{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 32553, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 0.6326836347579956, "eval_runtime": 164.9174, "eval_samples_per_second": 145.521, "eval_steps_per_second": 18.191, "step": 0 }, { "epoch": 3.0719134949159833e-05, "grad_norm": 2.2060632705688477, "learning_rate": 0.0, "loss": 0.8824, "step": 1 }, { "epoch": 6.143826989831967e-05, "grad_norm": 1.6191656589508057, "learning_rate": 2.0000000000000002e-07, "loss": 0.7731, "step": 2 }, { "epoch": 9.21574048474795e-05, "grad_norm": 1.736921787261963, "learning_rate": 4.0000000000000003e-07, "loss": 0.8734, "step": 3 }, { "epoch": 0.00012287653979663933, "grad_norm": 1.4968067407608032, "learning_rate": 6.000000000000001e-07, "loss": 0.7427, "step": 4 }, { "epoch": 0.00015359567474579916, "grad_norm": 1.4621388912200928, "learning_rate": 8.000000000000001e-07, "loss": 0.77, "step": 5 }, { "epoch": 0.000184314809694959, "grad_norm": 1.5482256412506104, "learning_rate": 1.0000000000000002e-06, "loss": 0.8165, "step": 6 }, { "epoch": 0.0002150339446441188, "grad_norm": 1.5582091808319092, "learning_rate": 1.2000000000000002e-06, "loss": 0.819, "step": 7 }, { "epoch": 0.00024575307959327867, "grad_norm": 1.513546347618103, "learning_rate": 1.4000000000000001e-06, "loss": 0.8363, "step": 8 }, { "epoch": 0.0002764722145424385, "grad_norm": 1.4339336156845093, "learning_rate": 1.6000000000000001e-06, "loss": 0.8125, "step": 9 }, { "epoch": 0.0003071913494915983, "grad_norm": 1.2125071287155151, "learning_rate": 1.8000000000000001e-06, "loss": 0.7789, "step": 10 }, { "epoch": 0.00033791048444075815, "grad_norm": 1.23660147190094, "learning_rate": 2.0000000000000003e-06, "loss": 0.7282, "step": 11 }, { "epoch": 0.000368629619389918, "grad_norm": 1.056510329246521, "learning_rate": 2.2e-06, "loss": 0.7939, "step": 12 }, { "epoch": 0.0003993487543390778, "grad_norm": 0.9758861064910889, "learning_rate": 2.4000000000000003e-06, "loss": 0.8209, "step": 13 }, { "epoch": 0.0004300678892882376, "grad_norm": 0.7711737751960754, "learning_rate": 2.6e-06, "loss": 0.7855, "step": 14 }, { "epoch": 0.00046078702423739745, "grad_norm": 0.7413145303726196, "learning_rate": 2.8000000000000003e-06, "loss": 0.8504, "step": 15 }, { "epoch": 0.0004915061591865573, "grad_norm": 0.5642073750495911, "learning_rate": 3e-06, "loss": 0.7012, "step": 16 }, { "epoch": 0.0005222252941357172, "grad_norm": 0.53790283203125, "learning_rate": 3.2000000000000003e-06, "loss": 0.7729, "step": 17 }, { "epoch": 0.000552944429084877, "grad_norm": 0.5495612621307373, "learning_rate": 3.4000000000000005e-06, "loss": 0.7109, "step": 18 }, { "epoch": 0.0005836635640340368, "grad_norm": 0.6268466711044312, "learning_rate": 3.6000000000000003e-06, "loss": 0.7447, "step": 19 }, { "epoch": 0.0006143826989831966, "grad_norm": 0.6037240624427795, "learning_rate": 3.8000000000000005e-06, "loss": 0.7352, "step": 20 }, { "epoch": 0.0006451018339323565, "grad_norm": 0.6931290626525879, "learning_rate": 4.000000000000001e-06, "loss": 0.8001, "step": 21 }, { "epoch": 0.0006758209688815163, "grad_norm": 0.7875220775604248, "learning_rate": 4.2000000000000004e-06, "loss": 0.7863, "step": 22 }, { "epoch": 0.0007065401038306761, "grad_norm": 0.6257062554359436, "learning_rate": 4.4e-06, "loss": 0.653, "step": 23 }, { "epoch": 0.000737259238779836, "grad_norm": 0.6351224780082703, "learning_rate": 4.600000000000001e-06, "loss": 0.7564, "step": 24 }, { "epoch": 0.0007679783737289958, "grad_norm": 0.6733613014221191, "learning_rate": 4.800000000000001e-06, "loss": 0.7801, "step": 25 }, { "epoch": 0.0007986975086781556, "grad_norm": 0.5667824745178223, "learning_rate": 5e-06, "loss": 0.7634, "step": 26 }, { "epoch": 0.0008294166436273154, "grad_norm": 0.6039953231811523, "learning_rate": 5.2e-06, "loss": 0.7154, "step": 27 }, { "epoch": 0.0008601357785764753, "grad_norm": 0.49980399012565613, "learning_rate": 5.400000000000001e-06, "loss": 0.7394, "step": 28 }, { "epoch": 0.0008908549135256351, "grad_norm": 0.5537846088409424, "learning_rate": 5.600000000000001e-06, "loss": 0.7897, "step": 29 }, { "epoch": 0.0009215740484747949, "grad_norm": 0.4041750133037567, "learning_rate": 5.8e-06, "loss": 0.6538, "step": 30 }, { "epoch": 0.0009522931834239547, "grad_norm": 0.4942256808280945, "learning_rate": 6e-06, "loss": 0.7338, "step": 31 }, { "epoch": 0.0009830123183731147, "grad_norm": 0.45916399359703064, "learning_rate": 6.200000000000001e-06, "loss": 0.733, "step": 32 }, { "epoch": 0.0010137314533222744, "grad_norm": 0.47235965728759766, "learning_rate": 6.4000000000000006e-06, "loss": 0.6785, "step": 33 }, { "epoch": 0.0010444505882714343, "grad_norm": 0.426445335149765, "learning_rate": 6.600000000000001e-06, "loss": 0.6756, "step": 34 }, { "epoch": 0.001075169723220594, "grad_norm": 0.41867509484291077, "learning_rate": 6.800000000000001e-06, "loss": 0.6992, "step": 35 }, { "epoch": 0.001105888858169754, "grad_norm": 0.5253549814224243, "learning_rate": 7e-06, "loss": 0.7337, "step": 36 }, { "epoch": 0.0011366079931189137, "grad_norm": 0.5028246641159058, "learning_rate": 7.2000000000000005e-06, "loss": 0.8443, "step": 37 }, { "epoch": 0.0011673271280680736, "grad_norm": 0.5090922117233276, "learning_rate": 7.4e-06, "loss": 0.7386, "step": 38 }, { "epoch": 0.0011980462630172333, "grad_norm": 0.5473935008049011, "learning_rate": 7.600000000000001e-06, "loss": 0.7312, "step": 39 }, { "epoch": 0.0012287653979663933, "grad_norm": 0.4583193361759186, "learning_rate": 7.800000000000002e-06, "loss": 0.6579, "step": 40 }, { "epoch": 0.001259484532915553, "grad_norm": 0.4007287323474884, "learning_rate": 8.000000000000001e-06, "loss": 0.7216, "step": 41 }, { "epoch": 0.001290203667864713, "grad_norm": 0.4102514386177063, "learning_rate": 8.2e-06, "loss": 0.7233, "step": 42 }, { "epoch": 0.0013209228028138727, "grad_norm": 0.43275970220565796, "learning_rate": 8.400000000000001e-06, "loss": 0.6587, "step": 43 }, { "epoch": 0.0013516419377630326, "grad_norm": 0.4226611852645874, "learning_rate": 8.6e-06, "loss": 0.7169, "step": 44 }, { "epoch": 0.0013823610727121925, "grad_norm": 0.3732317090034485, "learning_rate": 8.8e-06, "loss": 0.6469, "step": 45 }, { "epoch": 0.0014130802076613522, "grad_norm": 0.38714149594306946, "learning_rate": 9e-06, "loss": 0.7462, "step": 46 }, { "epoch": 0.0014437993426105122, "grad_norm": 0.4405876398086548, "learning_rate": 9.200000000000002e-06, "loss": 0.6285, "step": 47 }, { "epoch": 0.001474518477559672, "grad_norm": 0.4298103153705597, "learning_rate": 9.4e-06, "loss": 0.6269, "step": 48 }, { "epoch": 0.0015052376125088318, "grad_norm": 0.42390310764312744, "learning_rate": 9.600000000000001e-06, "loss": 0.6781, "step": 49 }, { "epoch": 0.0015359567474579915, "grad_norm": 0.7648850679397583, "learning_rate": 9.800000000000001e-06, "loss": 0.6272, "step": 50 }, { "epoch": 0.0015666758824071515, "grad_norm": 0.7110830545425415, "learning_rate": 1e-05, "loss": 0.6519, "step": 51 }, { "epoch": 0.0015973950173563112, "grad_norm": 0.4206750988960266, "learning_rate": 1.02e-05, "loss": 0.7422, "step": 52 }, { "epoch": 0.0016281141523054711, "grad_norm": 0.40987375378608704, "learning_rate": 1.04e-05, "loss": 0.7703, "step": 53 }, { "epoch": 0.0016588332872546309, "grad_norm": 0.3642449975013733, "learning_rate": 1.0600000000000002e-05, "loss": 0.7055, "step": 54 }, { "epoch": 0.0016895524222037908, "grad_norm": 0.4124760329723358, "learning_rate": 1.0800000000000002e-05, "loss": 0.701, "step": 55 }, { "epoch": 0.0017202715571529505, "grad_norm": 0.43547341227531433, "learning_rate": 1.1000000000000001e-05, "loss": 0.7221, "step": 56 }, { "epoch": 0.0017509906921021104, "grad_norm": 0.40725192427635193, "learning_rate": 1.1200000000000001e-05, "loss": 0.6446, "step": 57 }, { "epoch": 0.0017817098270512702, "grad_norm": 0.47742313146591187, "learning_rate": 1.14e-05, "loss": 0.7022, "step": 58 }, { "epoch": 0.00181242896200043, "grad_norm": 0.4527803659439087, "learning_rate": 1.16e-05, "loss": 0.7304, "step": 59 }, { "epoch": 0.0018431480969495898, "grad_norm": 0.3985797166824341, "learning_rate": 1.18e-05, "loss": 0.7555, "step": 60 }, { "epoch": 0.0018738672318987497, "grad_norm": 0.40599048137664795, "learning_rate": 1.2e-05, "loss": 0.6678, "step": 61 }, { "epoch": 0.0019045863668479095, "grad_norm": 0.46422773599624634, "learning_rate": 1.22e-05, "loss": 0.7375, "step": 62 }, { "epoch": 0.0019353055017970694, "grad_norm": 0.41334936022758484, "learning_rate": 1.2400000000000002e-05, "loss": 0.6874, "step": 63 }, { "epoch": 0.0019660246367462293, "grad_norm": 0.40186694264411926, "learning_rate": 1.2600000000000001e-05, "loss": 0.659, "step": 64 }, { "epoch": 0.001996743771695389, "grad_norm": 0.41983962059020996, "learning_rate": 1.2800000000000001e-05, "loss": 0.6874, "step": 65 }, { "epoch": 0.0020274629066445488, "grad_norm": 0.41536056995391846, "learning_rate": 1.3000000000000001e-05, "loss": 0.6925, "step": 66 }, { "epoch": 0.002058182041593709, "grad_norm": 0.431587278842926, "learning_rate": 1.3200000000000002e-05, "loss": 0.7492, "step": 67 }, { "epoch": 0.0020889011765428686, "grad_norm": 0.41855955123901367, "learning_rate": 1.3400000000000002e-05, "loss": 0.6918, "step": 68 }, { "epoch": 0.0021196203114920284, "grad_norm": 0.4394783079624176, "learning_rate": 1.3600000000000002e-05, "loss": 0.7476, "step": 69 }, { "epoch": 0.002150339446441188, "grad_norm": 0.4353363811969757, "learning_rate": 1.38e-05, "loss": 0.7128, "step": 70 }, { "epoch": 0.0021810585813903482, "grad_norm": 0.40552258491516113, "learning_rate": 1.4e-05, "loss": 0.6977, "step": 71 }, { "epoch": 0.002211777716339508, "grad_norm": 0.388932466506958, "learning_rate": 1.4200000000000001e-05, "loss": 0.6838, "step": 72 }, { "epoch": 0.0022424968512886677, "grad_norm": 0.44696030020713806, "learning_rate": 1.4400000000000001e-05, "loss": 0.7916, "step": 73 }, { "epoch": 0.0022732159862378274, "grad_norm": 0.3827476501464844, "learning_rate": 1.46e-05, "loss": 0.7288, "step": 74 }, { "epoch": 0.0023039351211869875, "grad_norm": 0.4317185580730438, "learning_rate": 1.48e-05, "loss": 0.6822, "step": 75 }, { "epoch": 0.0023346542561361473, "grad_norm": 0.4701237380504608, "learning_rate": 1.5000000000000002e-05, "loss": 0.7314, "step": 76 }, { "epoch": 0.002365373391085307, "grad_norm": 0.4364073574542999, "learning_rate": 1.5200000000000002e-05, "loss": 0.6788, "step": 77 }, { "epoch": 0.0023960925260344667, "grad_norm": 0.40740522742271423, "learning_rate": 1.54e-05, "loss": 0.7423, "step": 78 }, { "epoch": 0.002426811660983627, "grad_norm": 0.561620831489563, "learning_rate": 1.5600000000000003e-05, "loss": 0.6379, "step": 79 }, { "epoch": 0.0024575307959327866, "grad_norm": 0.41717392206192017, "learning_rate": 1.58e-05, "loss": 0.6998, "step": 80 }, { "epoch": 0.0024882499308819463, "grad_norm": 0.39351895451545715, "learning_rate": 1.6000000000000003e-05, "loss": 0.7005, "step": 81 }, { "epoch": 0.002518969065831106, "grad_norm": 0.45525091886520386, "learning_rate": 1.62e-05, "loss": 0.669, "step": 82 }, { "epoch": 0.002549688200780266, "grad_norm": 0.49478185176849365, "learning_rate": 1.64e-05, "loss": 0.7142, "step": 83 }, { "epoch": 0.002580407335729426, "grad_norm": 0.3819921314716339, "learning_rate": 1.66e-05, "loss": 0.6877, "step": 84 }, { "epoch": 0.0026111264706785856, "grad_norm": 0.6958513855934143, "learning_rate": 1.6800000000000002e-05, "loss": 0.695, "step": 85 }, { "epoch": 0.0026418456056277453, "grad_norm": 0.4809763431549072, "learning_rate": 1.7e-05, "loss": 0.6389, "step": 86 }, { "epoch": 0.0026725647405769055, "grad_norm": 0.5927045941352844, "learning_rate": 1.72e-05, "loss": 0.6551, "step": 87 }, { "epoch": 0.002703283875526065, "grad_norm": 0.6305676698684692, "learning_rate": 1.7400000000000003e-05, "loss": 0.6817, "step": 88 }, { "epoch": 0.002734003010475225, "grad_norm": 2.6431007385253906, "learning_rate": 1.76e-05, "loss": 0.5582, "step": 89 }, { "epoch": 0.002764722145424385, "grad_norm": 0.4043395221233368, "learning_rate": 1.7800000000000002e-05, "loss": 0.6677, "step": 90 }, { "epoch": 0.0027954412803735448, "grad_norm": 0.49771586060523987, "learning_rate": 1.8e-05, "loss": 0.7156, "step": 91 }, { "epoch": 0.0028261604153227045, "grad_norm": 0.43804931640625, "learning_rate": 1.8200000000000002e-05, "loss": 0.6696, "step": 92 }, { "epoch": 0.002856879550271864, "grad_norm": 0.39800825715065, "learning_rate": 1.8400000000000003e-05, "loss": 0.7048, "step": 93 }, { "epoch": 0.0028875986852210244, "grad_norm": 0.40523940324783325, "learning_rate": 1.86e-05, "loss": 0.6458, "step": 94 }, { "epoch": 0.002918317820170184, "grad_norm": 0.3812132477760315, "learning_rate": 1.88e-05, "loss": 0.6346, "step": 95 }, { "epoch": 0.002949036955119344, "grad_norm": 0.4807397425174713, "learning_rate": 1.9e-05, "loss": 0.7656, "step": 96 }, { "epoch": 0.0029797560900685035, "grad_norm": 0.4357774257659912, "learning_rate": 1.9200000000000003e-05, "loss": 0.6349, "step": 97 }, { "epoch": 0.0030104752250176637, "grad_norm": 0.45396918058395386, "learning_rate": 1.94e-05, "loss": 0.7982, "step": 98 }, { "epoch": 0.0030411943599668234, "grad_norm": 0.461595356464386, "learning_rate": 1.9600000000000002e-05, "loss": 0.709, "step": 99 }, { "epoch": 0.003071913494915983, "grad_norm": 0.4501061737537384, "learning_rate": 1.98e-05, "loss": 0.7047, "step": 100 }, { "epoch": 0.003102632629865143, "grad_norm": 0.3854588568210602, "learning_rate": 2e-05, "loss": 0.6107, "step": 101 }, { "epoch": 0.003133351764814303, "grad_norm": 0.5553824305534363, "learning_rate": 1.999999998832215e-05, "loss": 0.7218, "step": 102 }, { "epoch": 0.0031640708997634627, "grad_norm": 0.4018421769142151, "learning_rate": 1.9999999953288605e-05, "loss": 0.5644, "step": 103 }, { "epoch": 0.0031947900347126224, "grad_norm": 0.39340701699256897, "learning_rate": 1.9999999894899357e-05, "loss": 0.6485, "step": 104 }, { "epoch": 0.003225509169661782, "grad_norm": 0.6755144596099854, "learning_rate": 1.9999999813154413e-05, "loss": 0.7112, "step": 105 }, { "epoch": 0.0032562283046109423, "grad_norm": 0.591421902179718, "learning_rate": 1.9999999708053776e-05, "loss": 0.7892, "step": 106 }, { "epoch": 0.003286947439560102, "grad_norm": 0.45002928376197815, "learning_rate": 1.9999999579597435e-05, "loss": 0.6684, "step": 107 }, { "epoch": 0.0033176665745092617, "grad_norm": 0.6268222332000732, "learning_rate": 1.9999999427785397e-05, "loss": 0.657, "step": 108 }, { "epoch": 0.003348385709458422, "grad_norm": 0.7430928945541382, "learning_rate": 1.9999999252617665e-05, "loss": 0.7015, "step": 109 }, { "epoch": 0.0033791048444075816, "grad_norm": 1.0364269018173218, "learning_rate": 1.999999905409423e-05, "loss": 0.7512, "step": 110 }, { "epoch": 0.0034098239793567413, "grad_norm": 0.43804025650024414, "learning_rate": 1.9999998832215105e-05, "loss": 0.6397, "step": 111 }, { "epoch": 0.003440543114305901, "grad_norm": 0.4407244920730591, "learning_rate": 1.9999998586980283e-05, "loss": 0.6981, "step": 112 }, { "epoch": 0.003471262249255061, "grad_norm": 0.43327078223228455, "learning_rate": 1.9999998318389764e-05, "loss": 0.6826, "step": 113 }, { "epoch": 0.003501981384204221, "grad_norm": 0.42031970620155334, "learning_rate": 1.999999802644355e-05, "loss": 0.7271, "step": 114 }, { "epoch": 0.0035327005191533806, "grad_norm": 0.4113335609436035, "learning_rate": 1.9999997711141646e-05, "loss": 0.6303, "step": 115 }, { "epoch": 0.0035634196541025403, "grad_norm": 0.4468602240085602, "learning_rate": 1.9999997372484046e-05, "loss": 0.6167, "step": 116 }, { "epoch": 0.0035941387890517005, "grad_norm": 0.45326897501945496, "learning_rate": 1.9999997010470754e-05, "loss": 0.6268, "step": 117 }, { "epoch": 0.00362485792400086, "grad_norm": 0.40296611189842224, "learning_rate": 1.9999996625101774e-05, "loss": 0.6144, "step": 118 }, { "epoch": 0.00365557705895002, "grad_norm": 0.43666744232177734, "learning_rate": 1.99999962163771e-05, "loss": 0.659, "step": 119 }, { "epoch": 0.0036862961938991796, "grad_norm": 0.4473617374897003, "learning_rate": 1.999999578429674e-05, "loss": 0.7029, "step": 120 }, { "epoch": 0.0037170153288483398, "grad_norm": 0.41308149695396423, "learning_rate": 1.9999995328860685e-05, "loss": 0.6833, "step": 121 }, { "epoch": 0.0037477344637974995, "grad_norm": 0.4639977514743805, "learning_rate": 1.9999994850068946e-05, "loss": 0.7144, "step": 122 }, { "epoch": 0.0037784535987466592, "grad_norm": 0.43590787053108215, "learning_rate": 1.999999434792152e-05, "loss": 0.7556, "step": 123 }, { "epoch": 0.003809172733695819, "grad_norm": 0.4142620265483856, "learning_rate": 1.9999993822418413e-05, "loss": 0.6164, "step": 124 }, { "epoch": 0.003839891868644979, "grad_norm": 0.43738052248954773, "learning_rate": 1.999999327355962e-05, "loss": 0.7166, "step": 125 }, { "epoch": 0.003870611003594139, "grad_norm": 0.6542159914970398, "learning_rate": 1.999999270134514e-05, "loss": 0.7099, "step": 126 }, { "epoch": 0.0039013301385432985, "grad_norm": 0.4329463839530945, "learning_rate": 1.999999210577498e-05, "loss": 0.648, "step": 127 }, { "epoch": 0.003932049273492459, "grad_norm": 0.45176026225090027, "learning_rate": 1.9999991486849144e-05, "loss": 0.7054, "step": 128 }, { "epoch": 0.003962768408441618, "grad_norm": 0.4022347629070282, "learning_rate": 1.9999990844567628e-05, "loss": 0.6461, "step": 129 }, { "epoch": 0.003993487543390778, "grad_norm": 0.537884533405304, "learning_rate": 1.9999990178930432e-05, "loss": 0.6664, "step": 130 }, { "epoch": 0.004024206678339938, "grad_norm": 0.39311736822128296, "learning_rate": 1.9999989489937563e-05, "loss": 0.6682, "step": 131 }, { "epoch": 0.0040549258132890975, "grad_norm": 0.42505621910095215, "learning_rate": 1.999998877758902e-05, "loss": 0.7108, "step": 132 }, { "epoch": 0.004085644948238257, "grad_norm": 0.5580906867980957, "learning_rate": 1.9999988041884806e-05, "loss": 0.7079, "step": 133 }, { "epoch": 0.004116364083187418, "grad_norm": 0.4688013195991516, "learning_rate": 1.999998728282492e-05, "loss": 0.6672, "step": 134 }, { "epoch": 0.004147083218136578, "grad_norm": 0.42082464694976807, "learning_rate": 1.9999986500409366e-05, "loss": 0.6729, "step": 135 }, { "epoch": 0.004177802353085737, "grad_norm": 0.38066428899765015, "learning_rate": 1.9999985694638144e-05, "loss": 0.667, "step": 136 }, { "epoch": 0.004208521488034897, "grad_norm": 0.4260127544403076, "learning_rate": 1.999998486551126e-05, "loss": 0.7275, "step": 137 }, { "epoch": 0.004239240622984057, "grad_norm": 0.4907187521457672, "learning_rate": 1.999998401302871e-05, "loss": 0.7384, "step": 138 }, { "epoch": 0.0042699597579332164, "grad_norm": 1.410542607307434, "learning_rate": 1.9999983137190496e-05, "loss": 0.7092, "step": 139 }, { "epoch": 0.004300678892882376, "grad_norm": 0.43692833185195923, "learning_rate": 1.999998223799663e-05, "loss": 0.6813, "step": 140 }, { "epoch": 0.004331398027831536, "grad_norm": 0.36459484696388245, "learning_rate": 1.9999981315447104e-05, "loss": 0.6672, "step": 141 }, { "epoch": 0.0043621171627806965, "grad_norm": 0.4163941442966461, "learning_rate": 1.9999980369541922e-05, "loss": 0.6917, "step": 142 }, { "epoch": 0.004392836297729856, "grad_norm": 0.4046642482280731, "learning_rate": 1.999997940028109e-05, "loss": 0.6789, "step": 143 }, { "epoch": 0.004423555432679016, "grad_norm": 0.5259842872619629, "learning_rate": 1.9999978407664606e-05, "loss": 0.6927, "step": 144 }, { "epoch": 0.004454274567628176, "grad_norm": 0.4637850821018219, "learning_rate": 1.9999977391692472e-05, "loss": 0.7026, "step": 145 }, { "epoch": 0.004484993702577335, "grad_norm": 0.3918446898460388, "learning_rate": 1.9999976352364692e-05, "loss": 0.6085, "step": 146 }, { "epoch": 0.004515712837526495, "grad_norm": 0.36016353964805603, "learning_rate": 1.9999975289681273e-05, "loss": 0.6761, "step": 147 }, { "epoch": 0.004546431972475655, "grad_norm": 0.4215168356895447, "learning_rate": 1.9999974203642215e-05, "loss": 0.6938, "step": 148 }, { "epoch": 0.0045771511074248145, "grad_norm": 0.43267011642456055, "learning_rate": 1.9999973094247514e-05, "loss": 0.7183, "step": 149 }, { "epoch": 0.004607870242373975, "grad_norm": 0.36801448464393616, "learning_rate": 1.999997196149718e-05, "loss": 0.7292, "step": 150 }, { "epoch": 0.004638589377323135, "grad_norm": 0.4573562741279602, "learning_rate": 1.999997080539121e-05, "loss": 0.6087, "step": 151 }, { "epoch": 0.0046693085122722945, "grad_norm": 0.44994789361953735, "learning_rate": 1.9999969625929613e-05, "loss": 0.7521, "step": 152 }, { "epoch": 0.004700027647221454, "grad_norm": 0.4156774878501892, "learning_rate": 1.999996842311239e-05, "loss": 0.7256, "step": 153 }, { "epoch": 0.004730746782170614, "grad_norm": 0.3742092549800873, "learning_rate": 1.999996719693954e-05, "loss": 0.5957, "step": 154 }, { "epoch": 0.004761465917119774, "grad_norm": 0.39144250750541687, "learning_rate": 1.9999965947411067e-05, "loss": 0.5903, "step": 155 }, { "epoch": 0.004792185052068933, "grad_norm": 0.4047008156776428, "learning_rate": 1.9999964674526976e-05, "loss": 0.706, "step": 156 }, { "epoch": 0.004822904187018094, "grad_norm": 0.41821542382240295, "learning_rate": 1.999996337828727e-05, "loss": 0.6303, "step": 157 }, { "epoch": 0.004853623321967254, "grad_norm": 0.39450523257255554, "learning_rate": 1.9999962058691947e-05, "loss": 0.6936, "step": 158 }, { "epoch": 0.004884342456916413, "grad_norm": 0.4165686070919037, "learning_rate": 1.9999960715741016e-05, "loss": 0.677, "step": 159 }, { "epoch": 0.004915061591865573, "grad_norm": 0.4115440547466278, "learning_rate": 1.9999959349434484e-05, "loss": 0.6726, "step": 160 }, { "epoch": 0.004945780726814733, "grad_norm": 0.4328663945198059, "learning_rate": 1.9999957959772345e-05, "loss": 0.608, "step": 161 }, { "epoch": 0.0049764998617638926, "grad_norm": 0.42647549510002136, "learning_rate": 1.9999956546754605e-05, "loss": 0.7748, "step": 162 }, { "epoch": 0.005007218996713052, "grad_norm": 0.37625035643577576, "learning_rate": 1.999995511038127e-05, "loss": 0.7053, "step": 163 }, { "epoch": 0.005037938131662212, "grad_norm": 0.4004875123500824, "learning_rate": 1.999995365065234e-05, "loss": 0.7369, "step": 164 }, { "epoch": 0.005068657266611373, "grad_norm": 0.3631172776222229, "learning_rate": 1.9999952167567817e-05, "loss": 0.6568, "step": 165 }, { "epoch": 0.005099376401560532, "grad_norm": 0.4089898467063904, "learning_rate": 1.9999950661127713e-05, "loss": 0.665, "step": 166 }, { "epoch": 0.005130095536509692, "grad_norm": 0.40408825874328613, "learning_rate": 1.999994913133202e-05, "loss": 0.7395, "step": 167 }, { "epoch": 0.005160814671458852, "grad_norm": 0.4584376811981201, "learning_rate": 1.9999947578180753e-05, "loss": 0.5894, "step": 168 }, { "epoch": 0.0051915338064080115, "grad_norm": 0.39729058742523193, "learning_rate": 1.9999946001673907e-05, "loss": 0.6294, "step": 169 }, { "epoch": 0.005222252941357171, "grad_norm": 0.39025962352752686, "learning_rate": 1.999994440181149e-05, "loss": 0.7337, "step": 170 }, { "epoch": 0.005252972076306331, "grad_norm": 0.37553146481513977, "learning_rate": 1.99999427785935e-05, "loss": 0.6327, "step": 171 }, { "epoch": 0.005283691211255491, "grad_norm": 0.4529859125614166, "learning_rate": 1.999994113201995e-05, "loss": 0.6937, "step": 172 }, { "epoch": 0.005314410346204651, "grad_norm": 0.4317130148410797, "learning_rate": 1.9999939462090837e-05, "loss": 0.7041, "step": 173 }, { "epoch": 0.005345129481153811, "grad_norm": 0.40663379430770874, "learning_rate": 1.9999937768806167e-05, "loss": 0.6595, "step": 174 }, { "epoch": 0.005375848616102971, "grad_norm": 0.4241580069065094, "learning_rate": 1.9999936052165947e-05, "loss": 0.7627, "step": 175 }, { "epoch": 0.00540656775105213, "grad_norm": 0.39949244260787964, "learning_rate": 1.999993431217018e-05, "loss": 0.6418, "step": 176 }, { "epoch": 0.00543728688600129, "grad_norm": 0.5206996202468872, "learning_rate": 1.999993254881886e-05, "loss": 0.7191, "step": 177 }, { "epoch": 0.00546800602095045, "grad_norm": 0.439113587141037, "learning_rate": 1.9999930762112002e-05, "loss": 0.65, "step": 178 }, { "epoch": 0.0054987251558996095, "grad_norm": 1.0398123264312744, "learning_rate": 1.999992895204961e-05, "loss": 0.5634, "step": 179 }, { "epoch": 0.00552944429084877, "grad_norm": 0.3822295665740967, "learning_rate": 1.9999927118631682e-05, "loss": 0.6217, "step": 180 }, { "epoch": 0.00556016342579793, "grad_norm": 0.4115304946899414, "learning_rate": 1.999992526185823e-05, "loss": 0.6477, "step": 181 }, { "epoch": 0.0055908825607470895, "grad_norm": 0.4077301621437073, "learning_rate": 1.9999923381729253e-05, "loss": 0.7123, "step": 182 }, { "epoch": 0.005621601695696249, "grad_norm": 0.37694165110588074, "learning_rate": 1.9999921478244752e-05, "loss": 0.6258, "step": 183 }, { "epoch": 0.005652320830645409, "grad_norm": 0.4390418231487274, "learning_rate": 1.999991955140474e-05, "loss": 0.6808, "step": 184 }, { "epoch": 0.005683039965594569, "grad_norm": 0.5014764666557312, "learning_rate": 1.999991760120922e-05, "loss": 0.5788, "step": 185 }, { "epoch": 0.005713759100543728, "grad_norm": 0.3456033170223236, "learning_rate": 1.999991562765819e-05, "loss": 0.6716, "step": 186 }, { "epoch": 0.005744478235492888, "grad_norm": 0.39686164259910583, "learning_rate": 1.999991363075166e-05, "loss": 0.6912, "step": 187 }, { "epoch": 0.005775197370442049, "grad_norm": 0.4266481101512909, "learning_rate": 1.9999911610489633e-05, "loss": 0.679, "step": 188 }, { "epoch": 0.005805916505391208, "grad_norm": 0.3565369248390198, "learning_rate": 1.9999909566872113e-05, "loss": 0.63, "step": 189 }, { "epoch": 0.005836635640340368, "grad_norm": 0.4318750202655792, "learning_rate": 1.9999907499899107e-05, "loss": 0.6051, "step": 190 }, { "epoch": 0.005867354775289528, "grad_norm": 0.39390119910240173, "learning_rate": 1.999990540957062e-05, "loss": 0.7896, "step": 191 }, { "epoch": 0.005898073910238688, "grad_norm": 0.3787881135940552, "learning_rate": 1.9999903295886653e-05, "loss": 0.6158, "step": 192 }, { "epoch": 0.005928793045187847, "grad_norm": 0.4039481282234192, "learning_rate": 1.9999901158847213e-05, "loss": 0.7293, "step": 193 }, { "epoch": 0.005959512180137007, "grad_norm": 0.42687293887138367, "learning_rate": 1.9999898998452307e-05, "loss": 0.657, "step": 194 }, { "epoch": 0.005990231315086168, "grad_norm": 0.35309216380119324, "learning_rate": 1.999989681470194e-05, "loss": 0.6857, "step": 195 }, { "epoch": 0.006020950450035327, "grad_norm": 0.41449591517448425, "learning_rate": 1.999989460759611e-05, "loss": 0.7634, "step": 196 }, { "epoch": 0.006051669584984487, "grad_norm": 0.34888550639152527, "learning_rate": 1.9999892377134836e-05, "loss": 0.7087, "step": 197 }, { "epoch": 0.006082388719933647, "grad_norm": 0.38623496890068054, "learning_rate": 1.999989012331811e-05, "loss": 0.742, "step": 198 }, { "epoch": 0.0061131078548828065, "grad_norm": 0.3828396201133728, "learning_rate": 1.9999887846145944e-05, "loss": 0.6124, "step": 199 }, { "epoch": 0.006143826989831966, "grad_norm": 0.405282586812973, "learning_rate": 1.999988554561834e-05, "loss": 0.675, "step": 200 }, { "epoch": 0.006174546124781126, "grad_norm": 0.39675867557525635, "learning_rate": 1.9999883221735304e-05, "loss": 0.5764, "step": 201 }, { "epoch": 0.006205265259730286, "grad_norm": 0.4089547395706177, "learning_rate": 1.9999880874496845e-05, "loss": 0.7398, "step": 202 }, { "epoch": 0.006235984394679446, "grad_norm": 0.4142422676086426, "learning_rate": 1.9999878503902964e-05, "loss": 0.6223, "step": 203 }, { "epoch": 0.006266703529628606, "grad_norm": 0.3979097008705139, "learning_rate": 1.999987610995367e-05, "loss": 0.5788, "step": 204 }, { "epoch": 0.006297422664577766, "grad_norm": 0.38964784145355225, "learning_rate": 1.9999873692648966e-05, "loss": 0.6611, "step": 205 }, { "epoch": 0.006328141799526925, "grad_norm": 0.47635364532470703, "learning_rate": 1.999987125198886e-05, "loss": 0.611, "step": 206 }, { "epoch": 0.006358860934476085, "grad_norm": 0.3504014015197754, "learning_rate": 1.9999868787973352e-05, "loss": 0.5862, "step": 207 }, { "epoch": 0.006389580069425245, "grad_norm": 0.41913169622421265, "learning_rate": 1.9999866300602457e-05, "loss": 0.645, "step": 208 }, { "epoch": 0.0064202992043744045, "grad_norm": 0.4096449017524719, "learning_rate": 1.9999863789876174e-05, "loss": 0.6304, "step": 209 }, { "epoch": 0.006451018339323564, "grad_norm": 0.3602648675441742, "learning_rate": 1.999986125579451e-05, "loss": 0.6444, "step": 210 }, { "epoch": 0.006481737474272725, "grad_norm": 0.402296245098114, "learning_rate": 1.9999858698357472e-05, "loss": 0.7308, "step": 211 }, { "epoch": 0.0065124566092218845, "grad_norm": 0.3972809910774231, "learning_rate": 1.9999856117565067e-05, "loss": 0.6929, "step": 212 }, { "epoch": 0.006543175744171044, "grad_norm": 0.40088996291160583, "learning_rate": 1.9999853513417298e-05, "loss": 0.6613, "step": 213 }, { "epoch": 0.006573894879120204, "grad_norm": 1.0582997798919678, "learning_rate": 1.9999850885914173e-05, "loss": 0.7935, "step": 214 }, { "epoch": 0.006604614014069364, "grad_norm": 0.38180917501449585, "learning_rate": 1.9999848235055698e-05, "loss": 0.6419, "step": 215 }, { "epoch": 0.006635333149018523, "grad_norm": 0.4208572506904602, "learning_rate": 1.999984556084188e-05, "loss": 0.6565, "step": 216 }, { "epoch": 0.006666052283967683, "grad_norm": 0.369809091091156, "learning_rate": 1.9999842863272723e-05, "loss": 0.599, "step": 217 }, { "epoch": 0.006696771418916844, "grad_norm": 0.3528952896595001, "learning_rate": 1.9999840142348238e-05, "loss": 0.6812, "step": 218 }, { "epoch": 0.0067274905538660034, "grad_norm": 0.41435348987579346, "learning_rate": 1.9999837398068425e-05, "loss": 0.6297, "step": 219 }, { "epoch": 0.006758209688815163, "grad_norm": 0.39285483956336975, "learning_rate": 1.999983463043329e-05, "loss": 0.7044, "step": 220 }, { "epoch": 0.006788928823764323, "grad_norm": 0.38261091709136963, "learning_rate": 1.9999831839442847e-05, "loss": 0.7441, "step": 221 }, { "epoch": 0.006819647958713483, "grad_norm": 0.36022257804870605, "learning_rate": 1.99998290250971e-05, "loss": 0.6705, "step": 222 }, { "epoch": 0.006850367093662642, "grad_norm": 0.3939741849899292, "learning_rate": 1.999982618739605e-05, "loss": 0.7046, "step": 223 }, { "epoch": 0.006881086228611802, "grad_norm": 0.3867575228214264, "learning_rate": 1.9999823326339708e-05, "loss": 0.6751, "step": 224 }, { "epoch": 0.006911805363560962, "grad_norm": 0.4314223527908325, "learning_rate": 1.9999820441928082e-05, "loss": 0.6127, "step": 225 }, { "epoch": 0.006942524498510122, "grad_norm": 0.37257808446884155, "learning_rate": 1.9999817534161177e-05, "loss": 0.7028, "step": 226 }, { "epoch": 0.006973243633459282, "grad_norm": 0.3947150707244873, "learning_rate": 1.9999814603039e-05, "loss": 0.7125, "step": 227 }, { "epoch": 0.007003962768408442, "grad_norm": 0.3422849476337433, "learning_rate": 1.9999811648561555e-05, "loss": 0.5899, "step": 228 }, { "epoch": 0.0070346819033576015, "grad_norm": 0.34524112939834595, "learning_rate": 1.9999808670728856e-05, "loss": 0.621, "step": 229 }, { "epoch": 0.007065401038306761, "grad_norm": 0.3779149055480957, "learning_rate": 1.99998056695409e-05, "loss": 0.6688, "step": 230 }, { "epoch": 0.007096120173255921, "grad_norm": 0.40354642271995544, "learning_rate": 1.9999802644997707e-05, "loss": 0.7037, "step": 231 }, { "epoch": 0.007126839308205081, "grad_norm": 0.36464571952819824, "learning_rate": 1.999979959709927e-05, "loss": 0.6438, "step": 232 }, { "epoch": 0.00715755844315424, "grad_norm": 0.38330891728401184, "learning_rate": 1.9999796525845606e-05, "loss": 0.6663, "step": 233 }, { "epoch": 0.007188277578103401, "grad_norm": 0.4111490845680237, "learning_rate": 1.9999793431236717e-05, "loss": 0.6654, "step": 234 }, { "epoch": 0.007218996713052561, "grad_norm": 0.40398624539375305, "learning_rate": 1.9999790313272612e-05, "loss": 0.6901, "step": 235 }, { "epoch": 0.00724971584800172, "grad_norm": 0.4054335653781891, "learning_rate": 1.9999787171953298e-05, "loss": 0.6193, "step": 236 }, { "epoch": 0.00728043498295088, "grad_norm": 0.45651334524154663, "learning_rate": 1.9999784007278782e-05, "loss": 0.6828, "step": 237 }, { "epoch": 0.00731115411790004, "grad_norm": 0.4088228940963745, "learning_rate": 1.9999780819249074e-05, "loss": 0.6513, "step": 238 }, { "epoch": 0.0073418732528491995, "grad_norm": 0.3671337366104126, "learning_rate": 1.999977760786418e-05, "loss": 0.6647, "step": 239 }, { "epoch": 0.007372592387798359, "grad_norm": 0.3689892292022705, "learning_rate": 1.9999774373124105e-05, "loss": 0.6374, "step": 240 }, { "epoch": 0.00740331152274752, "grad_norm": 0.4682154059410095, "learning_rate": 1.999977111502886e-05, "loss": 0.6567, "step": 241 }, { "epoch": 0.0074340306576966796, "grad_norm": 0.35766980051994324, "learning_rate": 1.9999767833578453e-05, "loss": 0.7022, "step": 242 }, { "epoch": 0.007464749792645839, "grad_norm": 0.38158681988716125, "learning_rate": 1.9999764528772883e-05, "loss": 0.7223, "step": 243 }, { "epoch": 0.007495468927594999, "grad_norm": 0.40947428345680237, "learning_rate": 1.9999761200612172e-05, "loss": 0.6185, "step": 244 }, { "epoch": 0.007526188062544159, "grad_norm": 0.48650962114334106, "learning_rate": 1.9999757849096316e-05, "loss": 0.7419, "step": 245 }, { "epoch": 0.0075569071974933184, "grad_norm": 0.3855099678039551, "learning_rate": 1.999975447422533e-05, "loss": 0.7115, "step": 246 }, { "epoch": 0.007587626332442478, "grad_norm": 0.383861780166626, "learning_rate": 1.9999751075999215e-05, "loss": 0.6983, "step": 247 }, { "epoch": 0.007618345467391638, "grad_norm": 0.3975721597671509, "learning_rate": 1.999974765441799e-05, "loss": 0.6461, "step": 248 }, { "epoch": 0.0076490646023407985, "grad_norm": 0.4092705249786377, "learning_rate": 1.9999744209481647e-05, "loss": 0.6444, "step": 249 }, { "epoch": 0.007679783737289958, "grad_norm": 0.3585764765739441, "learning_rate": 1.999974074119021e-05, "loss": 0.6558, "step": 250 }, { "epoch": 0.007710502872239118, "grad_norm": 0.34911248087882996, "learning_rate": 1.999973724954368e-05, "loss": 0.6142, "step": 251 }, { "epoch": 0.007741222007188278, "grad_norm": 0.3658337891101837, "learning_rate": 1.999973373454206e-05, "loss": 0.7271, "step": 252 }, { "epoch": 0.007771941142137437, "grad_norm": 0.39929425716400146, "learning_rate": 1.999973019618537e-05, "loss": 0.6636, "step": 253 }, { "epoch": 0.007802660277086597, "grad_norm": 0.4133506417274475, "learning_rate": 1.999972663447361e-05, "loss": 0.6044, "step": 254 }, { "epoch": 0.007833379412035758, "grad_norm": 0.46768030524253845, "learning_rate": 1.9999723049406785e-05, "loss": 0.6723, "step": 255 }, { "epoch": 0.007864098546984917, "grad_norm": 0.3781800866127014, "learning_rate": 1.999971944098491e-05, "loss": 0.6659, "step": 256 }, { "epoch": 0.007894817681934077, "grad_norm": 0.3629230558872223, "learning_rate": 1.9999715809207995e-05, "loss": 0.56, "step": 257 }, { "epoch": 0.007925536816883237, "grad_norm": 0.34650668501853943, "learning_rate": 1.9999712154076046e-05, "loss": 0.6414, "step": 258 }, { "epoch": 0.007956255951832397, "grad_norm": 0.4147944450378418, "learning_rate": 1.9999708475589067e-05, "loss": 0.7194, "step": 259 }, { "epoch": 0.007986975086781556, "grad_norm": 0.38641059398651123, "learning_rate": 1.9999704773747072e-05, "loss": 0.6329, "step": 260 }, { "epoch": 0.008017694221730716, "grad_norm": 0.3812808394432068, "learning_rate": 1.9999701048550072e-05, "loss": 0.7474, "step": 261 }, { "epoch": 0.008048413356679876, "grad_norm": 0.3580913543701172, "learning_rate": 1.9999697299998066e-05, "loss": 0.6158, "step": 262 }, { "epoch": 0.008079132491629035, "grad_norm": 0.40415745973587036, "learning_rate": 1.999969352809107e-05, "loss": 0.7341, "step": 263 }, { "epoch": 0.008109851626578195, "grad_norm": 0.37502536177635193, "learning_rate": 1.9999689732829095e-05, "loss": 0.6728, "step": 264 }, { "epoch": 0.008140570761527355, "grad_norm": 0.3997909724712372, "learning_rate": 1.9999685914212143e-05, "loss": 0.6767, "step": 265 }, { "epoch": 0.008171289896476515, "grad_norm": 0.3480750322341919, "learning_rate": 1.9999682072240226e-05, "loss": 0.6943, "step": 266 }, { "epoch": 0.008202009031425674, "grad_norm": 0.4592234492301941, "learning_rate": 1.9999678206913355e-05, "loss": 0.7125, "step": 267 }, { "epoch": 0.008232728166374836, "grad_norm": 0.5680233836174011, "learning_rate": 1.9999674318231535e-05, "loss": 0.6677, "step": 268 }, { "epoch": 0.008263447301323995, "grad_norm": 0.46107569336891174, "learning_rate": 1.999967040619478e-05, "loss": 0.7797, "step": 269 }, { "epoch": 0.008294166436273155, "grad_norm": 0.48123297095298767, "learning_rate": 1.9999666470803095e-05, "loss": 0.7401, "step": 270 }, { "epoch": 0.008324885571222315, "grad_norm": 0.43247920274734497, "learning_rate": 1.999966251205649e-05, "loss": 0.5787, "step": 271 }, { "epoch": 0.008355604706171475, "grad_norm": 0.4157200753688812, "learning_rate": 1.9999658529954975e-05, "loss": 0.702, "step": 272 }, { "epoch": 0.008386323841120634, "grad_norm": 0.40017959475517273, "learning_rate": 1.999965452449856e-05, "loss": 0.6436, "step": 273 }, { "epoch": 0.008417042976069794, "grad_norm": 0.46761250495910645, "learning_rate": 1.9999650495687253e-05, "loss": 0.7264, "step": 274 }, { "epoch": 0.008447762111018954, "grad_norm": 0.38971495628356934, "learning_rate": 1.9999646443521064e-05, "loss": 0.6621, "step": 275 }, { "epoch": 0.008478481245968113, "grad_norm": 0.335269957780838, "learning_rate": 1.9999642368000002e-05, "loss": 0.6791, "step": 276 }, { "epoch": 0.008509200380917273, "grad_norm": 0.37881919741630554, "learning_rate": 1.9999638269124076e-05, "loss": 0.6573, "step": 277 }, { "epoch": 0.008539919515866433, "grad_norm": 0.41773054003715515, "learning_rate": 1.9999634146893298e-05, "loss": 0.5777, "step": 278 }, { "epoch": 0.008570638650815593, "grad_norm": 1.0363740921020508, "learning_rate": 1.9999630001307676e-05, "loss": 0.6935, "step": 279 }, { "epoch": 0.008601357785764752, "grad_norm": 0.3823683261871338, "learning_rate": 1.999962583236722e-05, "loss": 0.6592, "step": 280 }, { "epoch": 0.008632076920713912, "grad_norm": 0.4405483901500702, "learning_rate": 1.9999621640071937e-05, "loss": 0.6655, "step": 281 }, { "epoch": 0.008662796055663072, "grad_norm": 0.38129326701164246, "learning_rate": 1.999961742442184e-05, "loss": 0.6721, "step": 282 }, { "epoch": 0.008693515190612233, "grad_norm": 0.35498368740081787, "learning_rate": 1.9999613185416945e-05, "loss": 0.6929, "step": 283 }, { "epoch": 0.008724234325561393, "grad_norm": 0.43427222967147827, "learning_rate": 1.9999608923057247e-05, "loss": 0.6333, "step": 284 }, { "epoch": 0.008754953460510553, "grad_norm": 0.3337889313697815, "learning_rate": 1.9999604637342767e-05, "loss": 0.586, "step": 285 }, { "epoch": 0.008785672595459712, "grad_norm": 0.4489438235759735, "learning_rate": 1.9999600328273508e-05, "loss": 0.6355, "step": 286 }, { "epoch": 0.008816391730408872, "grad_norm": 0.42258504033088684, "learning_rate": 1.999959599584949e-05, "loss": 0.6204, "step": 287 }, { "epoch": 0.008847110865358032, "grad_norm": 0.33534491062164307, "learning_rate": 1.9999591640070715e-05, "loss": 0.5926, "step": 288 }, { "epoch": 0.008877830000307192, "grad_norm": 0.41799235343933105, "learning_rate": 1.9999587260937195e-05, "loss": 0.7327, "step": 289 }, { "epoch": 0.008908549135256351, "grad_norm": 0.40292006731033325, "learning_rate": 1.999958285844894e-05, "loss": 0.6498, "step": 290 }, { "epoch": 0.008939268270205511, "grad_norm": 0.7016105055809021, "learning_rate": 1.999957843260596e-05, "loss": 0.6208, "step": 291 }, { "epoch": 0.00896998740515467, "grad_norm": 0.3725782036781311, "learning_rate": 1.999957398340827e-05, "loss": 0.6172, "step": 292 }, { "epoch": 0.00900070654010383, "grad_norm": 0.39071616530418396, "learning_rate": 1.9999569510855873e-05, "loss": 0.715, "step": 293 }, { "epoch": 0.00903142567505299, "grad_norm": 0.3588090240955353, "learning_rate": 1.9999565014948785e-05, "loss": 0.6251, "step": 294 }, { "epoch": 0.00906214481000215, "grad_norm": 0.40229129791259766, "learning_rate": 1.9999560495687013e-05, "loss": 0.6563, "step": 295 }, { "epoch": 0.00909286394495131, "grad_norm": 0.3493892550468445, "learning_rate": 1.999955595307057e-05, "loss": 0.6356, "step": 296 }, { "epoch": 0.00912358307990047, "grad_norm": 0.35623547434806824, "learning_rate": 1.9999551387099468e-05, "loss": 0.6282, "step": 297 }, { "epoch": 0.009154302214849629, "grad_norm": 0.38631561398506165, "learning_rate": 1.9999546797773712e-05, "loss": 0.6539, "step": 298 }, { "epoch": 0.00918502134979879, "grad_norm": 0.3356530964374542, "learning_rate": 1.9999542185093316e-05, "loss": 0.69, "step": 299 }, { "epoch": 0.00921574048474795, "grad_norm": 0.3222956955432892, "learning_rate": 1.999953754905829e-05, "loss": 0.5983, "step": 300 }, { "epoch": 0.00924645961969711, "grad_norm": 0.336961030960083, "learning_rate": 1.999953288966865e-05, "loss": 0.6822, "step": 301 }, { "epoch": 0.00927717875464627, "grad_norm": 0.3628532886505127, "learning_rate": 1.99995282069244e-05, "loss": 0.6674, "step": 302 }, { "epoch": 0.00930789788959543, "grad_norm": 0.3731553256511688, "learning_rate": 1.9999523500825554e-05, "loss": 0.606, "step": 303 }, { "epoch": 0.009338617024544589, "grad_norm": 0.35044553875923157, "learning_rate": 1.9999518771372122e-05, "loss": 0.6227, "step": 304 }, { "epoch": 0.009369336159493749, "grad_norm": 0.35342147946357727, "learning_rate": 1.999951401856412e-05, "loss": 0.6714, "step": 305 }, { "epoch": 0.009400055294442908, "grad_norm": 0.3623189926147461, "learning_rate": 1.9999509242401546e-05, "loss": 0.6875, "step": 306 }, { "epoch": 0.009430774429392068, "grad_norm": 0.37557607889175415, "learning_rate": 1.9999504442884426e-05, "loss": 0.6599, "step": 307 }, { "epoch": 0.009461493564341228, "grad_norm": 0.3845899999141693, "learning_rate": 1.9999499620012764e-05, "loss": 0.6536, "step": 308 }, { "epoch": 0.009492212699290388, "grad_norm": 0.3518262803554535, "learning_rate": 1.9999494773786574e-05, "loss": 0.6633, "step": 309 }, { "epoch": 0.009522931834239547, "grad_norm": 0.35807764530181885, "learning_rate": 1.999948990420586e-05, "loss": 0.659, "step": 310 }, { "epoch": 0.009553650969188707, "grad_norm": 0.5970296859741211, "learning_rate": 1.9999485011270644e-05, "loss": 0.5517, "step": 311 }, { "epoch": 0.009584370104137867, "grad_norm": 0.3578811287879944, "learning_rate": 1.999948009498093e-05, "loss": 0.6496, "step": 312 }, { "epoch": 0.009615089239087026, "grad_norm": 0.3578011691570282, "learning_rate": 1.999947515533673e-05, "loss": 0.6423, "step": 313 }, { "epoch": 0.009645808374036188, "grad_norm": 0.41529035568237305, "learning_rate": 1.999947019233806e-05, "loss": 0.669, "step": 314 }, { "epoch": 0.009676527508985348, "grad_norm": 0.36085063219070435, "learning_rate": 1.999946520598493e-05, "loss": 0.6035, "step": 315 }, { "epoch": 0.009707246643934507, "grad_norm": 0.41266924142837524, "learning_rate": 1.999946019627735e-05, "loss": 0.6619, "step": 316 }, { "epoch": 0.009737965778883667, "grad_norm": 0.37145596742630005, "learning_rate": 1.999945516321533e-05, "loss": 0.7088, "step": 317 }, { "epoch": 0.009768684913832827, "grad_norm": 0.38889577984809875, "learning_rate": 1.9999450106798888e-05, "loss": 0.6121, "step": 318 }, { "epoch": 0.009799404048781987, "grad_norm": 0.4038635492324829, "learning_rate": 1.999944502702803e-05, "loss": 0.6331, "step": 319 }, { "epoch": 0.009830123183731146, "grad_norm": 0.42760738730430603, "learning_rate": 1.9999439923902766e-05, "loss": 0.6331, "step": 320 }, { "epoch": 0.009860842318680306, "grad_norm": 0.3894347846508026, "learning_rate": 1.999943479742312e-05, "loss": 0.7133, "step": 321 }, { "epoch": 0.009891561453629466, "grad_norm": 0.35994023084640503, "learning_rate": 1.999942964758909e-05, "loss": 0.6835, "step": 322 }, { "epoch": 0.009922280588578625, "grad_norm": 0.37454649806022644, "learning_rate": 1.9999424474400697e-05, "loss": 0.6567, "step": 323 }, { "epoch": 0.009952999723527785, "grad_norm": 0.3578818142414093, "learning_rate": 1.9999419277857946e-05, "loss": 0.6133, "step": 324 }, { "epoch": 0.009983718858476945, "grad_norm": 0.35288044810295105, "learning_rate": 1.9999414057960854e-05, "loss": 0.6021, "step": 325 }, { "epoch": 0.010014437993426105, "grad_norm": 0.42110273241996765, "learning_rate": 1.999940881470943e-05, "loss": 0.6784, "step": 326 }, { "epoch": 0.010045157128375264, "grad_norm": 0.3750227987766266, "learning_rate": 1.9999403548103692e-05, "loss": 0.6934, "step": 327 }, { "epoch": 0.010075876263324424, "grad_norm": 0.49750158190727234, "learning_rate": 1.999939825814365e-05, "loss": 0.6628, "step": 328 }, { "epoch": 0.010106595398273585, "grad_norm": 0.4880644381046295, "learning_rate": 1.999939294482931e-05, "loss": 0.6438, "step": 329 }, { "epoch": 0.010137314533222745, "grad_norm": 0.39149752259254456, "learning_rate": 1.9999387608160693e-05, "loss": 0.6487, "step": 330 }, { "epoch": 0.010168033668171905, "grad_norm": 0.3597683608531952, "learning_rate": 1.999938224813781e-05, "loss": 0.6257, "step": 331 }, { "epoch": 0.010198752803121065, "grad_norm": 0.35471296310424805, "learning_rate": 1.9999376864760664e-05, "loss": 0.5989, "step": 332 }, { "epoch": 0.010229471938070224, "grad_norm": 0.38467466831207275, "learning_rate": 1.9999371458029283e-05, "loss": 0.6762, "step": 333 }, { "epoch": 0.010260191073019384, "grad_norm": 1.087361216545105, "learning_rate": 1.9999366027943665e-05, "loss": 0.616, "step": 334 }, { "epoch": 0.010290910207968544, "grad_norm": 0.36453086137771606, "learning_rate": 1.999936057450383e-05, "loss": 0.6441, "step": 335 }, { "epoch": 0.010321629342917703, "grad_norm": 0.3416441082954407, "learning_rate": 1.9999355097709795e-05, "loss": 0.6209, "step": 336 }, { "epoch": 0.010352348477866863, "grad_norm": 0.3924543559551239, "learning_rate": 1.9999349597561565e-05, "loss": 0.7156, "step": 337 }, { "epoch": 0.010383067612816023, "grad_norm": 0.3669499456882477, "learning_rate": 1.9999344074059157e-05, "loss": 0.69, "step": 338 }, { "epoch": 0.010413786747765183, "grad_norm": 0.36612990498542786, "learning_rate": 1.999933852720258e-05, "loss": 0.7049, "step": 339 }, { "epoch": 0.010444505882714342, "grad_norm": 0.36199188232421875, "learning_rate": 1.999933295699185e-05, "loss": 0.5896, "step": 340 }, { "epoch": 0.010475225017663502, "grad_norm": 0.40106144547462463, "learning_rate": 1.9999327363426985e-05, "loss": 0.6656, "step": 341 }, { "epoch": 0.010505944152612662, "grad_norm": 0.3442155718803406, "learning_rate": 1.9999321746507986e-05, "loss": 0.6688, "step": 342 }, { "epoch": 0.010536663287561822, "grad_norm": 1.9197670221328735, "learning_rate": 1.9999316106234876e-05, "loss": 0.6966, "step": 343 }, { "epoch": 0.010567382422510981, "grad_norm": 0.4096560478210449, "learning_rate": 1.9999310442607662e-05, "loss": 0.6671, "step": 344 }, { "epoch": 0.010598101557460143, "grad_norm": 0.31059959530830383, "learning_rate": 1.999930475562636e-05, "loss": 0.623, "step": 345 }, { "epoch": 0.010628820692409302, "grad_norm": 0.49931785464286804, "learning_rate": 1.9999299045290988e-05, "loss": 0.724, "step": 346 }, { "epoch": 0.010659539827358462, "grad_norm": 0.4200448989868164, "learning_rate": 1.9999293311601552e-05, "loss": 0.7036, "step": 347 }, { "epoch": 0.010690258962307622, "grad_norm": 0.4106113314628601, "learning_rate": 1.9999287554558066e-05, "loss": 0.7176, "step": 348 }, { "epoch": 0.010720978097256782, "grad_norm": 0.34463292360305786, "learning_rate": 1.999928177416055e-05, "loss": 0.746, "step": 349 }, { "epoch": 0.010751697232205941, "grad_norm": 0.35606110095977783, "learning_rate": 1.999927597040901e-05, "loss": 0.6822, "step": 350 }, { "epoch": 0.010782416367155101, "grad_norm": 0.34727346897125244, "learning_rate": 1.9999270143303465e-05, "loss": 0.5823, "step": 351 }, { "epoch": 0.01081313550210426, "grad_norm": 0.3678117096424103, "learning_rate": 1.9999264292843923e-05, "loss": 0.6291, "step": 352 }, { "epoch": 0.01084385463705342, "grad_norm": 0.39331579208374023, "learning_rate": 1.99992584190304e-05, "loss": 0.6074, "step": 353 }, { "epoch": 0.01087457377200258, "grad_norm": 0.3936454653739929, "learning_rate": 1.9999252521862917e-05, "loss": 0.6587, "step": 354 }, { "epoch": 0.01090529290695174, "grad_norm": 0.43232443928718567, "learning_rate": 1.9999246601341475e-05, "loss": 0.7187, "step": 355 }, { "epoch": 0.0109360120419009, "grad_norm": 0.4093814492225647, "learning_rate": 1.9999240657466095e-05, "loss": 0.7158, "step": 356 }, { "epoch": 0.01096673117685006, "grad_norm": 0.3415032625198364, "learning_rate": 1.9999234690236794e-05, "loss": 0.6987, "step": 357 }, { "epoch": 0.010997450311799219, "grad_norm": 1.124853491783142, "learning_rate": 1.9999228699653578e-05, "loss": 0.6117, "step": 358 }, { "epoch": 0.011028169446748379, "grad_norm": 0.4822380542755127, "learning_rate": 1.9999222685716468e-05, "loss": 0.6702, "step": 359 }, { "epoch": 0.01105888858169754, "grad_norm": 0.33252909779548645, "learning_rate": 1.9999216648425473e-05, "loss": 0.6723, "step": 360 }, { "epoch": 0.0110896077166467, "grad_norm": 0.3503856062889099, "learning_rate": 1.999921058778061e-05, "loss": 0.5881, "step": 361 }, { "epoch": 0.01112032685159586, "grad_norm": 0.3378940522670746, "learning_rate": 1.9999204503781896e-05, "loss": 0.6501, "step": 362 }, { "epoch": 0.01115104598654502, "grad_norm": 0.3460848331451416, "learning_rate": 1.9999198396429337e-05, "loss": 0.6441, "step": 363 }, { "epoch": 0.011181765121494179, "grad_norm": 0.3655117452144623, "learning_rate": 1.999919226572295e-05, "loss": 0.7047, "step": 364 }, { "epoch": 0.011212484256443339, "grad_norm": 0.4178818166255951, "learning_rate": 1.9999186111662756e-05, "loss": 0.7082, "step": 365 }, { "epoch": 0.011243203391392498, "grad_norm": 0.4033735394477844, "learning_rate": 1.999917993424876e-05, "loss": 0.7278, "step": 366 }, { "epoch": 0.011273922526341658, "grad_norm": 0.32997870445251465, "learning_rate": 1.9999173733480987e-05, "loss": 0.6431, "step": 367 }, { "epoch": 0.011304641661290818, "grad_norm": 0.3790479898452759, "learning_rate": 1.999916750935944e-05, "loss": 0.5667, "step": 368 }, { "epoch": 0.011335360796239978, "grad_norm": 0.3373287320137024, "learning_rate": 1.9999161261884142e-05, "loss": 0.6058, "step": 369 }, { "epoch": 0.011366079931189137, "grad_norm": 0.3604663014411926, "learning_rate": 1.9999154991055102e-05, "loss": 0.6194, "step": 370 }, { "epoch": 0.011396799066138297, "grad_norm": 0.3682262599468231, "learning_rate": 1.999914869687234e-05, "loss": 0.6911, "step": 371 }, { "epoch": 0.011427518201087457, "grad_norm": 0.35400623083114624, "learning_rate": 1.9999142379335867e-05, "loss": 0.6196, "step": 372 }, { "epoch": 0.011458237336036617, "grad_norm": 0.3947265148162842, "learning_rate": 1.9999136038445695e-05, "loss": 0.6581, "step": 373 }, { "epoch": 0.011488956470985776, "grad_norm": 0.4798475503921509, "learning_rate": 1.9999129674201847e-05, "loss": 0.6726, "step": 374 }, { "epoch": 0.011519675605934938, "grad_norm": 0.35179272294044495, "learning_rate": 1.999912328660433e-05, "loss": 0.6398, "step": 375 }, { "epoch": 0.011550394740884097, "grad_norm": 0.5301125645637512, "learning_rate": 1.9999116875653167e-05, "loss": 0.6353, "step": 376 }, { "epoch": 0.011581113875833257, "grad_norm": 0.38167908787727356, "learning_rate": 1.9999110441348364e-05, "loss": 0.6339, "step": 377 }, { "epoch": 0.011611833010782417, "grad_norm": 0.3411039412021637, "learning_rate": 1.9999103983689944e-05, "loss": 0.6464, "step": 378 }, { "epoch": 0.011642552145731577, "grad_norm": 0.36295250058174133, "learning_rate": 1.9999097502677914e-05, "loss": 0.5786, "step": 379 }, { "epoch": 0.011673271280680736, "grad_norm": 0.5244048833847046, "learning_rate": 1.9999090998312295e-05, "loss": 0.6284, "step": 380 }, { "epoch": 0.011703990415629896, "grad_norm": 0.3522769808769226, "learning_rate": 1.99990844705931e-05, "loss": 0.7016, "step": 381 }, { "epoch": 0.011734709550579056, "grad_norm": 0.3721171021461487, "learning_rate": 1.9999077919520346e-05, "loss": 0.6314, "step": 382 }, { "epoch": 0.011765428685528215, "grad_norm": 0.3392050564289093, "learning_rate": 1.9999071345094047e-05, "loss": 0.6272, "step": 383 }, { "epoch": 0.011796147820477375, "grad_norm": 0.4184630215167999, "learning_rate": 1.999906474731422e-05, "loss": 0.6294, "step": 384 }, { "epoch": 0.011826866955426535, "grad_norm": 0.4917213022708893, "learning_rate": 1.9999058126180877e-05, "loss": 0.6854, "step": 385 }, { "epoch": 0.011857586090375695, "grad_norm": 0.362532377243042, "learning_rate": 1.9999051481694036e-05, "loss": 0.6606, "step": 386 }, { "epoch": 0.011888305225324854, "grad_norm": 0.3755333423614502, "learning_rate": 1.999904481385371e-05, "loss": 0.6519, "step": 387 }, { "epoch": 0.011919024360274014, "grad_norm": 0.40992051362991333, "learning_rate": 1.999903812265992e-05, "loss": 0.6902, "step": 388 }, { "epoch": 0.011949743495223174, "grad_norm": 0.4052127003669739, "learning_rate": 1.9999031408112676e-05, "loss": 0.7251, "step": 389 }, { "epoch": 0.011980462630172335, "grad_norm": 0.3541010022163391, "learning_rate": 1.9999024670211996e-05, "loss": 0.6237, "step": 390 }, { "epoch": 0.012011181765121495, "grad_norm": 0.354582279920578, "learning_rate": 1.9999017908957896e-05, "loss": 0.6678, "step": 391 }, { "epoch": 0.012041900900070655, "grad_norm": 0.5061604380607605, "learning_rate": 1.9999011124350393e-05, "loss": 0.6559, "step": 392 }, { "epoch": 0.012072620035019814, "grad_norm": 0.3392725884914398, "learning_rate": 1.99990043163895e-05, "loss": 0.6907, "step": 393 }, { "epoch": 0.012103339169968974, "grad_norm": 0.388367623090744, "learning_rate": 1.9998997485075228e-05, "loss": 0.6801, "step": 394 }, { "epoch": 0.012134058304918134, "grad_norm": 0.3592967391014099, "learning_rate": 1.9998990630407607e-05, "loss": 0.6578, "step": 395 }, { "epoch": 0.012164777439867294, "grad_norm": 0.3641335070133209, "learning_rate": 1.9998983752386644e-05, "loss": 0.6405, "step": 396 }, { "epoch": 0.012195496574816453, "grad_norm": 0.3645850419998169, "learning_rate": 1.9998976851012353e-05, "loss": 0.6921, "step": 397 }, { "epoch": 0.012226215709765613, "grad_norm": 0.3746393918991089, "learning_rate": 1.9998969926284756e-05, "loss": 0.6901, "step": 398 }, { "epoch": 0.012256934844714773, "grad_norm": 0.3751750886440277, "learning_rate": 1.9998962978203865e-05, "loss": 0.6872, "step": 399 }, { "epoch": 0.012287653979663932, "grad_norm": 0.36580783128738403, "learning_rate": 1.9998956006769696e-05, "loss": 0.6844, "step": 400 }, { "epoch": 0.012318373114613092, "grad_norm": 0.33628973364830017, "learning_rate": 1.999894901198227e-05, "loss": 0.5891, "step": 401 }, { "epoch": 0.012349092249562252, "grad_norm": 0.37504246830940247, "learning_rate": 1.9998941993841597e-05, "loss": 0.6494, "step": 402 }, { "epoch": 0.012379811384511412, "grad_norm": 0.4191441237926483, "learning_rate": 1.99989349523477e-05, "loss": 0.6378, "step": 403 }, { "epoch": 0.012410530519460571, "grad_norm": 0.31190699338912964, "learning_rate": 1.9998927887500593e-05, "loss": 0.6079, "step": 404 }, { "epoch": 0.012441249654409731, "grad_norm": 0.36322441697120667, "learning_rate": 1.999892079930029e-05, "loss": 0.7134, "step": 405 }, { "epoch": 0.012471968789358892, "grad_norm": 0.38948509097099304, "learning_rate": 1.9998913687746807e-05, "loss": 0.6873, "step": 406 }, { "epoch": 0.012502687924308052, "grad_norm": 0.346432626247406, "learning_rate": 1.999890655284017e-05, "loss": 0.6548, "step": 407 }, { "epoch": 0.012533407059257212, "grad_norm": 0.4184305965900421, "learning_rate": 1.9998899394580378e-05, "loss": 0.6886, "step": 408 }, { "epoch": 0.012564126194206372, "grad_norm": 0.3532783091068268, "learning_rate": 1.9998892212967465e-05, "loss": 0.7087, "step": 409 }, { "epoch": 0.012594845329155531, "grad_norm": 0.35029298067092896, "learning_rate": 1.9998885008001437e-05, "loss": 0.6724, "step": 410 }, { "epoch": 0.012625564464104691, "grad_norm": 0.4311799705028534, "learning_rate": 1.9998877779682318e-05, "loss": 0.6089, "step": 411 }, { "epoch": 0.01265628359905385, "grad_norm": 0.3698045611381531, "learning_rate": 1.9998870528010118e-05, "loss": 0.6595, "step": 412 }, { "epoch": 0.01268700273400301, "grad_norm": 0.385349303483963, "learning_rate": 1.999886325298486e-05, "loss": 0.6558, "step": 413 }, { "epoch": 0.01271772186895217, "grad_norm": 0.3399409055709839, "learning_rate": 1.999885595460656e-05, "loss": 0.606, "step": 414 }, { "epoch": 0.01274844100390133, "grad_norm": 0.3290490508079529, "learning_rate": 1.9998848632875232e-05, "loss": 0.6109, "step": 415 }, { "epoch": 0.01277916013885049, "grad_norm": 0.43345990777015686, "learning_rate": 1.9998841287790897e-05, "loss": 0.6037, "step": 416 }, { "epoch": 0.01280987927379965, "grad_norm": 0.3873814046382904, "learning_rate": 1.9998833919353565e-05, "loss": 0.7078, "step": 417 }, { "epoch": 0.012840598408748809, "grad_norm": 0.39010190963745117, "learning_rate": 1.999882652756326e-05, "loss": 0.7128, "step": 418 }, { "epoch": 0.012871317543697969, "grad_norm": 0.36289942264556885, "learning_rate": 1.999881911242e-05, "loss": 0.6815, "step": 419 }, { "epoch": 0.012902036678647128, "grad_norm": 0.36228346824645996, "learning_rate": 1.9998811673923796e-05, "loss": 0.5598, "step": 420 }, { "epoch": 0.01293275581359629, "grad_norm": 0.3516004681587219, "learning_rate": 1.999880421207467e-05, "loss": 0.6214, "step": 421 }, { "epoch": 0.01296347494854545, "grad_norm": 0.3334968090057373, "learning_rate": 1.999879672687264e-05, "loss": 0.6706, "step": 422 }, { "epoch": 0.01299419408349461, "grad_norm": 0.43286290764808655, "learning_rate": 1.9998789218317723e-05, "loss": 0.6315, "step": 423 }, { "epoch": 0.013024913218443769, "grad_norm": 0.40665122866630554, "learning_rate": 1.999878168640993e-05, "loss": 0.6175, "step": 424 }, { "epoch": 0.013055632353392929, "grad_norm": 0.3742751181125641, "learning_rate": 1.999877413114929e-05, "loss": 0.563, "step": 425 }, { "epoch": 0.013086351488342089, "grad_norm": 0.4124602973461151, "learning_rate": 1.9998766552535813e-05, "loss": 0.7052, "step": 426 }, { "epoch": 0.013117070623291248, "grad_norm": 0.3557966947555542, "learning_rate": 1.9998758950569516e-05, "loss": 0.7493, "step": 427 }, { "epoch": 0.013147789758240408, "grad_norm": 0.3962048590183258, "learning_rate": 1.999875132525042e-05, "loss": 0.6723, "step": 428 }, { "epoch": 0.013178508893189568, "grad_norm": 0.3799072206020355, "learning_rate": 1.9998743676578547e-05, "loss": 0.7543, "step": 429 }, { "epoch": 0.013209228028138727, "grad_norm": 0.3397122621536255, "learning_rate": 1.9998736004553904e-05, "loss": 0.655, "step": 430 }, { "epoch": 0.013239947163087887, "grad_norm": 0.33056625723838806, "learning_rate": 1.9998728309176515e-05, "loss": 0.6709, "step": 431 }, { "epoch": 0.013270666298037047, "grad_norm": 0.37026017904281616, "learning_rate": 1.99987205904464e-05, "loss": 0.6711, "step": 432 }, { "epoch": 0.013301385432986207, "grad_norm": 0.4748310446739197, "learning_rate": 1.9998712848363577e-05, "loss": 0.6709, "step": 433 }, { "epoch": 0.013332104567935366, "grad_norm": 0.31772613525390625, "learning_rate": 1.999870508292806e-05, "loss": 0.6029, "step": 434 }, { "epoch": 0.013362823702884526, "grad_norm": 0.3760957717895508, "learning_rate": 1.9998697294139866e-05, "loss": 0.6413, "step": 435 }, { "epoch": 0.013393542837833687, "grad_norm": 0.3804575502872467, "learning_rate": 1.9998689481999015e-05, "loss": 0.6301, "step": 436 }, { "epoch": 0.013424261972782847, "grad_norm": 0.3949744403362274, "learning_rate": 1.999868164650553e-05, "loss": 0.7035, "step": 437 }, { "epoch": 0.013454981107732007, "grad_norm": 0.36304038763046265, "learning_rate": 1.999867378765943e-05, "loss": 0.6092, "step": 438 }, { "epoch": 0.013485700242681167, "grad_norm": 0.33922120928764343, "learning_rate": 1.999866590546072e-05, "loss": 0.6859, "step": 439 }, { "epoch": 0.013516419377630326, "grad_norm": 0.3809610903263092, "learning_rate": 1.9998657999909432e-05, "loss": 0.6901, "step": 440 }, { "epoch": 0.013547138512579486, "grad_norm": 0.340664267539978, "learning_rate": 1.999865007100558e-05, "loss": 0.5094, "step": 441 }, { "epoch": 0.013577857647528646, "grad_norm": 0.34022846817970276, "learning_rate": 1.9998642118749184e-05, "loss": 0.6508, "step": 442 }, { "epoch": 0.013608576782477805, "grad_norm": 0.3372526168823242, "learning_rate": 1.999863414314026e-05, "loss": 0.5852, "step": 443 }, { "epoch": 0.013639295917426965, "grad_norm": 0.3284006416797638, "learning_rate": 1.9998626144178824e-05, "loss": 0.5451, "step": 444 }, { "epoch": 0.013670015052376125, "grad_norm": 0.3568611443042755, "learning_rate": 1.99986181218649e-05, "loss": 0.5759, "step": 445 }, { "epoch": 0.013700734187325285, "grad_norm": 0.39511221647262573, "learning_rate": 1.9998610076198507e-05, "loss": 0.6191, "step": 446 }, { "epoch": 0.013731453322274444, "grad_norm": 0.37458544969558716, "learning_rate": 1.9998602007179662e-05, "loss": 0.731, "step": 447 }, { "epoch": 0.013762172457223604, "grad_norm": 0.35774555802345276, "learning_rate": 1.9998593914808383e-05, "loss": 0.6403, "step": 448 }, { "epoch": 0.013792891592172764, "grad_norm": 0.3269951045513153, "learning_rate": 1.999858579908469e-05, "loss": 0.534, "step": 449 }, { "epoch": 0.013823610727121923, "grad_norm": 0.36316394805908203, "learning_rate": 1.99985776600086e-05, "loss": 0.6796, "step": 450 }, { "epoch": 0.013854329862071083, "grad_norm": 0.45881199836730957, "learning_rate": 1.9998569497580137e-05, "loss": 0.6818, "step": 451 }, { "epoch": 0.013885048997020245, "grad_norm": 0.3773094117641449, "learning_rate": 1.999856131179931e-05, "loss": 0.5793, "step": 452 }, { "epoch": 0.013915768131969404, "grad_norm": 0.4114103615283966, "learning_rate": 1.999855310266615e-05, "loss": 0.6635, "step": 453 }, { "epoch": 0.013946487266918564, "grad_norm": 0.39250558614730835, "learning_rate": 1.999854487018067e-05, "loss": 0.6543, "step": 454 }, { "epoch": 0.013977206401867724, "grad_norm": 0.39579471945762634, "learning_rate": 1.999853661434289e-05, "loss": 0.6709, "step": 455 }, { "epoch": 0.014007925536816884, "grad_norm": 0.33862248063087463, "learning_rate": 1.999852833515283e-05, "loss": 0.6579, "step": 456 }, { "epoch": 0.014038644671766043, "grad_norm": 0.36259809136390686, "learning_rate": 1.9998520032610507e-05, "loss": 0.6782, "step": 457 }, { "epoch": 0.014069363806715203, "grad_norm": 0.4477958083152771, "learning_rate": 1.9998511706715944e-05, "loss": 0.6075, "step": 458 }, { "epoch": 0.014100082941664363, "grad_norm": 0.4580737054347992, "learning_rate": 1.999850335746916e-05, "loss": 0.6711, "step": 459 }, { "epoch": 0.014130802076613522, "grad_norm": 0.3472817540168762, "learning_rate": 1.999849498487017e-05, "loss": 0.6435, "step": 460 }, { "epoch": 0.014161521211562682, "grad_norm": 0.3777022063732147, "learning_rate": 1.9998486588919e-05, "loss": 0.6518, "step": 461 }, { "epoch": 0.014192240346511842, "grad_norm": 0.35256606340408325, "learning_rate": 1.9998478169615662e-05, "loss": 0.6282, "step": 462 }, { "epoch": 0.014222959481461002, "grad_norm": 0.3950614035129547, "learning_rate": 1.9998469726960183e-05, "loss": 0.6714, "step": 463 }, { "epoch": 0.014253678616410161, "grad_norm": 0.3608599603176117, "learning_rate": 1.9998461260952577e-05, "loss": 0.6027, "step": 464 }, { "epoch": 0.014284397751359321, "grad_norm": 1.1909950971603394, "learning_rate": 1.999845277159287e-05, "loss": 0.6617, "step": 465 }, { "epoch": 0.01431511688630848, "grad_norm": 0.3181091845035553, "learning_rate": 1.9998444258881077e-05, "loss": 0.6252, "step": 466 }, { "epoch": 0.014345836021257642, "grad_norm": 0.3597394526004791, "learning_rate": 1.999843572281722e-05, "loss": 0.6043, "step": 467 }, { "epoch": 0.014376555156206802, "grad_norm": 0.3624703884124756, "learning_rate": 1.9998427163401314e-05, "loss": 0.5565, "step": 468 }, { "epoch": 0.014407274291155962, "grad_norm": 0.354559987783432, "learning_rate": 1.9998418580633388e-05, "loss": 0.6388, "step": 469 }, { "epoch": 0.014437993426105121, "grad_norm": 0.39622339606285095, "learning_rate": 1.9998409974513455e-05, "loss": 0.6887, "step": 470 }, { "epoch": 0.014468712561054281, "grad_norm": 0.683638870716095, "learning_rate": 1.9998401345041535e-05, "loss": 0.646, "step": 471 }, { "epoch": 0.01449943169600344, "grad_norm": 0.3551875352859497, "learning_rate": 1.9998392692217655e-05, "loss": 0.6936, "step": 472 }, { "epoch": 0.0145301508309526, "grad_norm": 0.35588881373405457, "learning_rate": 1.9998384016041826e-05, "loss": 0.6654, "step": 473 }, { "epoch": 0.01456086996590176, "grad_norm": 0.33561286330223083, "learning_rate": 1.9998375316514077e-05, "loss": 0.6105, "step": 474 }, { "epoch": 0.01459158910085092, "grad_norm": 0.6108133792877197, "learning_rate": 1.999836659363442e-05, "loss": 0.734, "step": 475 }, { "epoch": 0.01462230823580008, "grad_norm": 0.3363146185874939, "learning_rate": 1.9998357847402884e-05, "loss": 0.6213, "step": 476 }, { "epoch": 0.01465302737074924, "grad_norm": 0.4399028420448303, "learning_rate": 1.9998349077819484e-05, "loss": 0.6105, "step": 477 }, { "epoch": 0.014683746505698399, "grad_norm": 0.34152963757514954, "learning_rate": 1.9998340284884235e-05, "loss": 0.6596, "step": 478 }, { "epoch": 0.014714465640647559, "grad_norm": 0.3742964267730713, "learning_rate": 1.9998331468597172e-05, "loss": 0.7002, "step": 479 }, { "epoch": 0.014745184775596719, "grad_norm": 0.3924769163131714, "learning_rate": 1.9998322628958306e-05, "loss": 0.5889, "step": 480 }, { "epoch": 0.014775903910545878, "grad_norm": 0.4139409065246582, "learning_rate": 1.9998313765967656e-05, "loss": 0.5764, "step": 481 }, { "epoch": 0.01480662304549504, "grad_norm": 0.3535204231739044, "learning_rate": 1.999830487962525e-05, "loss": 0.5987, "step": 482 }, { "epoch": 0.0148373421804442, "grad_norm": 0.37848347425460815, "learning_rate": 1.99982959699311e-05, "loss": 0.68, "step": 483 }, { "epoch": 0.014868061315393359, "grad_norm": 0.4101581871509552, "learning_rate": 1.9998287036885235e-05, "loss": 0.6625, "step": 484 }, { "epoch": 0.014898780450342519, "grad_norm": 0.360139399766922, "learning_rate": 1.999827808048767e-05, "loss": 0.7029, "step": 485 }, { "epoch": 0.014929499585291679, "grad_norm": 0.5190515518188477, "learning_rate": 1.999826910073843e-05, "loss": 0.6072, "step": 486 }, { "epoch": 0.014960218720240838, "grad_norm": 0.40456128120422363, "learning_rate": 1.9998260097637537e-05, "loss": 0.6654, "step": 487 }, { "epoch": 0.014990937855189998, "grad_norm": 0.4523564577102661, "learning_rate": 1.9998251071185005e-05, "loss": 0.6575, "step": 488 }, { "epoch": 0.015021656990139158, "grad_norm": 0.38104692101478577, "learning_rate": 1.999824202138086e-05, "loss": 0.6989, "step": 489 }, { "epoch": 0.015052376125088317, "grad_norm": 0.35218968987464905, "learning_rate": 1.9998232948225123e-05, "loss": 0.7114, "step": 490 }, { "epoch": 0.015083095260037477, "grad_norm": 0.3803866505622864, "learning_rate": 1.9998223851717816e-05, "loss": 0.6345, "step": 491 }, { "epoch": 0.015113814394986637, "grad_norm": 0.57049560546875, "learning_rate": 1.9998214731858956e-05, "loss": 0.5909, "step": 492 }, { "epoch": 0.015144533529935797, "grad_norm": 0.3569631576538086, "learning_rate": 1.999820558864857e-05, "loss": 0.6669, "step": 493 }, { "epoch": 0.015175252664884956, "grad_norm": 0.3685276508331299, "learning_rate": 1.9998196422086672e-05, "loss": 0.6752, "step": 494 }, { "epoch": 0.015205971799834116, "grad_norm": 0.37512511014938354, "learning_rate": 1.9998187232173295e-05, "loss": 0.6267, "step": 495 }, { "epoch": 0.015236690934783276, "grad_norm": 0.3476499617099762, "learning_rate": 1.9998178018908446e-05, "loss": 0.5931, "step": 496 }, { "epoch": 0.015267410069732437, "grad_norm": 0.5179610848426819, "learning_rate": 1.9998168782292163e-05, "loss": 0.698, "step": 497 }, { "epoch": 0.015298129204681597, "grad_norm": 0.3833017647266388, "learning_rate": 1.9998159522324448e-05, "loss": 0.703, "step": 498 }, { "epoch": 0.015328848339630757, "grad_norm": 0.38522037863731384, "learning_rate": 1.999815023900534e-05, "loss": 0.6898, "step": 499 }, { "epoch": 0.015359567474579916, "grad_norm": 0.4121200144290924, "learning_rate": 1.9998140932334854e-05, "loss": 0.6283, "step": 500 }, { "epoch": 0.015390286609529076, "grad_norm": 0.32847708463668823, "learning_rate": 1.999813160231301e-05, "loss": 0.6833, "step": 501 }, { "epoch": 0.015421005744478236, "grad_norm": 0.33107444643974304, "learning_rate": 1.999812224893983e-05, "loss": 0.6341, "step": 502 }, { "epoch": 0.015451724879427396, "grad_norm": 0.3629991114139557, "learning_rate": 1.9998112872215342e-05, "loss": 0.7044, "step": 503 }, { "epoch": 0.015482444014376555, "grad_norm": 0.37641698122024536, "learning_rate": 1.999810347213956e-05, "loss": 0.6606, "step": 504 }, { "epoch": 0.015513163149325715, "grad_norm": 0.37335672974586487, "learning_rate": 1.9998094048712507e-05, "loss": 0.6255, "step": 505 }, { "epoch": 0.015543882284274875, "grad_norm": 0.36234939098358154, "learning_rate": 1.999808460193421e-05, "loss": 0.694, "step": 506 }, { "epoch": 0.015574601419224034, "grad_norm": 0.42407071590423584, "learning_rate": 1.9998075131804685e-05, "loss": 0.7052, "step": 507 }, { "epoch": 0.015605320554173194, "grad_norm": 0.42430660128593445, "learning_rate": 1.999806563832396e-05, "loss": 0.6378, "step": 508 }, { "epoch": 0.015636039689122354, "grad_norm": 0.3993668556213379, "learning_rate": 1.9998056121492057e-05, "loss": 0.6179, "step": 509 }, { "epoch": 0.015666758824071515, "grad_norm": 0.3538491129875183, "learning_rate": 1.999804658130899e-05, "loss": 0.614, "step": 510 }, { "epoch": 0.015697477959020673, "grad_norm": 0.39544254541397095, "learning_rate": 1.999803701777479e-05, "loss": 0.6662, "step": 511 }, { "epoch": 0.015728197093969835, "grad_norm": 0.35190457105636597, "learning_rate": 1.9998027430889476e-05, "loss": 0.6239, "step": 512 }, { "epoch": 0.015758916228918993, "grad_norm": 0.3516015112400055, "learning_rate": 1.9998017820653072e-05, "loss": 0.6962, "step": 513 }, { "epoch": 0.015789635363868154, "grad_norm": 0.3892490565776825, "learning_rate": 1.9998008187065592e-05, "loss": 0.6338, "step": 514 }, { "epoch": 0.015820354498817312, "grad_norm": 0.32652202248573303, "learning_rate": 1.9997998530127075e-05, "loss": 0.5617, "step": 515 }, { "epoch": 0.015851073633766474, "grad_norm": 0.3441597819328308, "learning_rate": 1.999798884983753e-05, "loss": 0.626, "step": 516 }, { "epoch": 0.01588179276871563, "grad_norm": 0.3360462188720703, "learning_rate": 1.9997979146196985e-05, "loss": 0.6315, "step": 517 }, { "epoch": 0.015912511903664793, "grad_norm": 2.5117130279541016, "learning_rate": 1.9997969419205458e-05, "loss": 0.6744, "step": 518 }, { "epoch": 0.01594323103861395, "grad_norm": 0.3741868734359741, "learning_rate": 1.9997959668862977e-05, "loss": 0.6908, "step": 519 }, { "epoch": 0.015973950173563112, "grad_norm": 0.3500064015388489, "learning_rate": 1.9997949895169567e-05, "loss": 0.4988, "step": 520 }, { "epoch": 0.016004669308512274, "grad_norm": 0.396871417760849, "learning_rate": 1.999794009812524e-05, "loss": 0.6251, "step": 521 }, { "epoch": 0.016035388443461432, "grad_norm": 0.380485862493515, "learning_rate": 1.999793027773003e-05, "loss": 0.5989, "step": 522 }, { "epoch": 0.016066107578410593, "grad_norm": 0.5101944804191589, "learning_rate": 1.9997920433983956e-05, "loss": 0.7179, "step": 523 }, { "epoch": 0.01609682671335975, "grad_norm": 0.42518511414527893, "learning_rate": 1.9997910566887037e-05, "loss": 0.5832, "step": 524 }, { "epoch": 0.016127545848308913, "grad_norm": 0.371922105550766, "learning_rate": 1.9997900676439303e-05, "loss": 0.6074, "step": 525 }, { "epoch": 0.01615826498325807, "grad_norm": 0.37057992815971375, "learning_rate": 1.9997890762640774e-05, "loss": 0.6276, "step": 526 }, { "epoch": 0.016188984118207232, "grad_norm": 0.3425723910331726, "learning_rate": 1.999788082549147e-05, "loss": 0.6603, "step": 527 }, { "epoch": 0.01621970325315639, "grad_norm": 0.3937438428401947, "learning_rate": 1.9997870864991416e-05, "loss": 0.648, "step": 528 }, { "epoch": 0.01625042238810555, "grad_norm": 0.42374852299690247, "learning_rate": 1.9997860881140644e-05, "loss": 0.5851, "step": 529 }, { "epoch": 0.01628114152305471, "grad_norm": 0.3818710446357727, "learning_rate": 1.9997850873939163e-05, "loss": 0.6358, "step": 530 }, { "epoch": 0.01631186065800387, "grad_norm": 0.34825989603996277, "learning_rate": 1.9997840843387005e-05, "loss": 0.696, "step": 531 }, { "epoch": 0.01634257979295303, "grad_norm": 0.5634368658065796, "learning_rate": 1.999783078948419e-05, "loss": 0.598, "step": 532 }, { "epoch": 0.01637329892790219, "grad_norm": 0.3791891038417816, "learning_rate": 1.9997820712230745e-05, "loss": 0.6235, "step": 533 }, { "epoch": 0.01640401806285135, "grad_norm": 0.3309478163719177, "learning_rate": 1.999781061162669e-05, "loss": 0.5736, "step": 534 }, { "epoch": 0.01643473719780051, "grad_norm": 0.41343048214912415, "learning_rate": 1.999780048767205e-05, "loss": 0.7395, "step": 535 }, { "epoch": 0.01646545633274967, "grad_norm": 0.4153267741203308, "learning_rate": 1.999779034036685e-05, "loss": 0.6646, "step": 536 }, { "epoch": 0.01649617546769883, "grad_norm": 0.39774152636528015, "learning_rate": 1.999778016971111e-05, "loss": 0.683, "step": 537 }, { "epoch": 0.01652689460264799, "grad_norm": 0.35458850860595703, "learning_rate": 1.999776997570486e-05, "loss": 0.6235, "step": 538 }, { "epoch": 0.01655761373759715, "grad_norm": 0.3556102514266968, "learning_rate": 1.9997759758348118e-05, "loss": 0.7218, "step": 539 }, { "epoch": 0.01658833287254631, "grad_norm": 0.4062642455101013, "learning_rate": 1.999774951764091e-05, "loss": 0.5745, "step": 540 }, { "epoch": 0.016619052007495468, "grad_norm": 0.3656878173351288, "learning_rate": 1.999773925358326e-05, "loss": 0.6394, "step": 541 }, { "epoch": 0.01664977114244463, "grad_norm": 0.3587823212146759, "learning_rate": 1.9997728966175192e-05, "loss": 0.6091, "step": 542 }, { "epoch": 0.016680490277393788, "grad_norm": 0.4865284562110901, "learning_rate": 1.9997718655416726e-05, "loss": 0.646, "step": 543 }, { "epoch": 0.01671120941234295, "grad_norm": 0.353466272354126, "learning_rate": 1.9997708321307896e-05, "loss": 0.6551, "step": 544 }, { "epoch": 0.016741928547292107, "grad_norm": 0.3422155976295471, "learning_rate": 1.9997697963848716e-05, "loss": 0.6432, "step": 545 }, { "epoch": 0.01677264768224127, "grad_norm": 0.35569289326667786, "learning_rate": 1.9997687583039216e-05, "loss": 0.6861, "step": 546 }, { "epoch": 0.016803366817190427, "grad_norm": 0.4082132875919342, "learning_rate": 1.9997677178879417e-05, "loss": 0.6735, "step": 547 }, { "epoch": 0.016834085952139588, "grad_norm": 0.37187737226486206, "learning_rate": 1.9997666751369345e-05, "loss": 0.7366, "step": 548 }, { "epoch": 0.016864805087088746, "grad_norm": 0.36215445399284363, "learning_rate": 1.9997656300509024e-05, "loss": 0.5643, "step": 549 }, { "epoch": 0.016895524222037907, "grad_norm": 0.3834013342857361, "learning_rate": 1.9997645826298478e-05, "loss": 0.7153, "step": 550 }, { "epoch": 0.01692624335698707, "grad_norm": 0.34866228699684143, "learning_rate": 1.9997635328737734e-05, "loss": 0.6419, "step": 551 }, { "epoch": 0.016956962491936227, "grad_norm": 0.37599804997444153, "learning_rate": 1.9997624807826813e-05, "loss": 0.6726, "step": 552 }, { "epoch": 0.01698768162688539, "grad_norm": 0.6900972127914429, "learning_rate": 1.999761426356574e-05, "loss": 0.6094, "step": 553 }, { "epoch": 0.017018400761834546, "grad_norm": 0.33605897426605225, "learning_rate": 1.9997603695954543e-05, "loss": 0.6587, "step": 554 }, { "epoch": 0.017049119896783708, "grad_norm": 0.34976935386657715, "learning_rate": 1.999759310499324e-05, "loss": 0.6358, "step": 555 }, { "epoch": 0.017079839031732866, "grad_norm": 0.3821108937263489, "learning_rate": 1.9997582490681867e-05, "loss": 0.6899, "step": 556 }, { "epoch": 0.017110558166682027, "grad_norm": 0.3232705891132355, "learning_rate": 1.9997571853020437e-05, "loss": 0.6831, "step": 557 }, { "epoch": 0.017141277301631185, "grad_norm": 0.3710523545742035, "learning_rate": 1.9997561192008982e-05, "loss": 0.6348, "step": 558 }, { "epoch": 0.017171996436580347, "grad_norm": 0.3424838185310364, "learning_rate": 1.999755050764752e-05, "loss": 0.6134, "step": 559 }, { "epoch": 0.017202715571529505, "grad_norm": 0.3667562007904053, "learning_rate": 1.999753979993609e-05, "loss": 0.6363, "step": 560 }, { "epoch": 0.017233434706478666, "grad_norm": 0.3751333951950073, "learning_rate": 1.9997529068874697e-05, "loss": 0.6536, "step": 561 }, { "epoch": 0.017264153841427824, "grad_norm": 0.3676891326904297, "learning_rate": 1.9997518314463383e-05, "loss": 0.7063, "step": 562 }, { "epoch": 0.017294872976376986, "grad_norm": 0.3435770571231842, "learning_rate": 1.9997507536702165e-05, "loss": 0.6254, "step": 563 }, { "epoch": 0.017325592111326144, "grad_norm": 0.37565135955810547, "learning_rate": 1.9997496735591067e-05, "loss": 0.6652, "step": 564 }, { "epoch": 0.017356311246275305, "grad_norm": 0.3480658531188965, "learning_rate": 1.9997485911130124e-05, "loss": 0.6396, "step": 565 }, { "epoch": 0.017387030381224466, "grad_norm": 0.34443679451942444, "learning_rate": 1.999747506331935e-05, "loss": 0.7708, "step": 566 }, { "epoch": 0.017417749516173624, "grad_norm": 0.34110283851623535, "learning_rate": 1.9997464192158773e-05, "loss": 0.6559, "step": 567 }, { "epoch": 0.017448468651122786, "grad_norm": 0.370608389377594, "learning_rate": 1.9997453297648422e-05, "loss": 0.6121, "step": 568 }, { "epoch": 0.017479187786071944, "grad_norm": 0.3690973222255707, "learning_rate": 1.9997442379788324e-05, "loss": 0.6774, "step": 569 }, { "epoch": 0.017509906921021105, "grad_norm": 0.3351481854915619, "learning_rate": 1.99974314385785e-05, "loss": 0.6453, "step": 570 }, { "epoch": 0.017540626055970263, "grad_norm": 0.33749860525131226, "learning_rate": 1.999742047401897e-05, "loss": 0.61, "step": 571 }, { "epoch": 0.017571345190919425, "grad_norm": 0.35784682631492615, "learning_rate": 1.9997409486109773e-05, "loss": 0.5679, "step": 572 }, { "epoch": 0.017602064325868583, "grad_norm": 0.4049821197986603, "learning_rate": 1.9997398474850926e-05, "loss": 0.5999, "step": 573 }, { "epoch": 0.017632783460817744, "grad_norm": 0.3599804639816284, "learning_rate": 1.9997387440242458e-05, "loss": 0.709, "step": 574 }, { "epoch": 0.017663502595766902, "grad_norm": 0.39153963327407837, "learning_rate": 1.9997376382284394e-05, "loss": 0.6476, "step": 575 }, { "epoch": 0.017694221730716064, "grad_norm": 0.36624693870544434, "learning_rate": 1.9997365300976753e-05, "loss": 0.6799, "step": 576 }, { "epoch": 0.01772494086566522, "grad_norm": 0.3636245131492615, "learning_rate": 1.999735419631957e-05, "loss": 0.6686, "step": 577 }, { "epoch": 0.017755660000614383, "grad_norm": 0.3377722501754761, "learning_rate": 1.9997343068312872e-05, "loss": 0.6324, "step": 578 }, { "epoch": 0.01778637913556354, "grad_norm": 0.37362775206565857, "learning_rate": 1.999733191695668e-05, "loss": 0.6422, "step": 579 }, { "epoch": 0.017817098270512702, "grad_norm": 0.39325955510139465, "learning_rate": 1.999732074225102e-05, "loss": 0.664, "step": 580 }, { "epoch": 0.01784781740546186, "grad_norm": 0.4099566638469696, "learning_rate": 1.999730954419592e-05, "loss": 0.7237, "step": 581 }, { "epoch": 0.017878536540411022, "grad_norm": 0.35113975405693054, "learning_rate": 1.9997298322791402e-05, "loss": 0.6564, "step": 582 }, { "epoch": 0.017909255675360183, "grad_norm": 0.379182904958725, "learning_rate": 1.9997287078037498e-05, "loss": 0.6373, "step": 583 }, { "epoch": 0.01793997481030934, "grad_norm": 0.3741150498390198, "learning_rate": 1.9997275809934233e-05, "loss": 0.6666, "step": 584 }, { "epoch": 0.017970693945258503, "grad_norm": 0.4382103383541107, "learning_rate": 1.999726451848163e-05, "loss": 0.6797, "step": 585 }, { "epoch": 0.01800141308020766, "grad_norm": 0.39685511589050293, "learning_rate": 1.9997253203679718e-05, "loss": 0.6729, "step": 586 }, { "epoch": 0.018032132215156822, "grad_norm": 0.3549593985080719, "learning_rate": 1.9997241865528526e-05, "loss": 0.5899, "step": 587 }, { "epoch": 0.01806285135010598, "grad_norm": 0.35407641530036926, "learning_rate": 1.9997230504028074e-05, "loss": 0.6364, "step": 588 }, { "epoch": 0.01809357048505514, "grad_norm": 0.349229097366333, "learning_rate": 1.9997219119178394e-05, "loss": 0.6009, "step": 589 }, { "epoch": 0.0181242896200043, "grad_norm": 0.31240102648735046, "learning_rate": 1.9997207710979505e-05, "loss": 0.6565, "step": 590 }, { "epoch": 0.01815500875495346, "grad_norm": 0.36765244603157043, "learning_rate": 1.9997196279431444e-05, "loss": 0.6724, "step": 591 }, { "epoch": 0.01818572788990262, "grad_norm": 0.38883981108665466, "learning_rate": 1.9997184824534233e-05, "loss": 0.703, "step": 592 }, { "epoch": 0.01821644702485178, "grad_norm": 0.3177252411842346, "learning_rate": 1.9997173346287897e-05, "loss": 0.5853, "step": 593 }, { "epoch": 0.01824716615980094, "grad_norm": 0.35797935724258423, "learning_rate": 1.9997161844692465e-05, "loss": 0.6474, "step": 594 }, { "epoch": 0.0182778852947501, "grad_norm": 0.40504375100135803, "learning_rate": 1.9997150319747964e-05, "loss": 0.6879, "step": 595 }, { "epoch": 0.018308604429699258, "grad_norm": 0.41175809502601624, "learning_rate": 1.999713877145442e-05, "loss": 0.6556, "step": 596 }, { "epoch": 0.01833932356464842, "grad_norm": 0.36256200075149536, "learning_rate": 1.999712719981186e-05, "loss": 0.6123, "step": 597 }, { "epoch": 0.01837004269959758, "grad_norm": 1.7380759716033936, "learning_rate": 1.999711560482031e-05, "loss": 0.5889, "step": 598 }, { "epoch": 0.01840076183454674, "grad_norm": 0.35388273000717163, "learning_rate": 1.9997103986479805e-05, "loss": 0.6453, "step": 599 }, { "epoch": 0.0184314809694959, "grad_norm": 0.3751313388347626, "learning_rate": 1.9997092344790355e-05, "loss": 0.6683, "step": 600 }, { "epoch": 0.01846220010444506, "grad_norm": 0.4096778333187103, "learning_rate": 1.9997080679752005e-05, "loss": 0.6361, "step": 601 }, { "epoch": 0.01849291923939422, "grad_norm": 0.3577617406845093, "learning_rate": 1.999706899136477e-05, "loss": 0.7152, "step": 602 }, { "epoch": 0.018523638374343378, "grad_norm": 0.3427104949951172, "learning_rate": 1.9997057279628687e-05, "loss": 0.6885, "step": 603 }, { "epoch": 0.01855435750929254, "grad_norm": 0.35069340467453003, "learning_rate": 1.999704554454378e-05, "loss": 0.6332, "step": 604 }, { "epoch": 0.018585076644241697, "grad_norm": 0.3404875099658966, "learning_rate": 1.999703378611007e-05, "loss": 0.6969, "step": 605 }, { "epoch": 0.01861579577919086, "grad_norm": 0.41629061102867126, "learning_rate": 1.999702200432759e-05, "loss": 0.6799, "step": 606 }, { "epoch": 0.018646514914140017, "grad_norm": 0.3499424159526825, "learning_rate": 1.9997010199196368e-05, "loss": 0.7034, "step": 607 }, { "epoch": 0.018677234049089178, "grad_norm": 0.4346235692501068, "learning_rate": 1.999699837071643e-05, "loss": 0.6681, "step": 608 }, { "epoch": 0.018707953184038336, "grad_norm": 0.319369375705719, "learning_rate": 1.9996986518887803e-05, "loss": 0.5927, "step": 609 }, { "epoch": 0.018738672318987497, "grad_norm": 0.34068673849105835, "learning_rate": 1.999697464371052e-05, "loss": 0.5987, "step": 610 }, { "epoch": 0.018769391453936655, "grad_norm": 0.3689308166503906, "learning_rate": 1.99969627451846e-05, "loss": 0.6897, "step": 611 }, { "epoch": 0.018800110588885817, "grad_norm": 0.3898228704929352, "learning_rate": 1.9996950823310077e-05, "loss": 0.7454, "step": 612 }, { "epoch": 0.01883082972383498, "grad_norm": 0.38592562079429626, "learning_rate": 1.9996938878086976e-05, "loss": 0.6769, "step": 613 }, { "epoch": 0.018861548858784136, "grad_norm": 0.307441383600235, "learning_rate": 1.9996926909515328e-05, "loss": 0.6565, "step": 614 }, { "epoch": 0.018892267993733298, "grad_norm": 0.36085936427116394, "learning_rate": 1.999691491759516e-05, "loss": 0.6423, "step": 615 }, { "epoch": 0.018922987128682456, "grad_norm": 0.3521069586277008, "learning_rate": 1.9996902902326496e-05, "loss": 0.5909, "step": 616 }, { "epoch": 0.018953706263631617, "grad_norm": 0.4399057924747467, "learning_rate": 1.9996890863709367e-05, "loss": 0.5927, "step": 617 }, { "epoch": 0.018984425398580775, "grad_norm": 0.3735198974609375, "learning_rate": 1.9996878801743805e-05, "loss": 0.7315, "step": 618 }, { "epoch": 0.019015144533529937, "grad_norm": 0.3371661901473999, "learning_rate": 1.9996866716429834e-05, "loss": 0.6963, "step": 619 }, { "epoch": 0.019045863668479095, "grad_norm": 0.3382458984851837, "learning_rate": 1.999685460776748e-05, "loss": 0.6525, "step": 620 }, { "epoch": 0.019076582803428256, "grad_norm": 0.3707584738731384, "learning_rate": 1.9996842475756773e-05, "loss": 0.7032, "step": 621 }, { "epoch": 0.019107301938377414, "grad_norm": 0.3761541545391083, "learning_rate": 1.999683032039774e-05, "loss": 0.6864, "step": 622 }, { "epoch": 0.019138021073326576, "grad_norm": 0.39501962065696716, "learning_rate": 1.999681814169042e-05, "loss": 0.7545, "step": 623 }, { "epoch": 0.019168740208275734, "grad_norm": 0.37704703211784363, "learning_rate": 1.9996805939634826e-05, "loss": 0.6846, "step": 624 }, { "epoch": 0.019199459343224895, "grad_norm": 0.3340139389038086, "learning_rate": 1.9996793714230997e-05, "loss": 0.6824, "step": 625 }, { "epoch": 0.019230178478174053, "grad_norm": 0.39260411262512207, "learning_rate": 1.9996781465478956e-05, "loss": 0.674, "step": 626 }, { "epoch": 0.019260897613123214, "grad_norm": 0.3635219931602478, "learning_rate": 1.9996769193378736e-05, "loss": 0.692, "step": 627 }, { "epoch": 0.019291616748072376, "grad_norm": 0.4429719150066376, "learning_rate": 1.9996756897930362e-05, "loss": 0.6444, "step": 628 }, { "epoch": 0.019322335883021534, "grad_norm": 0.5513244867324829, "learning_rate": 1.9996744579133863e-05, "loss": 0.6108, "step": 629 }, { "epoch": 0.019353055017970695, "grad_norm": 0.3571275472640991, "learning_rate": 1.999673223698927e-05, "loss": 0.6533, "step": 630 }, { "epoch": 0.019383774152919853, "grad_norm": 0.5451866984367371, "learning_rate": 1.9996719871496614e-05, "loss": 0.6953, "step": 631 }, { "epoch": 0.019414493287869015, "grad_norm": 0.3355654776096344, "learning_rate": 1.9996707482655915e-05, "loss": 0.6528, "step": 632 }, { "epoch": 0.019445212422818173, "grad_norm": 0.3886340260505676, "learning_rate": 1.9996695070467213e-05, "loss": 0.6084, "step": 633 }, { "epoch": 0.019475931557767334, "grad_norm": 0.3285294473171234, "learning_rate": 1.9996682634930526e-05, "loss": 0.6433, "step": 634 }, { "epoch": 0.019506650692716492, "grad_norm": 0.344137966632843, "learning_rate": 1.999667017604589e-05, "loss": 0.6041, "step": 635 }, { "epoch": 0.019537369827665654, "grad_norm": 0.38496875762939453, "learning_rate": 1.9996657693813334e-05, "loss": 0.6424, "step": 636 }, { "epoch": 0.01956808896261481, "grad_norm": 0.37379342317581177, "learning_rate": 1.9996645188232887e-05, "loss": 0.6142, "step": 637 }, { "epoch": 0.019598808097563973, "grad_norm": 0.35943636298179626, "learning_rate": 1.9996632659304573e-05, "loss": 0.642, "step": 638 }, { "epoch": 0.01962952723251313, "grad_norm": 0.383228063583374, "learning_rate": 1.999662010702843e-05, "loss": 0.6321, "step": 639 }, { "epoch": 0.019660246367462293, "grad_norm": 0.34772342443466187, "learning_rate": 1.999660753140448e-05, "loss": 0.6929, "step": 640 }, { "epoch": 0.01969096550241145, "grad_norm": 0.37511345744132996, "learning_rate": 1.9996594932432755e-05, "loss": 0.684, "step": 641 }, { "epoch": 0.019721684637360612, "grad_norm": 0.4366251826286316, "learning_rate": 1.9996582310113285e-05, "loss": 0.678, "step": 642 }, { "epoch": 0.019752403772309773, "grad_norm": 0.4729257822036743, "learning_rate": 1.9996569664446098e-05, "loss": 0.6917, "step": 643 }, { "epoch": 0.01978312290725893, "grad_norm": 0.338461309671402, "learning_rate": 1.9996556995431223e-05, "loss": 0.6788, "step": 644 }, { "epoch": 0.019813842042208093, "grad_norm": 0.4378059208393097, "learning_rate": 1.9996544303068695e-05, "loss": 0.732, "step": 645 }, { "epoch": 0.01984456117715725, "grad_norm": 0.38162198662757874, "learning_rate": 1.999653158735854e-05, "loss": 0.6496, "step": 646 }, { "epoch": 0.019875280312106412, "grad_norm": 0.34327518939971924, "learning_rate": 1.9996518848300783e-05, "loss": 0.5781, "step": 647 }, { "epoch": 0.01990599944705557, "grad_norm": 0.655466616153717, "learning_rate": 1.999650608589546e-05, "loss": 0.603, "step": 648 }, { "epoch": 0.01993671858200473, "grad_norm": 0.4052770733833313, "learning_rate": 1.99964933001426e-05, "loss": 0.633, "step": 649 }, { "epoch": 0.01996743771695389, "grad_norm": 0.35274645686149597, "learning_rate": 1.9996480491042228e-05, "loss": 0.642, "step": 650 }, { "epoch": 0.01999815685190305, "grad_norm": 0.3503863513469696, "learning_rate": 1.9996467658594382e-05, "loss": 0.6778, "step": 651 }, { "epoch": 0.02002887598685221, "grad_norm": 0.4011728763580322, "learning_rate": 1.9996454802799086e-05, "loss": 0.6183, "step": 652 }, { "epoch": 0.02005959512180137, "grad_norm": 0.35220634937286377, "learning_rate": 1.999644192365637e-05, "loss": 0.6796, "step": 653 }, { "epoch": 0.02009031425675053, "grad_norm": 0.35523271560668945, "learning_rate": 1.9996429021166266e-05, "loss": 0.6267, "step": 654 }, { "epoch": 0.02012103339169969, "grad_norm": 0.35560765862464905, "learning_rate": 1.999641609532881e-05, "loss": 0.6581, "step": 655 }, { "epoch": 0.020151752526648848, "grad_norm": 0.3332947790622711, "learning_rate": 1.9996403146144017e-05, "loss": 0.6577, "step": 656 }, { "epoch": 0.02018247166159801, "grad_norm": 0.4000426232814789, "learning_rate": 1.999639017361193e-05, "loss": 0.5903, "step": 657 }, { "epoch": 0.02021319079654717, "grad_norm": 0.3624047338962555, "learning_rate": 1.999637717773258e-05, "loss": 0.6296, "step": 658 }, { "epoch": 0.02024390993149633, "grad_norm": 0.3693142831325531, "learning_rate": 1.999636415850599e-05, "loss": 0.6395, "step": 659 }, { "epoch": 0.02027462906644549, "grad_norm": 0.3308466076850891, "learning_rate": 1.999635111593219e-05, "loss": 0.6513, "step": 660 }, { "epoch": 0.02030534820139465, "grad_norm": 0.4649997055530548, "learning_rate": 1.9996338050011218e-05, "loss": 0.6194, "step": 661 }, { "epoch": 0.02033606733634381, "grad_norm": 0.37187445163726807, "learning_rate": 1.9996324960743098e-05, "loss": 0.6349, "step": 662 }, { "epoch": 0.020366786471292968, "grad_norm": 0.3458600640296936, "learning_rate": 1.999631184812786e-05, "loss": 0.732, "step": 663 }, { "epoch": 0.02039750560624213, "grad_norm": 0.3875254690647125, "learning_rate": 1.9996298712165544e-05, "loss": 0.6638, "step": 664 }, { "epoch": 0.020428224741191287, "grad_norm": 0.33852434158325195, "learning_rate": 1.999628555285617e-05, "loss": 0.6338, "step": 665 }, { "epoch": 0.02045894387614045, "grad_norm": 0.3661525249481201, "learning_rate": 1.9996272370199775e-05, "loss": 0.6487, "step": 666 }, { "epoch": 0.020489663011089607, "grad_norm": 0.43532538414001465, "learning_rate": 1.9996259164196386e-05, "loss": 0.6447, "step": 667 }, { "epoch": 0.020520382146038768, "grad_norm": 0.34020864963531494, "learning_rate": 1.999624593484604e-05, "loss": 0.6866, "step": 668 }, { "epoch": 0.020551101280987926, "grad_norm": 0.34931477904319763, "learning_rate": 1.9996232682148756e-05, "loss": 0.631, "step": 669 }, { "epoch": 0.020581820415937088, "grad_norm": 0.35504257678985596, "learning_rate": 1.999621940610458e-05, "loss": 0.6354, "step": 670 }, { "epoch": 0.020612539550886246, "grad_norm": 0.3335193693637848, "learning_rate": 1.999620610671353e-05, "loss": 0.6088, "step": 671 }, { "epoch": 0.020643258685835407, "grad_norm": 0.3360497057437897, "learning_rate": 1.9996192783975642e-05, "loss": 0.5768, "step": 672 }, { "epoch": 0.02067397782078457, "grad_norm": 0.35713326930999756, "learning_rate": 1.9996179437890954e-05, "loss": 0.6847, "step": 673 }, { "epoch": 0.020704696955733726, "grad_norm": 0.5350825786590576, "learning_rate": 1.9996166068459483e-05, "loss": 0.6799, "step": 674 }, { "epoch": 0.020735416090682888, "grad_norm": 0.3860687017440796, "learning_rate": 1.9996152675681274e-05, "loss": 0.68, "step": 675 }, { "epoch": 0.020766135225632046, "grad_norm": 0.365662544965744, "learning_rate": 1.999613925955635e-05, "loss": 0.6335, "step": 676 }, { "epoch": 0.020796854360581207, "grad_norm": 0.36162814497947693, "learning_rate": 1.9996125820084745e-05, "loss": 0.6509, "step": 677 }, { "epoch": 0.020827573495530365, "grad_norm": 0.4481048882007599, "learning_rate": 1.999611235726649e-05, "loss": 0.6778, "step": 678 }, { "epoch": 0.020858292630479527, "grad_norm": 0.37087756395339966, "learning_rate": 1.9996098871101617e-05, "loss": 0.6782, "step": 679 }, { "epoch": 0.020889011765428685, "grad_norm": 0.43033871054649353, "learning_rate": 1.9996085361590155e-05, "loss": 0.6212, "step": 680 }, { "epoch": 0.020919730900377846, "grad_norm": 0.3877839744091034, "learning_rate": 1.999607182873214e-05, "loss": 0.6683, "step": 681 }, { "epoch": 0.020950450035327004, "grad_norm": 0.3336257338523865, "learning_rate": 1.99960582725276e-05, "loss": 0.6524, "step": 682 }, { "epoch": 0.020981169170276166, "grad_norm": 0.3160320222377777, "learning_rate": 1.9996044692976567e-05, "loss": 0.5913, "step": 683 }, { "epoch": 0.021011888305225324, "grad_norm": 0.33931928873062134, "learning_rate": 1.9996031090079075e-05, "loss": 0.5923, "step": 684 }, { "epoch": 0.021042607440174485, "grad_norm": 0.33188575506210327, "learning_rate": 1.9996017463835153e-05, "loss": 0.6683, "step": 685 }, { "epoch": 0.021073326575123643, "grad_norm": 0.35781964659690857, "learning_rate": 1.9996003814244835e-05, "loss": 0.6404, "step": 686 }, { "epoch": 0.021104045710072804, "grad_norm": 0.3614464998245239, "learning_rate": 1.9995990141308154e-05, "loss": 0.6202, "step": 687 }, { "epoch": 0.021134764845021962, "grad_norm": 0.358516663312912, "learning_rate": 1.9995976445025134e-05, "loss": 0.6329, "step": 688 }, { "epoch": 0.021165483979971124, "grad_norm": 0.361712783575058, "learning_rate": 1.9995962725395817e-05, "loss": 0.6538, "step": 689 }, { "epoch": 0.021196203114920285, "grad_norm": 0.37210845947265625, "learning_rate": 1.999594898242023e-05, "loss": 0.703, "step": 690 }, { "epoch": 0.021226922249869443, "grad_norm": 0.37681689858436584, "learning_rate": 1.999593521609841e-05, "loss": 0.6056, "step": 691 }, { "epoch": 0.021257641384818605, "grad_norm": 0.46939870715141296, "learning_rate": 1.999592142643038e-05, "loss": 0.7577, "step": 692 }, { "epoch": 0.021288360519767763, "grad_norm": 0.36975857615470886, "learning_rate": 1.999590761341618e-05, "loss": 0.7174, "step": 693 }, { "epoch": 0.021319079654716924, "grad_norm": 0.34185007214546204, "learning_rate": 1.9995893777055836e-05, "loss": 0.6457, "step": 694 }, { "epoch": 0.021349798789666082, "grad_norm": 0.3308317959308624, "learning_rate": 1.999587991734939e-05, "loss": 0.6848, "step": 695 }, { "epoch": 0.021380517924615244, "grad_norm": 0.3272716999053955, "learning_rate": 1.9995866034296862e-05, "loss": 0.6562, "step": 696 }, { "epoch": 0.0214112370595644, "grad_norm": 0.31899935007095337, "learning_rate": 1.9995852127898297e-05, "loss": 0.6303, "step": 697 }, { "epoch": 0.021441956194513563, "grad_norm": 0.343513160943985, "learning_rate": 1.9995838198153715e-05, "loss": 0.6489, "step": 698 }, { "epoch": 0.02147267532946272, "grad_norm": 0.3756782114505768, "learning_rate": 1.9995824245063157e-05, "loss": 0.6966, "step": 699 }, { "epoch": 0.021503394464411883, "grad_norm": 0.34475603699684143, "learning_rate": 1.9995810268626657e-05, "loss": 0.6059, "step": 700 }, { "epoch": 0.02153411359936104, "grad_norm": 0.3745661973953247, "learning_rate": 1.999579626884424e-05, "loss": 0.6981, "step": 701 }, { "epoch": 0.021564832734310202, "grad_norm": 0.39941927790641785, "learning_rate": 1.9995782245715943e-05, "loss": 0.6425, "step": 702 }, { "epoch": 0.02159555186925936, "grad_norm": 0.3506154417991638, "learning_rate": 1.9995768199241802e-05, "loss": 0.6518, "step": 703 }, { "epoch": 0.02162627100420852, "grad_norm": 0.36155733466148376, "learning_rate": 1.9995754129421844e-05, "loss": 0.7037, "step": 704 }, { "epoch": 0.021656990139157683, "grad_norm": 0.31318923830986023, "learning_rate": 1.9995740036256102e-05, "loss": 0.6144, "step": 705 }, { "epoch": 0.02168770927410684, "grad_norm": 0.36624830961227417, "learning_rate": 1.9995725919744614e-05, "loss": 0.6446, "step": 706 }, { "epoch": 0.021718428409056002, "grad_norm": 0.3366883397102356, "learning_rate": 1.9995711779887408e-05, "loss": 0.5609, "step": 707 }, { "epoch": 0.02174914754400516, "grad_norm": 0.34616565704345703, "learning_rate": 1.999569761668452e-05, "loss": 0.6338, "step": 708 }, { "epoch": 0.021779866678954322, "grad_norm": 0.3957144618034363, "learning_rate": 1.999568343013598e-05, "loss": 0.6372, "step": 709 }, { "epoch": 0.02181058581390348, "grad_norm": 0.33683693408966064, "learning_rate": 1.999566922024183e-05, "loss": 0.7073, "step": 710 }, { "epoch": 0.02184130494885264, "grad_norm": 0.3823423683643341, "learning_rate": 1.999565498700209e-05, "loss": 0.6907, "step": 711 }, { "epoch": 0.0218720240838018, "grad_norm": 0.4063527584075928, "learning_rate": 1.99956407304168e-05, "loss": 0.5691, "step": 712 }, { "epoch": 0.02190274321875096, "grad_norm": 0.3939085602760315, "learning_rate": 1.9995626450485996e-05, "loss": 0.6788, "step": 713 }, { "epoch": 0.02193346235370012, "grad_norm": 0.389740526676178, "learning_rate": 1.9995612147209706e-05, "loss": 0.6848, "step": 714 }, { "epoch": 0.02196418148864928, "grad_norm": 0.33408400416374207, "learning_rate": 1.9995597820587966e-05, "loss": 0.6911, "step": 715 }, { "epoch": 0.021994900623598438, "grad_norm": 0.5136900544166565, "learning_rate": 1.9995583470620808e-05, "loss": 0.68, "step": 716 }, { "epoch": 0.0220256197585476, "grad_norm": 0.34777143597602844, "learning_rate": 1.999556909730827e-05, "loss": 0.5712, "step": 717 }, { "epoch": 0.022056338893496757, "grad_norm": 0.44120004773139954, "learning_rate": 1.999555470065038e-05, "loss": 0.6556, "step": 718 }, { "epoch": 0.02208705802844592, "grad_norm": 0.34619706869125366, "learning_rate": 1.9995540280647177e-05, "loss": 0.5688, "step": 719 }, { "epoch": 0.02211777716339508, "grad_norm": 0.35979968309402466, "learning_rate": 1.999552583729869e-05, "loss": 0.6327, "step": 720 }, { "epoch": 0.02214849629834424, "grad_norm": 0.36372044682502747, "learning_rate": 1.9995511370604953e-05, "loss": 0.6747, "step": 721 }, { "epoch": 0.0221792154332934, "grad_norm": 0.3400021195411682, "learning_rate": 1.9995496880566e-05, "loss": 0.6644, "step": 722 }, { "epoch": 0.022209934568242558, "grad_norm": 0.3551691472530365, "learning_rate": 1.999548236718187e-05, "loss": 0.6554, "step": 723 }, { "epoch": 0.02224065370319172, "grad_norm": 0.5034741759300232, "learning_rate": 1.999546783045259e-05, "loss": 0.595, "step": 724 }, { "epoch": 0.022271372838140877, "grad_norm": 0.3290124237537384, "learning_rate": 1.9995453270378195e-05, "loss": 0.6337, "step": 725 }, { "epoch": 0.02230209197309004, "grad_norm": 0.32322415709495544, "learning_rate": 1.9995438686958725e-05, "loss": 0.6227, "step": 726 }, { "epoch": 0.022332811108039197, "grad_norm": 0.35580378770828247, "learning_rate": 1.9995424080194207e-05, "loss": 0.6709, "step": 727 }, { "epoch": 0.022363530242988358, "grad_norm": 0.35495415329933167, "learning_rate": 1.9995409450084678e-05, "loss": 0.6129, "step": 728 }, { "epoch": 0.022394249377937516, "grad_norm": 0.36138176918029785, "learning_rate": 1.9995394796630176e-05, "loss": 0.6723, "step": 729 }, { "epoch": 0.022424968512886678, "grad_norm": 0.41997915506362915, "learning_rate": 1.9995380119830727e-05, "loss": 0.6933, "step": 730 }, { "epoch": 0.022455687647835836, "grad_norm": 0.39947545528411865, "learning_rate": 1.9995365419686373e-05, "loss": 0.7338, "step": 731 }, { "epoch": 0.022486406782784997, "grad_norm": 0.34646695852279663, "learning_rate": 1.9995350696197143e-05, "loss": 0.6538, "step": 732 }, { "epoch": 0.022517125917734155, "grad_norm": 0.36460742354393005, "learning_rate": 1.9995335949363073e-05, "loss": 0.7014, "step": 733 }, { "epoch": 0.022547845052683316, "grad_norm": 0.3511771857738495, "learning_rate": 1.9995321179184198e-05, "loss": 0.5649, "step": 734 }, { "epoch": 0.022578564187632478, "grad_norm": 0.384949266910553, "learning_rate": 1.9995306385660553e-05, "loss": 0.5573, "step": 735 }, { "epoch": 0.022609283322581636, "grad_norm": 0.3244633972644806, "learning_rate": 1.9995291568792172e-05, "loss": 0.6293, "step": 736 }, { "epoch": 0.022640002457530797, "grad_norm": 0.3582436740398407, "learning_rate": 1.999527672857909e-05, "loss": 0.6812, "step": 737 }, { "epoch": 0.022670721592479955, "grad_norm": 0.3974528908729553, "learning_rate": 1.9995261865021337e-05, "loss": 0.6567, "step": 738 }, { "epoch": 0.022701440727429117, "grad_norm": 0.34583213925361633, "learning_rate": 1.9995246978118957e-05, "loss": 0.6519, "step": 739 }, { "epoch": 0.022732159862378275, "grad_norm": 0.35737287998199463, "learning_rate": 1.9995232067871976e-05, "loss": 0.6415, "step": 740 }, { "epoch": 0.022762878997327436, "grad_norm": 0.38277292251586914, "learning_rate": 1.9995217134280436e-05, "loss": 0.6042, "step": 741 }, { "epoch": 0.022793598132276594, "grad_norm": 0.37878045439720154, "learning_rate": 1.9995202177344366e-05, "loss": 0.6012, "step": 742 }, { "epoch": 0.022824317267225756, "grad_norm": 0.35402625799179077, "learning_rate": 1.9995187197063803e-05, "loss": 0.7019, "step": 743 }, { "epoch": 0.022855036402174914, "grad_norm": 0.34049639105796814, "learning_rate": 1.9995172193438782e-05, "loss": 0.5754, "step": 744 }, { "epoch": 0.022885755537124075, "grad_norm": 0.5275947451591492, "learning_rate": 1.999515716646934e-05, "loss": 0.6833, "step": 745 }, { "epoch": 0.022916474672073233, "grad_norm": 0.3617114722728729, "learning_rate": 1.999514211615551e-05, "loss": 0.6544, "step": 746 }, { "epoch": 0.022947193807022395, "grad_norm": 0.31442174315452576, "learning_rate": 1.9995127042497325e-05, "loss": 0.6007, "step": 747 }, { "epoch": 0.022977912941971552, "grad_norm": 0.3456467092037201, "learning_rate": 1.9995111945494826e-05, "loss": 0.6111, "step": 748 }, { "epoch": 0.023008632076920714, "grad_norm": 0.39414599537849426, "learning_rate": 1.9995096825148045e-05, "loss": 0.5804, "step": 749 }, { "epoch": 0.023039351211869875, "grad_norm": 0.4285268187522888, "learning_rate": 1.9995081681457016e-05, "loss": 0.6215, "step": 750 }, { "epoch": 0.023070070346819033, "grad_norm": 0.34765833616256714, "learning_rate": 1.9995066514421775e-05, "loss": 0.6209, "step": 751 }, { "epoch": 0.023100789481768195, "grad_norm": 0.3567025363445282, "learning_rate": 1.999505132404236e-05, "loss": 0.6387, "step": 752 }, { "epoch": 0.023131508616717353, "grad_norm": 0.3617381155490875, "learning_rate": 1.9995036110318804e-05, "loss": 0.6759, "step": 753 }, { "epoch": 0.023162227751666514, "grad_norm": 0.3386414051055908, "learning_rate": 1.9995020873251143e-05, "loss": 0.6702, "step": 754 }, { "epoch": 0.023192946886615672, "grad_norm": 0.5528104305267334, "learning_rate": 1.999500561283941e-05, "loss": 0.6896, "step": 755 }, { "epoch": 0.023223666021564834, "grad_norm": 0.375530868768692, "learning_rate": 1.9994990329083647e-05, "loss": 0.7641, "step": 756 }, { "epoch": 0.02325438515651399, "grad_norm": 0.38734525442123413, "learning_rate": 1.9994975021983885e-05, "loss": 0.5741, "step": 757 }, { "epoch": 0.023285104291463153, "grad_norm": 0.353128045797348, "learning_rate": 1.9994959691540163e-05, "loss": 0.6586, "step": 758 }, { "epoch": 0.02331582342641231, "grad_norm": 0.4162643551826477, "learning_rate": 1.999494433775251e-05, "loss": 0.6727, "step": 759 }, { "epoch": 0.023346542561361473, "grad_norm": 0.38830530643463135, "learning_rate": 1.999492896062097e-05, "loss": 0.6702, "step": 760 }, { "epoch": 0.02337726169631063, "grad_norm": 0.3813096284866333, "learning_rate": 1.9994913560145574e-05, "loss": 0.6763, "step": 761 }, { "epoch": 0.023407980831259792, "grad_norm": 0.35217440128326416, "learning_rate": 1.999489813632636e-05, "loss": 0.652, "step": 762 }, { "epoch": 0.02343869996620895, "grad_norm": 0.3833211064338684, "learning_rate": 1.999488268916336e-05, "loss": 0.6241, "step": 763 }, { "epoch": 0.02346941910115811, "grad_norm": 0.3450303375720978, "learning_rate": 1.9994867218656616e-05, "loss": 0.5678, "step": 764 }, { "epoch": 0.023500138236107273, "grad_norm": 0.401212215423584, "learning_rate": 1.9994851724806164e-05, "loss": 0.7116, "step": 765 }, { "epoch": 0.02353085737105643, "grad_norm": 0.36074939370155334, "learning_rate": 1.9994836207612036e-05, "loss": 0.717, "step": 766 }, { "epoch": 0.023561576506005592, "grad_norm": 0.35319191217422485, "learning_rate": 1.9994820667074267e-05, "loss": 0.7284, "step": 767 }, { "epoch": 0.02359229564095475, "grad_norm": 0.36707571148872375, "learning_rate": 1.9994805103192903e-05, "loss": 0.6625, "step": 768 }, { "epoch": 0.023623014775903912, "grad_norm": 0.3228600025177002, "learning_rate": 1.999478951596797e-05, "loss": 0.5961, "step": 769 }, { "epoch": 0.02365373391085307, "grad_norm": 0.3475819230079651, "learning_rate": 1.9994773905399504e-05, "loss": 0.5508, "step": 770 }, { "epoch": 0.02368445304580223, "grad_norm": 0.40725216269493103, "learning_rate": 1.999475827148755e-05, "loss": 0.6972, "step": 771 }, { "epoch": 0.02371517218075139, "grad_norm": 0.35891595482826233, "learning_rate": 1.999474261423214e-05, "loss": 0.5908, "step": 772 }, { "epoch": 0.02374589131570055, "grad_norm": 0.3445408046245575, "learning_rate": 1.999472693363331e-05, "loss": 0.5806, "step": 773 }, { "epoch": 0.02377661045064971, "grad_norm": 0.3231574296951294, "learning_rate": 1.9994711229691093e-05, "loss": 0.6987, "step": 774 }, { "epoch": 0.02380732958559887, "grad_norm": 0.34939518570899963, "learning_rate": 1.9994695502405538e-05, "loss": 0.5971, "step": 775 }, { "epoch": 0.023838048720548028, "grad_norm": 0.3990110754966736, "learning_rate": 1.999467975177667e-05, "loss": 0.6759, "step": 776 }, { "epoch": 0.02386876785549719, "grad_norm": 0.3214031457901001, "learning_rate": 1.9994663977804527e-05, "loss": 0.6156, "step": 777 }, { "epoch": 0.023899486990446348, "grad_norm": 0.34276875853538513, "learning_rate": 1.9994648180489153e-05, "loss": 0.615, "step": 778 }, { "epoch": 0.02393020612539551, "grad_norm": 0.3451148569583893, "learning_rate": 1.9994632359830574e-05, "loss": 0.6537, "step": 779 }, { "epoch": 0.02396092526034467, "grad_norm": 0.624241292476654, "learning_rate": 1.999461651582884e-05, "loss": 0.6242, "step": 780 }, { "epoch": 0.02399164439529383, "grad_norm": 0.3776779770851135, "learning_rate": 1.9994600648483976e-05, "loss": 0.6412, "step": 781 }, { "epoch": 0.02402236353024299, "grad_norm": 0.3751891851425171, "learning_rate": 1.9994584757796023e-05, "loss": 0.645, "step": 782 }, { "epoch": 0.024053082665192148, "grad_norm": 0.34398311376571655, "learning_rate": 1.9994568843765022e-05, "loss": 0.6562, "step": 783 }, { "epoch": 0.02408380180014131, "grad_norm": 0.4200153648853302, "learning_rate": 1.999455290639101e-05, "loss": 0.7578, "step": 784 }, { "epoch": 0.024114520935090467, "grad_norm": 0.37069088220596313, "learning_rate": 1.9994536945674018e-05, "loss": 0.6435, "step": 785 }, { "epoch": 0.02414524007003963, "grad_norm": 0.34032025933265686, "learning_rate": 1.9994520961614087e-05, "loss": 0.603, "step": 786 }, { "epoch": 0.024175959204988787, "grad_norm": 0.3333638906478882, "learning_rate": 1.9994504954211256e-05, "loss": 0.6867, "step": 787 }, { "epoch": 0.024206678339937948, "grad_norm": 0.341990202665329, "learning_rate": 1.9994488923465562e-05, "loss": 0.6199, "step": 788 }, { "epoch": 0.024237397474887106, "grad_norm": 0.42287757992744446, "learning_rate": 1.9994472869377036e-05, "loss": 0.668, "step": 789 }, { "epoch": 0.024268116609836268, "grad_norm": 0.31478404998779297, "learning_rate": 1.9994456791945725e-05, "loss": 0.6067, "step": 790 }, { "epoch": 0.024298835744785426, "grad_norm": 0.3145131468772888, "learning_rate": 1.999444069117166e-05, "loss": 0.5834, "step": 791 }, { "epoch": 0.024329554879734587, "grad_norm": 0.3533474802970886, "learning_rate": 1.9994424567054885e-05, "loss": 0.6195, "step": 792 }, { "epoch": 0.024360274014683745, "grad_norm": 0.3833659291267395, "learning_rate": 1.9994408419595427e-05, "loss": 0.645, "step": 793 }, { "epoch": 0.024390993149632906, "grad_norm": 0.3944745659828186, "learning_rate": 1.9994392248793334e-05, "loss": 0.7117, "step": 794 }, { "epoch": 0.024421712284582064, "grad_norm": 0.368807852268219, "learning_rate": 1.9994376054648642e-05, "loss": 0.6191, "step": 795 }, { "epoch": 0.024452431419531226, "grad_norm": 0.4665679335594177, "learning_rate": 1.9994359837161382e-05, "loss": 0.6531, "step": 796 }, { "epoch": 0.024483150554480387, "grad_norm": 0.34524843096733093, "learning_rate": 1.99943435963316e-05, "loss": 0.6637, "step": 797 }, { "epoch": 0.024513869689429545, "grad_norm": 0.3749580681324005, "learning_rate": 1.999432733215933e-05, "loss": 0.6151, "step": 798 }, { "epoch": 0.024544588824378707, "grad_norm": 0.32194986939430237, "learning_rate": 1.999431104464461e-05, "loss": 0.5592, "step": 799 }, { "epoch": 0.024575307959327865, "grad_norm": 0.3510719835758209, "learning_rate": 1.9994294733787474e-05, "loss": 0.6076, "step": 800 }, { "epoch": 0.024606027094277026, "grad_norm": 0.36355942487716675, "learning_rate": 1.9994278399587968e-05, "loss": 0.6031, "step": 801 }, { "epoch": 0.024636746229226184, "grad_norm": 0.39445173740386963, "learning_rate": 1.9994262042046128e-05, "loss": 0.6225, "step": 802 }, { "epoch": 0.024667465364175346, "grad_norm": 0.3646094501018524, "learning_rate": 1.999424566116199e-05, "loss": 0.5859, "step": 803 }, { "epoch": 0.024698184499124504, "grad_norm": 0.3574069142341614, "learning_rate": 1.9994229256935595e-05, "loss": 0.6482, "step": 804 }, { "epoch": 0.024728903634073665, "grad_norm": 0.3421379327774048, "learning_rate": 1.9994212829366976e-05, "loss": 0.645, "step": 805 }, { "epoch": 0.024759622769022823, "grad_norm": 0.34817564487457275, "learning_rate": 1.999419637845618e-05, "loss": 0.7169, "step": 806 }, { "epoch": 0.024790341903971985, "grad_norm": 0.4101845920085907, "learning_rate": 1.9994179904203238e-05, "loss": 0.6419, "step": 807 }, { "epoch": 0.024821061038921143, "grad_norm": 0.32005947828292847, "learning_rate": 1.999416340660819e-05, "loss": 0.599, "step": 808 }, { "epoch": 0.024851780173870304, "grad_norm": 0.36604833602905273, "learning_rate": 1.9994146885671078e-05, "loss": 0.6331, "step": 809 }, { "epoch": 0.024882499308819462, "grad_norm": 0.3340514600276947, "learning_rate": 1.9994130341391934e-05, "loss": 0.6004, "step": 810 }, { "epoch": 0.024913218443768623, "grad_norm": 0.36938372254371643, "learning_rate": 1.9994113773770802e-05, "loss": 0.6308, "step": 811 }, { "epoch": 0.024943937578717785, "grad_norm": 0.33159640431404114, "learning_rate": 1.999409718280772e-05, "loss": 0.6067, "step": 812 }, { "epoch": 0.024974656713666943, "grad_norm": 0.5651868581771851, "learning_rate": 1.9994080568502725e-05, "loss": 0.6208, "step": 813 }, { "epoch": 0.025005375848616104, "grad_norm": 0.3704703450202942, "learning_rate": 1.9994063930855857e-05, "loss": 0.7532, "step": 814 }, { "epoch": 0.025036094983565262, "grad_norm": 0.35838672518730164, "learning_rate": 1.9994047269867157e-05, "loss": 0.5827, "step": 815 }, { "epoch": 0.025066814118514424, "grad_norm": 0.4924098253250122, "learning_rate": 1.9994030585536662e-05, "loss": 0.6933, "step": 816 }, { "epoch": 0.02509753325346358, "grad_norm": 0.47129446268081665, "learning_rate": 1.9994013877864405e-05, "loss": 0.5615, "step": 817 }, { "epoch": 0.025128252388412743, "grad_norm": 0.3338177502155304, "learning_rate": 1.9993997146850438e-05, "loss": 0.5392, "step": 818 }, { "epoch": 0.0251589715233619, "grad_norm": 0.3910692632198334, "learning_rate": 1.9993980392494787e-05, "loss": 0.6248, "step": 819 }, { "epoch": 0.025189690658311063, "grad_norm": 0.3745579719543457, "learning_rate": 1.9993963614797497e-05, "loss": 0.6016, "step": 820 }, { "epoch": 0.02522040979326022, "grad_norm": 0.40972140431404114, "learning_rate": 1.9993946813758607e-05, "loss": 0.6921, "step": 821 }, { "epoch": 0.025251128928209382, "grad_norm": 0.32721611857414246, "learning_rate": 1.999392998937816e-05, "loss": 0.5915, "step": 822 }, { "epoch": 0.02528184806315854, "grad_norm": 0.3533724546432495, "learning_rate": 1.999391314165619e-05, "loss": 0.6254, "step": 823 }, { "epoch": 0.0253125671981077, "grad_norm": 0.360741525888443, "learning_rate": 1.999389627059274e-05, "loss": 0.6552, "step": 824 }, { "epoch": 0.02534328633305686, "grad_norm": 0.35991933941841125, "learning_rate": 1.9993879376187845e-05, "loss": 0.636, "step": 825 }, { "epoch": 0.02537400546800602, "grad_norm": 0.3693443238735199, "learning_rate": 1.9993862458441544e-05, "loss": 0.6132, "step": 826 }, { "epoch": 0.025404724602955182, "grad_norm": 0.42027372121810913, "learning_rate": 1.999384551735388e-05, "loss": 0.5749, "step": 827 }, { "epoch": 0.02543544373790434, "grad_norm": 0.3683750629425049, "learning_rate": 1.9993828552924895e-05, "loss": 0.5891, "step": 828 }, { "epoch": 0.025466162872853502, "grad_norm": 0.3447107672691345, "learning_rate": 1.9993811565154624e-05, "loss": 0.6319, "step": 829 }, { "epoch": 0.02549688200780266, "grad_norm": 0.3316881060600281, "learning_rate": 1.9993794554043107e-05, "loss": 0.6595, "step": 830 }, { "epoch": 0.02552760114275182, "grad_norm": 0.3164406716823578, "learning_rate": 1.9993777519590388e-05, "loss": 0.5373, "step": 831 }, { "epoch": 0.02555832027770098, "grad_norm": 0.35530632734298706, "learning_rate": 1.9993760461796504e-05, "loss": 0.6145, "step": 832 }, { "epoch": 0.02558903941265014, "grad_norm": 0.34377995133399963, "learning_rate": 1.999374338066149e-05, "loss": 0.7133, "step": 833 }, { "epoch": 0.0256197585475993, "grad_norm": 0.34587007761001587, "learning_rate": 1.999372627618539e-05, "loss": 0.6881, "step": 834 }, { "epoch": 0.02565047768254846, "grad_norm": 0.3629341423511505, "learning_rate": 1.999370914836825e-05, "loss": 0.6366, "step": 835 }, { "epoch": 0.025681196817497618, "grad_norm": 0.34330689907073975, "learning_rate": 1.99936919972101e-05, "loss": 0.6038, "step": 836 }, { "epoch": 0.02571191595244678, "grad_norm": 0.3261229991912842, "learning_rate": 1.9993674822710986e-05, "loss": 0.5594, "step": 837 }, { "epoch": 0.025742635087395938, "grad_norm": 0.36150410771369934, "learning_rate": 1.9993657624870947e-05, "loss": 0.6053, "step": 838 }, { "epoch": 0.0257733542223451, "grad_norm": 0.3431285321712494, "learning_rate": 1.999364040369002e-05, "loss": 0.6636, "step": 839 }, { "epoch": 0.025804073357294257, "grad_norm": 0.354445219039917, "learning_rate": 1.999362315916825e-05, "loss": 0.6036, "step": 840 }, { "epoch": 0.02583479249224342, "grad_norm": 0.3431447148323059, "learning_rate": 1.9993605891305677e-05, "loss": 0.6243, "step": 841 }, { "epoch": 0.02586551162719258, "grad_norm": 0.30395689606666565, "learning_rate": 1.9993588600102336e-05, "loss": 0.5895, "step": 842 }, { "epoch": 0.025896230762141738, "grad_norm": 0.31611546874046326, "learning_rate": 1.9993571285558275e-05, "loss": 0.5428, "step": 843 }, { "epoch": 0.0259269498970909, "grad_norm": 0.33272844552993774, "learning_rate": 1.9993553947673525e-05, "loss": 0.6215, "step": 844 }, { "epoch": 0.025957669032040057, "grad_norm": 0.3784993886947632, "learning_rate": 1.9993536586448136e-05, "loss": 0.6762, "step": 845 }, { "epoch": 0.02598838816698922, "grad_norm": 0.35857489705085754, "learning_rate": 1.999351920188214e-05, "loss": 0.6659, "step": 846 }, { "epoch": 0.026019107301938377, "grad_norm": 0.3249307870864868, "learning_rate": 1.9993501793975586e-05, "loss": 0.6286, "step": 847 }, { "epoch": 0.026049826436887538, "grad_norm": 0.349250465631485, "learning_rate": 1.9993484362728508e-05, "loss": 0.7103, "step": 848 }, { "epoch": 0.026080545571836696, "grad_norm": 0.33687305450439453, "learning_rate": 1.999346690814095e-05, "loss": 0.5929, "step": 849 }, { "epoch": 0.026111264706785858, "grad_norm": 0.3357529938220978, "learning_rate": 1.9993449430212955e-05, "loss": 0.645, "step": 850 }, { "epoch": 0.026141983841735016, "grad_norm": 0.3701726794242859, "learning_rate": 1.999343192894456e-05, "loss": 0.6337, "step": 851 }, { "epoch": 0.026172702976684177, "grad_norm": 0.3886323571205139, "learning_rate": 1.99934144043358e-05, "loss": 0.6362, "step": 852 }, { "epoch": 0.026203422111633335, "grad_norm": 0.33953696489334106, "learning_rate": 1.9993396856386732e-05, "loss": 0.6392, "step": 853 }, { "epoch": 0.026234141246582496, "grad_norm": 0.327877938747406, "learning_rate": 1.999337928509738e-05, "loss": 0.6176, "step": 854 }, { "epoch": 0.026264860381531654, "grad_norm": 0.3984313905239105, "learning_rate": 1.9993361690467798e-05, "loss": 0.6304, "step": 855 }, { "epoch": 0.026295579516480816, "grad_norm": 0.3199312686920166, "learning_rate": 1.9993344072498022e-05, "loss": 0.6465, "step": 856 }, { "epoch": 0.026326298651429977, "grad_norm": 0.36998212337493896, "learning_rate": 1.999332643118809e-05, "loss": 0.7073, "step": 857 }, { "epoch": 0.026357017786379135, "grad_norm": 0.370624840259552, "learning_rate": 1.999330876653805e-05, "loss": 0.6967, "step": 858 }, { "epoch": 0.026387736921328297, "grad_norm": 0.319754034280777, "learning_rate": 1.9993291078547936e-05, "loss": 0.611, "step": 859 }, { "epoch": 0.026418456056277455, "grad_norm": 0.3540596067905426, "learning_rate": 1.9993273367217795e-05, "loss": 0.6786, "step": 860 }, { "epoch": 0.026449175191226616, "grad_norm": 0.3391339182853699, "learning_rate": 1.9993255632547665e-05, "loss": 0.5998, "step": 861 }, { "epoch": 0.026479894326175774, "grad_norm": 0.33535754680633545, "learning_rate": 1.999323787453759e-05, "loss": 0.6467, "step": 862 }, { "epoch": 0.026510613461124936, "grad_norm": 0.403915673494339, "learning_rate": 1.9993220093187605e-05, "loss": 0.6947, "step": 863 }, { "epoch": 0.026541332596074094, "grad_norm": 0.3407231867313385, "learning_rate": 1.999320228849776e-05, "loss": 0.6535, "step": 864 }, { "epoch": 0.026572051731023255, "grad_norm": 0.34311601519584656, "learning_rate": 1.9993184460468096e-05, "loss": 0.6544, "step": 865 }, { "epoch": 0.026602770865972413, "grad_norm": 0.3273365795612335, "learning_rate": 1.9993166609098645e-05, "loss": 0.5689, "step": 866 }, { "epoch": 0.026633490000921575, "grad_norm": 0.30608561635017395, "learning_rate": 1.9993148734389462e-05, "loss": 0.6108, "step": 867 }, { "epoch": 0.026664209135870733, "grad_norm": 0.320281058549881, "learning_rate": 1.9993130836340578e-05, "loss": 0.6337, "step": 868 }, { "epoch": 0.026694928270819894, "grad_norm": 0.43544432520866394, "learning_rate": 1.9993112914952043e-05, "loss": 0.6234, "step": 869 }, { "epoch": 0.026725647405769052, "grad_norm": 0.379352867603302, "learning_rate": 1.9993094970223895e-05, "loss": 0.5824, "step": 870 }, { "epoch": 0.026756366540718213, "grad_norm": 0.34356755018234253, "learning_rate": 1.999307700215617e-05, "loss": 0.5832, "step": 871 }, { "epoch": 0.026787085675667375, "grad_norm": 0.36516669392585754, "learning_rate": 1.9993059010748917e-05, "loss": 0.6431, "step": 872 }, { "epoch": 0.026817804810616533, "grad_norm": 0.3460361361503601, "learning_rate": 1.9993040996002183e-05, "loss": 0.6466, "step": 873 }, { "epoch": 0.026848523945565694, "grad_norm": 0.32557782530784607, "learning_rate": 1.9993022957916e-05, "loss": 0.6117, "step": 874 }, { "epoch": 0.026879243080514852, "grad_norm": 0.3667413294315338, "learning_rate": 1.9993004896490412e-05, "loss": 0.6783, "step": 875 }, { "epoch": 0.026909962215464014, "grad_norm": 0.3549968898296356, "learning_rate": 1.9992986811725465e-05, "loss": 0.6018, "step": 876 }, { "epoch": 0.026940681350413172, "grad_norm": 0.3632084131240845, "learning_rate": 1.99929687036212e-05, "loss": 0.6765, "step": 877 }, { "epoch": 0.026971400485362333, "grad_norm": 0.45459064841270447, "learning_rate": 1.9992950572177657e-05, "loss": 0.6686, "step": 878 }, { "epoch": 0.02700211962031149, "grad_norm": 0.3806736171245575, "learning_rate": 1.9992932417394882e-05, "loss": 0.6493, "step": 879 }, { "epoch": 0.027032838755260653, "grad_norm": 0.3853391706943512, "learning_rate": 1.9992914239272918e-05, "loss": 0.6816, "step": 880 }, { "epoch": 0.02706355789020981, "grad_norm": 0.3224543631076813, "learning_rate": 1.9992896037811797e-05, "loss": 0.6645, "step": 881 }, { "epoch": 0.027094277025158972, "grad_norm": 0.3574940860271454, "learning_rate": 1.9992877813011574e-05, "loss": 0.6623, "step": 882 }, { "epoch": 0.02712499616010813, "grad_norm": 0.3445385694503784, "learning_rate": 1.9992859564872287e-05, "loss": 0.6362, "step": 883 }, { "epoch": 0.02715571529505729, "grad_norm": 0.3310242295265198, "learning_rate": 1.999284129339398e-05, "loss": 0.6039, "step": 884 }, { "epoch": 0.02718643443000645, "grad_norm": 0.34740573167800903, "learning_rate": 1.9992822998576692e-05, "loss": 0.6253, "step": 885 }, { "epoch": 0.02721715356495561, "grad_norm": 0.3663191497325897, "learning_rate": 1.999280468042047e-05, "loss": 0.6713, "step": 886 }, { "epoch": 0.027247872699904772, "grad_norm": 0.5052487850189209, "learning_rate": 1.999278633892535e-05, "loss": 0.6952, "step": 887 }, { "epoch": 0.02727859183485393, "grad_norm": 0.3309720456600189, "learning_rate": 1.9992767974091385e-05, "loss": 0.6636, "step": 888 }, { "epoch": 0.027309310969803092, "grad_norm": 0.34106194972991943, "learning_rate": 1.999274958591861e-05, "loss": 0.6298, "step": 889 }, { "epoch": 0.02734003010475225, "grad_norm": 0.37429702281951904, "learning_rate": 1.9992731174407073e-05, "loss": 0.6412, "step": 890 }, { "epoch": 0.02737074923970141, "grad_norm": 0.589742124080658, "learning_rate": 1.999271273955681e-05, "loss": 0.6488, "step": 891 }, { "epoch": 0.02740146837465057, "grad_norm": 0.34701046347618103, "learning_rate": 1.999269428136787e-05, "loss": 0.7622, "step": 892 }, { "epoch": 0.02743218750959973, "grad_norm": 0.3558473587036133, "learning_rate": 1.9992675799840294e-05, "loss": 0.72, "step": 893 }, { "epoch": 0.02746290664454889, "grad_norm": 0.4320787191390991, "learning_rate": 1.9992657294974133e-05, "loss": 0.6344, "step": 894 }, { "epoch": 0.02749362577949805, "grad_norm": 0.3307746946811676, "learning_rate": 1.9992638766769416e-05, "loss": 0.6672, "step": 895 }, { "epoch": 0.027524344914447208, "grad_norm": 0.33231550455093384, "learning_rate": 1.9992620215226193e-05, "loss": 0.6256, "step": 896 }, { "epoch": 0.02755506404939637, "grad_norm": 0.6375876069068909, "learning_rate": 1.9992601640344513e-05, "loss": 0.6433, "step": 897 }, { "epoch": 0.027585783184345528, "grad_norm": 0.356007844209671, "learning_rate": 1.9992583042124408e-05, "loss": 0.6657, "step": 898 }, { "epoch": 0.02761650231929469, "grad_norm": 0.3650149703025818, "learning_rate": 1.9992564420565926e-05, "loss": 0.693, "step": 899 }, { "epoch": 0.027647221454243847, "grad_norm": 0.4083043932914734, "learning_rate": 1.999254577566912e-05, "loss": 0.6994, "step": 900 }, { "epoch": 0.02767794058919301, "grad_norm": 0.3430173993110657, "learning_rate": 1.999252710743402e-05, "loss": 0.6392, "step": 901 }, { "epoch": 0.027708659724142166, "grad_norm": 0.3918071985244751, "learning_rate": 1.9992508415860675e-05, "loss": 0.6068, "step": 902 }, { "epoch": 0.027739378859091328, "grad_norm": 0.30533114075660706, "learning_rate": 1.999248970094913e-05, "loss": 0.6058, "step": 903 }, { "epoch": 0.02777009799404049, "grad_norm": 0.32990822196006775, "learning_rate": 1.999247096269943e-05, "loss": 0.6189, "step": 904 }, { "epoch": 0.027800817128989647, "grad_norm": 0.37144070863723755, "learning_rate": 1.9992452201111613e-05, "loss": 0.7093, "step": 905 }, { "epoch": 0.02783153626393881, "grad_norm": 0.33553409576416016, "learning_rate": 1.9992433416185728e-05, "loss": 0.6329, "step": 906 }, { "epoch": 0.027862255398887967, "grad_norm": 0.34402376413345337, "learning_rate": 1.999241460792181e-05, "loss": 0.5957, "step": 907 }, { "epoch": 0.027892974533837128, "grad_norm": 0.3575018644332886, "learning_rate": 1.9992395776319918e-05, "loss": 0.7001, "step": 908 }, { "epoch": 0.027923693668786286, "grad_norm": 0.5423919558525085, "learning_rate": 1.9992376921380085e-05, "loss": 0.6312, "step": 909 }, { "epoch": 0.027954412803735448, "grad_norm": 0.36307886242866516, "learning_rate": 1.999235804310236e-05, "loss": 0.6899, "step": 910 }, { "epoch": 0.027985131938684606, "grad_norm": 0.3432862460613251, "learning_rate": 1.999233914148678e-05, "loss": 0.6332, "step": 911 }, { "epoch": 0.028015851073633767, "grad_norm": 0.3484271466732025, "learning_rate": 1.99923202165334e-05, "loss": 0.6826, "step": 912 }, { "epoch": 0.028046570208582925, "grad_norm": 0.3533470928668976, "learning_rate": 1.9992301268242254e-05, "loss": 0.6419, "step": 913 }, { "epoch": 0.028077289343532087, "grad_norm": 0.34541529417037964, "learning_rate": 1.999228229661339e-05, "loss": 0.6058, "step": 914 }, { "epoch": 0.028108008478481245, "grad_norm": 0.36948898434638977, "learning_rate": 1.9992263301646854e-05, "loss": 0.6531, "step": 915 }, { "epoch": 0.028138727613430406, "grad_norm": 0.3919905126094818, "learning_rate": 1.999224428334269e-05, "loss": 0.6754, "step": 916 }, { "epoch": 0.028169446748379564, "grad_norm": 0.37425386905670166, "learning_rate": 1.9992225241700943e-05, "loss": 0.6725, "step": 917 }, { "epoch": 0.028200165883328725, "grad_norm": 0.33044975996017456, "learning_rate": 1.9992206176721652e-05, "loss": 0.6737, "step": 918 }, { "epoch": 0.028230885018277887, "grad_norm": 0.3141992390155792, "learning_rate": 1.999218708840487e-05, "loss": 0.6525, "step": 919 }, { "epoch": 0.028261604153227045, "grad_norm": 0.360970675945282, "learning_rate": 1.9992167976750637e-05, "loss": 0.721, "step": 920 }, { "epoch": 0.028292323288176206, "grad_norm": 0.3297577202320099, "learning_rate": 1.9992148841758994e-05, "loss": 0.6428, "step": 921 }, { "epoch": 0.028323042423125364, "grad_norm": 0.3619009256362915, "learning_rate": 1.9992129683429994e-05, "loss": 0.6031, "step": 922 }, { "epoch": 0.028353761558074526, "grad_norm": 0.3066405653953552, "learning_rate": 1.999211050176367e-05, "loss": 0.5744, "step": 923 }, { "epoch": 0.028384480693023684, "grad_norm": 0.35686203837394714, "learning_rate": 1.9992091296760083e-05, "loss": 0.6865, "step": 924 }, { "epoch": 0.028415199827972845, "grad_norm": 0.4501313269138336, "learning_rate": 1.9992072068419264e-05, "loss": 0.5582, "step": 925 }, { "epoch": 0.028445918962922003, "grad_norm": 0.3558783531188965, "learning_rate": 1.9992052816741263e-05, "loss": 0.614, "step": 926 }, { "epoch": 0.028476638097871165, "grad_norm": 0.3103170096874237, "learning_rate": 1.9992033541726126e-05, "loss": 0.591, "step": 927 }, { "epoch": 0.028507357232820323, "grad_norm": 0.3640974462032318, "learning_rate": 1.9992014243373897e-05, "loss": 0.5335, "step": 928 }, { "epoch": 0.028538076367769484, "grad_norm": 0.34118396043777466, "learning_rate": 1.9991994921684622e-05, "loss": 0.5728, "step": 929 }, { "epoch": 0.028568795502718642, "grad_norm": 0.37271228432655334, "learning_rate": 1.9991975576658342e-05, "loss": 0.6489, "step": 930 }, { "epoch": 0.028599514637667803, "grad_norm": 0.32607826590538025, "learning_rate": 1.9991956208295106e-05, "loss": 0.6328, "step": 931 }, { "epoch": 0.02863023377261696, "grad_norm": 0.4843723475933075, "learning_rate": 1.999193681659496e-05, "loss": 0.6742, "step": 932 }, { "epoch": 0.028660952907566123, "grad_norm": 0.34481140971183777, "learning_rate": 1.9991917401557948e-05, "loss": 0.658, "step": 933 }, { "epoch": 0.028691672042515284, "grad_norm": 0.35286298394203186, "learning_rate": 1.999189796318411e-05, "loss": 0.6628, "step": 934 }, { "epoch": 0.028722391177464442, "grad_norm": 0.3482709228992462, "learning_rate": 1.9991878501473504e-05, "loss": 0.7318, "step": 935 }, { "epoch": 0.028753110312413604, "grad_norm": 0.35179585218429565, "learning_rate": 1.9991859016426163e-05, "loss": 0.638, "step": 936 }, { "epoch": 0.028783829447362762, "grad_norm": 0.371429443359375, "learning_rate": 1.9991839508042136e-05, "loss": 0.7079, "step": 937 }, { "epoch": 0.028814548582311923, "grad_norm": 0.3398880958557129, "learning_rate": 1.9991819976321474e-05, "loss": 0.5865, "step": 938 }, { "epoch": 0.02884526771726108, "grad_norm": 0.37276071310043335, "learning_rate": 1.9991800421264217e-05, "loss": 0.704, "step": 939 }, { "epoch": 0.028875986852210243, "grad_norm": 0.35747987031936646, "learning_rate": 1.9991780842870413e-05, "loss": 0.622, "step": 940 }, { "epoch": 0.0289067059871594, "grad_norm": 0.48391324281692505, "learning_rate": 1.999176124114011e-05, "loss": 0.6132, "step": 941 }, { "epoch": 0.028937425122108562, "grad_norm": 0.349644273519516, "learning_rate": 1.9991741616073347e-05, "loss": 0.6095, "step": 942 }, { "epoch": 0.02896814425705772, "grad_norm": 0.33420729637145996, "learning_rate": 1.9991721967670173e-05, "loss": 0.6068, "step": 943 }, { "epoch": 0.02899886339200688, "grad_norm": 0.3364189565181732, "learning_rate": 1.9991702295930634e-05, "loss": 0.6172, "step": 944 }, { "epoch": 0.02902958252695604, "grad_norm": 0.3340352773666382, "learning_rate": 1.999168260085478e-05, "loss": 0.6584, "step": 945 }, { "epoch": 0.0290603016619052, "grad_norm": 0.3347535729408264, "learning_rate": 1.999166288244265e-05, "loss": 0.5865, "step": 946 }, { "epoch": 0.02909102079685436, "grad_norm": 0.35337284207344055, "learning_rate": 1.9991643140694298e-05, "loss": 0.6618, "step": 947 }, { "epoch": 0.02912173993180352, "grad_norm": 0.3245721161365509, "learning_rate": 1.999162337560976e-05, "loss": 0.5904, "step": 948 }, { "epoch": 0.029152459066752682, "grad_norm": 0.36671164631843567, "learning_rate": 1.9991603587189092e-05, "loss": 0.5839, "step": 949 }, { "epoch": 0.02918317820170184, "grad_norm": 0.34232187271118164, "learning_rate": 1.9991583775432334e-05, "loss": 0.5626, "step": 950 }, { "epoch": 0.029213897336651, "grad_norm": 0.3917906880378723, "learning_rate": 1.9991563940339534e-05, "loss": 0.6784, "step": 951 }, { "epoch": 0.02924461647160016, "grad_norm": 0.4486294984817505, "learning_rate": 1.9991544081910743e-05, "loss": 0.6921, "step": 952 }, { "epoch": 0.02927533560654932, "grad_norm": 0.4337921142578125, "learning_rate": 1.9991524200145997e-05, "loss": 0.6889, "step": 953 }, { "epoch": 0.02930605474149848, "grad_norm": 0.41638249158859253, "learning_rate": 1.999150429504535e-05, "loss": 0.6286, "step": 954 }, { "epoch": 0.02933677387644764, "grad_norm": 0.32168474793434143, "learning_rate": 1.999148436660885e-05, "loss": 0.627, "step": 955 }, { "epoch": 0.029367493011396798, "grad_norm": 0.34400907158851624, "learning_rate": 1.999146441483654e-05, "loss": 0.5621, "step": 956 }, { "epoch": 0.02939821214634596, "grad_norm": 0.3773561716079712, "learning_rate": 1.999144443972846e-05, "loss": 0.6157, "step": 957 }, { "epoch": 0.029428931281295118, "grad_norm": 0.3841872215270996, "learning_rate": 1.999142444128467e-05, "loss": 0.6361, "step": 958 }, { "epoch": 0.02945965041624428, "grad_norm": 0.3586106598377228, "learning_rate": 1.999140441950521e-05, "loss": 0.6932, "step": 959 }, { "epoch": 0.029490369551193437, "grad_norm": 0.3461854159832001, "learning_rate": 1.9991384374390127e-05, "loss": 0.6584, "step": 960 }, { "epoch": 0.0295210886861426, "grad_norm": 0.48548853397369385, "learning_rate": 1.999136430593947e-05, "loss": 0.644, "step": 961 }, { "epoch": 0.029551807821091756, "grad_norm": 0.3546416461467743, "learning_rate": 1.999134421415328e-05, "loss": 0.6752, "step": 962 }, { "epoch": 0.029582526956040918, "grad_norm": 0.3347010910511017, "learning_rate": 1.999132409903161e-05, "loss": 0.5618, "step": 963 }, { "epoch": 0.02961324609099008, "grad_norm": 0.35680463910102844, "learning_rate": 1.9991303960574503e-05, "loss": 0.663, "step": 964 }, { "epoch": 0.029643965225939237, "grad_norm": 0.36602479219436646, "learning_rate": 1.999128379878201e-05, "loss": 0.6647, "step": 965 }, { "epoch": 0.0296746843608884, "grad_norm": 0.3482474088668823, "learning_rate": 1.9991263613654176e-05, "loss": 0.632, "step": 966 }, { "epoch": 0.029705403495837557, "grad_norm": 0.36395663022994995, "learning_rate": 1.9991243405191045e-05, "loss": 0.6093, "step": 967 }, { "epoch": 0.029736122630786718, "grad_norm": 0.4002844989299774, "learning_rate": 1.9991223173392668e-05, "loss": 0.676, "step": 968 }, { "epoch": 0.029766841765735876, "grad_norm": 0.3626173436641693, "learning_rate": 1.9991202918259095e-05, "loss": 0.677, "step": 969 }, { "epoch": 0.029797560900685038, "grad_norm": 0.3409412205219269, "learning_rate": 1.999118263979037e-05, "loss": 0.6178, "step": 970 }, { "epoch": 0.029828280035634196, "grad_norm": 0.3712298572063446, "learning_rate": 1.9991162337986534e-05, "loss": 0.6617, "step": 971 }, { "epoch": 0.029858999170583357, "grad_norm": 0.35912397503852844, "learning_rate": 1.9991142012847647e-05, "loss": 0.6982, "step": 972 }, { "epoch": 0.029889718305532515, "grad_norm": 0.3603551387786865, "learning_rate": 1.9991121664373745e-05, "loss": 0.6532, "step": 973 }, { "epoch": 0.029920437440481677, "grad_norm": 0.3421851098537445, "learning_rate": 1.9991101292564884e-05, "loss": 0.6465, "step": 974 }, { "epoch": 0.029951156575430835, "grad_norm": 0.4020642042160034, "learning_rate": 1.9991080897421105e-05, "loss": 0.6809, "step": 975 }, { "epoch": 0.029981875710379996, "grad_norm": 0.4122118651866913, "learning_rate": 1.9991060478942464e-05, "loss": 0.6071, "step": 976 }, { "epoch": 0.030012594845329154, "grad_norm": 0.3102165460586548, "learning_rate": 1.9991040037129e-05, "loss": 0.6066, "step": 977 }, { "epoch": 0.030043313980278315, "grad_norm": 0.3728201687335968, "learning_rate": 1.9991019571980764e-05, "loss": 0.5976, "step": 978 }, { "epoch": 0.030074033115227477, "grad_norm": 0.36967578530311584, "learning_rate": 1.9990999083497804e-05, "loss": 0.6243, "step": 979 }, { "epoch": 0.030104752250176635, "grad_norm": 0.35314464569091797, "learning_rate": 1.999097857168017e-05, "loss": 0.6259, "step": 980 }, { "epoch": 0.030135471385125796, "grad_norm": 0.34373918175697327, "learning_rate": 1.9990958036527903e-05, "loss": 0.6841, "step": 981 }, { "epoch": 0.030166190520074954, "grad_norm": 0.32499226927757263, "learning_rate": 1.9990937478041062e-05, "loss": 0.6508, "step": 982 }, { "epoch": 0.030196909655024116, "grad_norm": 0.3431761562824249, "learning_rate": 1.9990916896219685e-05, "loss": 0.6711, "step": 983 }, { "epoch": 0.030227628789973274, "grad_norm": 0.33923274278640747, "learning_rate": 1.9990896291063825e-05, "loss": 0.5533, "step": 984 }, { "epoch": 0.030258347924922435, "grad_norm": 0.3480401635169983, "learning_rate": 1.999087566257353e-05, "loss": 0.6545, "step": 985 }, { "epoch": 0.030289067059871593, "grad_norm": 0.35932230949401855, "learning_rate": 1.9990855010748844e-05, "loss": 0.5466, "step": 986 }, { "epoch": 0.030319786194820755, "grad_norm": 0.39288341999053955, "learning_rate": 1.999083433558982e-05, "loss": 0.6442, "step": 987 }, { "epoch": 0.030350505329769913, "grad_norm": 0.3493782579898834, "learning_rate": 1.9990813637096503e-05, "loss": 0.6954, "step": 988 }, { "epoch": 0.030381224464719074, "grad_norm": 0.3798350989818573, "learning_rate": 1.9990792915268946e-05, "loss": 0.5755, "step": 989 }, { "epoch": 0.030411943599668232, "grad_norm": 0.36563122272491455, "learning_rate": 1.9990772170107193e-05, "loss": 0.5911, "step": 990 }, { "epoch": 0.030442662734617393, "grad_norm": 0.36792153120040894, "learning_rate": 1.9990751401611295e-05, "loss": 0.6221, "step": 991 }, { "epoch": 0.03047338186956655, "grad_norm": 0.3902142345905304, "learning_rate": 1.9990730609781296e-05, "loss": 0.6217, "step": 992 }, { "epoch": 0.030504101004515713, "grad_norm": 0.3285065293312073, "learning_rate": 1.999070979461725e-05, "loss": 0.6514, "step": 993 }, { "epoch": 0.030534820139464874, "grad_norm": 0.35930368304252625, "learning_rate": 1.9990688956119206e-05, "loss": 0.647, "step": 994 }, { "epoch": 0.030565539274414032, "grad_norm": 0.34278202056884766, "learning_rate": 1.9990668094287206e-05, "loss": 0.704, "step": 995 }, { "epoch": 0.030596258409363194, "grad_norm": 0.3557632267475128, "learning_rate": 1.9990647209121305e-05, "loss": 0.6233, "step": 996 }, { "epoch": 0.030626977544312352, "grad_norm": 0.4094691276550293, "learning_rate": 1.999062630062155e-05, "loss": 0.6053, "step": 997 }, { "epoch": 0.030657696679261513, "grad_norm": 0.5025925040245056, "learning_rate": 1.999060536878799e-05, "loss": 0.7051, "step": 998 }, { "epoch": 0.03068841581421067, "grad_norm": 0.3355008661746979, "learning_rate": 1.9990584413620673e-05, "loss": 0.5396, "step": 999 }, { "epoch": 0.030719134949159833, "grad_norm": 0.36267465353012085, "learning_rate": 1.9990563435119646e-05, "loss": 0.6351, "step": 1000 }, { "epoch": 0.03074985408410899, "grad_norm": 0.34036964178085327, "learning_rate": 1.9990542433284962e-05, "loss": 0.6418, "step": 1001 }, { "epoch": 0.030780573219058152, "grad_norm": 0.3541405200958252, "learning_rate": 1.999052140811667e-05, "loss": 0.629, "step": 1002 }, { "epoch": 0.03081129235400731, "grad_norm": 0.36128905415534973, "learning_rate": 1.9990500359614815e-05, "loss": 0.6617, "step": 1003 }, { "epoch": 0.03084201148895647, "grad_norm": 0.3653033673763275, "learning_rate": 1.999047928777945e-05, "loss": 0.6218, "step": 1004 }, { "epoch": 0.03087273062390563, "grad_norm": 0.37695032358169556, "learning_rate": 1.9990458192610623e-05, "loss": 0.6505, "step": 1005 }, { "epoch": 0.03090344975885479, "grad_norm": 0.3268383741378784, "learning_rate": 1.9990437074108385e-05, "loss": 0.6213, "step": 1006 }, { "epoch": 0.03093416889380395, "grad_norm": 0.345052570104599, "learning_rate": 1.999041593227278e-05, "loss": 0.6649, "step": 1007 }, { "epoch": 0.03096488802875311, "grad_norm": 0.35672691464424133, "learning_rate": 1.9990394767103863e-05, "loss": 0.6848, "step": 1008 }, { "epoch": 0.03099560716370227, "grad_norm": 0.3338707983493805, "learning_rate": 1.9990373578601678e-05, "loss": 0.619, "step": 1009 }, { "epoch": 0.03102632629865143, "grad_norm": 0.3253258466720581, "learning_rate": 1.9990352366766282e-05, "loss": 0.5523, "step": 1010 }, { "epoch": 0.03105704543360059, "grad_norm": 0.3613380491733551, "learning_rate": 1.9990331131597717e-05, "loss": 0.5879, "step": 1011 }, { "epoch": 0.03108776456854975, "grad_norm": 0.33052125573158264, "learning_rate": 1.9990309873096038e-05, "loss": 0.5923, "step": 1012 }, { "epoch": 0.03111848370349891, "grad_norm": 0.3249853551387787, "learning_rate": 1.9990288591261294e-05, "loss": 0.6219, "step": 1013 }, { "epoch": 0.03114920283844807, "grad_norm": 0.4121742248535156, "learning_rate": 1.9990267286093534e-05, "loss": 0.6363, "step": 1014 }, { "epoch": 0.03117992197339723, "grad_norm": 0.3123716711997986, "learning_rate": 1.99902459575928e-05, "loss": 0.5666, "step": 1015 }, { "epoch": 0.031210641108346388, "grad_norm": 0.35195404291152954, "learning_rate": 1.9990224605759156e-05, "loss": 0.5756, "step": 1016 }, { "epoch": 0.03124136024329555, "grad_norm": 0.812134325504303, "learning_rate": 1.9990203230592643e-05, "loss": 0.5645, "step": 1017 }, { "epoch": 0.03127207937824471, "grad_norm": 0.3393416404724121, "learning_rate": 1.999018183209331e-05, "loss": 0.5099, "step": 1018 }, { "epoch": 0.03130279851319387, "grad_norm": 0.3292752802371979, "learning_rate": 1.999016041026121e-05, "loss": 0.6746, "step": 1019 }, { "epoch": 0.03133351764814303, "grad_norm": 0.3554602265357971, "learning_rate": 1.99901389650964e-05, "loss": 0.6105, "step": 1020 }, { "epoch": 0.031364236783092185, "grad_norm": 0.3844868838787079, "learning_rate": 1.9990117496598913e-05, "loss": 0.6186, "step": 1021 }, { "epoch": 0.031394955918041346, "grad_norm": 0.38540178537368774, "learning_rate": 1.9990096004768812e-05, "loss": 0.7329, "step": 1022 }, { "epoch": 0.03142567505299051, "grad_norm": 0.37618565559387207, "learning_rate": 1.9990074489606144e-05, "loss": 0.6144, "step": 1023 }, { "epoch": 0.03145639418793967, "grad_norm": 0.3909849524497986, "learning_rate": 1.999005295111096e-05, "loss": 0.6905, "step": 1024 }, { "epoch": 0.03148711332288883, "grad_norm": 0.43792369961738586, "learning_rate": 1.999003138928331e-05, "loss": 0.6473, "step": 1025 }, { "epoch": 0.031517832457837985, "grad_norm": 0.3736238479614258, "learning_rate": 1.9990009804123245e-05, "loss": 0.679, "step": 1026 }, { "epoch": 0.03154855159278715, "grad_norm": 0.3628404140472412, "learning_rate": 1.9989988195630815e-05, "loss": 0.6373, "step": 1027 }, { "epoch": 0.03157927072773631, "grad_norm": 0.3571937084197998, "learning_rate": 1.9989966563806065e-05, "loss": 0.6805, "step": 1028 }, { "epoch": 0.03160998986268547, "grad_norm": 0.35794422030448914, "learning_rate": 1.9989944908649055e-05, "loss": 0.6166, "step": 1029 }, { "epoch": 0.031640708997634624, "grad_norm": 0.36259719729423523, "learning_rate": 1.998992323015983e-05, "loss": 0.6653, "step": 1030 }, { "epoch": 0.031671428132583786, "grad_norm": 0.3342472314834595, "learning_rate": 1.998990152833844e-05, "loss": 0.6321, "step": 1031 }, { "epoch": 0.03170214726753295, "grad_norm": 0.3626025915145874, "learning_rate": 1.9989879803184937e-05, "loss": 0.6433, "step": 1032 }, { "epoch": 0.03173286640248211, "grad_norm": 0.33758050203323364, "learning_rate": 1.9989858054699373e-05, "loss": 0.6284, "step": 1033 }, { "epoch": 0.03176358553743126, "grad_norm": 0.40521734952926636, "learning_rate": 1.99898362828818e-05, "loss": 0.7224, "step": 1034 }, { "epoch": 0.031794304672380425, "grad_norm": 0.35888582468032837, "learning_rate": 1.9989814487732265e-05, "loss": 0.6287, "step": 1035 }, { "epoch": 0.031825023807329586, "grad_norm": 0.33606529235839844, "learning_rate": 1.998979266925082e-05, "loss": 0.6271, "step": 1036 }, { "epoch": 0.03185574294227875, "grad_norm": 0.45442995429039, "learning_rate": 1.9989770827437515e-05, "loss": 0.5186, "step": 1037 }, { "epoch": 0.0318864620772279, "grad_norm": 0.3163524270057678, "learning_rate": 1.9989748962292407e-05, "loss": 0.5533, "step": 1038 }, { "epoch": 0.03191718121217706, "grad_norm": 0.33671003580093384, "learning_rate": 1.998972707381554e-05, "loss": 0.6325, "step": 1039 }, { "epoch": 0.031947900347126225, "grad_norm": 0.36726388335227966, "learning_rate": 1.9989705162006967e-05, "loss": 0.5739, "step": 1040 }, { "epoch": 0.031978619482075386, "grad_norm": 0.372280478477478, "learning_rate": 1.998968322686674e-05, "loss": 0.59, "step": 1041 }, { "epoch": 0.03200933861702455, "grad_norm": 0.33566126227378845, "learning_rate": 1.9989661268394915e-05, "loss": 0.5694, "step": 1042 }, { "epoch": 0.0320400577519737, "grad_norm": 0.36734333634376526, "learning_rate": 1.998963928659153e-05, "loss": 0.6471, "step": 1043 }, { "epoch": 0.032070776886922864, "grad_norm": 0.3498312830924988, "learning_rate": 1.998961728145665e-05, "loss": 0.6134, "step": 1044 }, { "epoch": 0.032101496021872025, "grad_norm": 0.3177320957183838, "learning_rate": 1.9989595252990322e-05, "loss": 0.5913, "step": 1045 }, { "epoch": 0.03213221515682119, "grad_norm": 0.33551815152168274, "learning_rate": 1.9989573201192594e-05, "loss": 0.6877, "step": 1046 }, { "epoch": 0.03216293429177034, "grad_norm": 0.368248850107193, "learning_rate": 1.998955112606352e-05, "loss": 0.6127, "step": 1047 }, { "epoch": 0.0321936534267195, "grad_norm": 0.32530200481414795, "learning_rate": 1.9989529027603155e-05, "loss": 0.5903, "step": 1048 }, { "epoch": 0.032224372561668664, "grad_norm": 0.3521789312362671, "learning_rate": 1.9989506905811544e-05, "loss": 0.6469, "step": 1049 }, { "epoch": 0.032255091696617826, "grad_norm": 0.3289661407470703, "learning_rate": 1.998948476068874e-05, "loss": 0.6371, "step": 1050 }, { "epoch": 0.03228581083156698, "grad_norm": 0.34857144951820374, "learning_rate": 1.9989462592234803e-05, "loss": 0.6517, "step": 1051 }, { "epoch": 0.03231652996651614, "grad_norm": 0.3379706144332886, "learning_rate": 1.9989440400449776e-05, "loss": 0.6029, "step": 1052 }, { "epoch": 0.0323472491014653, "grad_norm": 0.3823551535606384, "learning_rate": 1.998941818533371e-05, "loss": 0.6207, "step": 1053 }, { "epoch": 0.032377968236414464, "grad_norm": 0.3838878870010376, "learning_rate": 1.9989395946886665e-05, "loss": 0.5994, "step": 1054 }, { "epoch": 0.03240868737136362, "grad_norm": 0.335249662399292, "learning_rate": 1.9989373685108683e-05, "loss": 0.6793, "step": 1055 }, { "epoch": 0.03243940650631278, "grad_norm": 0.33423668146133423, "learning_rate": 1.9989351399999825e-05, "loss": 0.5696, "step": 1056 }, { "epoch": 0.03247012564126194, "grad_norm": 0.4110044836997986, "learning_rate": 1.9989329091560136e-05, "loss": 0.5768, "step": 1057 }, { "epoch": 0.0325008447762111, "grad_norm": 0.38590484857559204, "learning_rate": 1.9989306759789673e-05, "loss": 0.6829, "step": 1058 }, { "epoch": 0.032531563911160265, "grad_norm": 0.3750787377357483, "learning_rate": 1.9989284404688488e-05, "loss": 0.6269, "step": 1059 }, { "epoch": 0.03256228304610942, "grad_norm": 0.4510061740875244, "learning_rate": 1.998926202625663e-05, "loss": 0.594, "step": 1060 }, { "epoch": 0.03259300218105858, "grad_norm": 0.476958692073822, "learning_rate": 1.9989239624494155e-05, "loss": 0.682, "step": 1061 }, { "epoch": 0.03262372131600774, "grad_norm": 0.33054396510124207, "learning_rate": 1.998921719940111e-05, "loss": 0.6032, "step": 1062 }, { "epoch": 0.032654440450956904, "grad_norm": 0.3750171363353729, "learning_rate": 1.9989194750977553e-05, "loss": 0.6736, "step": 1063 }, { "epoch": 0.03268515958590606, "grad_norm": 0.38967016339302063, "learning_rate": 1.9989172279223532e-05, "loss": 0.6868, "step": 1064 }, { "epoch": 0.03271587872085522, "grad_norm": 0.3347996473312378, "learning_rate": 1.9989149784139103e-05, "loss": 0.5866, "step": 1065 }, { "epoch": 0.03274659785580438, "grad_norm": 0.3479015827178955, "learning_rate": 1.9989127265724317e-05, "loss": 0.6203, "step": 1066 }, { "epoch": 0.03277731699075354, "grad_norm": 0.3617999255657196, "learning_rate": 1.9989104723979225e-05, "loss": 0.6017, "step": 1067 }, { "epoch": 0.0328080361257027, "grad_norm": 0.42498594522476196, "learning_rate": 1.9989082158903885e-05, "loss": 0.5863, "step": 1068 }, { "epoch": 0.03283875526065186, "grad_norm": 0.32549619674682617, "learning_rate": 1.9989059570498343e-05, "loss": 0.6573, "step": 1069 }, { "epoch": 0.03286947439560102, "grad_norm": 0.3284909129142761, "learning_rate": 1.9989036958762654e-05, "loss": 0.6017, "step": 1070 }, { "epoch": 0.03290019353055018, "grad_norm": 0.8762412071228027, "learning_rate": 1.9989014323696873e-05, "loss": 0.6019, "step": 1071 }, { "epoch": 0.03293091266549934, "grad_norm": 0.3486361801624298, "learning_rate": 1.9988991665301053e-05, "loss": 0.7148, "step": 1072 }, { "epoch": 0.0329616318004485, "grad_norm": 0.3163992166519165, "learning_rate": 1.9988968983575245e-05, "loss": 0.6407, "step": 1073 }, { "epoch": 0.03299235093539766, "grad_norm": 0.46429967880249023, "learning_rate": 1.99889462785195e-05, "loss": 0.7093, "step": 1074 }, { "epoch": 0.03302307007034682, "grad_norm": 0.34972095489501953, "learning_rate": 1.9988923550133873e-05, "loss": 0.648, "step": 1075 }, { "epoch": 0.03305378920529598, "grad_norm": 0.33597803115844727, "learning_rate": 1.9988900798418417e-05, "loss": 0.6691, "step": 1076 }, { "epoch": 0.033084508340245136, "grad_norm": 0.4052893817424774, "learning_rate": 1.9988878023373187e-05, "loss": 0.6522, "step": 1077 }, { "epoch": 0.0331152274751943, "grad_norm": 0.33047980070114136, "learning_rate": 1.9988855224998237e-05, "loss": 0.6507, "step": 1078 }, { "epoch": 0.03314594661014346, "grad_norm": 0.3311249017715454, "learning_rate": 1.9988832403293618e-05, "loss": 0.6329, "step": 1079 }, { "epoch": 0.03317666574509262, "grad_norm": 0.3254491090774536, "learning_rate": 1.998880955825938e-05, "loss": 0.6163, "step": 1080 }, { "epoch": 0.033207384880041775, "grad_norm": 0.39359042048454285, "learning_rate": 1.9988786689895582e-05, "loss": 0.6223, "step": 1081 }, { "epoch": 0.033238104014990937, "grad_norm": 0.32798635959625244, "learning_rate": 1.998876379820227e-05, "loss": 0.6064, "step": 1082 }, { "epoch": 0.0332688231499401, "grad_norm": 0.3486401438713074, "learning_rate": 1.998874088317951e-05, "loss": 0.6426, "step": 1083 }, { "epoch": 0.03329954228488926, "grad_norm": 0.35808810591697693, "learning_rate": 1.9988717944827343e-05, "loss": 0.6659, "step": 1084 }, { "epoch": 0.033330261419838414, "grad_norm": 0.38490602374076843, "learning_rate": 1.998869498314583e-05, "loss": 0.6362, "step": 1085 }, { "epoch": 0.033360980554787575, "grad_norm": 0.32536956667900085, "learning_rate": 1.998867199813502e-05, "loss": 0.6341, "step": 1086 }, { "epoch": 0.03339169968973674, "grad_norm": 0.3295762240886688, "learning_rate": 1.9988648989794972e-05, "loss": 0.6103, "step": 1087 }, { "epoch": 0.0334224188246859, "grad_norm": 0.328519731760025, "learning_rate": 1.9988625958125735e-05, "loss": 0.6089, "step": 1088 }, { "epoch": 0.03345313795963506, "grad_norm": 0.3444724678993225, "learning_rate": 1.9988602903127365e-05, "loss": 0.6724, "step": 1089 }, { "epoch": 0.033483857094584214, "grad_norm": 0.3777585029602051, "learning_rate": 1.9988579824799914e-05, "loss": 0.6476, "step": 1090 }, { "epoch": 0.033514576229533376, "grad_norm": 0.32039839029312134, "learning_rate": 1.998855672314344e-05, "loss": 0.5772, "step": 1091 }, { "epoch": 0.03354529536448254, "grad_norm": 0.346576988697052, "learning_rate": 1.998853359815799e-05, "loss": 0.6687, "step": 1092 }, { "epoch": 0.0335760144994317, "grad_norm": 0.3695865571498871, "learning_rate": 1.9988510449843624e-05, "loss": 0.6032, "step": 1093 }, { "epoch": 0.03360673363438085, "grad_norm": 0.3498518466949463, "learning_rate": 1.9988487278200397e-05, "loss": 0.6022, "step": 1094 }, { "epoch": 0.033637452769330015, "grad_norm": 0.4260594844818115, "learning_rate": 1.9988464083228358e-05, "loss": 0.6013, "step": 1095 }, { "epoch": 0.033668171904279176, "grad_norm": 0.33936771750450134, "learning_rate": 1.9988440864927565e-05, "loss": 0.6827, "step": 1096 }, { "epoch": 0.03369889103922834, "grad_norm": 0.3518911898136139, "learning_rate": 1.9988417623298068e-05, "loss": 0.6752, "step": 1097 }, { "epoch": 0.03372961017417749, "grad_norm": 0.33924180269241333, "learning_rate": 1.9988394358339928e-05, "loss": 0.711, "step": 1098 }, { "epoch": 0.033760329309126653, "grad_norm": 0.3468928933143616, "learning_rate": 1.998837107005319e-05, "loss": 0.6432, "step": 1099 }, { "epoch": 0.033791048444075815, "grad_norm": 0.3473166227340698, "learning_rate": 1.9988347758437917e-05, "loss": 0.6069, "step": 1100 }, { "epoch": 0.033821767579024976, "grad_norm": 0.33215394616127014, "learning_rate": 1.998832442349416e-05, "loss": 0.5644, "step": 1101 }, { "epoch": 0.03385248671397414, "grad_norm": 0.33224251866340637, "learning_rate": 1.9988301065221973e-05, "loss": 0.6538, "step": 1102 }, { "epoch": 0.03388320584892329, "grad_norm": 0.42301124334335327, "learning_rate": 1.9988277683621413e-05, "loss": 0.6977, "step": 1103 }, { "epoch": 0.033913924983872454, "grad_norm": 0.3450161814689636, "learning_rate": 1.9988254278692533e-05, "loss": 0.6426, "step": 1104 }, { "epoch": 0.033944644118821615, "grad_norm": 0.33796852827072144, "learning_rate": 1.9988230850435385e-05, "loss": 0.6752, "step": 1105 }, { "epoch": 0.03397536325377078, "grad_norm": 0.36280930042266846, "learning_rate": 1.998820739885003e-05, "loss": 0.7304, "step": 1106 }, { "epoch": 0.03400608238871993, "grad_norm": 0.3155999481678009, "learning_rate": 1.9988183923936515e-05, "loss": 0.645, "step": 1107 }, { "epoch": 0.03403680152366909, "grad_norm": 0.39948561787605286, "learning_rate": 1.99881604256949e-05, "loss": 0.6919, "step": 1108 }, { "epoch": 0.034067520658618254, "grad_norm": 0.3763177692890167, "learning_rate": 1.998813690412524e-05, "loss": 0.5657, "step": 1109 }, { "epoch": 0.034098239793567416, "grad_norm": 0.3410244286060333, "learning_rate": 1.998811335922759e-05, "loss": 0.6499, "step": 1110 }, { "epoch": 0.03412895892851657, "grad_norm": 0.4885820746421814, "learning_rate": 1.9988089791002e-05, "loss": 0.619, "step": 1111 }, { "epoch": 0.03415967806346573, "grad_norm": 0.36203864216804504, "learning_rate": 1.998806619944853e-05, "loss": 0.6533, "step": 1112 }, { "epoch": 0.03419039719841489, "grad_norm": 0.337883323431015, "learning_rate": 1.9988042584567236e-05, "loss": 0.5459, "step": 1113 }, { "epoch": 0.034221116333364054, "grad_norm": 0.3576014041900635, "learning_rate": 1.998801894635817e-05, "loss": 0.6642, "step": 1114 }, { "epoch": 0.03425183546831321, "grad_norm": 0.35070136189460754, "learning_rate": 1.9987995284821387e-05, "loss": 0.6656, "step": 1115 }, { "epoch": 0.03428255460326237, "grad_norm": 0.36463361978530884, "learning_rate": 1.9987971599956943e-05, "loss": 0.6276, "step": 1116 }, { "epoch": 0.03431327373821153, "grad_norm": 0.33819615840911865, "learning_rate": 1.9987947891764897e-05, "loss": 0.6317, "step": 1117 }, { "epoch": 0.03434399287316069, "grad_norm": 0.3403265178203583, "learning_rate": 1.99879241602453e-05, "loss": 0.6562, "step": 1118 }, { "epoch": 0.034374712008109855, "grad_norm": 0.35242852568626404, "learning_rate": 1.9987900405398207e-05, "loss": 0.679, "step": 1119 }, { "epoch": 0.03440543114305901, "grad_norm": 0.5605168342590332, "learning_rate": 1.9987876627223676e-05, "loss": 0.587, "step": 1120 }, { "epoch": 0.03443615027800817, "grad_norm": 0.6877058744430542, "learning_rate": 1.998785282572176e-05, "loss": 0.6493, "step": 1121 }, { "epoch": 0.03446686941295733, "grad_norm": 0.33031851053237915, "learning_rate": 1.9987829000892517e-05, "loss": 0.5746, "step": 1122 }, { "epoch": 0.034497588547906494, "grad_norm": 0.351340651512146, "learning_rate": 1.9987805152736e-05, "loss": 0.6415, "step": 1123 }, { "epoch": 0.03452830768285565, "grad_norm": 0.3523685038089752, "learning_rate": 1.9987781281252272e-05, "loss": 0.6289, "step": 1124 }, { "epoch": 0.03455902681780481, "grad_norm": 0.35850510001182556, "learning_rate": 1.998775738644138e-05, "loss": 0.6245, "step": 1125 }, { "epoch": 0.03458974595275397, "grad_norm": 0.3614632785320282, "learning_rate": 1.9987733468303383e-05, "loss": 0.6177, "step": 1126 }, { "epoch": 0.03462046508770313, "grad_norm": 0.5139718055725098, "learning_rate": 1.9987709526838338e-05, "loss": 0.6196, "step": 1127 }, { "epoch": 0.03465118422265229, "grad_norm": 0.3231205940246582, "learning_rate": 1.9987685562046297e-05, "loss": 0.5182, "step": 1128 }, { "epoch": 0.03468190335760145, "grad_norm": 0.3981630802154541, "learning_rate": 1.998766157392732e-05, "loss": 0.6017, "step": 1129 }, { "epoch": 0.03471262249255061, "grad_norm": 0.3593992590904236, "learning_rate": 1.9987637562481464e-05, "loss": 0.6839, "step": 1130 }, { "epoch": 0.03474334162749977, "grad_norm": 0.34997332096099854, "learning_rate": 1.998761352770878e-05, "loss": 0.6096, "step": 1131 }, { "epoch": 0.03477406076244893, "grad_norm": 0.3157402276992798, "learning_rate": 1.998758946960933e-05, "loss": 0.6551, "step": 1132 }, { "epoch": 0.03480477989739809, "grad_norm": 0.33537814021110535, "learning_rate": 1.9987565388183164e-05, "loss": 0.5809, "step": 1133 }, { "epoch": 0.03483549903234725, "grad_norm": 0.38313373923301697, "learning_rate": 1.9987541283430347e-05, "loss": 0.6214, "step": 1134 }, { "epoch": 0.03486621816729641, "grad_norm": 0.36110833287239075, "learning_rate": 1.9987517155350923e-05, "loss": 0.7066, "step": 1135 }, { "epoch": 0.03489693730224557, "grad_norm": 0.3341173827648163, "learning_rate": 1.9987493003944957e-05, "loss": 0.6129, "step": 1136 }, { "epoch": 0.034927656437194726, "grad_norm": 0.5185731649398804, "learning_rate": 1.9987468829212506e-05, "loss": 0.5703, "step": 1137 }, { "epoch": 0.03495837557214389, "grad_norm": 0.48208731412887573, "learning_rate": 1.9987444631153626e-05, "loss": 0.6604, "step": 1138 }, { "epoch": 0.03498909470709305, "grad_norm": 0.3346957564353943, "learning_rate": 1.998742040976837e-05, "loss": 0.6467, "step": 1139 }, { "epoch": 0.03501981384204221, "grad_norm": 0.40110841393470764, "learning_rate": 1.998739616505679e-05, "loss": 0.6154, "step": 1140 }, { "epoch": 0.035050532976991365, "grad_norm": 0.3996373116970062, "learning_rate": 1.9987371897018955e-05, "loss": 0.6254, "step": 1141 }, { "epoch": 0.03508125211194053, "grad_norm": 0.3233530819416046, "learning_rate": 1.9987347605654913e-05, "loss": 0.663, "step": 1142 }, { "epoch": 0.03511197124688969, "grad_norm": 0.3848903179168701, "learning_rate": 1.9987323290964725e-05, "loss": 0.5948, "step": 1143 }, { "epoch": 0.03514269038183885, "grad_norm": 0.3306058645248413, "learning_rate": 1.9987298952948446e-05, "loss": 0.6513, "step": 1144 }, { "epoch": 0.035173409516788004, "grad_norm": 0.37019258737564087, "learning_rate": 1.998727459160613e-05, "loss": 0.5442, "step": 1145 }, { "epoch": 0.035204128651737165, "grad_norm": 0.3646225333213806, "learning_rate": 1.9987250206937837e-05, "loss": 0.6951, "step": 1146 }, { "epoch": 0.03523484778668633, "grad_norm": 0.34737294912338257, "learning_rate": 1.9987225798943626e-05, "loss": 0.6929, "step": 1147 }, { "epoch": 0.03526556692163549, "grad_norm": 0.3375813364982605, "learning_rate": 1.998720136762355e-05, "loss": 0.6828, "step": 1148 }, { "epoch": 0.03529628605658465, "grad_norm": 0.32294291257858276, "learning_rate": 1.998717691297767e-05, "loss": 0.6447, "step": 1149 }, { "epoch": 0.035327005191533804, "grad_norm": 0.3601262867450714, "learning_rate": 1.998715243500604e-05, "loss": 0.6343, "step": 1150 }, { "epoch": 0.035357724326482966, "grad_norm": 0.3922213315963745, "learning_rate": 1.9987127933708715e-05, "loss": 0.6946, "step": 1151 }, { "epoch": 0.03538844346143213, "grad_norm": 0.34392884373664856, "learning_rate": 1.9987103409085758e-05, "loss": 0.6455, "step": 1152 }, { "epoch": 0.03541916259638129, "grad_norm": 0.32858920097351074, "learning_rate": 1.9987078861137225e-05, "loss": 0.6681, "step": 1153 }, { "epoch": 0.03544988173133044, "grad_norm": 0.33385196328163147, "learning_rate": 1.998705428986317e-05, "loss": 0.559, "step": 1154 }, { "epoch": 0.035480600866279605, "grad_norm": 0.3941601514816284, "learning_rate": 1.998702969526365e-05, "loss": 0.6134, "step": 1155 }, { "epoch": 0.035511320001228766, "grad_norm": 1.1655091047286987, "learning_rate": 1.9987005077338727e-05, "loss": 0.6687, "step": 1156 }, { "epoch": 0.03554203913617793, "grad_norm": 0.329611599445343, "learning_rate": 1.9986980436088457e-05, "loss": 0.629, "step": 1157 }, { "epoch": 0.03557275827112708, "grad_norm": 0.3688400685787201, "learning_rate": 1.9986955771512896e-05, "loss": 0.6032, "step": 1158 }, { "epoch": 0.035603477406076244, "grad_norm": 0.3553326427936554, "learning_rate": 1.99869310836121e-05, "loss": 0.7169, "step": 1159 }, { "epoch": 0.035634196541025405, "grad_norm": 0.3208097815513611, "learning_rate": 1.998690637238613e-05, "loss": 0.6528, "step": 1160 }, { "epoch": 0.035664915675974566, "grad_norm": 0.3430224359035492, "learning_rate": 1.9986881637835045e-05, "loss": 0.5713, "step": 1161 }, { "epoch": 0.03569563481092372, "grad_norm": 0.3426739573478699, "learning_rate": 1.9986856879958896e-05, "loss": 0.6446, "step": 1162 }, { "epoch": 0.03572635394587288, "grad_norm": 0.33965155482292175, "learning_rate": 1.998683209875775e-05, "loss": 0.6827, "step": 1163 }, { "epoch": 0.035757073080822044, "grad_norm": 1.1401686668395996, "learning_rate": 1.998680729423166e-05, "loss": 0.6602, "step": 1164 }, { "epoch": 0.035787792215771205, "grad_norm": 0.45226195454597473, "learning_rate": 1.998678246638068e-05, "loss": 0.5895, "step": 1165 }, { "epoch": 0.03581851135072037, "grad_norm": 0.33200159668922424, "learning_rate": 1.9986757615204877e-05, "loss": 0.6269, "step": 1166 }, { "epoch": 0.03584923048566952, "grad_norm": 0.39806219935417175, "learning_rate": 1.9986732740704302e-05, "loss": 0.5095, "step": 1167 }, { "epoch": 0.03587994962061868, "grad_norm": 0.32058918476104736, "learning_rate": 1.9986707842879015e-05, "loss": 0.6019, "step": 1168 }, { "epoch": 0.035910668755567844, "grad_norm": 0.35292816162109375, "learning_rate": 1.9986682921729073e-05, "loss": 0.6797, "step": 1169 }, { "epoch": 0.035941387890517006, "grad_norm": 0.33664676547050476, "learning_rate": 1.9986657977254538e-05, "loss": 0.6342, "step": 1170 }, { "epoch": 0.03597210702546616, "grad_norm": 0.3467969298362732, "learning_rate": 1.9986633009455466e-05, "loss": 0.5918, "step": 1171 }, { "epoch": 0.03600282616041532, "grad_norm": 0.3353050947189331, "learning_rate": 1.9986608018331912e-05, "loss": 0.647, "step": 1172 }, { "epoch": 0.03603354529536448, "grad_norm": 0.365765780210495, "learning_rate": 1.998658300388394e-05, "loss": 0.7645, "step": 1173 }, { "epoch": 0.036064264430313644, "grad_norm": 0.3525621294975281, "learning_rate": 1.9986557966111605e-05, "loss": 0.6484, "step": 1174 }, { "epoch": 0.0360949835652628, "grad_norm": 0.35533735156059265, "learning_rate": 1.9986532905014965e-05, "loss": 0.6002, "step": 1175 }, { "epoch": 0.03612570270021196, "grad_norm": 0.34509676694869995, "learning_rate": 1.9986507820594083e-05, "loss": 0.5839, "step": 1176 }, { "epoch": 0.03615642183516112, "grad_norm": 0.3451254367828369, "learning_rate": 1.9986482712849013e-05, "loss": 0.6817, "step": 1177 }, { "epoch": 0.03618714097011028, "grad_norm": 0.3528457283973694, "learning_rate": 1.9986457581779818e-05, "loss": 0.6283, "step": 1178 }, { "epoch": 0.036217860105059445, "grad_norm": 0.34501558542251587, "learning_rate": 1.998643242738655e-05, "loss": 0.6239, "step": 1179 }, { "epoch": 0.0362485792400086, "grad_norm": 0.3607727289199829, "learning_rate": 1.998640724966927e-05, "loss": 0.681, "step": 1180 }, { "epoch": 0.03627929837495776, "grad_norm": 0.36807841062545776, "learning_rate": 1.9986382048628043e-05, "loss": 0.6561, "step": 1181 }, { "epoch": 0.03631001750990692, "grad_norm": 0.30417513847351074, "learning_rate": 1.998635682426292e-05, "loss": 0.5269, "step": 1182 }, { "epoch": 0.036340736644856084, "grad_norm": 0.3167667090892792, "learning_rate": 1.9986331576573965e-05, "loss": 0.6324, "step": 1183 }, { "epoch": 0.03637145577980524, "grad_norm": 0.3423810601234436, "learning_rate": 1.9986306305561235e-05, "loss": 0.5935, "step": 1184 }, { "epoch": 0.0364021749147544, "grad_norm": 0.36603233218193054, "learning_rate": 1.998628101122479e-05, "loss": 0.6519, "step": 1185 }, { "epoch": 0.03643289404970356, "grad_norm": 0.3313036262989044, "learning_rate": 1.9986255693564688e-05, "loss": 0.6593, "step": 1186 }, { "epoch": 0.03646361318465272, "grad_norm": 0.39743563532829285, "learning_rate": 1.9986230352580985e-05, "loss": 0.6745, "step": 1187 }, { "epoch": 0.03649433231960188, "grad_norm": 0.34477776288986206, "learning_rate": 1.998620498827375e-05, "loss": 0.653, "step": 1188 }, { "epoch": 0.03652505145455104, "grad_norm": 0.40885576605796814, "learning_rate": 1.998617960064303e-05, "loss": 0.7001, "step": 1189 }, { "epoch": 0.0365557705895002, "grad_norm": 0.36339232325553894, "learning_rate": 1.9986154189688894e-05, "loss": 0.5629, "step": 1190 }, { "epoch": 0.03658648972444936, "grad_norm": 0.3135628402233124, "learning_rate": 1.9986128755411397e-05, "loss": 0.6855, "step": 1191 }, { "epoch": 0.036617208859398516, "grad_norm": 0.3271785080432892, "learning_rate": 1.9986103297810597e-05, "loss": 0.5927, "step": 1192 }, { "epoch": 0.03664792799434768, "grad_norm": 0.3556221127510071, "learning_rate": 1.9986077816886555e-05, "loss": 0.6973, "step": 1193 }, { "epoch": 0.03667864712929684, "grad_norm": 0.3333517611026764, "learning_rate": 1.998605231263933e-05, "loss": 0.633, "step": 1194 }, { "epoch": 0.036709366264246, "grad_norm": 0.38100293278694153, "learning_rate": 1.9986026785068986e-05, "loss": 0.6813, "step": 1195 }, { "epoch": 0.03674008539919516, "grad_norm": 0.3444337248802185, "learning_rate": 1.9986001234175578e-05, "loss": 0.6261, "step": 1196 }, { "epoch": 0.036770804534144316, "grad_norm": 0.362163245677948, "learning_rate": 1.9985975659959165e-05, "loss": 0.5683, "step": 1197 }, { "epoch": 0.03680152366909348, "grad_norm": 0.337381511926651, "learning_rate": 1.998595006241981e-05, "loss": 0.6431, "step": 1198 }, { "epoch": 0.03683224280404264, "grad_norm": 0.36392587423324585, "learning_rate": 1.998592444155757e-05, "loss": 0.6649, "step": 1199 }, { "epoch": 0.0368629619389918, "grad_norm": 0.393775999546051, "learning_rate": 1.9985898797372506e-05, "loss": 0.668, "step": 1200 }, { "epoch": 0.036893681073940955, "grad_norm": 0.35453563928604126, "learning_rate": 1.998587312986468e-05, "loss": 0.6413, "step": 1201 }, { "epoch": 0.03692440020889012, "grad_norm": 0.41804438829421997, "learning_rate": 1.9985847439034147e-05, "loss": 0.665, "step": 1202 }, { "epoch": 0.03695511934383928, "grad_norm": 0.34605711698532104, "learning_rate": 1.9985821724880972e-05, "loss": 0.5767, "step": 1203 }, { "epoch": 0.03698583847878844, "grad_norm": 0.4035540819168091, "learning_rate": 1.9985795987405213e-05, "loss": 0.6547, "step": 1204 }, { "epoch": 0.037016557613737594, "grad_norm": 0.36091601848602295, "learning_rate": 1.998577022660693e-05, "loss": 0.599, "step": 1205 }, { "epoch": 0.037047276748686755, "grad_norm": 0.33805254101753235, "learning_rate": 1.998574444248618e-05, "loss": 0.619, "step": 1206 }, { "epoch": 0.03707799588363592, "grad_norm": 0.3593480587005615, "learning_rate": 1.998571863504303e-05, "loss": 0.6414, "step": 1207 }, { "epoch": 0.03710871501858508, "grad_norm": 0.35659322142601013, "learning_rate": 1.9985692804277535e-05, "loss": 0.6121, "step": 1208 }, { "epoch": 0.03713943415353424, "grad_norm": 0.34227725863456726, "learning_rate": 1.998566695018976e-05, "loss": 0.7017, "step": 1209 }, { "epoch": 0.037170153288483394, "grad_norm": 0.34966111183166504, "learning_rate": 1.998564107277976e-05, "loss": 0.576, "step": 1210 }, { "epoch": 0.037200872423432556, "grad_norm": 0.3284471333026886, "learning_rate": 1.99856151720476e-05, "loss": 0.6127, "step": 1211 }, { "epoch": 0.03723159155838172, "grad_norm": 0.38893088698387146, "learning_rate": 1.9985589247993334e-05, "loss": 0.647, "step": 1212 }, { "epoch": 0.03726231069333088, "grad_norm": 0.3554762601852417, "learning_rate": 1.9985563300617034e-05, "loss": 0.619, "step": 1213 }, { "epoch": 0.03729302982828003, "grad_norm": 0.3509194850921631, "learning_rate": 1.998553732991875e-05, "loss": 0.6253, "step": 1214 }, { "epoch": 0.037323748963229195, "grad_norm": 0.3803483843803406, "learning_rate": 1.998551133589854e-05, "loss": 0.5794, "step": 1215 }, { "epoch": 0.037354468098178356, "grad_norm": 0.3450569212436676, "learning_rate": 1.9985485318556477e-05, "loss": 0.6154, "step": 1216 }, { "epoch": 0.03738518723312752, "grad_norm": 0.34689244627952576, "learning_rate": 1.9985459277892616e-05, "loss": 0.654, "step": 1217 }, { "epoch": 0.03741590636807667, "grad_norm": 1.2905505895614624, "learning_rate": 1.9985433213907014e-05, "loss": 0.6252, "step": 1218 }, { "epoch": 0.037446625503025834, "grad_norm": 0.3544299602508545, "learning_rate": 1.998540712659974e-05, "loss": 0.5893, "step": 1219 }, { "epoch": 0.037477344637974995, "grad_norm": 0.3478759527206421, "learning_rate": 1.9985381015970845e-05, "loss": 0.6743, "step": 1220 }, { "epoch": 0.037508063772924156, "grad_norm": 0.3556002676486969, "learning_rate": 1.99853548820204e-05, "loss": 0.6095, "step": 1221 }, { "epoch": 0.03753878290787331, "grad_norm": 0.380756139755249, "learning_rate": 1.9985328724748457e-05, "loss": 0.6336, "step": 1222 }, { "epoch": 0.03756950204282247, "grad_norm": 1.2980037927627563, "learning_rate": 1.998530254415509e-05, "loss": 0.6716, "step": 1223 }, { "epoch": 0.037600221177771634, "grad_norm": 0.35682734847068787, "learning_rate": 1.9985276340240342e-05, "loss": 0.6459, "step": 1224 }, { "epoch": 0.037630940312720795, "grad_norm": 0.35538023710250854, "learning_rate": 1.9985250113004285e-05, "loss": 0.5214, "step": 1225 }, { "epoch": 0.03766165944766996, "grad_norm": 0.3652734160423279, "learning_rate": 1.9985223862446984e-05, "loss": 0.6394, "step": 1226 }, { "epoch": 0.03769237858261911, "grad_norm": 0.33052316308021545, "learning_rate": 1.9985197588568492e-05, "loss": 0.5973, "step": 1227 }, { "epoch": 0.03772309771756827, "grad_norm": 0.3559899628162384, "learning_rate": 1.9985171291368875e-05, "loss": 0.6623, "step": 1228 }, { "epoch": 0.037753816852517434, "grad_norm": 0.31526419520378113, "learning_rate": 1.998514497084819e-05, "loss": 0.5926, "step": 1229 }, { "epoch": 0.037784535987466596, "grad_norm": 0.2998439371585846, "learning_rate": 1.9985118627006505e-05, "loss": 0.593, "step": 1230 }, { "epoch": 0.03781525512241575, "grad_norm": 0.3625462055206299, "learning_rate": 1.9985092259843878e-05, "loss": 0.669, "step": 1231 }, { "epoch": 0.03784597425736491, "grad_norm": 0.34353965520858765, "learning_rate": 1.998506586936037e-05, "loss": 0.6884, "step": 1232 }, { "epoch": 0.03787669339231407, "grad_norm": 0.34745660424232483, "learning_rate": 1.998503945555604e-05, "loss": 0.577, "step": 1233 }, { "epoch": 0.037907412527263235, "grad_norm": 0.3209642767906189, "learning_rate": 1.9985013018430957e-05, "loss": 0.6665, "step": 1234 }, { "epoch": 0.03793813166221239, "grad_norm": 0.42744413018226624, "learning_rate": 1.998498655798518e-05, "loss": 0.6684, "step": 1235 }, { "epoch": 0.03796885079716155, "grad_norm": 0.3818417191505432, "learning_rate": 1.9984960074218768e-05, "loss": 0.6862, "step": 1236 }, { "epoch": 0.03799956993211071, "grad_norm": 0.3814997375011444, "learning_rate": 1.998493356713178e-05, "loss": 0.666, "step": 1237 }, { "epoch": 0.03803028906705987, "grad_norm": 0.3493185341358185, "learning_rate": 1.9984907036724287e-05, "loss": 0.5934, "step": 1238 }, { "epoch": 0.038061008202009035, "grad_norm": 0.37946295738220215, "learning_rate": 1.9984880482996348e-05, "loss": 0.6088, "step": 1239 }, { "epoch": 0.03809172733695819, "grad_norm": 0.3961479663848877, "learning_rate": 1.998485390594802e-05, "loss": 0.5218, "step": 1240 }, { "epoch": 0.03812244647190735, "grad_norm": 0.39373499155044556, "learning_rate": 1.9984827305579366e-05, "loss": 0.7145, "step": 1241 }, { "epoch": 0.03815316560685651, "grad_norm": 0.9995068311691284, "learning_rate": 1.9984800681890456e-05, "loss": 0.5873, "step": 1242 }, { "epoch": 0.038183884741805674, "grad_norm": 2.5733673572540283, "learning_rate": 1.9984774034881343e-05, "loss": 0.5964, "step": 1243 }, { "epoch": 0.03821460387675483, "grad_norm": 0.39437639713287354, "learning_rate": 1.998474736455209e-05, "loss": 0.5413, "step": 1244 }, { "epoch": 0.03824532301170399, "grad_norm": 0.3916095793247223, "learning_rate": 1.998472067090277e-05, "loss": 0.6366, "step": 1245 }, { "epoch": 0.03827604214665315, "grad_norm": 0.3696109652519226, "learning_rate": 1.9984693953933434e-05, "loss": 0.6416, "step": 1246 }, { "epoch": 0.03830676128160231, "grad_norm": 0.46778205037117004, "learning_rate": 1.9984667213644147e-05, "loss": 0.6534, "step": 1247 }, { "epoch": 0.03833748041655147, "grad_norm": 0.3470768630504608, "learning_rate": 1.9984640450034972e-05, "loss": 0.6319, "step": 1248 }, { "epoch": 0.03836819955150063, "grad_norm": 0.33619794249534607, "learning_rate": 1.9984613663105974e-05, "loss": 0.6064, "step": 1249 }, { "epoch": 0.03839891868644979, "grad_norm": 0.3386102318763733, "learning_rate": 1.9984586852857214e-05, "loss": 0.6615, "step": 1250 }, { "epoch": 0.03842963782139895, "grad_norm": 0.3598247170448303, "learning_rate": 1.998456001928875e-05, "loss": 0.6221, "step": 1251 }, { "epoch": 0.038460356956348106, "grad_norm": 0.32594582438468933, "learning_rate": 1.9984533162400653e-05, "loss": 0.665, "step": 1252 }, { "epoch": 0.03849107609129727, "grad_norm": 0.5428548455238342, "learning_rate": 1.998450628219298e-05, "loss": 0.5848, "step": 1253 }, { "epoch": 0.03852179522624643, "grad_norm": 0.40160736441612244, "learning_rate": 1.9984479378665795e-05, "loss": 0.5992, "step": 1254 }, { "epoch": 0.03855251436119559, "grad_norm": 0.5563382506370544, "learning_rate": 1.998445245181916e-05, "loss": 0.7348, "step": 1255 }, { "epoch": 0.03858323349614475, "grad_norm": 0.3989725112915039, "learning_rate": 1.9984425501653142e-05, "loss": 0.6279, "step": 1256 }, { "epoch": 0.038613952631093906, "grad_norm": 0.3328264653682709, "learning_rate": 1.99843985281678e-05, "loss": 0.6831, "step": 1257 }, { "epoch": 0.03864467176604307, "grad_norm": 0.4014774262905121, "learning_rate": 1.9984371531363197e-05, "loss": 0.7586, "step": 1258 }, { "epoch": 0.03867539090099223, "grad_norm": 0.3358435034751892, "learning_rate": 1.9984344511239395e-05, "loss": 0.689, "step": 1259 }, { "epoch": 0.03870611003594139, "grad_norm": 0.43563520908355713, "learning_rate": 1.998431746779646e-05, "loss": 0.6152, "step": 1260 }, { "epoch": 0.038736829170890545, "grad_norm": 0.3358006775379181, "learning_rate": 1.9984290401034458e-05, "loss": 0.621, "step": 1261 }, { "epoch": 0.03876754830583971, "grad_norm": 0.3727252781391144, "learning_rate": 1.9984263310953445e-05, "loss": 0.7041, "step": 1262 }, { "epoch": 0.03879826744078887, "grad_norm": 0.4676095247268677, "learning_rate": 1.9984236197553487e-05, "loss": 0.6208, "step": 1263 }, { "epoch": 0.03882898657573803, "grad_norm": 0.43992066383361816, "learning_rate": 1.998420906083465e-05, "loss": 0.695, "step": 1264 }, { "epoch": 0.038859705710687184, "grad_norm": 0.3355921804904938, "learning_rate": 1.9984181900796997e-05, "loss": 0.6561, "step": 1265 }, { "epoch": 0.038890424845636345, "grad_norm": 0.42735740542411804, "learning_rate": 1.9984154717440588e-05, "loss": 0.6101, "step": 1266 }, { "epoch": 0.03892114398058551, "grad_norm": 0.34911462664604187, "learning_rate": 1.9984127510765486e-05, "loss": 0.5908, "step": 1267 }, { "epoch": 0.03895186311553467, "grad_norm": 0.3263993263244629, "learning_rate": 1.998410028077176e-05, "loss": 0.5888, "step": 1268 }, { "epoch": 0.03898258225048382, "grad_norm": 0.35734114050865173, "learning_rate": 1.998407302745947e-05, "loss": 0.6248, "step": 1269 }, { "epoch": 0.039013301385432984, "grad_norm": 0.3556295931339264, "learning_rate": 1.998404575082868e-05, "loss": 0.657, "step": 1270 }, { "epoch": 0.039044020520382146, "grad_norm": 0.3525039255619049, "learning_rate": 1.9984018450879456e-05, "loss": 0.5289, "step": 1271 }, { "epoch": 0.03907473965533131, "grad_norm": 0.3490462303161621, "learning_rate": 1.9983991127611855e-05, "loss": 0.62, "step": 1272 }, { "epoch": 0.03910545879028047, "grad_norm": 0.32000699639320374, "learning_rate": 1.998396378102595e-05, "loss": 0.5396, "step": 1273 }, { "epoch": 0.03913617792522962, "grad_norm": 0.36176151037216187, "learning_rate": 1.9983936411121795e-05, "loss": 0.6796, "step": 1274 }, { "epoch": 0.039166897060178785, "grad_norm": 0.5960634350776672, "learning_rate": 1.9983909017899464e-05, "loss": 0.5856, "step": 1275 }, { "epoch": 0.039197616195127946, "grad_norm": 0.34495842456817627, "learning_rate": 1.9983881601359015e-05, "loss": 0.6505, "step": 1276 }, { "epoch": 0.03922833533007711, "grad_norm": 0.33138707280158997, "learning_rate": 1.9983854161500514e-05, "loss": 0.5956, "step": 1277 }, { "epoch": 0.03925905446502626, "grad_norm": 0.35902804136276245, "learning_rate": 1.9983826698324022e-05, "loss": 0.6064, "step": 1278 }, { "epoch": 0.039289773599975424, "grad_norm": 0.3806832432746887, "learning_rate": 1.9983799211829605e-05, "loss": 0.642, "step": 1279 }, { "epoch": 0.039320492734924585, "grad_norm": 0.36414170265197754, "learning_rate": 1.9983771702017333e-05, "loss": 0.6582, "step": 1280 }, { "epoch": 0.039351211869873746, "grad_norm": 0.3456595540046692, "learning_rate": 1.998374416888726e-05, "loss": 0.6926, "step": 1281 }, { "epoch": 0.0393819310048229, "grad_norm": 0.331032931804657, "learning_rate": 1.9983716612439458e-05, "loss": 0.5856, "step": 1282 }, { "epoch": 0.03941265013977206, "grad_norm": 0.3527127504348755, "learning_rate": 1.9983689032673984e-05, "loss": 0.7143, "step": 1283 }, { "epoch": 0.039443369274721224, "grad_norm": 0.3437643051147461, "learning_rate": 1.9983661429590913e-05, "loss": 0.7129, "step": 1284 }, { "epoch": 0.039474088409670385, "grad_norm": 0.38078218698501587, "learning_rate": 1.99836338031903e-05, "loss": 0.6534, "step": 1285 }, { "epoch": 0.03950480754461955, "grad_norm": 0.44422438740730286, "learning_rate": 1.9983606153472212e-05, "loss": 0.8031, "step": 1286 }, { "epoch": 0.0395355266795687, "grad_norm": 0.37358132004737854, "learning_rate": 1.9983578480436718e-05, "loss": 0.6237, "step": 1287 }, { "epoch": 0.03956624581451786, "grad_norm": 0.34754785895347595, "learning_rate": 1.9983550784083877e-05, "loss": 0.6246, "step": 1288 }, { "epoch": 0.039596964949467024, "grad_norm": 0.37022921442985535, "learning_rate": 1.9983523064413758e-05, "loss": 0.6402, "step": 1289 }, { "epoch": 0.039627684084416186, "grad_norm": 0.8107430934906006, "learning_rate": 1.9983495321426424e-05, "loss": 0.6298, "step": 1290 }, { "epoch": 0.03965840321936534, "grad_norm": 0.41078561544418335, "learning_rate": 1.9983467555121937e-05, "loss": 0.6476, "step": 1291 }, { "epoch": 0.0396891223543145, "grad_norm": 0.4743632972240448, "learning_rate": 1.9983439765500364e-05, "loss": 0.6304, "step": 1292 }, { "epoch": 0.03971984148926366, "grad_norm": 0.346250057220459, "learning_rate": 1.998341195256177e-05, "loss": 0.6441, "step": 1293 }, { "epoch": 0.039750560624212825, "grad_norm": 0.37644070386886597, "learning_rate": 1.998338411630622e-05, "loss": 0.6204, "step": 1294 }, { "epoch": 0.03978127975916198, "grad_norm": 0.43439558148384094, "learning_rate": 1.998335625673378e-05, "loss": 0.657, "step": 1295 }, { "epoch": 0.03981199889411114, "grad_norm": 0.3847845196723938, "learning_rate": 1.9983328373844516e-05, "loss": 0.5702, "step": 1296 }, { "epoch": 0.0398427180290603, "grad_norm": 0.3555111885070801, "learning_rate": 1.998330046763849e-05, "loss": 0.575, "step": 1297 }, { "epoch": 0.03987343716400946, "grad_norm": 0.35155341029167175, "learning_rate": 1.9983272538115768e-05, "loss": 0.6301, "step": 1298 }, { "epoch": 0.03990415629895862, "grad_norm": 0.4040221869945526, "learning_rate": 1.9983244585276415e-05, "loss": 0.6567, "step": 1299 }, { "epoch": 0.03993487543390778, "grad_norm": 0.33256182074546814, "learning_rate": 1.99832166091205e-05, "loss": 0.5542, "step": 1300 }, { "epoch": 0.03996559456885694, "grad_norm": 0.4011290967464447, "learning_rate": 1.9983188609648084e-05, "loss": 0.657, "step": 1301 }, { "epoch": 0.0399963137038061, "grad_norm": 0.3467792272567749, "learning_rate": 1.998316058685923e-05, "loss": 0.6367, "step": 1302 }, { "epoch": 0.040027032838755264, "grad_norm": 0.3791506886482239, "learning_rate": 1.9983132540754012e-05, "loss": 0.6649, "step": 1303 }, { "epoch": 0.04005775197370442, "grad_norm": 0.3354448974132538, "learning_rate": 1.998310447133249e-05, "loss": 0.6354, "step": 1304 }, { "epoch": 0.04008847110865358, "grad_norm": 0.3629630208015442, "learning_rate": 1.998307637859473e-05, "loss": 0.685, "step": 1305 }, { "epoch": 0.04011919024360274, "grad_norm": 0.3527342975139618, "learning_rate": 1.9983048262540796e-05, "loss": 0.6809, "step": 1306 }, { "epoch": 0.0401499093785519, "grad_norm": 0.3353877365589142, "learning_rate": 1.9983020123170757e-05, "loss": 0.6398, "step": 1307 }, { "epoch": 0.04018062851350106, "grad_norm": 0.43226921558380127, "learning_rate": 1.998299196048468e-05, "loss": 0.5849, "step": 1308 }, { "epoch": 0.04021134764845022, "grad_norm": 0.39846912026405334, "learning_rate": 1.9982963774482624e-05, "loss": 0.5311, "step": 1309 }, { "epoch": 0.04024206678339938, "grad_norm": 0.4026656746864319, "learning_rate": 1.9982935565164662e-05, "loss": 0.6953, "step": 1310 }, { "epoch": 0.04027278591834854, "grad_norm": 0.46269744634628296, "learning_rate": 1.9982907332530855e-05, "loss": 0.7117, "step": 1311 }, { "epoch": 0.040303505053297696, "grad_norm": 0.333202987909317, "learning_rate": 1.998287907658127e-05, "loss": 0.6078, "step": 1312 }, { "epoch": 0.04033422418824686, "grad_norm": 0.8267314434051514, "learning_rate": 1.9982850797315977e-05, "loss": 0.5873, "step": 1313 }, { "epoch": 0.04036494332319602, "grad_norm": 0.32276901602745056, "learning_rate": 1.9982822494735036e-05, "loss": 0.6274, "step": 1314 }, { "epoch": 0.04039566245814518, "grad_norm": 0.4270782768726349, "learning_rate": 1.9982794168838513e-05, "loss": 0.6155, "step": 1315 }, { "epoch": 0.04042638159309434, "grad_norm": 0.36716511845588684, "learning_rate": 1.998276581962648e-05, "loss": 0.6267, "step": 1316 }, { "epoch": 0.040457100728043496, "grad_norm": 0.3697338402271271, "learning_rate": 1.9982737447099005e-05, "loss": 0.6471, "step": 1317 }, { "epoch": 0.04048781986299266, "grad_norm": 0.3661898076534271, "learning_rate": 1.998270905125614e-05, "loss": 0.6449, "step": 1318 }, { "epoch": 0.04051853899794182, "grad_norm": 0.32294878363609314, "learning_rate": 1.9982680632097967e-05, "loss": 0.6529, "step": 1319 }, { "epoch": 0.04054925813289098, "grad_norm": 0.34810781478881836, "learning_rate": 1.9982652189624544e-05, "loss": 0.6789, "step": 1320 }, { "epoch": 0.040579977267840135, "grad_norm": 0.32672739028930664, "learning_rate": 1.9982623723835942e-05, "loss": 0.6444, "step": 1321 }, { "epoch": 0.0406106964027893, "grad_norm": 0.36555731296539307, "learning_rate": 1.9982595234732224e-05, "loss": 0.659, "step": 1322 }, { "epoch": 0.04064141553773846, "grad_norm": 0.3887861371040344, "learning_rate": 1.998256672231346e-05, "loss": 0.5944, "step": 1323 }, { "epoch": 0.04067213467268762, "grad_norm": 0.3768608570098877, "learning_rate": 1.998253818657971e-05, "loss": 0.6287, "step": 1324 }, { "epoch": 0.040702853807636774, "grad_norm": 0.3339996039867401, "learning_rate": 1.9982509627531046e-05, "loss": 0.582, "step": 1325 }, { "epoch": 0.040733572942585936, "grad_norm": 0.33874088525772095, "learning_rate": 1.9982481045167536e-05, "loss": 0.6492, "step": 1326 }, { "epoch": 0.0407642920775351, "grad_norm": 0.43983194231987, "learning_rate": 1.998245243948924e-05, "loss": 0.5989, "step": 1327 }, { "epoch": 0.04079501121248426, "grad_norm": 0.3397119343280792, "learning_rate": 1.9982423810496235e-05, "loss": 0.591, "step": 1328 }, { "epoch": 0.04082573034743341, "grad_norm": 0.3484622538089752, "learning_rate": 1.9982395158188577e-05, "loss": 0.6904, "step": 1329 }, { "epoch": 0.040856449482382574, "grad_norm": 0.3267402648925781, "learning_rate": 1.998236648256634e-05, "loss": 0.6489, "step": 1330 }, { "epoch": 0.040887168617331736, "grad_norm": 0.36569881439208984, "learning_rate": 1.9982337783629588e-05, "loss": 0.6858, "step": 1331 }, { "epoch": 0.0409178877522809, "grad_norm": 0.33135324716567993, "learning_rate": 1.998230906137839e-05, "loss": 0.6024, "step": 1332 }, { "epoch": 0.04094860688723006, "grad_norm": 0.3579927384853363, "learning_rate": 1.998228031581281e-05, "loss": 0.6235, "step": 1333 }, { "epoch": 0.04097932602217921, "grad_norm": 0.554391622543335, "learning_rate": 1.998225154693292e-05, "loss": 0.7404, "step": 1334 }, { "epoch": 0.041010045157128375, "grad_norm": 0.34545570611953735, "learning_rate": 1.9982222754738784e-05, "loss": 0.7289, "step": 1335 }, { "epoch": 0.041040764292077536, "grad_norm": 0.35289642214775085, "learning_rate": 1.998219393923047e-05, "loss": 0.6505, "step": 1336 }, { "epoch": 0.0410714834270267, "grad_norm": 0.35704490542411804, "learning_rate": 1.9982165100408042e-05, "loss": 0.6668, "step": 1337 }, { "epoch": 0.04110220256197585, "grad_norm": 0.3648998439311981, "learning_rate": 1.998213623827157e-05, "loss": 0.5093, "step": 1338 }, { "epoch": 0.041132921696925014, "grad_norm": 0.5972452163696289, "learning_rate": 1.9982107352821124e-05, "loss": 0.6392, "step": 1339 }, { "epoch": 0.041163640831874175, "grad_norm": 0.34235504269599915, "learning_rate": 1.9982078444056768e-05, "loss": 0.6845, "step": 1340 }, { "epoch": 0.041194359966823337, "grad_norm": 0.40513327717781067, "learning_rate": 1.9982049511978575e-05, "loss": 0.6188, "step": 1341 }, { "epoch": 0.04122507910177249, "grad_norm": 0.3554416298866272, "learning_rate": 1.9982020556586604e-05, "loss": 0.5828, "step": 1342 }, { "epoch": 0.04125579823672165, "grad_norm": 0.35149258375167847, "learning_rate": 1.9981991577880925e-05, "loss": 0.631, "step": 1343 }, { "epoch": 0.041286517371670814, "grad_norm": 0.31266769766807556, "learning_rate": 1.998196257586161e-05, "loss": 0.7006, "step": 1344 }, { "epoch": 0.041317236506619975, "grad_norm": 0.33909475803375244, "learning_rate": 1.9981933550528728e-05, "loss": 0.6174, "step": 1345 }, { "epoch": 0.04134795564156914, "grad_norm": 0.36623668670654297, "learning_rate": 1.9981904501882336e-05, "loss": 0.6313, "step": 1346 }, { "epoch": 0.04137867477651829, "grad_norm": 0.3229750394821167, "learning_rate": 1.9981875429922514e-05, "loss": 0.582, "step": 1347 }, { "epoch": 0.04140939391146745, "grad_norm": 0.3288065791130066, "learning_rate": 1.9981846334649322e-05, "loss": 0.6085, "step": 1348 }, { "epoch": 0.041440113046416614, "grad_norm": 0.3143009841442108, "learning_rate": 1.998181721606283e-05, "loss": 0.5698, "step": 1349 }, { "epoch": 0.041470832181365776, "grad_norm": 0.3113131523132324, "learning_rate": 1.998178807416311e-05, "loss": 0.6169, "step": 1350 }, { "epoch": 0.04150155131631493, "grad_norm": 0.3403845429420471, "learning_rate": 1.9981758908950226e-05, "loss": 0.6312, "step": 1351 }, { "epoch": 0.04153227045126409, "grad_norm": 0.37382861971855164, "learning_rate": 1.9981729720424247e-05, "loss": 0.7344, "step": 1352 }, { "epoch": 0.04156298958621325, "grad_norm": 0.4893076419830322, "learning_rate": 1.998170050858524e-05, "loss": 0.6182, "step": 1353 }, { "epoch": 0.041593708721162415, "grad_norm": 0.3977046310901642, "learning_rate": 1.9981671273433273e-05, "loss": 0.6471, "step": 1354 }, { "epoch": 0.04162442785611157, "grad_norm": 0.3733634948730469, "learning_rate": 1.9981642014968417e-05, "loss": 0.6244, "step": 1355 }, { "epoch": 0.04165514699106073, "grad_norm": 0.40803149342536926, "learning_rate": 1.998161273319074e-05, "loss": 0.5487, "step": 1356 }, { "epoch": 0.04168586612600989, "grad_norm": 0.340304434299469, "learning_rate": 1.998158342810031e-05, "loss": 0.643, "step": 1357 }, { "epoch": 0.04171658526095905, "grad_norm": 0.3310611844062805, "learning_rate": 1.9981554099697194e-05, "loss": 0.6541, "step": 1358 }, { "epoch": 0.04174730439590821, "grad_norm": 0.34450089931488037, "learning_rate": 1.998152474798146e-05, "loss": 0.6408, "step": 1359 }, { "epoch": 0.04177802353085737, "grad_norm": 0.33022892475128174, "learning_rate": 1.998149537295318e-05, "loss": 0.6216, "step": 1360 }, { "epoch": 0.04180874266580653, "grad_norm": 0.3132493793964386, "learning_rate": 1.998146597461242e-05, "loss": 0.509, "step": 1361 }, { "epoch": 0.04183946180075569, "grad_norm": 0.38062700629234314, "learning_rate": 1.9981436552959246e-05, "loss": 0.6868, "step": 1362 }, { "epoch": 0.041870180935704854, "grad_norm": 0.469441294670105, "learning_rate": 1.9981407107993733e-05, "loss": 0.6549, "step": 1363 }, { "epoch": 0.04190090007065401, "grad_norm": 0.3415747880935669, "learning_rate": 1.9981377639715943e-05, "loss": 0.65, "step": 1364 }, { "epoch": 0.04193161920560317, "grad_norm": 0.31847482919692993, "learning_rate": 1.9981348148125953e-05, "loss": 0.7101, "step": 1365 }, { "epoch": 0.04196233834055233, "grad_norm": 0.32223549485206604, "learning_rate": 1.9981318633223824e-05, "loss": 0.6363, "step": 1366 }, { "epoch": 0.04199305747550149, "grad_norm": 0.3406137526035309, "learning_rate": 1.998128909500963e-05, "loss": 0.6027, "step": 1367 }, { "epoch": 0.04202377661045065, "grad_norm": 0.38347071409225464, "learning_rate": 1.998125953348344e-05, "loss": 0.5966, "step": 1368 }, { "epoch": 0.04205449574539981, "grad_norm": 0.35691577196121216, "learning_rate": 1.9981229948645317e-05, "loss": 0.5651, "step": 1369 }, { "epoch": 0.04208521488034897, "grad_norm": 0.3855243921279907, "learning_rate": 1.998120034049534e-05, "loss": 0.6293, "step": 1370 }, { "epoch": 0.04211593401529813, "grad_norm": 0.34988829493522644, "learning_rate": 1.998117070903357e-05, "loss": 0.5896, "step": 1371 }, { "epoch": 0.042146653150247286, "grad_norm": 0.3378292918205261, "learning_rate": 1.9981141054260076e-05, "loss": 0.6267, "step": 1372 }, { "epoch": 0.04217737228519645, "grad_norm": 0.3438066840171814, "learning_rate": 1.9981111376174932e-05, "loss": 0.5228, "step": 1373 }, { "epoch": 0.04220809142014561, "grad_norm": 0.34191304445266724, "learning_rate": 1.9981081674778205e-05, "loss": 0.6482, "step": 1374 }, { "epoch": 0.04223881055509477, "grad_norm": 0.34952497482299805, "learning_rate": 1.9981051950069962e-05, "loss": 0.6408, "step": 1375 }, { "epoch": 0.042269529690043925, "grad_norm": 0.3141106367111206, "learning_rate": 1.998102220205028e-05, "loss": 0.6703, "step": 1376 }, { "epoch": 0.042300248824993086, "grad_norm": 0.39198148250579834, "learning_rate": 1.998099243071922e-05, "loss": 0.6121, "step": 1377 }, { "epoch": 0.04233096795994225, "grad_norm": 0.5472900867462158, "learning_rate": 1.998096263607686e-05, "loss": 0.6347, "step": 1378 }, { "epoch": 0.04236168709489141, "grad_norm": 0.3278624415397644, "learning_rate": 1.998093281812326e-05, "loss": 0.6486, "step": 1379 }, { "epoch": 0.04239240622984057, "grad_norm": 0.33763113617897034, "learning_rate": 1.9980902976858496e-05, "loss": 0.5879, "step": 1380 }, { "epoch": 0.042423125364789725, "grad_norm": 0.40945038199424744, "learning_rate": 1.9980873112282637e-05, "loss": 0.6476, "step": 1381 }, { "epoch": 0.04245384449973889, "grad_norm": 0.3597598373889923, "learning_rate": 1.9980843224395746e-05, "loss": 0.6091, "step": 1382 }, { "epoch": 0.04248456363468805, "grad_norm": 0.35325631499290466, "learning_rate": 1.9980813313197906e-05, "loss": 0.6006, "step": 1383 }, { "epoch": 0.04251528276963721, "grad_norm": 0.3527797758579254, "learning_rate": 1.9980783378689176e-05, "loss": 0.6606, "step": 1384 }, { "epoch": 0.042546001904586364, "grad_norm": 0.3115088641643524, "learning_rate": 1.998075342086963e-05, "loss": 0.5569, "step": 1385 }, { "epoch": 0.042576721039535526, "grad_norm": 0.3726767599582672, "learning_rate": 1.998072343973934e-05, "loss": 0.6297, "step": 1386 }, { "epoch": 0.04260744017448469, "grad_norm": 0.3506571054458618, "learning_rate": 1.9980693435298367e-05, "loss": 0.5663, "step": 1387 }, { "epoch": 0.04263815930943385, "grad_norm": 0.3337964415550232, "learning_rate": 1.998066340754679e-05, "loss": 0.6094, "step": 1388 }, { "epoch": 0.042668878444383, "grad_norm": 0.31702691316604614, "learning_rate": 1.998063335648468e-05, "loss": 0.5518, "step": 1389 }, { "epoch": 0.042699597579332164, "grad_norm": 0.41954272985458374, "learning_rate": 1.99806032821121e-05, "loss": 0.6333, "step": 1390 }, { "epoch": 0.042730316714281326, "grad_norm": 0.33430716395378113, "learning_rate": 1.9980573184429125e-05, "loss": 0.5633, "step": 1391 }, { "epoch": 0.04276103584923049, "grad_norm": 0.33189457654953003, "learning_rate": 1.9980543063435826e-05, "loss": 0.6754, "step": 1392 }, { "epoch": 0.04279175498417965, "grad_norm": 0.34805548191070557, "learning_rate": 1.9980512919132272e-05, "loss": 0.5565, "step": 1393 }, { "epoch": 0.0428224741191288, "grad_norm": 0.3961887061595917, "learning_rate": 1.9980482751518527e-05, "loss": 0.6858, "step": 1394 }, { "epoch": 0.042853193254077965, "grad_norm": 0.36680492758750916, "learning_rate": 1.9980452560594673e-05, "loss": 0.5789, "step": 1395 }, { "epoch": 0.042883912389027126, "grad_norm": 0.43212810158729553, "learning_rate": 1.9980422346360777e-05, "loss": 0.6794, "step": 1396 }, { "epoch": 0.04291463152397629, "grad_norm": 0.3258790671825409, "learning_rate": 1.9980392108816904e-05, "loss": 0.6235, "step": 1397 }, { "epoch": 0.04294535065892544, "grad_norm": 0.33876070380210876, "learning_rate": 1.9980361847963128e-05, "loss": 0.574, "step": 1398 }, { "epoch": 0.042976069793874604, "grad_norm": 0.3398304879665375, "learning_rate": 1.9980331563799523e-05, "loss": 0.6843, "step": 1399 }, { "epoch": 0.043006788928823765, "grad_norm": 0.37226569652557373, "learning_rate": 1.998030125632615e-05, "loss": 0.5872, "step": 1400 }, { "epoch": 0.04303750806377293, "grad_norm": 0.3657737374305725, "learning_rate": 1.9980270925543093e-05, "loss": 0.6204, "step": 1401 }, { "epoch": 0.04306822719872208, "grad_norm": 0.7144231200218201, "learning_rate": 1.9980240571450414e-05, "loss": 0.641, "step": 1402 }, { "epoch": 0.04309894633367124, "grad_norm": 0.3397647738456726, "learning_rate": 1.9980210194048184e-05, "loss": 0.6162, "step": 1403 }, { "epoch": 0.043129665468620404, "grad_norm": 0.3834957480430603, "learning_rate": 1.998017979333648e-05, "loss": 0.5438, "step": 1404 }, { "epoch": 0.043160384603569565, "grad_norm": 0.3419320583343506, "learning_rate": 1.9980149369315363e-05, "loss": 0.5891, "step": 1405 }, { "epoch": 0.04319110373851872, "grad_norm": 0.36903810501098633, "learning_rate": 1.9980118921984915e-05, "loss": 0.6397, "step": 1406 }, { "epoch": 0.04322182287346788, "grad_norm": 0.32629451155662537, "learning_rate": 1.99800884513452e-05, "loss": 0.639, "step": 1407 }, { "epoch": 0.04325254200841704, "grad_norm": 0.37720727920532227, "learning_rate": 1.9980057957396296e-05, "loss": 0.6481, "step": 1408 }, { "epoch": 0.043283261143366204, "grad_norm": 0.3799186050891876, "learning_rate": 1.9980027440138268e-05, "loss": 0.6832, "step": 1409 }, { "epoch": 0.043313980278315366, "grad_norm": 0.3101624548435211, "learning_rate": 1.9979996899571187e-05, "loss": 0.6249, "step": 1410 }, { "epoch": 0.04334469941326452, "grad_norm": 0.3461057245731354, "learning_rate": 1.9979966335695127e-05, "loss": 0.6777, "step": 1411 }, { "epoch": 0.04337541854821368, "grad_norm": 0.328911691904068, "learning_rate": 1.9979935748510158e-05, "loss": 0.7588, "step": 1412 }, { "epoch": 0.04340613768316284, "grad_norm": 0.34436020255088806, "learning_rate": 1.9979905138016353e-05, "loss": 0.6467, "step": 1413 }, { "epoch": 0.043436856818112005, "grad_norm": 0.36390817165374756, "learning_rate": 1.997987450421378e-05, "loss": 0.6275, "step": 1414 }, { "epoch": 0.04346757595306116, "grad_norm": 0.38903191685676575, "learning_rate": 1.9979843847102515e-05, "loss": 0.6571, "step": 1415 }, { "epoch": 0.04349829508801032, "grad_norm": 0.33003270626068115, "learning_rate": 1.997981316668263e-05, "loss": 0.6261, "step": 1416 }, { "epoch": 0.04352901422295948, "grad_norm": 0.31904131174087524, "learning_rate": 1.997978246295419e-05, "loss": 0.551, "step": 1417 }, { "epoch": 0.043559733357908643, "grad_norm": 0.4109552800655365, "learning_rate": 1.9979751735917275e-05, "loss": 0.6741, "step": 1418 }, { "epoch": 0.0435904524928578, "grad_norm": 0.6238108277320862, "learning_rate": 1.997972098557195e-05, "loss": 0.6126, "step": 1419 }, { "epoch": 0.04362117162780696, "grad_norm": 0.3267028331756592, "learning_rate": 1.9979690211918293e-05, "loss": 0.5529, "step": 1420 }, { "epoch": 0.04365189076275612, "grad_norm": 0.33773040771484375, "learning_rate": 1.9979659414956368e-05, "loss": 0.5857, "step": 1421 }, { "epoch": 0.04368260989770528, "grad_norm": 0.3527114987373352, "learning_rate": 1.9979628594686255e-05, "loss": 0.5598, "step": 1422 }, { "epoch": 0.043713329032654444, "grad_norm": 0.3142206072807312, "learning_rate": 1.997959775110802e-05, "loss": 0.6842, "step": 1423 }, { "epoch": 0.0437440481676036, "grad_norm": 0.3595605790615082, "learning_rate": 1.997956688422174e-05, "loss": 0.6865, "step": 1424 }, { "epoch": 0.04377476730255276, "grad_norm": 0.36609315872192383, "learning_rate": 1.9979535994027485e-05, "loss": 0.5621, "step": 1425 }, { "epoch": 0.04380548643750192, "grad_norm": 0.3660450577735901, "learning_rate": 1.9979505080525323e-05, "loss": 0.6267, "step": 1426 }, { "epoch": 0.04383620557245108, "grad_norm": 0.33784806728363037, "learning_rate": 1.997947414371533e-05, "loss": 0.6512, "step": 1427 }, { "epoch": 0.04386692470740024, "grad_norm": 0.32937437295913696, "learning_rate": 1.997944318359758e-05, "loss": 0.672, "step": 1428 }, { "epoch": 0.0438976438423494, "grad_norm": 0.3826597332954407, "learning_rate": 1.9979412200172147e-05, "loss": 0.5954, "step": 1429 }, { "epoch": 0.04392836297729856, "grad_norm": 0.3578501045703888, "learning_rate": 1.9979381193439095e-05, "loss": 0.7192, "step": 1430 }, { "epoch": 0.04395908211224772, "grad_norm": 2.2008795738220215, "learning_rate": 1.9979350163398504e-05, "loss": 0.6604, "step": 1431 }, { "epoch": 0.043989801247196876, "grad_norm": 0.32416272163391113, "learning_rate": 1.9979319110050443e-05, "loss": 0.6595, "step": 1432 }, { "epoch": 0.04402052038214604, "grad_norm": 0.39766889810562134, "learning_rate": 1.9979288033394983e-05, "loss": 0.6021, "step": 1433 }, { "epoch": 0.0440512395170952, "grad_norm": 0.4091089367866516, "learning_rate": 1.9979256933432202e-05, "loss": 0.6592, "step": 1434 }, { "epoch": 0.04408195865204436, "grad_norm": 0.42348575592041016, "learning_rate": 1.9979225810162167e-05, "loss": 0.5446, "step": 1435 }, { "epoch": 0.044112677786993515, "grad_norm": 0.3059660792350769, "learning_rate": 1.9979194663584954e-05, "loss": 0.6734, "step": 1436 }, { "epoch": 0.044143396921942676, "grad_norm": 0.30541545152664185, "learning_rate": 1.9979163493700637e-05, "loss": 0.5623, "step": 1437 }, { "epoch": 0.04417411605689184, "grad_norm": 0.3510250747203827, "learning_rate": 1.9979132300509283e-05, "loss": 0.6416, "step": 1438 }, { "epoch": 0.044204835191841, "grad_norm": 0.31188827753067017, "learning_rate": 1.997910108401097e-05, "loss": 0.5719, "step": 1439 }, { "epoch": 0.04423555432679016, "grad_norm": 0.3400861620903015, "learning_rate": 1.9979069844205773e-05, "loss": 0.6733, "step": 1440 }, { "epoch": 0.044266273461739315, "grad_norm": 0.3580463230609894, "learning_rate": 1.9979038581093757e-05, "loss": 0.6159, "step": 1441 }, { "epoch": 0.04429699259668848, "grad_norm": 0.3390311300754547, "learning_rate": 1.9979007294675e-05, "loss": 0.6727, "step": 1442 }, { "epoch": 0.04432771173163764, "grad_norm": 0.3564579486846924, "learning_rate": 1.9978975984949574e-05, "loss": 0.6026, "step": 1443 }, { "epoch": 0.0443584308665868, "grad_norm": 0.35155758261680603, "learning_rate": 1.9978944651917556e-05, "loss": 0.6054, "step": 1444 }, { "epoch": 0.044389150001535954, "grad_norm": 0.34290528297424316, "learning_rate": 1.9978913295579013e-05, "loss": 0.6391, "step": 1445 }, { "epoch": 0.044419869136485116, "grad_norm": 0.3204345107078552, "learning_rate": 1.9978881915934023e-05, "loss": 0.5447, "step": 1446 }, { "epoch": 0.04445058827143428, "grad_norm": 0.34730637073516846, "learning_rate": 1.9978850512982654e-05, "loss": 0.5865, "step": 1447 }, { "epoch": 0.04448130740638344, "grad_norm": 0.36416298151016235, "learning_rate": 1.9978819086724987e-05, "loss": 0.6315, "step": 1448 }, { "epoch": 0.04451202654133259, "grad_norm": 0.32476168870925903, "learning_rate": 1.9978787637161087e-05, "loss": 0.6705, "step": 1449 }, { "epoch": 0.044542745676281754, "grad_norm": 0.3341827094554901, "learning_rate": 1.9978756164291034e-05, "loss": 0.586, "step": 1450 }, { "epoch": 0.044573464811230916, "grad_norm": 0.35912227630615234, "learning_rate": 1.99787246681149e-05, "loss": 0.5861, "step": 1451 }, { "epoch": 0.04460418394618008, "grad_norm": 0.34328487515449524, "learning_rate": 1.9978693148632757e-05, "loss": 0.6699, "step": 1452 }, { "epoch": 0.04463490308112924, "grad_norm": 0.3288676142692566, "learning_rate": 1.997866160584468e-05, "loss": 0.6401, "step": 1453 }, { "epoch": 0.04466562221607839, "grad_norm": 0.32186517119407654, "learning_rate": 1.997863003975074e-05, "loss": 0.6666, "step": 1454 }, { "epoch": 0.044696341351027555, "grad_norm": 0.3401786983013153, "learning_rate": 1.9978598450351013e-05, "loss": 0.6003, "step": 1455 }, { "epoch": 0.044727060485976716, "grad_norm": 0.3354983627796173, "learning_rate": 1.997856683764557e-05, "loss": 0.6607, "step": 1456 }, { "epoch": 0.04475777962092588, "grad_norm": 0.36971738934516907, "learning_rate": 1.997853520163449e-05, "loss": 0.6413, "step": 1457 }, { "epoch": 0.04478849875587503, "grad_norm": 0.4143010675907135, "learning_rate": 1.9978503542317845e-05, "loss": 0.6348, "step": 1458 }, { "epoch": 0.044819217890824194, "grad_norm": 0.46499133110046387, "learning_rate": 1.9978471859695706e-05, "loss": 0.6554, "step": 1459 }, { "epoch": 0.044849937025773355, "grad_norm": 0.5254920125007629, "learning_rate": 1.9978440153768153e-05, "loss": 0.6116, "step": 1460 }, { "epoch": 0.04488065616072252, "grad_norm": 0.38139066100120544, "learning_rate": 1.9978408424535255e-05, "loss": 0.5671, "step": 1461 }, { "epoch": 0.04491137529567167, "grad_norm": 0.31457269191741943, "learning_rate": 1.997837667199708e-05, "loss": 0.6146, "step": 1462 }, { "epoch": 0.04494209443062083, "grad_norm": 0.3491078019142151, "learning_rate": 1.9978344896153718e-05, "loss": 0.5766, "step": 1463 }, { "epoch": 0.044972813565569994, "grad_norm": 0.36150890588760376, "learning_rate": 1.9978313097005232e-05, "loss": 0.6061, "step": 1464 }, { "epoch": 0.045003532700519155, "grad_norm": 0.37765732407569885, "learning_rate": 1.99782812745517e-05, "loss": 0.6203, "step": 1465 }, { "epoch": 0.04503425183546831, "grad_norm": 0.3623577952384949, "learning_rate": 1.9978249428793193e-05, "loss": 0.5941, "step": 1466 }, { "epoch": 0.04506497097041747, "grad_norm": 0.36662980914115906, "learning_rate": 1.9978217559729788e-05, "loss": 0.6235, "step": 1467 }, { "epoch": 0.04509569010536663, "grad_norm": 0.33251652121543884, "learning_rate": 1.997818566736156e-05, "loss": 0.5661, "step": 1468 }, { "epoch": 0.045126409240315794, "grad_norm": 0.3359599709510803, "learning_rate": 1.997815375168858e-05, "loss": 0.5544, "step": 1469 }, { "epoch": 0.045157128375264956, "grad_norm": 0.32382041215896606, "learning_rate": 1.9978121812710927e-05, "loss": 0.5927, "step": 1470 }, { "epoch": 0.04518784751021411, "grad_norm": 0.3252922296524048, "learning_rate": 1.9978089850428676e-05, "loss": 0.4875, "step": 1471 }, { "epoch": 0.04521856664516327, "grad_norm": 0.33955198526382446, "learning_rate": 1.9978057864841896e-05, "loss": 0.5927, "step": 1472 }, { "epoch": 0.04524928578011243, "grad_norm": 0.3254089653491974, "learning_rate": 1.9978025855950665e-05, "loss": 0.5663, "step": 1473 }, { "epoch": 0.045280004915061595, "grad_norm": 0.3900725841522217, "learning_rate": 1.9977993823755057e-05, "loss": 0.6608, "step": 1474 }, { "epoch": 0.04531072405001075, "grad_norm": 0.31601065397262573, "learning_rate": 1.997796176825515e-05, "loss": 0.5711, "step": 1475 }, { "epoch": 0.04534144318495991, "grad_norm": 0.34350335597991943, "learning_rate": 1.9977929689451016e-05, "loss": 0.585, "step": 1476 }, { "epoch": 0.04537216231990907, "grad_norm": 0.33578652143478394, "learning_rate": 1.9977897587342728e-05, "loss": 0.6119, "step": 1477 }, { "epoch": 0.045402881454858234, "grad_norm": 0.340859979391098, "learning_rate": 1.9977865461930368e-05, "loss": 0.5741, "step": 1478 }, { "epoch": 0.04543360058980739, "grad_norm": 0.435679167509079, "learning_rate": 1.9977833313214004e-05, "loss": 0.5694, "step": 1479 }, { "epoch": 0.04546431972475655, "grad_norm": 0.3523082137107849, "learning_rate": 1.997780114119371e-05, "loss": 0.6662, "step": 1480 }, { "epoch": 0.04549503885970571, "grad_norm": 0.38861361145973206, "learning_rate": 1.9977768945869572e-05, "loss": 0.6694, "step": 1481 }, { "epoch": 0.04552575799465487, "grad_norm": 0.35401690006256104, "learning_rate": 1.997773672724165e-05, "loss": 0.6435, "step": 1482 }, { "epoch": 0.04555647712960403, "grad_norm": 0.5448992252349854, "learning_rate": 1.9977704485310032e-05, "loss": 0.6583, "step": 1483 }, { "epoch": 0.04558719626455319, "grad_norm": 0.367035448551178, "learning_rate": 1.9977672220074786e-05, "loss": 0.6472, "step": 1484 }, { "epoch": 0.04561791539950235, "grad_norm": 0.30278944969177246, "learning_rate": 1.997763993153599e-05, "loss": 0.6311, "step": 1485 }, { "epoch": 0.04564863453445151, "grad_norm": 0.3472312390804291, "learning_rate": 1.997760761969372e-05, "loss": 0.6651, "step": 1486 }, { "epoch": 0.04567935366940067, "grad_norm": 0.40772393345832825, "learning_rate": 1.997757528454805e-05, "loss": 0.5877, "step": 1487 }, { "epoch": 0.04571007280434983, "grad_norm": 0.3308549225330353, "learning_rate": 1.997754292609906e-05, "loss": 0.7004, "step": 1488 }, { "epoch": 0.04574079193929899, "grad_norm": 0.3354812264442444, "learning_rate": 1.9977510544346815e-05, "loss": 0.6888, "step": 1489 }, { "epoch": 0.04577151107424815, "grad_norm": 0.37962910532951355, "learning_rate": 1.9977478139291403e-05, "loss": 0.6213, "step": 1490 }, { "epoch": 0.04580223020919731, "grad_norm": 0.31664037704467773, "learning_rate": 1.997744571093289e-05, "loss": 0.63, "step": 1491 }, { "epoch": 0.045832949344146466, "grad_norm": 0.31045863032341003, "learning_rate": 1.9977413259271356e-05, "loss": 0.5555, "step": 1492 }, { "epoch": 0.04586366847909563, "grad_norm": 0.3578523099422455, "learning_rate": 1.997738078430688e-05, "loss": 0.6412, "step": 1493 }, { "epoch": 0.04589438761404479, "grad_norm": 0.31463494896888733, "learning_rate": 1.997734828603953e-05, "loss": 0.5666, "step": 1494 }, { "epoch": 0.04592510674899395, "grad_norm": 0.34017422795295715, "learning_rate": 1.9977315764469387e-05, "loss": 0.6019, "step": 1495 }, { "epoch": 0.045955825883943105, "grad_norm": 0.3557920455932617, "learning_rate": 1.9977283219596527e-05, "loss": 0.6685, "step": 1496 }, { "epoch": 0.045986545018892266, "grad_norm": 0.3509226441383362, "learning_rate": 1.9977250651421028e-05, "loss": 0.6637, "step": 1497 }, { "epoch": 0.04601726415384143, "grad_norm": 0.3282249867916107, "learning_rate": 1.997721805994296e-05, "loss": 0.5905, "step": 1498 }, { "epoch": 0.04604798328879059, "grad_norm": 0.3344109058380127, "learning_rate": 1.9977185445162402e-05, "loss": 0.5815, "step": 1499 }, { "epoch": 0.04607870242373975, "grad_norm": 0.3479396402835846, "learning_rate": 1.997715280707943e-05, "loss": 0.6773, "step": 1500 }, { "epoch": 0.046109421558688905, "grad_norm": 0.4208886921405792, "learning_rate": 1.9977120145694126e-05, "loss": 0.5612, "step": 1501 }, { "epoch": 0.04614014069363807, "grad_norm": 0.40888920426368713, "learning_rate": 1.997708746100656e-05, "loss": 0.6609, "step": 1502 }, { "epoch": 0.04617085982858723, "grad_norm": 0.3395008146762848, "learning_rate": 1.9977054753016806e-05, "loss": 0.5936, "step": 1503 }, { "epoch": 0.04620157896353639, "grad_norm": 0.3486707806587219, "learning_rate": 1.9977022021724944e-05, "loss": 0.6374, "step": 1504 }, { "epoch": 0.046232298098485544, "grad_norm": 0.32248854637145996, "learning_rate": 1.9976989267131053e-05, "loss": 0.5666, "step": 1505 }, { "epoch": 0.046263017233434706, "grad_norm": 0.3303990662097931, "learning_rate": 1.9976956489235204e-05, "loss": 0.5912, "step": 1506 }, { "epoch": 0.04629373636838387, "grad_norm": 0.36752432584762573, "learning_rate": 1.997692368803748e-05, "loss": 0.5867, "step": 1507 }, { "epoch": 0.04632445550333303, "grad_norm": 0.3922474980354309, "learning_rate": 1.9976890863537948e-05, "loss": 0.6185, "step": 1508 }, { "epoch": 0.04635517463828218, "grad_norm": 0.3277745842933655, "learning_rate": 1.9976858015736697e-05, "loss": 0.6353, "step": 1509 }, { "epoch": 0.046385893773231344, "grad_norm": 0.33256980776786804, "learning_rate": 1.9976825144633793e-05, "loss": 0.6097, "step": 1510 }, { "epoch": 0.046416612908180506, "grad_norm": 0.359630823135376, "learning_rate": 1.9976792250229318e-05, "loss": 0.6131, "step": 1511 }, { "epoch": 0.04644733204312967, "grad_norm": 0.5256035923957825, "learning_rate": 1.997675933252335e-05, "loss": 0.5965, "step": 1512 }, { "epoch": 0.04647805117807882, "grad_norm": 0.41148775815963745, "learning_rate": 1.9976726391515958e-05, "loss": 0.7274, "step": 1513 }, { "epoch": 0.04650877031302798, "grad_norm": 0.3620133697986603, "learning_rate": 1.9976693427207232e-05, "loss": 0.5982, "step": 1514 }, { "epoch": 0.046539489447977145, "grad_norm": 0.33658626675605774, "learning_rate": 1.9976660439597237e-05, "loss": 0.6405, "step": 1515 }, { "epoch": 0.046570208582926306, "grad_norm": 0.3355967700481415, "learning_rate": 1.997662742868606e-05, "loss": 0.7334, "step": 1516 }, { "epoch": 0.04660092771787547, "grad_norm": 0.3297376036643982, "learning_rate": 1.997659439447377e-05, "loss": 0.6044, "step": 1517 }, { "epoch": 0.04663164685282462, "grad_norm": 0.3391709625720978, "learning_rate": 1.997656133696044e-05, "loss": 0.6688, "step": 1518 }, { "epoch": 0.046662365987773784, "grad_norm": 0.3304705023765564, "learning_rate": 1.9976528256146162e-05, "loss": 0.6482, "step": 1519 }, { "epoch": 0.046693085122722945, "grad_norm": 0.3371427059173584, "learning_rate": 1.9976495152031007e-05, "loss": 0.5933, "step": 1520 }, { "epoch": 0.04672380425767211, "grad_norm": 0.3329218327999115, "learning_rate": 1.9976462024615045e-05, "loss": 0.6615, "step": 1521 }, { "epoch": 0.04675452339262126, "grad_norm": 0.3517970144748688, "learning_rate": 1.9976428873898366e-05, "loss": 0.7042, "step": 1522 }, { "epoch": 0.04678524252757042, "grad_norm": 0.32224196195602417, "learning_rate": 1.9976395699881034e-05, "loss": 0.5852, "step": 1523 }, { "epoch": 0.046815961662519584, "grad_norm": 0.33342045545578003, "learning_rate": 1.9976362502563137e-05, "loss": 0.6849, "step": 1524 }, { "epoch": 0.046846680797468745, "grad_norm": 0.32047998905181885, "learning_rate": 1.997632928194475e-05, "loss": 0.6463, "step": 1525 }, { "epoch": 0.0468773999324179, "grad_norm": 0.3257700800895691, "learning_rate": 1.9976296038025948e-05, "loss": 0.6344, "step": 1526 }, { "epoch": 0.04690811906736706, "grad_norm": 0.35298335552215576, "learning_rate": 1.9976262770806806e-05, "loss": 0.5705, "step": 1527 }, { "epoch": 0.04693883820231622, "grad_norm": 0.3183249831199646, "learning_rate": 1.9976229480287406e-05, "loss": 0.6058, "step": 1528 }, { "epoch": 0.046969557337265384, "grad_norm": 0.31748098134994507, "learning_rate": 1.9976196166467828e-05, "loss": 0.5766, "step": 1529 }, { "epoch": 0.047000276472214546, "grad_norm": 0.3378646969795227, "learning_rate": 1.997616282934815e-05, "loss": 0.6195, "step": 1530 }, { "epoch": 0.0470309956071637, "grad_norm": 0.33636677265167236, "learning_rate": 1.9976129468928444e-05, "loss": 0.5486, "step": 1531 }, { "epoch": 0.04706171474211286, "grad_norm": 0.346770703792572, "learning_rate": 1.997609608520879e-05, "loss": 0.6155, "step": 1532 }, { "epoch": 0.04709243387706202, "grad_norm": 0.3523155450820923, "learning_rate": 1.9976062678189265e-05, "loss": 0.701, "step": 1533 }, { "epoch": 0.047123153012011185, "grad_norm": 0.33391839265823364, "learning_rate": 1.997602924786995e-05, "loss": 0.5706, "step": 1534 }, { "epoch": 0.04715387214696034, "grad_norm": 0.3638920187950134, "learning_rate": 1.9975995794250925e-05, "loss": 0.5597, "step": 1535 }, { "epoch": 0.0471845912819095, "grad_norm": 0.3479497730731964, "learning_rate": 1.9975962317332263e-05, "loss": 0.5867, "step": 1536 }, { "epoch": 0.04721531041685866, "grad_norm": 0.3112768530845642, "learning_rate": 1.9975928817114046e-05, "loss": 0.6336, "step": 1537 }, { "epoch": 0.047246029551807824, "grad_norm": 0.3886876702308655, "learning_rate": 1.997589529359635e-05, "loss": 0.6129, "step": 1538 }, { "epoch": 0.04727674868675698, "grad_norm": 0.4790090024471283, "learning_rate": 1.9975861746779252e-05, "loss": 0.6039, "step": 1539 }, { "epoch": 0.04730746782170614, "grad_norm": 0.3459738492965698, "learning_rate": 1.9975828176662832e-05, "loss": 0.6329, "step": 1540 }, { "epoch": 0.0473381869566553, "grad_norm": 0.3439803719520569, "learning_rate": 1.997579458324717e-05, "loss": 0.6069, "step": 1541 }, { "epoch": 0.04736890609160446, "grad_norm": 0.3112871050834656, "learning_rate": 1.9975760966532345e-05, "loss": 0.5899, "step": 1542 }, { "epoch": 0.04739962522655362, "grad_norm": 0.33760762214660645, "learning_rate": 1.997572732651843e-05, "loss": 0.6275, "step": 1543 }, { "epoch": 0.04743034436150278, "grad_norm": 0.32981356978416443, "learning_rate": 1.997569366320551e-05, "loss": 0.6024, "step": 1544 }, { "epoch": 0.04746106349645194, "grad_norm": 0.4020029902458191, "learning_rate": 1.9975659976593657e-05, "loss": 0.6039, "step": 1545 }, { "epoch": 0.0474917826314011, "grad_norm": 0.3561566174030304, "learning_rate": 1.9975626266682957e-05, "loss": 0.7257, "step": 1546 }, { "epoch": 0.04752250176635026, "grad_norm": 0.33351147174835205, "learning_rate": 1.9975592533473484e-05, "loss": 0.6535, "step": 1547 }, { "epoch": 0.04755322090129942, "grad_norm": 0.32751715183258057, "learning_rate": 1.997555877696532e-05, "loss": 0.5961, "step": 1548 }, { "epoch": 0.04758394003624858, "grad_norm": 0.6751850247383118, "learning_rate": 1.997552499715854e-05, "loss": 0.6425, "step": 1549 }, { "epoch": 0.04761465917119774, "grad_norm": 0.33825552463531494, "learning_rate": 1.9975491194053223e-05, "loss": 0.6531, "step": 1550 }, { "epoch": 0.0476453783061469, "grad_norm": 0.33470311760902405, "learning_rate": 1.9975457367649448e-05, "loss": 0.5947, "step": 1551 }, { "epoch": 0.047676097441096056, "grad_norm": 0.37409093976020813, "learning_rate": 1.99754235179473e-05, "loss": 0.5729, "step": 1552 }, { "epoch": 0.04770681657604522, "grad_norm": 0.34536245465278625, "learning_rate": 1.9975389644946853e-05, "loss": 0.6385, "step": 1553 }, { "epoch": 0.04773753571099438, "grad_norm": 0.3762564957141876, "learning_rate": 1.9975355748648185e-05, "loss": 0.6596, "step": 1554 }, { "epoch": 0.04776825484594354, "grad_norm": 0.3271220624446869, "learning_rate": 1.997532182905138e-05, "loss": 0.6539, "step": 1555 }, { "epoch": 0.047798973980892695, "grad_norm": 0.3872227370738983, "learning_rate": 1.997528788615651e-05, "loss": 0.7396, "step": 1556 }, { "epoch": 0.047829693115841856, "grad_norm": 0.31722933053970337, "learning_rate": 1.9975253919963663e-05, "loss": 0.6518, "step": 1557 }, { "epoch": 0.04786041225079102, "grad_norm": 0.3098757266998291, "learning_rate": 1.9975219930472913e-05, "loss": 0.6431, "step": 1558 }, { "epoch": 0.04789113138574018, "grad_norm": 0.3384096622467041, "learning_rate": 1.997518591768434e-05, "loss": 0.6132, "step": 1559 }, { "epoch": 0.04792185052068934, "grad_norm": 0.4054543673992157, "learning_rate": 1.997515188159802e-05, "loss": 0.5855, "step": 1560 }, { "epoch": 0.047952569655638495, "grad_norm": 0.35157856345176697, "learning_rate": 1.997511782221404e-05, "loss": 0.6538, "step": 1561 }, { "epoch": 0.04798328879058766, "grad_norm": 0.37904587388038635, "learning_rate": 1.9975083739532474e-05, "loss": 0.5607, "step": 1562 }, { "epoch": 0.04801400792553682, "grad_norm": 0.3330012261867523, "learning_rate": 1.9975049633553402e-05, "loss": 0.6413, "step": 1563 }, { "epoch": 0.04804472706048598, "grad_norm": 0.3533659875392914, "learning_rate": 1.9975015504276907e-05, "loss": 0.6096, "step": 1564 }, { "epoch": 0.048075446195435134, "grad_norm": 0.316693514585495, "learning_rate": 1.9974981351703064e-05, "loss": 0.6011, "step": 1565 }, { "epoch": 0.048106165330384296, "grad_norm": 0.36805659532546997, "learning_rate": 1.9974947175831957e-05, "loss": 0.7139, "step": 1566 }, { "epoch": 0.04813688446533346, "grad_norm": 0.35783979296684265, "learning_rate": 1.9974912976663666e-05, "loss": 0.5848, "step": 1567 }, { "epoch": 0.04816760360028262, "grad_norm": 0.3219115138053894, "learning_rate": 1.997487875419827e-05, "loss": 0.5772, "step": 1568 }, { "epoch": 0.04819832273523177, "grad_norm": 0.3617928624153137, "learning_rate": 1.9974844508435844e-05, "loss": 0.6541, "step": 1569 }, { "epoch": 0.048229041870180935, "grad_norm": 0.3374931514263153, "learning_rate": 1.9974810239376476e-05, "loss": 0.6277, "step": 1570 }, { "epoch": 0.048259761005130096, "grad_norm": 0.32027724385261536, "learning_rate": 1.997477594702024e-05, "loss": 0.6263, "step": 1571 }, { "epoch": 0.04829048014007926, "grad_norm": 0.3411986529827118, "learning_rate": 1.9974741631367214e-05, "loss": 0.6527, "step": 1572 }, { "epoch": 0.04832119927502841, "grad_norm": 0.3594294488430023, "learning_rate": 1.9974707292417487e-05, "loss": 0.6526, "step": 1573 }, { "epoch": 0.04835191840997757, "grad_norm": 0.37739384174346924, "learning_rate": 1.9974672930171136e-05, "loss": 0.6564, "step": 1574 }, { "epoch": 0.048382637544926735, "grad_norm": 0.4125100076198578, "learning_rate": 1.9974638544628237e-05, "loss": 0.6811, "step": 1575 }, { "epoch": 0.048413356679875896, "grad_norm": 0.4873066842556, "learning_rate": 1.9974604135788873e-05, "loss": 0.6472, "step": 1576 }, { "epoch": 0.04844407581482506, "grad_norm": 0.3150804340839386, "learning_rate": 1.9974569703653128e-05, "loss": 0.5568, "step": 1577 }, { "epoch": 0.04847479494977421, "grad_norm": 0.3466322422027588, "learning_rate": 1.9974535248221073e-05, "loss": 0.6358, "step": 1578 }, { "epoch": 0.048505514084723374, "grad_norm": 0.3265446722507477, "learning_rate": 1.9974500769492797e-05, "loss": 0.6912, "step": 1579 }, { "epoch": 0.048536233219672535, "grad_norm": 0.37474381923675537, "learning_rate": 1.9974466267468377e-05, "loss": 0.5628, "step": 1580 }, { "epoch": 0.0485669523546217, "grad_norm": 0.6572012901306152, "learning_rate": 1.9974431742147898e-05, "loss": 0.6057, "step": 1581 }, { "epoch": 0.04859767148957085, "grad_norm": 0.3566594123840332, "learning_rate": 1.9974397193531434e-05, "loss": 0.6826, "step": 1582 }, { "epoch": 0.04862839062452001, "grad_norm": 0.5015448927879333, "learning_rate": 1.997436262161907e-05, "loss": 0.5952, "step": 1583 }, { "epoch": 0.048659109759469174, "grad_norm": 0.33665817975997925, "learning_rate": 1.9974328026410883e-05, "loss": 0.586, "step": 1584 }, { "epoch": 0.048689828894418336, "grad_norm": 0.3693210482597351, "learning_rate": 1.9974293407906958e-05, "loss": 0.6243, "step": 1585 }, { "epoch": 0.04872054802936749, "grad_norm": 0.36246031522750854, "learning_rate": 1.9974258766107373e-05, "loss": 0.6524, "step": 1586 }, { "epoch": 0.04875126716431665, "grad_norm": 0.4176059365272522, "learning_rate": 1.9974224101012214e-05, "loss": 0.6688, "step": 1587 }, { "epoch": 0.04878198629926581, "grad_norm": 0.3751685917377472, "learning_rate": 1.9974189412621556e-05, "loss": 0.6376, "step": 1588 }, { "epoch": 0.048812705434214974, "grad_norm": 0.37172165513038635, "learning_rate": 1.9974154700935478e-05, "loss": 0.6676, "step": 1589 }, { "epoch": 0.04884342456916413, "grad_norm": 0.37124791741371155, "learning_rate": 1.997411996595407e-05, "loss": 0.5672, "step": 1590 }, { "epoch": 0.04887414370411329, "grad_norm": 0.3678755760192871, "learning_rate": 1.9974085207677406e-05, "loss": 0.624, "step": 1591 }, { "epoch": 0.04890486283906245, "grad_norm": 0.3361211121082306, "learning_rate": 1.997405042610557e-05, "loss": 0.5884, "step": 1592 }, { "epoch": 0.04893558197401161, "grad_norm": 0.35418471693992615, "learning_rate": 1.9974015621238646e-05, "loss": 0.633, "step": 1593 }, { "epoch": 0.048966301108960775, "grad_norm": 0.3511559069156647, "learning_rate": 1.997398079307671e-05, "loss": 0.6021, "step": 1594 }, { "epoch": 0.04899702024390993, "grad_norm": 0.3330918252468109, "learning_rate": 1.997394594161984e-05, "loss": 0.6972, "step": 1595 }, { "epoch": 0.04902773937885909, "grad_norm": 0.3222728967666626, "learning_rate": 1.9973911066868127e-05, "loss": 0.5819, "step": 1596 }, { "epoch": 0.04905845851380825, "grad_norm": 0.31858038902282715, "learning_rate": 1.997387616882165e-05, "loss": 0.5669, "step": 1597 }, { "epoch": 0.049089177648757414, "grad_norm": 0.33087068796157837, "learning_rate": 1.9973841247480487e-05, "loss": 0.5517, "step": 1598 }, { "epoch": 0.04911989678370657, "grad_norm": 0.33781805634498596, "learning_rate": 1.997380630284472e-05, "loss": 0.636, "step": 1599 }, { "epoch": 0.04915061591865573, "grad_norm": 0.4171277582645416, "learning_rate": 1.997377133491443e-05, "loss": 0.5651, "step": 1600 }, { "epoch": 0.04918133505360489, "grad_norm": 0.3270309269428253, "learning_rate": 1.9973736343689703e-05, "loss": 0.61, "step": 1601 }, { "epoch": 0.04921205418855405, "grad_norm": 0.32625848054885864, "learning_rate": 1.997370132917062e-05, "loss": 0.6293, "step": 1602 }, { "epoch": 0.04924277332350321, "grad_norm": 0.4175812900066376, "learning_rate": 1.997366629135726e-05, "loss": 0.6014, "step": 1603 }, { "epoch": 0.04927349245845237, "grad_norm": 0.6452588438987732, "learning_rate": 1.9973631230249704e-05, "loss": 0.5702, "step": 1604 }, { "epoch": 0.04930421159340153, "grad_norm": 0.30845406651496887, "learning_rate": 1.9973596145848035e-05, "loss": 0.6482, "step": 1605 }, { "epoch": 0.04933493072835069, "grad_norm": 0.3601158857345581, "learning_rate": 1.9973561038152337e-05, "loss": 0.5936, "step": 1606 }, { "epoch": 0.04936564986329985, "grad_norm": 0.38791725039482117, "learning_rate": 1.997352590716269e-05, "loss": 0.7201, "step": 1607 }, { "epoch": 0.04939636899824901, "grad_norm": 0.31345584988594055, "learning_rate": 1.997349075287918e-05, "loss": 0.6502, "step": 1608 }, { "epoch": 0.04942708813319817, "grad_norm": 0.3697737157344818, "learning_rate": 1.9973455575301886e-05, "loss": 0.6035, "step": 1609 }, { "epoch": 0.04945780726814733, "grad_norm": 0.32363954186439514, "learning_rate": 1.9973420374430886e-05, "loss": 0.7158, "step": 1610 }, { "epoch": 0.04948852640309649, "grad_norm": 0.3198433220386505, "learning_rate": 1.9973385150266267e-05, "loss": 0.6393, "step": 1611 }, { "epoch": 0.049519245538045646, "grad_norm": 0.3102641701698303, "learning_rate": 1.9973349902808113e-05, "loss": 0.6113, "step": 1612 }, { "epoch": 0.04954996467299481, "grad_norm": 0.34853553771972656, "learning_rate": 1.99733146320565e-05, "loss": 0.6422, "step": 1613 }, { "epoch": 0.04958068380794397, "grad_norm": 0.31733274459838867, "learning_rate": 1.9973279338011516e-05, "loss": 0.638, "step": 1614 }, { "epoch": 0.04961140294289313, "grad_norm": 0.314637154340744, "learning_rate": 1.9973244020673243e-05, "loss": 0.6516, "step": 1615 }, { "epoch": 0.049642122077842285, "grad_norm": 0.4583761394023895, "learning_rate": 1.997320868004176e-05, "loss": 0.6295, "step": 1616 }, { "epoch": 0.049672841212791446, "grad_norm": 0.31452253460884094, "learning_rate": 1.997317331611715e-05, "loss": 0.6754, "step": 1617 }, { "epoch": 0.04970356034774061, "grad_norm": 0.34458106756210327, "learning_rate": 1.99731379288995e-05, "loss": 0.5799, "step": 1618 }, { "epoch": 0.04973427948268977, "grad_norm": 0.3168652057647705, "learning_rate": 1.9973102518388893e-05, "loss": 0.6147, "step": 1619 }, { "epoch": 0.049764998617638924, "grad_norm": 0.31987428665161133, "learning_rate": 1.99730670845854e-05, "loss": 0.7054, "step": 1620 }, { "epoch": 0.049795717752588085, "grad_norm": 0.3057861626148224, "learning_rate": 1.997303162748912e-05, "loss": 0.5597, "step": 1621 }, { "epoch": 0.04982643688753725, "grad_norm": 0.4233693480491638, "learning_rate": 1.9972996147100125e-05, "loss": 0.6706, "step": 1622 }, { "epoch": 0.04985715602248641, "grad_norm": 0.3018213212490082, "learning_rate": 1.9972960643418503e-05, "loss": 0.6036, "step": 1623 }, { "epoch": 0.04988787515743557, "grad_norm": 0.33124321699142456, "learning_rate": 1.997292511644433e-05, "loss": 0.5172, "step": 1624 }, { "epoch": 0.049918594292384724, "grad_norm": 0.30949515104293823, "learning_rate": 1.99728895661777e-05, "loss": 0.6224, "step": 1625 }, { "epoch": 0.049949313427333886, "grad_norm": 0.380548894405365, "learning_rate": 1.9972853992618685e-05, "loss": 0.6887, "step": 1626 }, { "epoch": 0.04998003256228305, "grad_norm": 0.38368430733680725, "learning_rate": 1.997281839576738e-05, "loss": 0.6189, "step": 1627 }, { "epoch": 0.05001075169723221, "grad_norm": 0.43819427490234375, "learning_rate": 1.9972782775623852e-05, "loss": 0.5916, "step": 1628 }, { "epoch": 0.05004147083218136, "grad_norm": 0.41229456663131714, "learning_rate": 1.9972747132188197e-05, "loss": 0.5813, "step": 1629 }, { "epoch": 0.050072189967130525, "grad_norm": 0.3749625086784363, "learning_rate": 1.9972711465460494e-05, "loss": 0.6286, "step": 1630 }, { "epoch": 0.050102909102079686, "grad_norm": 0.32124078273773193, "learning_rate": 1.997267577544083e-05, "loss": 0.598, "step": 1631 }, { "epoch": 0.05013362823702885, "grad_norm": 0.3479023873806, "learning_rate": 1.9972640062129282e-05, "loss": 0.6067, "step": 1632 }, { "epoch": 0.050164347371978, "grad_norm": 0.48171094059944153, "learning_rate": 1.9972604325525936e-05, "loss": 0.6425, "step": 1633 }, { "epoch": 0.05019506650692716, "grad_norm": 0.330768883228302, "learning_rate": 1.9972568565630877e-05, "loss": 0.6233, "step": 1634 }, { "epoch": 0.050225785641876325, "grad_norm": 0.32223352789878845, "learning_rate": 1.997253278244419e-05, "loss": 0.5836, "step": 1635 }, { "epoch": 0.050256504776825486, "grad_norm": 0.341009259223938, "learning_rate": 1.997249697596596e-05, "loss": 0.6334, "step": 1636 }, { "epoch": 0.05028722391177465, "grad_norm": 0.34071841835975647, "learning_rate": 1.997246114619626e-05, "loss": 0.6203, "step": 1637 }, { "epoch": 0.0503179430467238, "grad_norm": 0.3425106406211853, "learning_rate": 1.997242529313518e-05, "loss": 0.6273, "step": 1638 }, { "epoch": 0.050348662181672964, "grad_norm": 0.3437378406524658, "learning_rate": 1.9972389416782808e-05, "loss": 0.5916, "step": 1639 }, { "epoch": 0.050379381316622125, "grad_norm": 0.32230859994888306, "learning_rate": 1.997235351713922e-05, "loss": 0.602, "step": 1640 }, { "epoch": 0.05041010045157129, "grad_norm": 0.3632746934890747, "learning_rate": 1.997231759420451e-05, "loss": 0.6859, "step": 1641 }, { "epoch": 0.05044081958652044, "grad_norm": 0.3984330892562866, "learning_rate": 1.997228164797875e-05, "loss": 0.6805, "step": 1642 }, { "epoch": 0.0504715387214696, "grad_norm": 0.31781619787216187, "learning_rate": 1.9972245678462032e-05, "loss": 0.5995, "step": 1643 }, { "epoch": 0.050502257856418764, "grad_norm": 0.3396299183368683, "learning_rate": 1.9972209685654437e-05, "loss": 0.5892, "step": 1644 }, { "epoch": 0.050532976991367926, "grad_norm": 0.3552519679069519, "learning_rate": 1.9972173669556053e-05, "loss": 0.6061, "step": 1645 }, { "epoch": 0.05056369612631708, "grad_norm": 0.33940160274505615, "learning_rate": 1.9972137630166955e-05, "loss": 0.593, "step": 1646 }, { "epoch": 0.05059441526126624, "grad_norm": 0.411429226398468, "learning_rate": 1.9972101567487237e-05, "loss": 0.6938, "step": 1647 }, { "epoch": 0.0506251343962154, "grad_norm": 0.34179040789604187, "learning_rate": 1.9972065481516983e-05, "loss": 0.647, "step": 1648 }, { "epoch": 0.050655853531164564, "grad_norm": 0.363298237323761, "learning_rate": 1.9972029372256266e-05, "loss": 0.6203, "step": 1649 }, { "epoch": 0.05068657266611372, "grad_norm": 0.35668376088142395, "learning_rate": 1.9971993239705183e-05, "loss": 0.6778, "step": 1650 }, { "epoch": 0.05071729180106288, "grad_norm": 0.3233514726161957, "learning_rate": 1.9971957083863814e-05, "loss": 0.6426, "step": 1651 }, { "epoch": 0.05074801093601204, "grad_norm": 0.38480865955352783, "learning_rate": 1.9971920904732238e-05, "loss": 0.5662, "step": 1652 }, { "epoch": 0.0507787300709612, "grad_norm": 0.33321690559387207, "learning_rate": 1.9971884702310545e-05, "loss": 0.6974, "step": 1653 }, { "epoch": 0.050809449205910365, "grad_norm": 0.33720824122428894, "learning_rate": 1.9971848476598824e-05, "loss": 0.5444, "step": 1654 }, { "epoch": 0.05084016834085952, "grad_norm": 0.3110780119895935, "learning_rate": 1.997181222759715e-05, "loss": 0.666, "step": 1655 }, { "epoch": 0.05087088747580868, "grad_norm": 0.36159563064575195, "learning_rate": 1.9971775955305614e-05, "loss": 0.5475, "step": 1656 }, { "epoch": 0.05090160661075784, "grad_norm": 0.3299364149570465, "learning_rate": 1.99717396597243e-05, "loss": 0.617, "step": 1657 }, { "epoch": 0.050932325745707004, "grad_norm": 0.3755226135253906, "learning_rate": 1.997170334085329e-05, "loss": 0.611, "step": 1658 }, { "epoch": 0.05096304488065616, "grad_norm": 0.31369540095329285, "learning_rate": 1.997166699869267e-05, "loss": 0.668, "step": 1659 }, { "epoch": 0.05099376401560532, "grad_norm": 0.33271852135658264, "learning_rate": 1.9971630633242525e-05, "loss": 0.7082, "step": 1660 }, { "epoch": 0.05102448315055448, "grad_norm": 0.3282442092895508, "learning_rate": 1.9971594244502942e-05, "loss": 0.5718, "step": 1661 }, { "epoch": 0.05105520228550364, "grad_norm": 0.38441911339759827, "learning_rate": 1.9971557832474002e-05, "loss": 0.6738, "step": 1662 }, { "epoch": 0.0510859214204528, "grad_norm": 0.351683646440506, "learning_rate": 1.9971521397155793e-05, "loss": 0.5522, "step": 1663 }, { "epoch": 0.05111664055540196, "grad_norm": 0.40883177518844604, "learning_rate": 1.99714849385484e-05, "loss": 0.6556, "step": 1664 }, { "epoch": 0.05114735969035112, "grad_norm": 0.3257603347301483, "learning_rate": 1.9971448456651906e-05, "loss": 0.6419, "step": 1665 }, { "epoch": 0.05117807882530028, "grad_norm": 0.4220370352268219, "learning_rate": 1.99714119514664e-05, "loss": 0.6404, "step": 1666 }, { "epoch": 0.05120879796024944, "grad_norm": 0.3198326826095581, "learning_rate": 1.9971375422991962e-05, "loss": 0.6203, "step": 1667 }, { "epoch": 0.0512395170951986, "grad_norm": 0.3326724171638489, "learning_rate": 1.9971338871228684e-05, "loss": 0.6021, "step": 1668 }, { "epoch": 0.05127023623014776, "grad_norm": 0.35600051283836365, "learning_rate": 1.9971302296176646e-05, "loss": 0.5888, "step": 1669 }, { "epoch": 0.05130095536509692, "grad_norm": 0.3490760922431946, "learning_rate": 1.9971265697835934e-05, "loss": 0.5674, "step": 1670 }, { "epoch": 0.05133167450004608, "grad_norm": 0.3330647051334381, "learning_rate": 1.9971229076206638e-05, "loss": 0.5826, "step": 1671 }, { "epoch": 0.051362393634995236, "grad_norm": 0.3673471212387085, "learning_rate": 1.997119243128884e-05, "loss": 0.6201, "step": 1672 }, { "epoch": 0.0513931127699444, "grad_norm": 0.34637609124183655, "learning_rate": 1.997115576308262e-05, "loss": 0.5956, "step": 1673 }, { "epoch": 0.05142383190489356, "grad_norm": 0.32922089099884033, "learning_rate": 1.997111907158807e-05, "loss": 0.6585, "step": 1674 }, { "epoch": 0.05145455103984272, "grad_norm": 0.36187902092933655, "learning_rate": 1.997108235680528e-05, "loss": 0.6389, "step": 1675 }, { "epoch": 0.051485270174791875, "grad_norm": 0.3523949086666107, "learning_rate": 1.997104561873433e-05, "loss": 0.5296, "step": 1676 }, { "epoch": 0.051515989309741037, "grad_norm": 0.5153689980506897, "learning_rate": 1.9971008857375305e-05, "loss": 0.634, "step": 1677 }, { "epoch": 0.0515467084446902, "grad_norm": 0.41411033272743225, "learning_rate": 1.9970972072728294e-05, "loss": 0.6311, "step": 1678 }, { "epoch": 0.05157742757963936, "grad_norm": 0.3246355354785919, "learning_rate": 1.997093526479338e-05, "loss": 0.5896, "step": 1679 }, { "epoch": 0.051608146714588514, "grad_norm": 0.46703168749809265, "learning_rate": 1.997089843357065e-05, "loss": 0.5243, "step": 1680 }, { "epoch": 0.051638865849537675, "grad_norm": 0.4030819535255432, "learning_rate": 1.997086157906019e-05, "loss": 0.6053, "step": 1681 }, { "epoch": 0.05166958498448684, "grad_norm": 0.3761485517024994, "learning_rate": 1.9970824701262085e-05, "loss": 0.6481, "step": 1682 }, { "epoch": 0.051700304119436, "grad_norm": 0.7228783369064331, "learning_rate": 1.9970787800176426e-05, "loss": 0.5928, "step": 1683 }, { "epoch": 0.05173102325438516, "grad_norm": 0.3218024969100952, "learning_rate": 1.9970750875803294e-05, "loss": 0.6075, "step": 1684 }, { "epoch": 0.051761742389334314, "grad_norm": 0.410461962223053, "learning_rate": 1.9970713928142776e-05, "loss": 0.6254, "step": 1685 }, { "epoch": 0.051792461524283476, "grad_norm": 0.31899458169937134, "learning_rate": 1.9970676957194962e-05, "loss": 0.6254, "step": 1686 }, { "epoch": 0.05182318065923264, "grad_norm": 0.3944074809551239, "learning_rate": 1.9970639962959938e-05, "loss": 0.6313, "step": 1687 }, { "epoch": 0.0518538997941818, "grad_norm": 0.957227885723114, "learning_rate": 1.9970602945437783e-05, "loss": 0.628, "step": 1688 }, { "epoch": 0.05188461892913095, "grad_norm": 0.32219892740249634, "learning_rate": 1.9970565904628588e-05, "loss": 0.6311, "step": 1689 }, { "epoch": 0.051915338064080115, "grad_norm": 0.3373182415962219, "learning_rate": 1.997052884053244e-05, "loss": 0.6165, "step": 1690 }, { "epoch": 0.051946057199029276, "grad_norm": 0.35779833793640137, "learning_rate": 1.997049175314943e-05, "loss": 0.6559, "step": 1691 }, { "epoch": 0.05197677633397844, "grad_norm": 0.3302483558654785, "learning_rate": 1.9970454642479636e-05, "loss": 0.5413, "step": 1692 }, { "epoch": 0.05200749546892759, "grad_norm": 0.3374185562133789, "learning_rate": 1.9970417508523153e-05, "loss": 0.6166, "step": 1693 }, { "epoch": 0.05203821460387675, "grad_norm": 0.3562318980693817, "learning_rate": 1.997038035128006e-05, "loss": 0.6394, "step": 1694 }, { "epoch": 0.052068933738825915, "grad_norm": 0.30988162755966187, "learning_rate": 1.9970343170750446e-05, "loss": 0.6422, "step": 1695 }, { "epoch": 0.052099652873775076, "grad_norm": 0.3686848282814026, "learning_rate": 1.9970305966934403e-05, "loss": 0.7171, "step": 1696 }, { "epoch": 0.05213037200872423, "grad_norm": 0.3901228606700897, "learning_rate": 1.9970268739832014e-05, "loss": 0.7016, "step": 1697 }, { "epoch": 0.05216109114367339, "grad_norm": 0.3580358028411865, "learning_rate": 1.9970231489443364e-05, "loss": 0.5929, "step": 1698 }, { "epoch": 0.052191810278622554, "grad_norm": 0.32933008670806885, "learning_rate": 1.9970194215768542e-05, "loss": 0.5992, "step": 1699 }, { "epoch": 0.052222529413571715, "grad_norm": 0.2849575877189636, "learning_rate": 1.9970156918807636e-05, "loss": 0.5287, "step": 1700 }, { "epoch": 0.05225324854852088, "grad_norm": 0.3584528863430023, "learning_rate": 1.9970119598560734e-05, "loss": 0.6353, "step": 1701 }, { "epoch": 0.05228396768347003, "grad_norm": 0.3351663649082184, "learning_rate": 1.9970082255027917e-05, "loss": 0.619, "step": 1702 }, { "epoch": 0.05231468681841919, "grad_norm": 0.4637466371059418, "learning_rate": 1.997004488820928e-05, "loss": 0.6933, "step": 1703 }, { "epoch": 0.052345405953368354, "grad_norm": 0.3082723617553711, "learning_rate": 1.9970007498104907e-05, "loss": 0.6118, "step": 1704 }, { "epoch": 0.052376125088317516, "grad_norm": 0.3228706121444702, "learning_rate": 1.9969970084714884e-05, "loss": 0.5609, "step": 1705 }, { "epoch": 0.05240684422326667, "grad_norm": 0.459017813205719, "learning_rate": 1.9969932648039298e-05, "loss": 0.5658, "step": 1706 }, { "epoch": 0.05243756335821583, "grad_norm": 0.4038344919681549, "learning_rate": 1.996989518807824e-05, "loss": 0.6269, "step": 1707 }, { "epoch": 0.05246828249316499, "grad_norm": 0.3404841721057892, "learning_rate": 1.9969857704831798e-05, "loss": 0.6568, "step": 1708 }, { "epoch": 0.052499001628114154, "grad_norm": 0.35428181290626526, "learning_rate": 1.9969820198300053e-05, "loss": 0.5812, "step": 1709 }, { "epoch": 0.05252972076306331, "grad_norm": 0.32055121660232544, "learning_rate": 1.99697826684831e-05, "loss": 0.6316, "step": 1710 }, { "epoch": 0.05256043989801247, "grad_norm": 0.31095001101493835, "learning_rate": 1.9969745115381015e-05, "loss": 0.5626, "step": 1711 }, { "epoch": 0.05259115903296163, "grad_norm": 0.41241323947906494, "learning_rate": 1.9969707538993904e-05, "loss": 0.6062, "step": 1712 }, { "epoch": 0.05262187816791079, "grad_norm": 0.3453739881515503, "learning_rate": 1.9969669939321837e-05, "loss": 0.7103, "step": 1713 }, { "epoch": 0.052652597302859955, "grad_norm": 0.35108473896980286, "learning_rate": 1.9969632316364915e-05, "loss": 0.5966, "step": 1714 }, { "epoch": 0.05268331643780911, "grad_norm": 0.3071790635585785, "learning_rate": 1.996959467012322e-05, "loss": 0.5721, "step": 1715 }, { "epoch": 0.05271403557275827, "grad_norm": 0.342948317527771, "learning_rate": 1.9969557000596838e-05, "loss": 0.6325, "step": 1716 }, { "epoch": 0.05274475470770743, "grad_norm": 0.36018943786621094, "learning_rate": 1.9969519307785862e-05, "loss": 0.7135, "step": 1717 }, { "epoch": 0.052775473842656594, "grad_norm": 0.4865421950817108, "learning_rate": 1.9969481591690377e-05, "loss": 0.6305, "step": 1718 }, { "epoch": 0.05280619297760575, "grad_norm": 0.3245712220668793, "learning_rate": 1.9969443852310467e-05, "loss": 0.5766, "step": 1719 }, { "epoch": 0.05283691211255491, "grad_norm": 0.3359595835208893, "learning_rate": 1.9969406089646232e-05, "loss": 0.6175, "step": 1720 }, { "epoch": 0.05286763124750407, "grad_norm": 0.33772316575050354, "learning_rate": 1.9969368303697746e-05, "loss": 0.6443, "step": 1721 }, { "epoch": 0.05289835038245323, "grad_norm": 0.37347254157066345, "learning_rate": 1.996933049446511e-05, "loss": 0.6089, "step": 1722 }, { "epoch": 0.05292906951740239, "grad_norm": 0.3811996579170227, "learning_rate": 1.99692926619484e-05, "loss": 0.5863, "step": 1723 }, { "epoch": 0.05295978865235155, "grad_norm": 0.364454984664917, "learning_rate": 1.9969254806147717e-05, "loss": 0.5847, "step": 1724 }, { "epoch": 0.05299050778730071, "grad_norm": 0.3238917291164398, "learning_rate": 1.996921692706314e-05, "loss": 0.5748, "step": 1725 }, { "epoch": 0.05302122692224987, "grad_norm": 0.3327987492084503, "learning_rate": 1.996917902469476e-05, "loss": 0.6356, "step": 1726 }, { "epoch": 0.053051946057199026, "grad_norm": 0.3554466962814331, "learning_rate": 1.996914109904267e-05, "loss": 0.6154, "step": 1727 }, { "epoch": 0.05308266519214819, "grad_norm": 0.33200323581695557, "learning_rate": 1.996910315010695e-05, "loss": 0.6428, "step": 1728 }, { "epoch": 0.05311338432709735, "grad_norm": 0.3314322233200073, "learning_rate": 1.9969065177887696e-05, "loss": 0.5371, "step": 1729 }, { "epoch": 0.05314410346204651, "grad_norm": 0.33848699927330017, "learning_rate": 1.9969027182384993e-05, "loss": 0.65, "step": 1730 }, { "epoch": 0.05317482259699567, "grad_norm": 0.3499906063079834, "learning_rate": 1.996898916359893e-05, "loss": 0.603, "step": 1731 }, { "epoch": 0.053205541731944826, "grad_norm": 0.3205356299877167, "learning_rate": 1.9968951121529598e-05, "loss": 0.6111, "step": 1732 }, { "epoch": 0.05323626086689399, "grad_norm": 0.34398016333580017, "learning_rate": 1.9968913056177085e-05, "loss": 0.6106, "step": 1733 }, { "epoch": 0.05326698000184315, "grad_norm": 0.332608699798584, "learning_rate": 1.9968874967541477e-05, "loss": 0.5559, "step": 1734 }, { "epoch": 0.05329769913679231, "grad_norm": 0.3016985356807709, "learning_rate": 1.9968836855622867e-05, "loss": 0.531, "step": 1735 }, { "epoch": 0.053328418271741465, "grad_norm": 0.3576187193393707, "learning_rate": 1.996879872042134e-05, "loss": 0.6087, "step": 1736 }, { "epoch": 0.05335913740669063, "grad_norm": 0.32663413882255554, "learning_rate": 1.996876056193699e-05, "loss": 0.6651, "step": 1737 }, { "epoch": 0.05338985654163979, "grad_norm": 0.416313111782074, "learning_rate": 1.9968722380169906e-05, "loss": 0.5658, "step": 1738 }, { "epoch": 0.05342057567658895, "grad_norm": 0.3951456546783447, "learning_rate": 1.996868417512017e-05, "loss": 0.6773, "step": 1739 }, { "epoch": 0.053451294811538104, "grad_norm": 0.48338189721107483, "learning_rate": 1.9968645946787875e-05, "loss": 0.7154, "step": 1740 }, { "epoch": 0.053482013946487265, "grad_norm": 0.34798914194107056, "learning_rate": 1.9968607695173116e-05, "loss": 0.6022, "step": 1741 }, { "epoch": 0.05351273308143643, "grad_norm": 0.40057680010795593, "learning_rate": 1.9968569420275975e-05, "loss": 0.6754, "step": 1742 }, { "epoch": 0.05354345221638559, "grad_norm": 0.3306162357330322, "learning_rate": 1.9968531122096545e-05, "loss": 0.568, "step": 1743 }, { "epoch": 0.05357417135133475, "grad_norm": 0.3748723566532135, "learning_rate": 1.9968492800634913e-05, "loss": 0.618, "step": 1744 }, { "epoch": 0.053604890486283904, "grad_norm": 0.30966079235076904, "learning_rate": 1.996845445589117e-05, "loss": 0.6878, "step": 1745 }, { "epoch": 0.053635609621233066, "grad_norm": 0.3186560273170471, "learning_rate": 1.9968416087865405e-05, "loss": 0.6537, "step": 1746 }, { "epoch": 0.05366632875618223, "grad_norm": 0.3570191562175751, "learning_rate": 1.996837769655771e-05, "loss": 0.7086, "step": 1747 }, { "epoch": 0.05369704789113139, "grad_norm": 0.3589688837528229, "learning_rate": 1.996833928196817e-05, "loss": 0.6437, "step": 1748 }, { "epoch": 0.05372776702608054, "grad_norm": 0.3615206778049469, "learning_rate": 1.996830084409688e-05, "loss": 0.5804, "step": 1749 }, { "epoch": 0.053758486161029705, "grad_norm": 0.3345549404621124, "learning_rate": 1.9968262382943927e-05, "loss": 0.7215, "step": 1750 }, { "epoch": 0.053789205295978866, "grad_norm": 0.3486518859863281, "learning_rate": 1.99682238985094e-05, "loss": 0.7676, "step": 1751 }, { "epoch": 0.05381992443092803, "grad_norm": 0.30950355529785156, "learning_rate": 1.9968185390793387e-05, "loss": 0.6181, "step": 1752 }, { "epoch": 0.05385064356587718, "grad_norm": 0.3626912832260132, "learning_rate": 1.9968146859795987e-05, "loss": 0.7019, "step": 1753 }, { "epoch": 0.053881362700826343, "grad_norm": 0.3275771141052246, "learning_rate": 1.996810830551728e-05, "loss": 0.6282, "step": 1754 }, { "epoch": 0.053912081835775505, "grad_norm": 0.4009876549243927, "learning_rate": 1.996806972795736e-05, "loss": 0.65, "step": 1755 }, { "epoch": 0.053942800970724666, "grad_norm": 0.3567888140678406, "learning_rate": 1.9968031127116315e-05, "loss": 0.583, "step": 1756 }, { "epoch": 0.05397352010567382, "grad_norm": 0.3097861409187317, "learning_rate": 1.9967992502994243e-05, "loss": 0.6725, "step": 1757 }, { "epoch": 0.05400423924062298, "grad_norm": 0.33761295676231384, "learning_rate": 1.9967953855591223e-05, "loss": 0.599, "step": 1758 }, { "epoch": 0.054034958375572144, "grad_norm": 0.344808965921402, "learning_rate": 1.9967915184907353e-05, "loss": 0.5943, "step": 1759 }, { "epoch": 0.054065677510521305, "grad_norm": 0.32958218455314636, "learning_rate": 1.9967876490942722e-05, "loss": 0.6732, "step": 1760 }, { "epoch": 0.05409639664547047, "grad_norm": 0.32065731287002563, "learning_rate": 1.9967837773697417e-05, "loss": 0.5628, "step": 1761 }, { "epoch": 0.05412711578041962, "grad_norm": 0.3384649157524109, "learning_rate": 1.9967799033171534e-05, "loss": 0.5858, "step": 1762 }, { "epoch": 0.05415783491536878, "grad_norm": 0.38876229524612427, "learning_rate": 1.9967760269365157e-05, "loss": 0.5885, "step": 1763 }, { "epoch": 0.054188554050317944, "grad_norm": 0.4863390624523163, "learning_rate": 1.996772148227838e-05, "loss": 0.5833, "step": 1764 }, { "epoch": 0.054219273185267106, "grad_norm": 0.352750301361084, "learning_rate": 1.9967682671911294e-05, "loss": 0.594, "step": 1765 }, { "epoch": 0.05424999232021626, "grad_norm": 0.3516665995121002, "learning_rate": 1.996764383826399e-05, "loss": 0.6692, "step": 1766 }, { "epoch": 0.05428071145516542, "grad_norm": 0.3806980848312378, "learning_rate": 1.9967604981336556e-05, "loss": 0.6181, "step": 1767 }, { "epoch": 0.05431143059011458, "grad_norm": 0.3783700466156006, "learning_rate": 1.9967566101129085e-05, "loss": 0.5741, "step": 1768 }, { "epoch": 0.054342149725063744, "grad_norm": 0.4128999412059784, "learning_rate": 1.9967527197641666e-05, "loss": 0.5392, "step": 1769 }, { "epoch": 0.0543728688600129, "grad_norm": 0.9719181060791016, "learning_rate": 1.9967488270874393e-05, "loss": 0.685, "step": 1770 }, { "epoch": 0.05440358799496206, "grad_norm": 0.33161112666130066, "learning_rate": 1.9967449320827358e-05, "loss": 0.5686, "step": 1771 }, { "epoch": 0.05443430712991122, "grad_norm": 0.3345581293106079, "learning_rate": 1.9967410347500646e-05, "loss": 0.6247, "step": 1772 }, { "epoch": 0.05446502626486038, "grad_norm": 0.34420591592788696, "learning_rate": 1.9967371350894347e-05, "loss": 0.6516, "step": 1773 }, { "epoch": 0.054495745399809545, "grad_norm": 0.3308345675468445, "learning_rate": 1.996733233100856e-05, "loss": 0.5793, "step": 1774 }, { "epoch": 0.0545264645347587, "grad_norm": 0.396537721157074, "learning_rate": 1.9967293287843373e-05, "loss": 0.622, "step": 1775 }, { "epoch": 0.05455718366970786, "grad_norm": 0.32373932003974915, "learning_rate": 1.9967254221398873e-05, "loss": 0.5834, "step": 1776 }, { "epoch": 0.05458790280465702, "grad_norm": 0.5956937074661255, "learning_rate": 1.9967215131675156e-05, "loss": 0.6589, "step": 1777 }, { "epoch": 0.054618621939606184, "grad_norm": 0.33745530247688293, "learning_rate": 1.9967176018672315e-05, "loss": 0.6526, "step": 1778 }, { "epoch": 0.05464934107455534, "grad_norm": 0.3374810218811035, "learning_rate": 1.9967136882390436e-05, "loss": 0.5269, "step": 1779 }, { "epoch": 0.0546800602095045, "grad_norm": 0.3114989697933197, "learning_rate": 1.9967097722829615e-05, "loss": 0.6264, "step": 1780 }, { "epoch": 0.05471077934445366, "grad_norm": 0.3103391230106354, "learning_rate": 1.9967058539989937e-05, "loss": 0.6085, "step": 1781 }, { "epoch": 0.05474149847940282, "grad_norm": 0.3351426422595978, "learning_rate": 1.99670193338715e-05, "loss": 0.6653, "step": 1782 }, { "epoch": 0.05477221761435198, "grad_norm": 0.333210289478302, "learning_rate": 1.9966980104474395e-05, "loss": 0.5896, "step": 1783 }, { "epoch": 0.05480293674930114, "grad_norm": 0.3972318470478058, "learning_rate": 1.996694085179871e-05, "loss": 0.582, "step": 1784 }, { "epoch": 0.0548336558842503, "grad_norm": 0.3368469476699829, "learning_rate": 1.9966901575844537e-05, "loss": 0.6089, "step": 1785 }, { "epoch": 0.05486437501919946, "grad_norm": 0.3137270212173462, "learning_rate": 1.996686227661197e-05, "loss": 0.6137, "step": 1786 }, { "epoch": 0.054895094154148616, "grad_norm": 0.3790385127067566, "learning_rate": 1.99668229541011e-05, "loss": 0.6153, "step": 1787 }, { "epoch": 0.05492581328909778, "grad_norm": 0.3525201976299286, "learning_rate": 1.9966783608312022e-05, "loss": 0.6533, "step": 1788 }, { "epoch": 0.05495653242404694, "grad_norm": 0.4979594349861145, "learning_rate": 1.996674423924482e-05, "loss": 0.6987, "step": 1789 }, { "epoch": 0.0549872515589961, "grad_norm": 0.3671897053718567, "learning_rate": 1.9966704846899595e-05, "loss": 0.5983, "step": 1790 }, { "epoch": 0.05501797069394526, "grad_norm": 0.3712370693683624, "learning_rate": 1.9966665431276435e-05, "loss": 0.6116, "step": 1791 }, { "epoch": 0.055048689828894416, "grad_norm": 0.38204923272132874, "learning_rate": 1.9966625992375427e-05, "loss": 0.6007, "step": 1792 }, { "epoch": 0.05507940896384358, "grad_norm": 0.3614477515220642, "learning_rate": 1.996658653019667e-05, "loss": 0.5865, "step": 1793 }, { "epoch": 0.05511012809879274, "grad_norm": 0.33504560589790344, "learning_rate": 1.9966547044740258e-05, "loss": 0.6174, "step": 1794 }, { "epoch": 0.0551408472337419, "grad_norm": 0.3353455364704132, "learning_rate": 1.9966507536006273e-05, "loss": 0.6599, "step": 1795 }, { "epoch": 0.055171566368691055, "grad_norm": 0.37101733684539795, "learning_rate": 1.996646800399482e-05, "loss": 0.5846, "step": 1796 }, { "epoch": 0.05520228550364022, "grad_norm": 0.415584921836853, "learning_rate": 1.9966428448705982e-05, "loss": 0.6294, "step": 1797 }, { "epoch": 0.05523300463858938, "grad_norm": 0.3864404857158661, "learning_rate": 1.9966388870139853e-05, "loss": 0.6278, "step": 1798 }, { "epoch": 0.05526372377353854, "grad_norm": 0.6925176382064819, "learning_rate": 1.9966349268296527e-05, "loss": 0.545, "step": 1799 }, { "epoch": 0.055294442908487694, "grad_norm": 0.32420557737350464, "learning_rate": 1.9966309643176095e-05, "loss": 0.6888, "step": 1800 }, { "epoch": 0.055325162043436855, "grad_norm": 0.32321715354919434, "learning_rate": 1.9966269994778655e-05, "loss": 0.5997, "step": 1801 }, { "epoch": 0.05535588117838602, "grad_norm": 0.3477127254009247, "learning_rate": 1.9966230323104293e-05, "loss": 0.6304, "step": 1802 }, { "epoch": 0.05538660031333518, "grad_norm": 0.37694552540779114, "learning_rate": 1.9966190628153103e-05, "loss": 0.5925, "step": 1803 }, { "epoch": 0.05541731944828433, "grad_norm": 0.5167793035507202, "learning_rate": 1.9966150909925183e-05, "loss": 0.5952, "step": 1804 }, { "epoch": 0.055448038583233494, "grad_norm": 0.3316456079483032, "learning_rate": 1.9966111168420618e-05, "loss": 0.6407, "step": 1805 }, { "epoch": 0.055478757718182656, "grad_norm": 0.390786737203598, "learning_rate": 1.9966071403639503e-05, "loss": 0.6364, "step": 1806 }, { "epoch": 0.05550947685313182, "grad_norm": 0.3137774169445038, "learning_rate": 1.9966031615581936e-05, "loss": 0.6109, "step": 1807 }, { "epoch": 0.05554019598808098, "grad_norm": 0.3803066313266754, "learning_rate": 1.9965991804248002e-05, "loss": 0.5329, "step": 1808 }, { "epoch": 0.05557091512303013, "grad_norm": 0.36643847823143005, "learning_rate": 1.9965951969637803e-05, "loss": 0.6092, "step": 1809 }, { "epoch": 0.055601634257979295, "grad_norm": 0.34362462162971497, "learning_rate": 1.9965912111751426e-05, "loss": 0.686, "step": 1810 }, { "epoch": 0.055632353392928456, "grad_norm": 0.32659971714019775, "learning_rate": 1.996587223058896e-05, "loss": 0.6673, "step": 1811 }, { "epoch": 0.05566307252787762, "grad_norm": 0.326425701379776, "learning_rate": 1.9965832326150506e-05, "loss": 0.6497, "step": 1812 }, { "epoch": 0.05569379166282677, "grad_norm": 0.3291676938533783, "learning_rate": 1.996579239843616e-05, "loss": 0.6723, "step": 1813 }, { "epoch": 0.055724510797775934, "grad_norm": 0.34406304359436035, "learning_rate": 1.9965752447446e-05, "loss": 0.6455, "step": 1814 }, { "epoch": 0.055755229932725095, "grad_norm": 0.31995972990989685, "learning_rate": 1.996571247318014e-05, "loss": 0.5241, "step": 1815 }, { "epoch": 0.055785949067674256, "grad_norm": 0.35008370876312256, "learning_rate": 1.9965672475638654e-05, "loss": 0.7469, "step": 1816 }, { "epoch": 0.05581666820262341, "grad_norm": 0.3419457674026489, "learning_rate": 1.996563245482165e-05, "loss": 0.602, "step": 1817 }, { "epoch": 0.05584738733757257, "grad_norm": 0.3613872230052948, "learning_rate": 1.9965592410729212e-05, "loss": 0.6162, "step": 1818 }, { "epoch": 0.055878106472521734, "grad_norm": 0.3541260063648224, "learning_rate": 1.9965552343361437e-05, "loss": 0.6228, "step": 1819 }, { "epoch": 0.055908825607470895, "grad_norm": 0.35104671120643616, "learning_rate": 1.996551225271842e-05, "loss": 0.7081, "step": 1820 }, { "epoch": 0.05593954474242006, "grad_norm": 0.397500216960907, "learning_rate": 1.996547213880025e-05, "loss": 0.6632, "step": 1821 }, { "epoch": 0.05597026387736921, "grad_norm": 0.3174766004085541, "learning_rate": 1.996543200160703e-05, "loss": 0.6176, "step": 1822 }, { "epoch": 0.05600098301231837, "grad_norm": 0.36102616786956787, "learning_rate": 1.996539184113884e-05, "loss": 0.5808, "step": 1823 }, { "epoch": 0.056031702147267534, "grad_norm": 0.3269558548927307, "learning_rate": 1.996535165739579e-05, "loss": 0.606, "step": 1824 }, { "epoch": 0.056062421282216696, "grad_norm": 0.3468816578388214, "learning_rate": 1.996531145037796e-05, "loss": 0.6293, "step": 1825 }, { "epoch": 0.05609314041716585, "grad_norm": 0.3510095179080963, "learning_rate": 1.9965271220085446e-05, "loss": 0.5887, "step": 1826 }, { "epoch": 0.05612385955211501, "grad_norm": 0.3229139745235443, "learning_rate": 1.996523096651835e-05, "loss": 0.6503, "step": 1827 }, { "epoch": 0.05615457868706417, "grad_norm": 0.3386416435241699, "learning_rate": 1.996519068967676e-05, "loss": 0.6425, "step": 1828 }, { "epoch": 0.056185297822013335, "grad_norm": 0.3360848128795624, "learning_rate": 1.996515038956077e-05, "loss": 0.6006, "step": 1829 }, { "epoch": 0.05621601695696249, "grad_norm": 0.4475609064102173, "learning_rate": 1.9965110066170475e-05, "loss": 0.6766, "step": 1830 }, { "epoch": 0.05624673609191165, "grad_norm": 0.36392685770988464, "learning_rate": 1.9965069719505972e-05, "loss": 0.6182, "step": 1831 }, { "epoch": 0.05627745522686081, "grad_norm": 0.7619168758392334, "learning_rate": 1.9965029349567352e-05, "loss": 0.6581, "step": 1832 }, { "epoch": 0.05630817436180997, "grad_norm": 0.3269537687301636, "learning_rate": 1.9964988956354706e-05, "loss": 0.6013, "step": 1833 }, { "epoch": 0.05633889349675913, "grad_norm": 0.3960472047328949, "learning_rate": 1.9964948539868135e-05, "loss": 0.5932, "step": 1834 }, { "epoch": 0.05636961263170829, "grad_norm": 0.38066351413726807, "learning_rate": 1.9964908100107735e-05, "loss": 0.6471, "step": 1835 }, { "epoch": 0.05640033176665745, "grad_norm": 0.3433813154697418, "learning_rate": 1.9964867637073594e-05, "loss": 0.6981, "step": 1836 }, { "epoch": 0.05643105090160661, "grad_norm": 0.3386608362197876, "learning_rate": 1.9964827150765806e-05, "loss": 0.6089, "step": 1837 }, { "epoch": 0.056461770036555774, "grad_norm": 0.31819021701812744, "learning_rate": 1.9964786641184467e-05, "loss": 0.5727, "step": 1838 }, { "epoch": 0.05649248917150493, "grad_norm": 0.3429846465587616, "learning_rate": 1.9964746108329678e-05, "loss": 0.6361, "step": 1839 }, { "epoch": 0.05652320830645409, "grad_norm": 0.35714486241340637, "learning_rate": 1.9964705552201526e-05, "loss": 0.661, "step": 1840 }, { "epoch": 0.05655392744140325, "grad_norm": 0.32667797803878784, "learning_rate": 1.9964664972800108e-05, "loss": 0.5749, "step": 1841 }, { "epoch": 0.05658464657635241, "grad_norm": 0.4207698106765747, "learning_rate": 1.9964624370125518e-05, "loss": 0.5503, "step": 1842 }, { "epoch": 0.05661536571130157, "grad_norm": 0.4215468764305115, "learning_rate": 1.9964583744177854e-05, "loss": 0.7191, "step": 1843 }, { "epoch": 0.05664608484625073, "grad_norm": 0.5196759104728699, "learning_rate": 1.9964543094957207e-05, "loss": 0.6739, "step": 1844 }, { "epoch": 0.05667680398119989, "grad_norm": 0.3433223068714142, "learning_rate": 1.9964502422463673e-05, "loss": 0.6302, "step": 1845 }, { "epoch": 0.05670752311614905, "grad_norm": 0.3245305120944977, "learning_rate": 1.9964461726697353e-05, "loss": 0.6519, "step": 1846 }, { "epoch": 0.056738242251098206, "grad_norm": 0.33324727416038513, "learning_rate": 1.9964421007658332e-05, "loss": 0.6084, "step": 1847 }, { "epoch": 0.05676896138604737, "grad_norm": 0.32655832171440125, "learning_rate": 1.996438026534671e-05, "loss": 0.6613, "step": 1848 }, { "epoch": 0.05679968052099653, "grad_norm": 0.4154995083808899, "learning_rate": 1.9964339499762583e-05, "loss": 0.6961, "step": 1849 }, { "epoch": 0.05683039965594569, "grad_norm": 0.3287869989871979, "learning_rate": 1.9964298710906048e-05, "loss": 0.6156, "step": 1850 }, { "epoch": 0.05686111879089485, "grad_norm": 0.3565879166126251, "learning_rate": 1.9964257898777194e-05, "loss": 0.6149, "step": 1851 }, { "epoch": 0.056891837925844006, "grad_norm": 0.3523060977458954, "learning_rate": 1.9964217063376117e-05, "loss": 0.5629, "step": 1852 }, { "epoch": 0.05692255706079317, "grad_norm": 0.3587956428527832, "learning_rate": 1.996417620470292e-05, "loss": 0.6421, "step": 1853 }, { "epoch": 0.05695327619574233, "grad_norm": 0.3773297965526581, "learning_rate": 1.9964135322757693e-05, "loss": 0.6417, "step": 1854 }, { "epoch": 0.05698399533069149, "grad_norm": 0.3549673855304718, "learning_rate": 1.996409441754053e-05, "loss": 0.6191, "step": 1855 }, { "epoch": 0.057014714465640645, "grad_norm": 0.31769442558288574, "learning_rate": 1.996405348905153e-05, "loss": 0.6062, "step": 1856 }, { "epoch": 0.05704543360058981, "grad_norm": 0.37908053398132324, "learning_rate": 1.9964012537290783e-05, "loss": 0.7115, "step": 1857 }, { "epoch": 0.05707615273553897, "grad_norm": 0.33274707198143005, "learning_rate": 1.9963971562258395e-05, "loss": 0.6494, "step": 1858 }, { "epoch": 0.05710687187048813, "grad_norm": 0.3254900574684143, "learning_rate": 1.9963930563954454e-05, "loss": 0.6296, "step": 1859 }, { "epoch": 0.057137591005437284, "grad_norm": 0.3304414749145508, "learning_rate": 1.996388954237906e-05, "loss": 0.5853, "step": 1860 }, { "epoch": 0.057168310140386445, "grad_norm": 0.45191147923469543, "learning_rate": 1.9963848497532298e-05, "loss": 0.6332, "step": 1861 }, { "epoch": 0.05719902927533561, "grad_norm": 0.34929314255714417, "learning_rate": 1.996380742941428e-05, "loss": 0.6308, "step": 1862 }, { "epoch": 0.05722974841028477, "grad_norm": 0.44320791959762573, "learning_rate": 1.9963766338025086e-05, "loss": 0.5749, "step": 1863 }, { "epoch": 0.05726046754523392, "grad_norm": 0.38186830282211304, "learning_rate": 1.9963725223364826e-05, "loss": 0.7038, "step": 1864 }, { "epoch": 0.057291186680183084, "grad_norm": 0.35562124848365784, "learning_rate": 1.996368408543359e-05, "loss": 0.6504, "step": 1865 }, { "epoch": 0.057321905815132246, "grad_norm": 0.4203442335128784, "learning_rate": 1.996364292423147e-05, "loss": 0.5932, "step": 1866 }, { "epoch": 0.05735262495008141, "grad_norm": 0.32171741127967834, "learning_rate": 1.9963601739758568e-05, "loss": 0.6459, "step": 1867 }, { "epoch": 0.05738334408503057, "grad_norm": 0.3384965658187866, "learning_rate": 1.996356053201498e-05, "loss": 0.6221, "step": 1868 }, { "epoch": 0.05741406321997972, "grad_norm": 0.35755300521850586, "learning_rate": 1.99635193010008e-05, "loss": 0.6074, "step": 1869 }, { "epoch": 0.057444782354928885, "grad_norm": 0.35449323058128357, "learning_rate": 1.9963478046716123e-05, "loss": 0.6477, "step": 1870 }, { "epoch": 0.057475501489878046, "grad_norm": 0.36527958512306213, "learning_rate": 1.9963436769161046e-05, "loss": 0.6173, "step": 1871 }, { "epoch": 0.05750622062482721, "grad_norm": 0.3522111177444458, "learning_rate": 1.996339546833567e-05, "loss": 0.6596, "step": 1872 }, { "epoch": 0.05753693975977636, "grad_norm": 0.364387571811676, "learning_rate": 1.996335414424009e-05, "loss": 0.6656, "step": 1873 }, { "epoch": 0.057567658894725524, "grad_norm": 0.3396987020969391, "learning_rate": 1.9963312796874396e-05, "loss": 0.6133, "step": 1874 }, { "epoch": 0.057598378029674685, "grad_norm": 0.4459451138973236, "learning_rate": 1.9963271426238688e-05, "loss": 0.544, "step": 1875 }, { "epoch": 0.057629097164623846, "grad_norm": 0.3512122333049774, "learning_rate": 1.9963230032333068e-05, "loss": 0.7005, "step": 1876 }, { "epoch": 0.057659816299573, "grad_norm": 0.3311053216457367, "learning_rate": 1.9963188615157627e-05, "loss": 0.6254, "step": 1877 }, { "epoch": 0.05769053543452216, "grad_norm": 0.36754411458969116, "learning_rate": 1.9963147174712462e-05, "loss": 0.614, "step": 1878 }, { "epoch": 0.057721254569471324, "grad_norm": 0.3394148051738739, "learning_rate": 1.996310571099767e-05, "loss": 0.5888, "step": 1879 }, { "epoch": 0.057751973704420485, "grad_norm": 0.6702133417129517, "learning_rate": 1.996306422401335e-05, "loss": 0.6552, "step": 1880 }, { "epoch": 0.05778269283936965, "grad_norm": 0.336073637008667, "learning_rate": 1.99630227137596e-05, "loss": 0.6284, "step": 1881 }, { "epoch": 0.0578134119743188, "grad_norm": 0.333565890789032, "learning_rate": 1.9962981180236512e-05, "loss": 0.6309, "step": 1882 }, { "epoch": 0.05784413110926796, "grad_norm": 0.328255832195282, "learning_rate": 1.9962939623444186e-05, "loss": 0.5806, "step": 1883 }, { "epoch": 0.057874850244217124, "grad_norm": 0.33637914061546326, "learning_rate": 1.996289804338272e-05, "loss": 0.6502, "step": 1884 }, { "epoch": 0.057905569379166286, "grad_norm": 0.38039854168891907, "learning_rate": 1.9962856440052208e-05, "loss": 0.5823, "step": 1885 }, { "epoch": 0.05793628851411544, "grad_norm": 0.35690635442733765, "learning_rate": 1.9962814813452753e-05, "loss": 0.5955, "step": 1886 }, { "epoch": 0.0579670076490646, "grad_norm": 0.3070641756057739, "learning_rate": 1.9962773163584445e-05, "loss": 0.6028, "step": 1887 }, { "epoch": 0.05799772678401376, "grad_norm": 0.35995957255363464, "learning_rate": 1.9962731490447383e-05, "loss": 0.7271, "step": 1888 }, { "epoch": 0.058028445918962925, "grad_norm": 0.3890112638473511, "learning_rate": 1.996268979404167e-05, "loss": 0.6347, "step": 1889 }, { "epoch": 0.05805916505391208, "grad_norm": 0.3295291066169739, "learning_rate": 1.9962648074367394e-05, "loss": 0.6074, "step": 1890 }, { "epoch": 0.05808988418886124, "grad_norm": 0.361087828874588, "learning_rate": 1.996260633142466e-05, "loss": 0.6289, "step": 1891 }, { "epoch": 0.0581206033238104, "grad_norm": 0.34797289967536926, "learning_rate": 1.9962564565213565e-05, "loss": 0.5912, "step": 1892 }, { "epoch": 0.05815132245875956, "grad_norm": 0.3457486033439636, "learning_rate": 1.9962522775734204e-05, "loss": 0.6287, "step": 1893 }, { "epoch": 0.05818204159370872, "grad_norm": 0.3835926651954651, "learning_rate": 1.9962480962986675e-05, "loss": 0.6334, "step": 1894 }, { "epoch": 0.05821276072865788, "grad_norm": 0.46173426508903503, "learning_rate": 1.9962439126971073e-05, "loss": 0.5848, "step": 1895 }, { "epoch": 0.05824347986360704, "grad_norm": 0.5842630863189697, "learning_rate": 1.9962397267687503e-05, "loss": 0.6981, "step": 1896 }, { "epoch": 0.0582741989985562, "grad_norm": 0.31368395686149597, "learning_rate": 1.9962355385136053e-05, "loss": 0.6539, "step": 1897 }, { "epoch": 0.058304918133505364, "grad_norm": 0.36070555448532104, "learning_rate": 1.996231347931683e-05, "loss": 0.5601, "step": 1898 }, { "epoch": 0.05833563726845452, "grad_norm": 0.3700317144393921, "learning_rate": 1.996227155022993e-05, "loss": 0.6711, "step": 1899 }, { "epoch": 0.05836635640340368, "grad_norm": 0.37846943736076355, "learning_rate": 1.9962229597875447e-05, "loss": 0.7282, "step": 1900 }, { "epoch": 0.05839707553835284, "grad_norm": 0.3402721583843231, "learning_rate": 1.996218762225348e-05, "loss": 0.581, "step": 1901 }, { "epoch": 0.058427794673302, "grad_norm": 0.34083840250968933, "learning_rate": 1.996214562336413e-05, "loss": 0.6463, "step": 1902 }, { "epoch": 0.05845851380825116, "grad_norm": 0.3564232885837555, "learning_rate": 1.996210360120749e-05, "loss": 0.6563, "step": 1903 }, { "epoch": 0.05848923294320032, "grad_norm": 0.4010450541973114, "learning_rate": 1.9962061555783667e-05, "loss": 0.6782, "step": 1904 }, { "epoch": 0.05851995207814948, "grad_norm": 0.3669174015522003, "learning_rate": 1.996201948709275e-05, "loss": 0.718, "step": 1905 }, { "epoch": 0.05855067121309864, "grad_norm": 0.32990458607673645, "learning_rate": 1.996197739513484e-05, "loss": 0.5863, "step": 1906 }, { "epoch": 0.058581390348047796, "grad_norm": 0.34461724758148193, "learning_rate": 1.9961935279910035e-05, "loss": 0.6445, "step": 1907 }, { "epoch": 0.05861210948299696, "grad_norm": 0.33632513880729675, "learning_rate": 1.9961893141418436e-05, "loss": 0.6195, "step": 1908 }, { "epoch": 0.05864282861794612, "grad_norm": 0.33710768818855286, "learning_rate": 1.9961850979660142e-05, "loss": 0.6783, "step": 1909 }, { "epoch": 0.05867354775289528, "grad_norm": 0.3746880888938904, "learning_rate": 1.9961808794635246e-05, "loss": 0.5438, "step": 1910 }, { "epoch": 0.058704266887844435, "grad_norm": 0.43682464957237244, "learning_rate": 1.9961766586343853e-05, "loss": 0.5646, "step": 1911 }, { "epoch": 0.058734986022793596, "grad_norm": 0.34957990050315857, "learning_rate": 1.9961724354786053e-05, "loss": 0.6046, "step": 1912 }, { "epoch": 0.05876570515774276, "grad_norm": 0.4093162715435028, "learning_rate": 1.9961682099961956e-05, "loss": 0.6684, "step": 1913 }, { "epoch": 0.05879642429269192, "grad_norm": 0.3118473291397095, "learning_rate": 1.9961639821871653e-05, "loss": 0.6549, "step": 1914 }, { "epoch": 0.05882714342764108, "grad_norm": 0.325076699256897, "learning_rate": 1.9961597520515245e-05, "loss": 0.6655, "step": 1915 }, { "epoch": 0.058857862562590235, "grad_norm": 0.31546851992607117, "learning_rate": 1.9961555195892828e-05, "loss": 0.635, "step": 1916 }, { "epoch": 0.0588885816975394, "grad_norm": 0.3521716594696045, "learning_rate": 1.9961512848004502e-05, "loss": 0.6021, "step": 1917 }, { "epoch": 0.05891930083248856, "grad_norm": 0.294404000043869, "learning_rate": 1.996147047685037e-05, "loss": 0.5636, "step": 1918 }, { "epoch": 0.05895001996743772, "grad_norm": 0.33375808596611023, "learning_rate": 1.9961428082430527e-05, "loss": 0.5356, "step": 1919 }, { "epoch": 0.058980739102386874, "grad_norm": 0.3853186368942261, "learning_rate": 1.996138566474507e-05, "loss": 0.7362, "step": 1920 }, { "epoch": 0.059011458237336036, "grad_norm": 0.3215197026729584, "learning_rate": 1.9961343223794107e-05, "loss": 0.5889, "step": 1921 }, { "epoch": 0.0590421773722852, "grad_norm": 0.3713286817073822, "learning_rate": 1.996130075957773e-05, "loss": 0.6738, "step": 1922 }, { "epoch": 0.05907289650723436, "grad_norm": 0.516392171382904, "learning_rate": 1.996125827209604e-05, "loss": 0.6039, "step": 1923 }, { "epoch": 0.05910361564218351, "grad_norm": 0.3217048645019531, "learning_rate": 1.996121576134913e-05, "loss": 0.6858, "step": 1924 }, { "epoch": 0.059134334777132674, "grad_norm": 0.33400759100914, "learning_rate": 1.9961173227337108e-05, "loss": 0.5944, "step": 1925 }, { "epoch": 0.059165053912081836, "grad_norm": 0.32303401827812195, "learning_rate": 1.9961130670060073e-05, "loss": 0.6157, "step": 1926 }, { "epoch": 0.059195773047031, "grad_norm": 0.3906562328338623, "learning_rate": 1.996108808951812e-05, "loss": 0.6809, "step": 1927 }, { "epoch": 0.05922649218198016, "grad_norm": 0.3516508936882019, "learning_rate": 1.996104548571135e-05, "loss": 0.6279, "step": 1928 }, { "epoch": 0.05925721131692931, "grad_norm": 0.3840316832065582, "learning_rate": 1.996100285863986e-05, "loss": 0.7507, "step": 1929 }, { "epoch": 0.059287930451878475, "grad_norm": 0.33520829677581787, "learning_rate": 1.9960960208303753e-05, "loss": 0.4881, "step": 1930 }, { "epoch": 0.059318649586827636, "grad_norm": 0.36315110325813293, "learning_rate": 1.9960917534703128e-05, "loss": 0.677, "step": 1931 }, { "epoch": 0.0593493687217768, "grad_norm": 0.3510785400867462, "learning_rate": 1.9960874837838086e-05, "loss": 0.6949, "step": 1932 }, { "epoch": 0.05938008785672595, "grad_norm": 0.3370154798030853, "learning_rate": 1.9960832117708728e-05, "loss": 0.6104, "step": 1933 }, { "epoch": 0.059410806991675114, "grad_norm": 0.33316150307655334, "learning_rate": 1.9960789374315146e-05, "loss": 0.6694, "step": 1934 }, { "epoch": 0.059441526126624275, "grad_norm": 0.3218884766101837, "learning_rate": 1.9960746607657448e-05, "loss": 0.6159, "step": 1935 }, { "epoch": 0.059472245261573436, "grad_norm": 0.37028834223747253, "learning_rate": 1.9960703817735726e-05, "loss": 0.6328, "step": 1936 }, { "epoch": 0.05950296439652259, "grad_norm": 0.3951369524002075, "learning_rate": 1.996066100455009e-05, "loss": 0.5957, "step": 1937 }, { "epoch": 0.05953368353147175, "grad_norm": 0.2833153307437897, "learning_rate": 1.996061816810063e-05, "loss": 0.6079, "step": 1938 }, { "epoch": 0.059564402666420914, "grad_norm": 0.30723920464515686, "learning_rate": 1.9960575308387452e-05, "loss": 0.5969, "step": 1939 }, { "epoch": 0.059595121801370075, "grad_norm": 0.3215668201446533, "learning_rate": 1.9960532425410653e-05, "loss": 0.5895, "step": 1940 }, { "epoch": 0.05962584093631923, "grad_norm": 0.3341771364212036, "learning_rate": 1.996048951917034e-05, "loss": 0.5528, "step": 1941 }, { "epoch": 0.05965656007126839, "grad_norm": 0.32899585366249084, "learning_rate": 1.9960446589666603e-05, "loss": 0.5894, "step": 1942 }, { "epoch": 0.05968727920621755, "grad_norm": 0.33236971497535706, "learning_rate": 1.996040363689955e-05, "loss": 0.5846, "step": 1943 }, { "epoch": 0.059717998341166714, "grad_norm": 0.37765568494796753, "learning_rate": 1.996036066086928e-05, "loss": 0.5184, "step": 1944 }, { "epoch": 0.059748717476115876, "grad_norm": 0.5443404912948608, "learning_rate": 1.996031766157589e-05, "loss": 0.61, "step": 1945 }, { "epoch": 0.05977943661106503, "grad_norm": 1.7714771032333374, "learning_rate": 1.9960274639019482e-05, "loss": 0.5958, "step": 1946 }, { "epoch": 0.05981015574601419, "grad_norm": 0.32788190245628357, "learning_rate": 1.9960231593200162e-05, "loss": 0.5766, "step": 1947 }, { "epoch": 0.05984087488096335, "grad_norm": 0.3337060809135437, "learning_rate": 1.996018852411802e-05, "loss": 0.6893, "step": 1948 }, { "epoch": 0.059871594015912515, "grad_norm": 0.34407633543014526, "learning_rate": 1.9960145431773163e-05, "loss": 0.657, "step": 1949 }, { "epoch": 0.05990231315086167, "grad_norm": 0.357710063457489, "learning_rate": 1.9960102316165694e-05, "loss": 0.6068, "step": 1950 }, { "epoch": 0.05993303228581083, "grad_norm": 0.36166834831237793, "learning_rate": 1.9960059177295705e-05, "loss": 0.6723, "step": 1951 }, { "epoch": 0.05996375142075999, "grad_norm": 0.3370208740234375, "learning_rate": 1.9960016015163305e-05, "loss": 0.593, "step": 1952 }, { "epoch": 0.05999447055570915, "grad_norm": 0.3309382200241089, "learning_rate": 1.995997282976859e-05, "loss": 0.5448, "step": 1953 }, { "epoch": 0.06002518969065831, "grad_norm": 0.3434211015701294, "learning_rate": 1.9959929621111667e-05, "loss": 0.6144, "step": 1954 }, { "epoch": 0.06005590882560747, "grad_norm": 0.33767828345298767, "learning_rate": 1.9959886389192632e-05, "loss": 0.6763, "step": 1955 }, { "epoch": 0.06008662796055663, "grad_norm": 0.3372674882411957, "learning_rate": 1.9959843134011582e-05, "loss": 0.6525, "step": 1956 }, { "epoch": 0.06011734709550579, "grad_norm": 0.42414262890815735, "learning_rate": 1.9959799855568626e-05, "loss": 0.5901, "step": 1957 }, { "epoch": 0.060148066230454954, "grad_norm": 0.32659855484962463, "learning_rate": 1.995975655386386e-05, "loss": 0.7091, "step": 1958 }, { "epoch": 0.06017878536540411, "grad_norm": 0.3550529181957245, "learning_rate": 1.995971322889739e-05, "loss": 0.5484, "step": 1959 }, { "epoch": 0.06020950450035327, "grad_norm": 0.3221299648284912, "learning_rate": 1.9959669880669313e-05, "loss": 0.6626, "step": 1960 }, { "epoch": 0.06024022363530243, "grad_norm": 0.38942691683769226, "learning_rate": 1.9959626509179733e-05, "loss": 0.5829, "step": 1961 }, { "epoch": 0.06027094277025159, "grad_norm": 0.40637823939323425, "learning_rate": 1.995958311442875e-05, "loss": 0.7706, "step": 1962 }, { "epoch": 0.06030166190520075, "grad_norm": 0.35358142852783203, "learning_rate": 1.995953969641646e-05, "loss": 0.6262, "step": 1963 }, { "epoch": 0.06033238104014991, "grad_norm": 0.3889586329460144, "learning_rate": 1.9959496255142973e-05, "loss": 0.6075, "step": 1964 }, { "epoch": 0.06036310017509907, "grad_norm": 0.3544589877128601, "learning_rate": 1.995945279060839e-05, "loss": 0.5939, "step": 1965 }, { "epoch": 0.06039381931004823, "grad_norm": 0.47478851675987244, "learning_rate": 1.9959409302812803e-05, "loss": 0.684, "step": 1966 }, { "epoch": 0.060424538444997386, "grad_norm": 0.3987491726875305, "learning_rate": 1.9959365791756324e-05, "loss": 0.6862, "step": 1967 }, { "epoch": 0.06045525757994655, "grad_norm": 0.33788686990737915, "learning_rate": 1.995932225743905e-05, "loss": 0.6017, "step": 1968 }, { "epoch": 0.06048597671489571, "grad_norm": 0.35874420404434204, "learning_rate": 1.995927869986108e-05, "loss": 0.6155, "step": 1969 }, { "epoch": 0.06051669584984487, "grad_norm": 0.30768996477127075, "learning_rate": 1.9959235119022528e-05, "loss": 0.6335, "step": 1970 }, { "epoch": 0.060547414984794025, "grad_norm": 0.3538748621940613, "learning_rate": 1.995919151492348e-05, "loss": 0.7285, "step": 1971 }, { "epoch": 0.060578134119743186, "grad_norm": 0.32906243205070496, "learning_rate": 1.995914788756404e-05, "loss": 0.6362, "step": 1972 }, { "epoch": 0.06060885325469235, "grad_norm": 0.7795906662940979, "learning_rate": 1.9959104236944325e-05, "loss": 0.627, "step": 1973 }, { "epoch": 0.06063957238964151, "grad_norm": 0.3739781081676483, "learning_rate": 1.995906056306442e-05, "loss": 0.6602, "step": 1974 }, { "epoch": 0.06067029152459067, "grad_norm": 0.33342576026916504, "learning_rate": 1.9959016865924435e-05, "loss": 0.6336, "step": 1975 }, { "epoch": 0.060701010659539825, "grad_norm": 0.3325606882572174, "learning_rate": 1.995897314552447e-05, "loss": 0.6575, "step": 1976 }, { "epoch": 0.06073172979448899, "grad_norm": 0.363676518201828, "learning_rate": 1.995892940186463e-05, "loss": 0.6119, "step": 1977 }, { "epoch": 0.06076244892943815, "grad_norm": 0.3407360315322876, "learning_rate": 1.9958885634945013e-05, "loss": 0.5727, "step": 1978 }, { "epoch": 0.06079316806438731, "grad_norm": 0.34714728593826294, "learning_rate": 1.9958841844765723e-05, "loss": 0.5528, "step": 1979 }, { "epoch": 0.060823887199336464, "grad_norm": 0.3789319694042206, "learning_rate": 1.9958798031326864e-05, "loss": 0.6909, "step": 1980 }, { "epoch": 0.060854606334285626, "grad_norm": 0.3390599489212036, "learning_rate": 1.9958754194628537e-05, "loss": 0.6981, "step": 1981 }, { "epoch": 0.06088532546923479, "grad_norm": 0.3374008536338806, "learning_rate": 1.9958710334670842e-05, "loss": 0.6679, "step": 1982 }, { "epoch": 0.06091604460418395, "grad_norm": 0.3059338629245758, "learning_rate": 1.9958666451453883e-05, "loss": 0.5899, "step": 1983 }, { "epoch": 0.0609467637391331, "grad_norm": 0.32157742977142334, "learning_rate": 1.9958622544977765e-05, "loss": 0.6735, "step": 1984 }, { "epoch": 0.060977482874082264, "grad_norm": 0.35762548446655273, "learning_rate": 1.9958578615242587e-05, "loss": 0.6715, "step": 1985 }, { "epoch": 0.061008202009031426, "grad_norm": 0.4238846004009247, "learning_rate": 1.9958534662248453e-05, "loss": 0.5757, "step": 1986 }, { "epoch": 0.06103892114398059, "grad_norm": 0.5919464230537415, "learning_rate": 1.9958490685995465e-05, "loss": 0.6297, "step": 1987 }, { "epoch": 0.06106964027892975, "grad_norm": 0.36047831177711487, "learning_rate": 1.995844668648373e-05, "loss": 0.6197, "step": 1988 }, { "epoch": 0.0611003594138789, "grad_norm": 0.4208289086818695, "learning_rate": 1.9958402663713344e-05, "loss": 0.5795, "step": 1989 }, { "epoch": 0.061131078548828065, "grad_norm": 0.3347480297088623, "learning_rate": 1.9958358617684418e-05, "loss": 0.6772, "step": 1990 }, { "epoch": 0.061161797683777226, "grad_norm": 0.36140525341033936, "learning_rate": 1.9958314548397045e-05, "loss": 0.5645, "step": 1991 }, { "epoch": 0.06119251681872639, "grad_norm": 0.3452851176261902, "learning_rate": 1.9958270455851333e-05, "loss": 0.7188, "step": 1992 }, { "epoch": 0.06122323595367554, "grad_norm": 0.3738088011741638, "learning_rate": 1.9958226340047386e-05, "loss": 0.6284, "step": 1993 }, { "epoch": 0.061253955088624704, "grad_norm": 0.36979958415031433, "learning_rate": 1.9958182200985305e-05, "loss": 0.6323, "step": 1994 }, { "epoch": 0.061284674223573865, "grad_norm": 0.3399437963962555, "learning_rate": 1.99581380386652e-05, "loss": 0.5732, "step": 1995 }, { "epoch": 0.061315393358523027, "grad_norm": 0.3622000515460968, "learning_rate": 1.9958093853087158e-05, "loss": 0.6186, "step": 1996 }, { "epoch": 0.06134611249347218, "grad_norm": 0.3338692784309387, "learning_rate": 1.99580496442513e-05, "loss": 0.6405, "step": 1997 }, { "epoch": 0.06137683162842134, "grad_norm": 0.3561590313911438, "learning_rate": 1.995800541215772e-05, "loss": 0.583, "step": 1998 }, { "epoch": 0.061407550763370504, "grad_norm": 0.39040496945381165, "learning_rate": 1.995796115680652e-05, "loss": 0.6663, "step": 1999 }, { "epoch": 0.061438269898319665, "grad_norm": 0.34152212738990784, "learning_rate": 1.9957916878197808e-05, "loss": 0.5743, "step": 2000 }, { "epoch": 0.06146898903326882, "grad_norm": 0.32716041803359985, "learning_rate": 1.9957872576331688e-05, "loss": 0.5898, "step": 2001 }, { "epoch": 0.06149970816821798, "grad_norm": 0.3220956027507782, "learning_rate": 1.9957828251208258e-05, "loss": 0.6262, "step": 2002 }, { "epoch": 0.06153042730316714, "grad_norm": 0.35508161783218384, "learning_rate": 1.9957783902827625e-05, "loss": 0.6901, "step": 2003 }, { "epoch": 0.061561146438116304, "grad_norm": 0.3355950117111206, "learning_rate": 1.9957739531189896e-05, "loss": 0.5956, "step": 2004 }, { "epoch": 0.061591865573065466, "grad_norm": 0.32397258281707764, "learning_rate": 1.9957695136295167e-05, "loss": 0.6232, "step": 2005 }, { "epoch": 0.06162258470801462, "grad_norm": 0.3622901141643524, "learning_rate": 1.9957650718143546e-05, "loss": 0.581, "step": 2006 }, { "epoch": 0.06165330384296378, "grad_norm": 0.3379761874675751, "learning_rate": 1.9957606276735137e-05, "loss": 0.7019, "step": 2007 }, { "epoch": 0.06168402297791294, "grad_norm": 0.37739211320877075, "learning_rate": 1.9957561812070043e-05, "loss": 0.5951, "step": 2008 }, { "epoch": 0.061714742112862105, "grad_norm": 0.31355494260787964, "learning_rate": 1.995751732414837e-05, "loss": 0.6131, "step": 2009 }, { "epoch": 0.06174546124781126, "grad_norm": 0.47889384627342224, "learning_rate": 1.9957472812970215e-05, "loss": 0.6291, "step": 2010 }, { "epoch": 0.06177618038276042, "grad_norm": 0.3297981023788452, "learning_rate": 1.9957428278535688e-05, "loss": 0.5747, "step": 2011 }, { "epoch": 0.06180689951770958, "grad_norm": 0.4766994118690491, "learning_rate": 1.9957383720844892e-05, "loss": 0.5804, "step": 2012 }, { "epoch": 0.06183761865265874, "grad_norm": 0.33408433198928833, "learning_rate": 1.9957339139897933e-05, "loss": 0.6645, "step": 2013 }, { "epoch": 0.0618683377876079, "grad_norm": 0.34987059235572815, "learning_rate": 1.9957294535694915e-05, "loss": 0.6459, "step": 2014 }, { "epoch": 0.06189905692255706, "grad_norm": 0.35812458395957947, "learning_rate": 1.9957249908235936e-05, "loss": 0.6221, "step": 2015 }, { "epoch": 0.06192977605750622, "grad_norm": 0.3241878151893616, "learning_rate": 1.9957205257521106e-05, "loss": 0.6355, "step": 2016 }, { "epoch": 0.06196049519245538, "grad_norm": 0.3208383023738861, "learning_rate": 1.9957160583550528e-05, "loss": 0.6038, "step": 2017 }, { "epoch": 0.06199121432740454, "grad_norm": 0.3224657475948334, "learning_rate": 1.9957115886324305e-05, "loss": 0.652, "step": 2018 }, { "epoch": 0.0620219334623537, "grad_norm": 0.3631000220775604, "learning_rate": 1.9957071165842542e-05, "loss": 0.644, "step": 2019 }, { "epoch": 0.06205265259730286, "grad_norm": 0.45039403438568115, "learning_rate": 1.9957026422105347e-05, "loss": 0.6209, "step": 2020 }, { "epoch": 0.06208337173225202, "grad_norm": 0.33488887548446655, "learning_rate": 1.9956981655112822e-05, "loss": 0.6399, "step": 2021 }, { "epoch": 0.06211409086720118, "grad_norm": 0.37546268105506897, "learning_rate": 1.9956936864865065e-05, "loss": 0.6463, "step": 2022 }, { "epoch": 0.06214481000215034, "grad_norm": 0.314593642950058, "learning_rate": 1.995689205136219e-05, "loss": 0.6604, "step": 2023 }, { "epoch": 0.0621755291370995, "grad_norm": 0.3480203151702881, "learning_rate": 1.9956847214604302e-05, "loss": 0.5937, "step": 2024 }, { "epoch": 0.06220624827204866, "grad_norm": 0.343314528465271, "learning_rate": 1.9956802354591496e-05, "loss": 0.6662, "step": 2025 }, { "epoch": 0.06223696740699782, "grad_norm": 0.3652275502681732, "learning_rate": 1.9956757471323885e-05, "loss": 0.6379, "step": 2026 }, { "epoch": 0.062267686541946976, "grad_norm": 0.3650220036506653, "learning_rate": 1.995671256480157e-05, "loss": 0.5957, "step": 2027 }, { "epoch": 0.06229840567689614, "grad_norm": 0.35878440737724304, "learning_rate": 1.995666763502466e-05, "loss": 0.6018, "step": 2028 }, { "epoch": 0.0623291248118453, "grad_norm": 0.3804531693458557, "learning_rate": 1.9956622681993255e-05, "loss": 0.6474, "step": 2029 }, { "epoch": 0.06235984394679446, "grad_norm": 0.33783742785453796, "learning_rate": 1.9956577705707463e-05, "loss": 0.604, "step": 2030 }, { "epoch": 0.062390563081743615, "grad_norm": 0.3206551671028137, "learning_rate": 1.9956532706167392e-05, "loss": 0.6501, "step": 2031 }, { "epoch": 0.062421282216692776, "grad_norm": 0.31462362408638, "learning_rate": 1.995648768337314e-05, "loss": 0.6115, "step": 2032 }, { "epoch": 0.06245200135164194, "grad_norm": 0.3736492395401001, "learning_rate": 1.9956442637324814e-05, "loss": 0.5779, "step": 2033 }, { "epoch": 0.0624827204865911, "grad_norm": 0.34888580441474915, "learning_rate": 1.9956397568022527e-05, "loss": 0.6153, "step": 2034 }, { "epoch": 0.06251343962154025, "grad_norm": 0.3450947403907776, "learning_rate": 1.9956352475466373e-05, "loss": 0.5892, "step": 2035 }, { "epoch": 0.06254415875648942, "grad_norm": 0.3257576823234558, "learning_rate": 1.995630735965646e-05, "loss": 0.5703, "step": 2036 }, { "epoch": 0.06257487789143858, "grad_norm": 0.4108649492263794, "learning_rate": 1.9956262220592902e-05, "loss": 0.6349, "step": 2037 }, { "epoch": 0.06260559702638774, "grad_norm": 0.3595578074455261, "learning_rate": 1.9956217058275796e-05, "loss": 0.689, "step": 2038 }, { "epoch": 0.0626363161613369, "grad_norm": 0.37989306449890137, "learning_rate": 1.995617187270525e-05, "loss": 0.5531, "step": 2039 }, { "epoch": 0.06266703529628606, "grad_norm": 0.3185305893421173, "learning_rate": 1.995612666388137e-05, "loss": 0.6399, "step": 2040 }, { "epoch": 0.06269775443123522, "grad_norm": 0.3371380567550659, "learning_rate": 1.995608143180426e-05, "loss": 0.6269, "step": 2041 }, { "epoch": 0.06272847356618437, "grad_norm": 0.32660025358200073, "learning_rate": 1.9956036176474024e-05, "loss": 0.6373, "step": 2042 }, { "epoch": 0.06275919270113353, "grad_norm": 0.4529273211956024, "learning_rate": 1.995599089789077e-05, "loss": 0.7178, "step": 2043 }, { "epoch": 0.06278991183608269, "grad_norm": 0.3270372152328491, "learning_rate": 1.9955945596054608e-05, "loss": 0.6177, "step": 2044 }, { "epoch": 0.06282063097103185, "grad_norm": 0.35735514760017395, "learning_rate": 1.9955900270965636e-05, "loss": 0.6139, "step": 2045 }, { "epoch": 0.06285135010598102, "grad_norm": 0.42778611183166504, "learning_rate": 1.995585492262397e-05, "loss": 0.5606, "step": 2046 }, { "epoch": 0.06288206924093018, "grad_norm": 0.33018603920936584, "learning_rate": 1.9955809551029702e-05, "loss": 0.7136, "step": 2047 }, { "epoch": 0.06291278837587934, "grad_norm": 0.4222186505794525, "learning_rate": 1.995576415618295e-05, "loss": 0.5416, "step": 2048 }, { "epoch": 0.0629435075108285, "grad_norm": 0.3369393050670624, "learning_rate": 1.9955718738083814e-05, "loss": 0.6799, "step": 2049 }, { "epoch": 0.06297422664577766, "grad_norm": 0.33394452929496765, "learning_rate": 1.99556732967324e-05, "loss": 0.7366, "step": 2050 }, { "epoch": 0.06300494578072681, "grad_norm": 0.32370445132255554, "learning_rate": 1.995562783212882e-05, "loss": 0.6478, "step": 2051 }, { "epoch": 0.06303566491567597, "grad_norm": 0.3291032314300537, "learning_rate": 1.9955582344273174e-05, "loss": 0.5856, "step": 2052 }, { "epoch": 0.06306638405062513, "grad_norm": 0.35613158345222473, "learning_rate": 1.995553683316557e-05, "loss": 0.6358, "step": 2053 }, { "epoch": 0.0630971031855743, "grad_norm": 0.41023147106170654, "learning_rate": 1.995549129880611e-05, "loss": 0.5944, "step": 2054 }, { "epoch": 0.06312782232052346, "grad_norm": 0.35625216364860535, "learning_rate": 1.995544574119491e-05, "loss": 0.6607, "step": 2055 }, { "epoch": 0.06315854145547262, "grad_norm": 0.6555358171463013, "learning_rate": 1.995540016033207e-05, "loss": 0.699, "step": 2056 }, { "epoch": 0.06318926059042178, "grad_norm": 0.31461217999458313, "learning_rate": 1.9955354556217696e-05, "loss": 0.5648, "step": 2057 }, { "epoch": 0.06321997972537094, "grad_norm": 0.35454753041267395, "learning_rate": 1.99553089288519e-05, "loss": 0.623, "step": 2058 }, { "epoch": 0.06325069886032009, "grad_norm": 0.3599033057689667, "learning_rate": 1.9955263278234782e-05, "loss": 0.6192, "step": 2059 }, { "epoch": 0.06328141799526925, "grad_norm": 0.3375680148601532, "learning_rate": 1.995521760436645e-05, "loss": 0.6223, "step": 2060 }, { "epoch": 0.06331213713021841, "grad_norm": 0.3603276312351227, "learning_rate": 1.9955171907247014e-05, "loss": 0.6708, "step": 2061 }, { "epoch": 0.06334285626516757, "grad_norm": 0.36193907260894775, "learning_rate": 1.995512618687658e-05, "loss": 0.6489, "step": 2062 }, { "epoch": 0.06337357540011673, "grad_norm": 0.42661309242248535, "learning_rate": 1.9955080443255252e-05, "loss": 0.5622, "step": 2063 }, { "epoch": 0.0634042945350659, "grad_norm": 0.3199162483215332, "learning_rate": 1.9955034676383138e-05, "loss": 0.5712, "step": 2064 }, { "epoch": 0.06343501367001506, "grad_norm": 0.32310736179351807, "learning_rate": 1.9954988886260346e-05, "loss": 0.5701, "step": 2065 }, { "epoch": 0.06346573280496422, "grad_norm": 0.3517463803291321, "learning_rate": 1.995494307288698e-05, "loss": 0.6471, "step": 2066 }, { "epoch": 0.06349645193991338, "grad_norm": 0.3411223888397217, "learning_rate": 1.9954897236263154e-05, "loss": 0.6022, "step": 2067 }, { "epoch": 0.06352717107486253, "grad_norm": 0.33211588859558105, "learning_rate": 1.9954851376388967e-05, "loss": 0.5958, "step": 2068 }, { "epoch": 0.06355789020981169, "grad_norm": 0.3234077990055084, "learning_rate": 1.995480549326453e-05, "loss": 0.5383, "step": 2069 }, { "epoch": 0.06358860934476085, "grad_norm": 0.31032007932662964, "learning_rate": 1.9954759586889948e-05, "loss": 0.6081, "step": 2070 }, { "epoch": 0.06361932847971001, "grad_norm": 0.3390188217163086, "learning_rate": 1.995471365726533e-05, "loss": 0.5522, "step": 2071 }, { "epoch": 0.06365004761465917, "grad_norm": 0.3211539387702942, "learning_rate": 1.9954667704390783e-05, "loss": 0.6225, "step": 2072 }, { "epoch": 0.06368076674960833, "grad_norm": 0.3500007092952728, "learning_rate": 1.995462172826642e-05, "loss": 0.5669, "step": 2073 }, { "epoch": 0.0637114858845575, "grad_norm": 0.3570995330810547, "learning_rate": 1.9954575728892334e-05, "loss": 0.6177, "step": 2074 }, { "epoch": 0.06374220501950666, "grad_norm": 0.31760019063949585, "learning_rate": 1.9954529706268648e-05, "loss": 0.5468, "step": 2075 }, { "epoch": 0.0637729241544558, "grad_norm": 0.3548451066017151, "learning_rate": 1.9954483660395458e-05, "loss": 0.6281, "step": 2076 }, { "epoch": 0.06380364328940497, "grad_norm": 0.3533082902431488, "learning_rate": 1.9954437591272878e-05, "loss": 0.6018, "step": 2077 }, { "epoch": 0.06383436242435413, "grad_norm": 0.316623330116272, "learning_rate": 1.9954391498901015e-05, "loss": 0.6324, "step": 2078 }, { "epoch": 0.06386508155930329, "grad_norm": 0.8445413708686829, "learning_rate": 1.9954345383279975e-05, "loss": 0.5649, "step": 2079 }, { "epoch": 0.06389580069425245, "grad_norm": 0.3582322895526886, "learning_rate": 1.9954299244409866e-05, "loss": 0.6857, "step": 2080 }, { "epoch": 0.06392651982920161, "grad_norm": 0.38850030303001404, "learning_rate": 1.9954253082290794e-05, "loss": 0.5548, "step": 2081 }, { "epoch": 0.06395723896415077, "grad_norm": 0.34235695004463196, "learning_rate": 1.9954206896922872e-05, "loss": 0.5873, "step": 2082 }, { "epoch": 0.06398795809909993, "grad_norm": 0.3226953148841858, "learning_rate": 1.99541606883062e-05, "loss": 0.6257, "step": 2083 }, { "epoch": 0.0640186772340491, "grad_norm": 0.3573955297470093, "learning_rate": 1.9954114456440893e-05, "loss": 0.6359, "step": 2084 }, { "epoch": 0.06404939636899824, "grad_norm": 0.3314650058746338, "learning_rate": 1.995406820132706e-05, "loss": 0.646, "step": 2085 }, { "epoch": 0.0640801155039474, "grad_norm": 0.3612213134765625, "learning_rate": 1.9954021922964802e-05, "loss": 0.5611, "step": 2086 }, { "epoch": 0.06411083463889657, "grad_norm": 0.37518632411956787, "learning_rate": 1.995397562135423e-05, "loss": 0.6228, "step": 2087 }, { "epoch": 0.06414155377384573, "grad_norm": 0.3344673812389374, "learning_rate": 1.9953929296495455e-05, "loss": 0.6411, "step": 2088 }, { "epoch": 0.06417227290879489, "grad_norm": 0.32260623574256897, "learning_rate": 1.995388294838858e-05, "loss": 0.5879, "step": 2089 }, { "epoch": 0.06420299204374405, "grad_norm": 0.38375124335289, "learning_rate": 1.9953836577033718e-05, "loss": 0.5695, "step": 2090 }, { "epoch": 0.06423371117869321, "grad_norm": 0.34943363070487976, "learning_rate": 1.9953790182430976e-05, "loss": 0.5646, "step": 2091 }, { "epoch": 0.06426443031364237, "grad_norm": 0.3503745496273041, "learning_rate": 1.995374376458046e-05, "loss": 0.5949, "step": 2092 }, { "epoch": 0.06429514944859152, "grad_norm": 0.3671824336051941, "learning_rate": 1.9953697323482283e-05, "loss": 0.5476, "step": 2093 }, { "epoch": 0.06432586858354068, "grad_norm": 0.3636349141597748, "learning_rate": 1.9953650859136548e-05, "loss": 0.6286, "step": 2094 }, { "epoch": 0.06435658771848984, "grad_norm": 0.372244268655777, "learning_rate": 1.9953604371543368e-05, "loss": 0.6271, "step": 2095 }, { "epoch": 0.064387306853439, "grad_norm": 0.34529027342796326, "learning_rate": 1.995355786070285e-05, "loss": 0.6282, "step": 2096 }, { "epoch": 0.06441802598838817, "grad_norm": 0.32633864879608154, "learning_rate": 1.99535113266151e-05, "loss": 0.6553, "step": 2097 }, { "epoch": 0.06444874512333733, "grad_norm": 0.33246153593063354, "learning_rate": 1.995346476928023e-05, "loss": 0.606, "step": 2098 }, { "epoch": 0.06447946425828649, "grad_norm": 0.330211877822876, "learning_rate": 1.995341818869835e-05, "loss": 0.6395, "step": 2099 }, { "epoch": 0.06451018339323565, "grad_norm": 0.33483198285102844, "learning_rate": 1.9953371584869567e-05, "loss": 0.547, "step": 2100 }, { "epoch": 0.06454090252818481, "grad_norm": 0.33768245577812195, "learning_rate": 1.9953324957793987e-05, "loss": 0.648, "step": 2101 }, { "epoch": 0.06457162166313396, "grad_norm": 0.3206245005130768, "learning_rate": 1.995327830747172e-05, "loss": 0.6001, "step": 2102 }, { "epoch": 0.06460234079808312, "grad_norm": 0.40937384963035583, "learning_rate": 1.9953231633902882e-05, "loss": 0.5764, "step": 2103 }, { "epoch": 0.06463305993303228, "grad_norm": 0.3958721458911896, "learning_rate": 1.9953184937087573e-05, "loss": 0.6037, "step": 2104 }, { "epoch": 0.06466377906798144, "grad_norm": 0.3331458270549774, "learning_rate": 1.9953138217025905e-05, "loss": 0.6549, "step": 2105 }, { "epoch": 0.0646944982029306, "grad_norm": 0.3487420678138733, "learning_rate": 1.9953091473717988e-05, "loss": 0.6022, "step": 2106 }, { "epoch": 0.06472521733787977, "grad_norm": 0.3472624719142914, "learning_rate": 1.9953044707163932e-05, "loss": 0.647, "step": 2107 }, { "epoch": 0.06475593647282893, "grad_norm": 0.3412119746208191, "learning_rate": 1.9952997917363843e-05, "loss": 0.5626, "step": 2108 }, { "epoch": 0.06478665560777809, "grad_norm": 0.3638586103916168, "learning_rate": 1.9952951104317836e-05, "loss": 0.645, "step": 2109 }, { "epoch": 0.06481737474272724, "grad_norm": 0.3402235507965088, "learning_rate": 1.9952904268026013e-05, "loss": 0.7019, "step": 2110 }, { "epoch": 0.0648480938776764, "grad_norm": 0.3330906927585602, "learning_rate": 1.9952857408488485e-05, "loss": 0.6554, "step": 2111 }, { "epoch": 0.06487881301262556, "grad_norm": 0.37610435485839844, "learning_rate": 1.9952810525705368e-05, "loss": 0.5456, "step": 2112 }, { "epoch": 0.06490953214757472, "grad_norm": 0.32916611433029175, "learning_rate": 1.9952763619676763e-05, "loss": 0.7011, "step": 2113 }, { "epoch": 0.06494025128252388, "grad_norm": 0.35643884539604187, "learning_rate": 1.9952716690402786e-05, "loss": 0.637, "step": 2114 }, { "epoch": 0.06497097041747305, "grad_norm": 0.3439774215221405, "learning_rate": 1.9952669737883545e-05, "loss": 0.6282, "step": 2115 }, { "epoch": 0.0650016895524222, "grad_norm": 0.3599433898925781, "learning_rate": 1.9952622762119147e-05, "loss": 0.6712, "step": 2116 }, { "epoch": 0.06503240868737137, "grad_norm": 0.3816385567188263, "learning_rate": 1.9952575763109706e-05, "loss": 0.5633, "step": 2117 }, { "epoch": 0.06506312782232053, "grad_norm": 0.46776336431503296, "learning_rate": 1.9952528740855327e-05, "loss": 0.5629, "step": 2118 }, { "epoch": 0.06509384695726968, "grad_norm": 0.3818588852882385, "learning_rate": 1.995248169535612e-05, "loss": 0.6649, "step": 2119 }, { "epoch": 0.06512456609221884, "grad_norm": 0.32152682542800903, "learning_rate": 1.99524346266122e-05, "loss": 0.5904, "step": 2120 }, { "epoch": 0.065155285227168, "grad_norm": 0.3332715332508087, "learning_rate": 1.9952387534623673e-05, "loss": 0.695, "step": 2121 }, { "epoch": 0.06518600436211716, "grad_norm": 0.32111167907714844, "learning_rate": 1.995234041939065e-05, "loss": 0.6337, "step": 2122 }, { "epoch": 0.06521672349706632, "grad_norm": 0.3152373433113098, "learning_rate": 1.9952293280913242e-05, "loss": 0.6186, "step": 2123 }, { "epoch": 0.06524744263201548, "grad_norm": 0.3914032578468323, "learning_rate": 1.9952246119191557e-05, "loss": 0.6375, "step": 2124 }, { "epoch": 0.06527816176696465, "grad_norm": 0.3522087335586548, "learning_rate": 1.9952198934225705e-05, "loss": 0.6225, "step": 2125 }, { "epoch": 0.06530888090191381, "grad_norm": 0.33481672406196594, "learning_rate": 1.99521517260158e-05, "loss": 0.6261, "step": 2126 }, { "epoch": 0.06533960003686297, "grad_norm": 0.3173021376132965, "learning_rate": 1.9952104494561947e-05, "loss": 0.6228, "step": 2127 }, { "epoch": 0.06537031917181212, "grad_norm": 0.3427012860774994, "learning_rate": 1.995205723986426e-05, "loss": 0.6566, "step": 2128 }, { "epoch": 0.06540103830676128, "grad_norm": 0.3746190369129181, "learning_rate": 1.9952009961922846e-05, "loss": 0.6358, "step": 2129 }, { "epoch": 0.06543175744171044, "grad_norm": 0.3233788311481476, "learning_rate": 1.995196266073782e-05, "loss": 0.6889, "step": 2130 }, { "epoch": 0.0654624765766596, "grad_norm": 0.6477028131484985, "learning_rate": 1.995191533630929e-05, "loss": 0.6384, "step": 2131 }, { "epoch": 0.06549319571160876, "grad_norm": 0.45228806138038635, "learning_rate": 1.9951867988637365e-05, "loss": 0.6398, "step": 2132 }, { "epoch": 0.06552391484655792, "grad_norm": 0.3472476005554199, "learning_rate": 1.995182061772216e-05, "loss": 0.6323, "step": 2133 }, { "epoch": 0.06555463398150708, "grad_norm": 0.3458273410797119, "learning_rate": 1.9951773223563783e-05, "loss": 0.5658, "step": 2134 }, { "epoch": 0.06558535311645625, "grad_norm": 0.33673110604286194, "learning_rate": 1.995172580616234e-05, "loss": 0.5645, "step": 2135 }, { "epoch": 0.0656160722514054, "grad_norm": 0.36029210686683655, "learning_rate": 1.9951678365517953e-05, "loss": 0.5992, "step": 2136 }, { "epoch": 0.06564679138635456, "grad_norm": 0.3110905885696411, "learning_rate": 1.9951630901630722e-05, "loss": 0.5349, "step": 2137 }, { "epoch": 0.06567751052130372, "grad_norm": 0.3103884160518646, "learning_rate": 1.9951583414500764e-05, "loss": 0.5824, "step": 2138 }, { "epoch": 0.06570822965625288, "grad_norm": 0.3443349599838257, "learning_rate": 1.9951535904128185e-05, "loss": 0.6294, "step": 2139 }, { "epoch": 0.06573894879120204, "grad_norm": 0.3701552450656891, "learning_rate": 1.99514883705131e-05, "loss": 0.4986, "step": 2140 }, { "epoch": 0.0657696679261512, "grad_norm": 0.2963980436325073, "learning_rate": 1.995144081365562e-05, "loss": 0.5955, "step": 2141 }, { "epoch": 0.06580038706110036, "grad_norm": 0.3400505781173706, "learning_rate": 1.9951393233555855e-05, "loss": 0.527, "step": 2142 }, { "epoch": 0.06583110619604952, "grad_norm": 0.3331143260002136, "learning_rate": 1.9951345630213915e-05, "loss": 0.6365, "step": 2143 }, { "epoch": 0.06586182533099869, "grad_norm": 0.3495553135871887, "learning_rate": 1.9951298003629917e-05, "loss": 0.5282, "step": 2144 }, { "epoch": 0.06589254446594783, "grad_norm": 0.32302752137184143, "learning_rate": 1.9951250353803958e-05, "loss": 0.6029, "step": 2145 }, { "epoch": 0.065923263600897, "grad_norm": 0.32959187030792236, "learning_rate": 1.9951202680736167e-05, "loss": 0.5879, "step": 2146 }, { "epoch": 0.06595398273584616, "grad_norm": 0.3058619499206543, "learning_rate": 1.9951154984426645e-05, "loss": 0.5665, "step": 2147 }, { "epoch": 0.06598470187079532, "grad_norm": 0.3739256262779236, "learning_rate": 1.9951107264875505e-05, "loss": 0.6685, "step": 2148 }, { "epoch": 0.06601542100574448, "grad_norm": 0.33411556482315063, "learning_rate": 1.995105952208286e-05, "loss": 0.6673, "step": 2149 }, { "epoch": 0.06604614014069364, "grad_norm": 0.32287248969078064, "learning_rate": 1.9951011756048818e-05, "loss": 0.6358, "step": 2150 }, { "epoch": 0.0660768592756428, "grad_norm": 0.3576965928077698, "learning_rate": 1.9950963966773497e-05, "loss": 0.5938, "step": 2151 }, { "epoch": 0.06610757841059196, "grad_norm": 0.3274233043193817, "learning_rate": 1.9950916154256998e-05, "loss": 0.5995, "step": 2152 }, { "epoch": 0.06613829754554111, "grad_norm": 0.3117865324020386, "learning_rate": 1.9950868318499447e-05, "loss": 0.5917, "step": 2153 }, { "epoch": 0.06616901668049027, "grad_norm": 0.3457801938056946, "learning_rate": 1.995082045950094e-05, "loss": 0.6459, "step": 2154 }, { "epoch": 0.06619973581543943, "grad_norm": 0.3461821973323822, "learning_rate": 1.9950772577261602e-05, "loss": 0.5878, "step": 2155 }, { "epoch": 0.0662304549503886, "grad_norm": 0.3079778850078583, "learning_rate": 1.995072467178154e-05, "loss": 0.6572, "step": 2156 }, { "epoch": 0.06626117408533776, "grad_norm": 0.3415226638317108, "learning_rate": 1.9950676743060866e-05, "loss": 0.6231, "step": 2157 }, { "epoch": 0.06629189322028692, "grad_norm": 0.33916139602661133, "learning_rate": 1.995062879109969e-05, "loss": 0.5889, "step": 2158 }, { "epoch": 0.06632261235523608, "grad_norm": 0.3025778830051422, "learning_rate": 1.9950580815898127e-05, "loss": 0.5918, "step": 2159 }, { "epoch": 0.06635333149018524, "grad_norm": 0.3727894127368927, "learning_rate": 1.9950532817456287e-05, "loss": 0.6332, "step": 2160 }, { "epoch": 0.0663840506251344, "grad_norm": 0.3237680494785309, "learning_rate": 1.9950484795774282e-05, "loss": 0.5384, "step": 2161 }, { "epoch": 0.06641476976008355, "grad_norm": 0.33803215622901917, "learning_rate": 1.9950436750852224e-05, "loss": 0.6623, "step": 2162 }, { "epoch": 0.06644548889503271, "grad_norm": 0.36024630069732666, "learning_rate": 1.9950388682690228e-05, "loss": 0.6538, "step": 2163 }, { "epoch": 0.06647620802998187, "grad_norm": 0.38099226355552673, "learning_rate": 1.9950340591288402e-05, "loss": 0.6681, "step": 2164 }, { "epoch": 0.06650692716493103, "grad_norm": 0.3577551245689392, "learning_rate": 1.9950292476646865e-05, "loss": 0.6139, "step": 2165 }, { "epoch": 0.0665376462998802, "grad_norm": 0.3705735504627228, "learning_rate": 1.995024433876572e-05, "loss": 0.6357, "step": 2166 }, { "epoch": 0.06656836543482936, "grad_norm": 0.38249045610427856, "learning_rate": 1.9950196177645086e-05, "loss": 0.6819, "step": 2167 }, { "epoch": 0.06659908456977852, "grad_norm": 0.33235689997673035, "learning_rate": 1.9950147993285074e-05, "loss": 0.5917, "step": 2168 }, { "epoch": 0.06662980370472768, "grad_norm": 0.34283697605133057, "learning_rate": 1.9950099785685798e-05, "loss": 0.5834, "step": 2169 }, { "epoch": 0.06666052283967683, "grad_norm": 0.35009679198265076, "learning_rate": 1.9950051554847367e-05, "loss": 0.5781, "step": 2170 }, { "epoch": 0.06669124197462599, "grad_norm": 0.3518453538417816, "learning_rate": 1.9950003300769895e-05, "loss": 0.5908, "step": 2171 }, { "epoch": 0.06672196110957515, "grad_norm": 0.3643420934677124, "learning_rate": 1.99499550234535e-05, "loss": 0.6281, "step": 2172 }, { "epoch": 0.06675268024452431, "grad_norm": 0.3717498481273651, "learning_rate": 1.9949906722898283e-05, "loss": 0.5998, "step": 2173 }, { "epoch": 0.06678339937947347, "grad_norm": 0.33876290917396545, "learning_rate": 1.994985839910437e-05, "loss": 0.6156, "step": 2174 }, { "epoch": 0.06681411851442264, "grad_norm": 0.30981507897377014, "learning_rate": 1.9949810052071866e-05, "loss": 0.6361, "step": 2175 }, { "epoch": 0.0668448376493718, "grad_norm": 0.3320583403110504, "learning_rate": 1.9949761681800885e-05, "loss": 0.6606, "step": 2176 }, { "epoch": 0.06687555678432096, "grad_norm": 0.35287216305732727, "learning_rate": 1.994971328829154e-05, "loss": 0.6165, "step": 2177 }, { "epoch": 0.06690627591927012, "grad_norm": 0.3695656359195709, "learning_rate": 1.9949664871543944e-05, "loss": 0.6199, "step": 2178 }, { "epoch": 0.06693699505421927, "grad_norm": 0.35671326518058777, "learning_rate": 1.9949616431558218e-05, "loss": 0.7255, "step": 2179 }, { "epoch": 0.06696771418916843, "grad_norm": 0.32923802733421326, "learning_rate": 1.994956796833446e-05, "loss": 0.6272, "step": 2180 }, { "epoch": 0.06699843332411759, "grad_norm": 0.3100261390209198, "learning_rate": 1.9949519481872793e-05, "loss": 0.5415, "step": 2181 }, { "epoch": 0.06702915245906675, "grad_norm": 0.35098424553871155, "learning_rate": 1.994947097217333e-05, "loss": 0.5633, "step": 2182 }, { "epoch": 0.06705987159401591, "grad_norm": 0.3194218575954437, "learning_rate": 1.9949422439236185e-05, "loss": 0.5973, "step": 2183 }, { "epoch": 0.06709059072896507, "grad_norm": 0.4437752068042755, "learning_rate": 1.9949373883061462e-05, "loss": 0.7611, "step": 2184 }, { "epoch": 0.06712130986391424, "grad_norm": 0.30451834201812744, "learning_rate": 1.9949325303649284e-05, "loss": 0.5577, "step": 2185 }, { "epoch": 0.0671520289988634, "grad_norm": 0.3236548900604248, "learning_rate": 1.9949276700999768e-05, "loss": 0.6526, "step": 2186 }, { "epoch": 0.06718274813381256, "grad_norm": 0.34792137145996094, "learning_rate": 1.9949228075113013e-05, "loss": 0.6377, "step": 2187 }, { "epoch": 0.0672134672687617, "grad_norm": 0.37232786417007446, "learning_rate": 1.9949179425989147e-05, "loss": 0.6068, "step": 2188 }, { "epoch": 0.06724418640371087, "grad_norm": 0.36086997389793396, "learning_rate": 1.9949130753628273e-05, "loss": 0.5905, "step": 2189 }, { "epoch": 0.06727490553866003, "grad_norm": 0.3678576350212097, "learning_rate": 1.9949082058030514e-05, "loss": 0.6277, "step": 2190 }, { "epoch": 0.06730562467360919, "grad_norm": 0.3111893832683563, "learning_rate": 1.9949033339195975e-05, "loss": 0.6299, "step": 2191 }, { "epoch": 0.06733634380855835, "grad_norm": 0.3356234133243561, "learning_rate": 1.9948984597124778e-05, "loss": 0.6017, "step": 2192 }, { "epoch": 0.06736706294350751, "grad_norm": 0.3536360561847687, "learning_rate": 1.9948935831817027e-05, "loss": 0.6462, "step": 2193 }, { "epoch": 0.06739778207845668, "grad_norm": 0.3472273051738739, "learning_rate": 1.9948887043272846e-05, "loss": 0.687, "step": 2194 }, { "epoch": 0.06742850121340584, "grad_norm": 0.3442753851413727, "learning_rate": 1.994883823149234e-05, "loss": 0.6947, "step": 2195 }, { "epoch": 0.06745922034835498, "grad_norm": 0.34007900953292847, "learning_rate": 1.9948789396475636e-05, "loss": 0.579, "step": 2196 }, { "epoch": 0.06748993948330415, "grad_norm": 0.3170677423477173, "learning_rate": 1.994874053822283e-05, "loss": 0.5501, "step": 2197 }, { "epoch": 0.06752065861825331, "grad_norm": 0.32207638025283813, "learning_rate": 1.9948691656734053e-05, "loss": 0.6087, "step": 2198 }, { "epoch": 0.06755137775320247, "grad_norm": 0.32594001293182373, "learning_rate": 1.9948642752009405e-05, "loss": 0.5433, "step": 2199 }, { "epoch": 0.06758209688815163, "grad_norm": 0.3319441080093384, "learning_rate": 1.994859382404901e-05, "loss": 0.6014, "step": 2200 }, { "epoch": 0.06761281602310079, "grad_norm": 0.325753390789032, "learning_rate": 1.9948544872852982e-05, "loss": 0.5819, "step": 2201 }, { "epoch": 0.06764353515804995, "grad_norm": 0.2985192835330963, "learning_rate": 1.994849589842143e-05, "loss": 0.6178, "step": 2202 }, { "epoch": 0.06767425429299911, "grad_norm": 0.32980191707611084, "learning_rate": 1.9948446900754473e-05, "loss": 0.6096, "step": 2203 }, { "epoch": 0.06770497342794828, "grad_norm": 0.32874277234077454, "learning_rate": 1.994839787985222e-05, "loss": 0.6337, "step": 2204 }, { "epoch": 0.06773569256289742, "grad_norm": 0.3212631046772003, "learning_rate": 1.9948348835714793e-05, "loss": 0.6087, "step": 2205 }, { "epoch": 0.06776641169784658, "grad_norm": 0.350879967212677, "learning_rate": 1.99482997683423e-05, "loss": 0.5664, "step": 2206 }, { "epoch": 0.06779713083279575, "grad_norm": 0.3025473952293396, "learning_rate": 1.994825067773486e-05, "loss": 0.5442, "step": 2207 }, { "epoch": 0.06782784996774491, "grad_norm": 0.3510275185108185, "learning_rate": 1.9948201563892584e-05, "loss": 0.5291, "step": 2208 }, { "epoch": 0.06785856910269407, "grad_norm": 0.3357773423194885, "learning_rate": 1.9948152426815588e-05, "loss": 0.6192, "step": 2209 }, { "epoch": 0.06788928823764323, "grad_norm": 0.39369672536849976, "learning_rate": 1.994810326650399e-05, "loss": 0.7361, "step": 2210 }, { "epoch": 0.06792000737259239, "grad_norm": 0.36984783411026, "learning_rate": 1.9948054082957896e-05, "loss": 0.5873, "step": 2211 }, { "epoch": 0.06795072650754155, "grad_norm": 0.43243589997291565, "learning_rate": 1.9948004876177434e-05, "loss": 0.6249, "step": 2212 }, { "epoch": 0.0679814456424907, "grad_norm": 0.3033086955547333, "learning_rate": 1.994795564616271e-05, "loss": 0.6024, "step": 2213 }, { "epoch": 0.06801216477743986, "grad_norm": 0.3519750237464905, "learning_rate": 1.994790639291384e-05, "loss": 0.5912, "step": 2214 }, { "epoch": 0.06804288391238902, "grad_norm": 0.3444477319717407, "learning_rate": 1.9947857116430938e-05, "loss": 0.6537, "step": 2215 }, { "epoch": 0.06807360304733819, "grad_norm": 0.32625457644462585, "learning_rate": 1.9947807816714123e-05, "loss": 0.6347, "step": 2216 }, { "epoch": 0.06810432218228735, "grad_norm": 0.3351273834705353, "learning_rate": 1.994775849376351e-05, "loss": 0.5991, "step": 2217 }, { "epoch": 0.06813504131723651, "grad_norm": 0.4342171847820282, "learning_rate": 1.9947709147579208e-05, "loss": 0.767, "step": 2218 }, { "epoch": 0.06816576045218567, "grad_norm": 0.349337100982666, "learning_rate": 1.994765977816134e-05, "loss": 0.6768, "step": 2219 }, { "epoch": 0.06819647958713483, "grad_norm": 0.3772926926612854, "learning_rate": 1.9947610385510013e-05, "loss": 0.7065, "step": 2220 }, { "epoch": 0.06822719872208399, "grad_norm": 0.3229788541793823, "learning_rate": 1.9947560969625353e-05, "loss": 0.5053, "step": 2221 }, { "epoch": 0.06825791785703314, "grad_norm": 0.33497437834739685, "learning_rate": 1.9947511530507467e-05, "loss": 0.6534, "step": 2222 }, { "epoch": 0.0682886369919823, "grad_norm": 0.3466551899909973, "learning_rate": 1.9947462068156473e-05, "loss": 0.6401, "step": 2223 }, { "epoch": 0.06831935612693146, "grad_norm": 0.35122209787368774, "learning_rate": 1.9947412582572487e-05, "loss": 0.616, "step": 2224 }, { "epoch": 0.06835007526188062, "grad_norm": 0.31898894906044006, "learning_rate": 1.9947363073755625e-05, "loss": 0.6504, "step": 2225 }, { "epoch": 0.06838079439682979, "grad_norm": 0.7081322073936462, "learning_rate": 1.9947313541706e-05, "loss": 0.6698, "step": 2226 }, { "epoch": 0.06841151353177895, "grad_norm": 0.33141574263572693, "learning_rate": 1.994726398642373e-05, "loss": 0.5923, "step": 2227 }, { "epoch": 0.06844223266672811, "grad_norm": 0.36821237206459045, "learning_rate": 1.994721440790893e-05, "loss": 0.6302, "step": 2228 }, { "epoch": 0.06847295180167727, "grad_norm": 0.3279934823513031, "learning_rate": 1.994716480616172e-05, "loss": 0.6228, "step": 2229 }, { "epoch": 0.06850367093662642, "grad_norm": 0.34491825103759766, "learning_rate": 1.9947115181182206e-05, "loss": 0.6311, "step": 2230 }, { "epoch": 0.06853439007157558, "grad_norm": 0.3310839533805847, "learning_rate": 1.9947065532970513e-05, "loss": 0.5552, "step": 2231 }, { "epoch": 0.06856510920652474, "grad_norm": 0.35003000497817993, "learning_rate": 1.9947015861526757e-05, "loss": 0.6093, "step": 2232 }, { "epoch": 0.0685958283414739, "grad_norm": 0.32708999514579773, "learning_rate": 1.994696616685104e-05, "loss": 0.6409, "step": 2233 }, { "epoch": 0.06862654747642306, "grad_norm": 0.3440236449241638, "learning_rate": 1.9946916448943502e-05, "loss": 0.6195, "step": 2234 }, { "epoch": 0.06865726661137223, "grad_norm": 0.3515322804450989, "learning_rate": 1.994686670780424e-05, "loss": 0.7262, "step": 2235 }, { "epoch": 0.06868798574632139, "grad_norm": 0.35663193464279175, "learning_rate": 1.9946816943433375e-05, "loss": 0.5967, "step": 2236 }, { "epoch": 0.06871870488127055, "grad_norm": 0.2946612238883972, "learning_rate": 1.9946767155831023e-05, "loss": 0.6521, "step": 2237 }, { "epoch": 0.06874942401621971, "grad_norm": 0.4454301595687866, "learning_rate": 1.9946717344997305e-05, "loss": 0.6523, "step": 2238 }, { "epoch": 0.06878014315116886, "grad_norm": 0.3690214157104492, "learning_rate": 1.994666751093233e-05, "loss": 0.6302, "step": 2239 }, { "epoch": 0.06881086228611802, "grad_norm": 0.29992982745170593, "learning_rate": 1.9946617653636223e-05, "loss": 0.5953, "step": 2240 }, { "epoch": 0.06884158142106718, "grad_norm": 0.33487552404403687, "learning_rate": 1.9946567773109095e-05, "loss": 0.6012, "step": 2241 }, { "epoch": 0.06887230055601634, "grad_norm": 0.3518930971622467, "learning_rate": 1.9946517869351062e-05, "loss": 0.5544, "step": 2242 }, { "epoch": 0.0689030196909655, "grad_norm": 0.32719117403030396, "learning_rate": 1.9946467942362242e-05, "loss": 0.541, "step": 2243 }, { "epoch": 0.06893373882591466, "grad_norm": 0.3556510806083679, "learning_rate": 1.994641799214275e-05, "loss": 0.661, "step": 2244 }, { "epoch": 0.06896445796086383, "grad_norm": 0.3943074643611908, "learning_rate": 1.9946368018692708e-05, "loss": 0.6122, "step": 2245 }, { "epoch": 0.06899517709581299, "grad_norm": 0.35023805499076843, "learning_rate": 1.9946318022012226e-05, "loss": 0.6368, "step": 2246 }, { "epoch": 0.06902589623076213, "grad_norm": 0.35899481177330017, "learning_rate": 1.9946268002101426e-05, "loss": 0.5897, "step": 2247 }, { "epoch": 0.0690566153657113, "grad_norm": 0.3251500427722931, "learning_rate": 1.9946217958960422e-05, "loss": 0.6765, "step": 2248 }, { "epoch": 0.06908733450066046, "grad_norm": 0.36872854828834534, "learning_rate": 1.994616789258933e-05, "loss": 0.6119, "step": 2249 }, { "epoch": 0.06911805363560962, "grad_norm": 0.33494773507118225, "learning_rate": 1.9946117802988266e-05, "loss": 0.6515, "step": 2250 }, { "epoch": 0.06914877277055878, "grad_norm": 0.30779212713241577, "learning_rate": 1.994606769015735e-05, "loss": 0.6315, "step": 2251 }, { "epoch": 0.06917949190550794, "grad_norm": 0.32273975014686584, "learning_rate": 1.99460175540967e-05, "loss": 0.6472, "step": 2252 }, { "epoch": 0.0692102110404571, "grad_norm": 0.41840842366218567, "learning_rate": 1.9945967394806432e-05, "loss": 0.6202, "step": 2253 }, { "epoch": 0.06924093017540627, "grad_norm": 0.355185329914093, "learning_rate": 1.994591721228666e-05, "loss": 0.6333, "step": 2254 }, { "epoch": 0.06927164931035543, "grad_norm": 0.3869832158088684, "learning_rate": 1.9945867006537507e-05, "loss": 0.6168, "step": 2255 }, { "epoch": 0.06930236844530457, "grad_norm": 0.34259966015815735, "learning_rate": 1.9945816777559087e-05, "loss": 0.6816, "step": 2256 }, { "epoch": 0.06933308758025374, "grad_norm": 0.430733323097229, "learning_rate": 1.9945766525351516e-05, "loss": 0.667, "step": 2257 }, { "epoch": 0.0693638067152029, "grad_norm": 0.33048900961875916, "learning_rate": 1.994571624991491e-05, "loss": 0.5609, "step": 2258 }, { "epoch": 0.06939452585015206, "grad_norm": 0.3378935158252716, "learning_rate": 1.9945665951249393e-05, "loss": 0.629, "step": 2259 }, { "epoch": 0.06942524498510122, "grad_norm": 0.36652514338493347, "learning_rate": 1.9945615629355077e-05, "loss": 0.5554, "step": 2260 }, { "epoch": 0.06945596412005038, "grad_norm": 0.32435449957847595, "learning_rate": 1.9945565284232082e-05, "loss": 0.7183, "step": 2261 }, { "epoch": 0.06948668325499954, "grad_norm": 0.3089374303817749, "learning_rate": 1.9945514915880523e-05, "loss": 0.5832, "step": 2262 }, { "epoch": 0.0695174023899487, "grad_norm": 0.3346021771430969, "learning_rate": 1.994546452430052e-05, "loss": 0.63, "step": 2263 }, { "epoch": 0.06954812152489787, "grad_norm": 0.41662389039993286, "learning_rate": 1.994541410949219e-05, "loss": 0.7098, "step": 2264 }, { "epoch": 0.06957884065984701, "grad_norm": 0.35666322708129883, "learning_rate": 1.9945363671455654e-05, "loss": 0.6115, "step": 2265 }, { "epoch": 0.06960955979479617, "grad_norm": 0.32230043411254883, "learning_rate": 1.994531321019102e-05, "loss": 0.5534, "step": 2266 }, { "epoch": 0.06964027892974534, "grad_norm": 0.4277683198451996, "learning_rate": 1.9945262725698415e-05, "loss": 0.559, "step": 2267 }, { "epoch": 0.0696709980646945, "grad_norm": 0.35958540439605713, "learning_rate": 1.9945212217977955e-05, "loss": 0.6841, "step": 2268 }, { "epoch": 0.06970171719964366, "grad_norm": 0.37720420956611633, "learning_rate": 1.9945161687029762e-05, "loss": 0.5834, "step": 2269 }, { "epoch": 0.06973243633459282, "grad_norm": 0.37763404846191406, "learning_rate": 1.9945111132853943e-05, "loss": 0.6331, "step": 2270 }, { "epoch": 0.06976315546954198, "grad_norm": 0.3571881651878357, "learning_rate": 1.9945060555450624e-05, "loss": 0.6178, "step": 2271 }, { "epoch": 0.06979387460449114, "grad_norm": 0.3198193609714508, "learning_rate": 1.9945009954819924e-05, "loss": 0.5818, "step": 2272 }, { "epoch": 0.06982459373944029, "grad_norm": 0.5529232621192932, "learning_rate": 1.9944959330961953e-05, "loss": 0.6286, "step": 2273 }, { "epoch": 0.06985531287438945, "grad_norm": 0.36903703212738037, "learning_rate": 1.994490868387684e-05, "loss": 0.6199, "step": 2274 }, { "epoch": 0.06988603200933861, "grad_norm": 0.35598793625831604, "learning_rate": 1.99448580135647e-05, "loss": 0.6665, "step": 2275 }, { "epoch": 0.06991675114428778, "grad_norm": 0.3389105796813965, "learning_rate": 1.9944807320025644e-05, "loss": 0.5844, "step": 2276 }, { "epoch": 0.06994747027923694, "grad_norm": 0.5125939249992371, "learning_rate": 1.9944756603259798e-05, "loss": 0.6452, "step": 2277 }, { "epoch": 0.0699781894141861, "grad_norm": 0.3350171446800232, "learning_rate": 1.9944705863267278e-05, "loss": 0.5952, "step": 2278 }, { "epoch": 0.07000890854913526, "grad_norm": 0.3530266284942627, "learning_rate": 1.9944655100048204e-05, "loss": 0.6312, "step": 2279 }, { "epoch": 0.07003962768408442, "grad_norm": 0.34833967685699463, "learning_rate": 1.9944604313602693e-05, "loss": 0.6096, "step": 2280 }, { "epoch": 0.07007034681903358, "grad_norm": 0.34933724999427795, "learning_rate": 1.9944553503930865e-05, "loss": 0.5973, "step": 2281 }, { "epoch": 0.07010106595398273, "grad_norm": 0.3486560881137848, "learning_rate": 1.9944502671032837e-05, "loss": 0.6277, "step": 2282 }, { "epoch": 0.07013178508893189, "grad_norm": 0.34632110595703125, "learning_rate": 1.9944451814908726e-05, "loss": 0.6534, "step": 2283 }, { "epoch": 0.07016250422388105, "grad_norm": 0.3313484489917755, "learning_rate": 1.994440093555866e-05, "loss": 0.682, "step": 2284 }, { "epoch": 0.07019322335883021, "grad_norm": 0.32189467549324036, "learning_rate": 1.9944350032982743e-05, "loss": 0.6427, "step": 2285 }, { "epoch": 0.07022394249377938, "grad_norm": 0.41524431109428406, "learning_rate": 1.9944299107181105e-05, "loss": 0.5632, "step": 2286 }, { "epoch": 0.07025466162872854, "grad_norm": 0.35633614659309387, "learning_rate": 1.9944248158153864e-05, "loss": 0.6101, "step": 2287 }, { "epoch": 0.0702853807636777, "grad_norm": 0.3450487554073334, "learning_rate": 1.9944197185901136e-05, "loss": 0.6219, "step": 2288 }, { "epoch": 0.07031609989862686, "grad_norm": 0.3279600143432617, "learning_rate": 1.9944146190423042e-05, "loss": 0.5961, "step": 2289 }, { "epoch": 0.07034681903357601, "grad_norm": 0.33049142360687256, "learning_rate": 1.9944095171719698e-05, "loss": 0.6571, "step": 2290 }, { "epoch": 0.07037753816852517, "grad_norm": 0.3888467848300934, "learning_rate": 1.9944044129791226e-05, "loss": 0.5651, "step": 2291 }, { "epoch": 0.07040825730347433, "grad_norm": 0.316631555557251, "learning_rate": 1.9943993064637747e-05, "loss": 0.6095, "step": 2292 }, { "epoch": 0.07043897643842349, "grad_norm": 0.3150646388530731, "learning_rate": 1.9943941976259373e-05, "loss": 0.5393, "step": 2293 }, { "epoch": 0.07046969557337265, "grad_norm": 0.3414592146873474, "learning_rate": 1.994389086465623e-05, "loss": 0.5886, "step": 2294 }, { "epoch": 0.07050041470832182, "grad_norm": 0.45348697900772095, "learning_rate": 1.9943839729828433e-05, "loss": 0.6592, "step": 2295 }, { "epoch": 0.07053113384327098, "grad_norm": 0.3132486045360565, "learning_rate": 1.994378857177611e-05, "loss": 0.662, "step": 2296 }, { "epoch": 0.07056185297822014, "grad_norm": 0.3697911202907562, "learning_rate": 1.9943737390499368e-05, "loss": 0.6458, "step": 2297 }, { "epoch": 0.0705925721131693, "grad_norm": 0.317377507686615, "learning_rate": 1.9943686185998337e-05, "loss": 0.6278, "step": 2298 }, { "epoch": 0.07062329124811845, "grad_norm": 0.37609127163887024, "learning_rate": 1.994363495827313e-05, "loss": 0.5128, "step": 2299 }, { "epoch": 0.07065401038306761, "grad_norm": 0.34507983922958374, "learning_rate": 1.9943583707323872e-05, "loss": 0.6882, "step": 2300 }, { "epoch": 0.07068472951801677, "grad_norm": 0.32441240549087524, "learning_rate": 1.9943532433150678e-05, "loss": 0.6271, "step": 2301 }, { "epoch": 0.07071544865296593, "grad_norm": 0.375387966632843, "learning_rate": 1.994348113575367e-05, "loss": 0.5773, "step": 2302 }, { "epoch": 0.07074616778791509, "grad_norm": 0.33356645703315735, "learning_rate": 1.9943429815132967e-05, "loss": 0.6013, "step": 2303 }, { "epoch": 0.07077688692286425, "grad_norm": 0.3719983696937561, "learning_rate": 1.9943378471288687e-05, "loss": 0.6801, "step": 2304 }, { "epoch": 0.07080760605781342, "grad_norm": 0.3404647707939148, "learning_rate": 1.9943327104220956e-05, "loss": 0.6471, "step": 2305 }, { "epoch": 0.07083832519276258, "grad_norm": 0.3903762400150299, "learning_rate": 1.994327571392989e-05, "loss": 0.6995, "step": 2306 }, { "epoch": 0.07086904432771172, "grad_norm": 0.3524881601333618, "learning_rate": 1.9943224300415608e-05, "loss": 0.5735, "step": 2307 }, { "epoch": 0.07089976346266089, "grad_norm": 0.3640892505645752, "learning_rate": 1.9943172863678228e-05, "loss": 0.6708, "step": 2308 }, { "epoch": 0.07093048259761005, "grad_norm": 0.34076252579689026, "learning_rate": 1.9943121403717874e-05, "loss": 0.6152, "step": 2309 }, { "epoch": 0.07096120173255921, "grad_norm": 0.34372198581695557, "learning_rate": 1.994306992053467e-05, "loss": 0.6042, "step": 2310 }, { "epoch": 0.07099192086750837, "grad_norm": 0.4769997000694275, "learning_rate": 1.994301841412873e-05, "loss": 0.622, "step": 2311 }, { "epoch": 0.07102264000245753, "grad_norm": 0.3170176148414612, "learning_rate": 1.9942966884500175e-05, "loss": 0.5337, "step": 2312 }, { "epoch": 0.0710533591374067, "grad_norm": 0.35010942816734314, "learning_rate": 1.9942915331649126e-05, "loss": 0.6023, "step": 2313 }, { "epoch": 0.07108407827235586, "grad_norm": 0.3587970733642578, "learning_rate": 1.9942863755575704e-05, "loss": 0.5152, "step": 2314 }, { "epoch": 0.07111479740730502, "grad_norm": 0.34760305285453796, "learning_rate": 1.9942812156280033e-05, "loss": 0.5594, "step": 2315 }, { "epoch": 0.07114551654225416, "grad_norm": 0.3742259442806244, "learning_rate": 1.9942760533762225e-05, "loss": 0.5659, "step": 2316 }, { "epoch": 0.07117623567720333, "grad_norm": 0.36210277676582336, "learning_rate": 1.9942708888022406e-05, "loss": 0.6614, "step": 2317 }, { "epoch": 0.07120695481215249, "grad_norm": 0.3308568596839905, "learning_rate": 1.9942657219060698e-05, "loss": 0.6233, "step": 2318 }, { "epoch": 0.07123767394710165, "grad_norm": 0.3983585834503174, "learning_rate": 1.9942605526877222e-05, "loss": 0.5789, "step": 2319 }, { "epoch": 0.07126839308205081, "grad_norm": 0.3281286358833313, "learning_rate": 1.994255381147209e-05, "loss": 0.5178, "step": 2320 }, { "epoch": 0.07129911221699997, "grad_norm": 0.3261682987213135, "learning_rate": 1.9942502072845433e-05, "loss": 0.5969, "step": 2321 }, { "epoch": 0.07132983135194913, "grad_norm": 0.3409619927406311, "learning_rate": 1.994245031099737e-05, "loss": 0.5368, "step": 2322 }, { "epoch": 0.0713605504868983, "grad_norm": 0.4014226198196411, "learning_rate": 1.9942398525928017e-05, "loss": 0.6459, "step": 2323 }, { "epoch": 0.07139126962184744, "grad_norm": 0.33894264698028564, "learning_rate": 1.9942346717637497e-05, "loss": 0.6673, "step": 2324 }, { "epoch": 0.0714219887567966, "grad_norm": 0.343667596578598, "learning_rate": 1.9942294886125934e-05, "loss": 0.5949, "step": 2325 }, { "epoch": 0.07145270789174576, "grad_norm": 0.3785068690776825, "learning_rate": 1.9942243031393444e-05, "loss": 0.6453, "step": 2326 }, { "epoch": 0.07148342702669493, "grad_norm": 0.35941192507743835, "learning_rate": 1.9942191153440156e-05, "loss": 0.6209, "step": 2327 }, { "epoch": 0.07151414616164409, "grad_norm": 0.35576051473617554, "learning_rate": 1.9942139252266185e-05, "loss": 0.5594, "step": 2328 }, { "epoch": 0.07154486529659325, "grad_norm": 0.35480037331581116, "learning_rate": 1.994208732787165e-05, "loss": 0.6243, "step": 2329 }, { "epoch": 0.07157558443154241, "grad_norm": 0.3774089813232422, "learning_rate": 1.9942035380256677e-05, "loss": 0.5906, "step": 2330 }, { "epoch": 0.07160630356649157, "grad_norm": 0.33675557374954224, "learning_rate": 1.9941983409421385e-05, "loss": 0.6886, "step": 2331 }, { "epoch": 0.07163702270144073, "grad_norm": 0.3092780113220215, "learning_rate": 1.9941931415365897e-05, "loss": 0.5213, "step": 2332 }, { "epoch": 0.07166774183638988, "grad_norm": 0.3463011384010315, "learning_rate": 1.9941879398090336e-05, "loss": 0.6377, "step": 2333 }, { "epoch": 0.07169846097133904, "grad_norm": 0.3536158800125122, "learning_rate": 1.9941827357594823e-05, "loss": 0.6058, "step": 2334 }, { "epoch": 0.0717291801062882, "grad_norm": 0.3352678418159485, "learning_rate": 1.994177529387947e-05, "loss": 0.6406, "step": 2335 }, { "epoch": 0.07175989924123737, "grad_norm": 0.3921270966529846, "learning_rate": 1.9941723206944415e-05, "loss": 0.5709, "step": 2336 }, { "epoch": 0.07179061837618653, "grad_norm": 0.36226755380630493, "learning_rate": 1.9941671096789767e-05, "loss": 0.6557, "step": 2337 }, { "epoch": 0.07182133751113569, "grad_norm": 0.3200473189353943, "learning_rate": 1.9941618963415652e-05, "loss": 0.6685, "step": 2338 }, { "epoch": 0.07185205664608485, "grad_norm": 0.32420361042022705, "learning_rate": 1.9941566806822195e-05, "loss": 0.5278, "step": 2339 }, { "epoch": 0.07188277578103401, "grad_norm": 0.4029267728328705, "learning_rate": 1.994151462700951e-05, "loss": 0.651, "step": 2340 }, { "epoch": 0.07191349491598317, "grad_norm": 0.3506752550601959, "learning_rate": 1.9941462423977725e-05, "loss": 0.5895, "step": 2341 }, { "epoch": 0.07194421405093232, "grad_norm": 0.3415878713130951, "learning_rate": 1.994141019772696e-05, "loss": 0.627, "step": 2342 }, { "epoch": 0.07197493318588148, "grad_norm": 0.31377658247947693, "learning_rate": 1.9941357948257336e-05, "loss": 0.5209, "step": 2343 }, { "epoch": 0.07200565232083064, "grad_norm": 0.4006168842315674, "learning_rate": 1.9941305675568978e-05, "loss": 0.627, "step": 2344 }, { "epoch": 0.0720363714557798, "grad_norm": 0.36485832929611206, "learning_rate": 1.9941253379662003e-05, "loss": 0.5889, "step": 2345 }, { "epoch": 0.07206709059072897, "grad_norm": 0.35250672698020935, "learning_rate": 1.994120106053654e-05, "loss": 0.5875, "step": 2346 }, { "epoch": 0.07209780972567813, "grad_norm": 0.33126139640808105, "learning_rate": 1.994114871819271e-05, "loss": 0.6719, "step": 2347 }, { "epoch": 0.07212852886062729, "grad_norm": 0.3363138437271118, "learning_rate": 1.9941096352630628e-05, "loss": 0.5875, "step": 2348 }, { "epoch": 0.07215924799557645, "grad_norm": 0.4345424473285675, "learning_rate": 1.9941043963850424e-05, "loss": 0.6054, "step": 2349 }, { "epoch": 0.0721899671305256, "grad_norm": 0.3654957711696625, "learning_rate": 1.9940991551852217e-05, "loss": 0.6151, "step": 2350 }, { "epoch": 0.07222068626547476, "grad_norm": 0.355685830116272, "learning_rate": 1.9940939116636126e-05, "loss": 0.6425, "step": 2351 }, { "epoch": 0.07225140540042392, "grad_norm": 0.35184353590011597, "learning_rate": 1.9940886658202282e-05, "loss": 0.596, "step": 2352 }, { "epoch": 0.07228212453537308, "grad_norm": 0.6204710006713867, "learning_rate": 1.9940834176550804e-05, "loss": 0.5397, "step": 2353 }, { "epoch": 0.07231284367032224, "grad_norm": 0.36159420013427734, "learning_rate": 1.9940781671681808e-05, "loss": 0.6805, "step": 2354 }, { "epoch": 0.0723435628052714, "grad_norm": 0.34708112478256226, "learning_rate": 1.9940729143595425e-05, "loss": 0.6081, "step": 2355 }, { "epoch": 0.07237428194022057, "grad_norm": 0.3197517693042755, "learning_rate": 1.9940676592291778e-05, "loss": 0.5547, "step": 2356 }, { "epoch": 0.07240500107516973, "grad_norm": 0.3518645763397217, "learning_rate": 1.9940624017770984e-05, "loss": 0.6578, "step": 2357 }, { "epoch": 0.07243572021011889, "grad_norm": 0.35610949993133545, "learning_rate": 1.9940571420033167e-05, "loss": 0.651, "step": 2358 }, { "epoch": 0.07246643934506804, "grad_norm": 0.4015142619609833, "learning_rate": 1.994051879907845e-05, "loss": 0.6297, "step": 2359 }, { "epoch": 0.0724971584800172, "grad_norm": 0.3155009150505066, "learning_rate": 1.9940466154906965e-05, "loss": 0.5167, "step": 2360 }, { "epoch": 0.07252787761496636, "grad_norm": 0.3717946410179138, "learning_rate": 1.994041348751882e-05, "loss": 0.6334, "step": 2361 }, { "epoch": 0.07255859674991552, "grad_norm": 0.3706805408000946, "learning_rate": 1.9940360796914144e-05, "loss": 0.6089, "step": 2362 }, { "epoch": 0.07258931588486468, "grad_norm": 0.3583068251609802, "learning_rate": 1.9940308083093066e-05, "loss": 0.6208, "step": 2363 }, { "epoch": 0.07262003501981384, "grad_norm": 0.3196122348308563, "learning_rate": 1.99402553460557e-05, "loss": 0.5309, "step": 2364 }, { "epoch": 0.072650754154763, "grad_norm": 0.3746194541454315, "learning_rate": 1.9940202585802177e-05, "loss": 0.6055, "step": 2365 }, { "epoch": 0.07268147328971217, "grad_norm": 0.37152329087257385, "learning_rate": 1.9940149802332613e-05, "loss": 0.6555, "step": 2366 }, { "epoch": 0.07271219242466131, "grad_norm": 0.3165590465068817, "learning_rate": 1.994009699564714e-05, "loss": 0.6072, "step": 2367 }, { "epoch": 0.07274291155961048, "grad_norm": 0.3682546615600586, "learning_rate": 1.994004416574587e-05, "loss": 0.621, "step": 2368 }, { "epoch": 0.07277363069455964, "grad_norm": 0.334236741065979, "learning_rate": 1.993999131262894e-05, "loss": 0.6649, "step": 2369 }, { "epoch": 0.0728043498295088, "grad_norm": 0.31573131680488586, "learning_rate": 1.993993843629646e-05, "loss": 0.5988, "step": 2370 }, { "epoch": 0.07283506896445796, "grad_norm": 0.43167757987976074, "learning_rate": 1.993988553674856e-05, "loss": 0.6602, "step": 2371 }, { "epoch": 0.07286578809940712, "grad_norm": 0.3301078975200653, "learning_rate": 1.9939832613985363e-05, "loss": 0.7288, "step": 2372 }, { "epoch": 0.07289650723435628, "grad_norm": 0.3498932719230652, "learning_rate": 1.9939779668007e-05, "loss": 0.6893, "step": 2373 }, { "epoch": 0.07292722636930545, "grad_norm": 0.3463471531867981, "learning_rate": 1.993972669881358e-05, "loss": 0.622, "step": 2374 }, { "epoch": 0.0729579455042546, "grad_norm": 0.514157772064209, "learning_rate": 1.9939673706405235e-05, "loss": 0.5697, "step": 2375 }, { "epoch": 0.07298866463920375, "grad_norm": 0.32319241762161255, "learning_rate": 1.993962069078209e-05, "loss": 0.5567, "step": 2376 }, { "epoch": 0.07301938377415292, "grad_norm": 0.3460410535335541, "learning_rate": 1.9939567651944266e-05, "loss": 0.5902, "step": 2377 }, { "epoch": 0.07305010290910208, "grad_norm": 0.36054372787475586, "learning_rate": 1.9939514589891887e-05, "loss": 0.6386, "step": 2378 }, { "epoch": 0.07308082204405124, "grad_norm": 0.3617940843105316, "learning_rate": 1.9939461504625073e-05, "loss": 0.6332, "step": 2379 }, { "epoch": 0.0731115411790004, "grad_norm": 0.3256889581680298, "learning_rate": 1.993940839614396e-05, "loss": 0.5449, "step": 2380 }, { "epoch": 0.07314226031394956, "grad_norm": 0.36186841130256653, "learning_rate": 1.993935526444866e-05, "loss": 0.6813, "step": 2381 }, { "epoch": 0.07317297944889872, "grad_norm": 0.4319345951080322, "learning_rate": 1.9939302109539303e-05, "loss": 0.6801, "step": 2382 }, { "epoch": 0.07320369858384788, "grad_norm": 0.32260259985923767, "learning_rate": 1.9939248931416014e-05, "loss": 0.5786, "step": 2383 }, { "epoch": 0.07323441771879703, "grad_norm": 0.33656251430511475, "learning_rate": 1.993919573007891e-05, "loss": 0.7092, "step": 2384 }, { "epoch": 0.0732651368537462, "grad_norm": 0.3366798460483551, "learning_rate": 1.9939142505528126e-05, "loss": 0.5323, "step": 2385 }, { "epoch": 0.07329585598869535, "grad_norm": 0.3359346389770508, "learning_rate": 1.9939089257763778e-05, "loss": 0.664, "step": 2386 }, { "epoch": 0.07332657512364452, "grad_norm": 0.8740454316139221, "learning_rate": 1.9939035986785993e-05, "loss": 0.6494, "step": 2387 }, { "epoch": 0.07335729425859368, "grad_norm": 0.3123961389064789, "learning_rate": 1.993898269259489e-05, "loss": 0.6727, "step": 2388 }, { "epoch": 0.07338801339354284, "grad_norm": 0.3513924181461334, "learning_rate": 1.9938929375190607e-05, "loss": 0.6359, "step": 2389 }, { "epoch": 0.073418732528492, "grad_norm": 0.34043487906455994, "learning_rate": 1.9938876034573257e-05, "loss": 0.6134, "step": 2390 }, { "epoch": 0.07344945166344116, "grad_norm": 0.3346777856349945, "learning_rate": 1.9938822670742968e-05, "loss": 0.6867, "step": 2391 }, { "epoch": 0.07348017079839032, "grad_norm": 0.3113040328025818, "learning_rate": 1.9938769283699867e-05, "loss": 0.652, "step": 2392 }, { "epoch": 0.07351088993333947, "grad_norm": 0.3436412811279297, "learning_rate": 1.9938715873444072e-05, "loss": 0.6135, "step": 2393 }, { "epoch": 0.07354160906828863, "grad_norm": 0.3263019621372223, "learning_rate": 1.9938662439975715e-05, "loss": 0.6095, "step": 2394 }, { "epoch": 0.0735723282032378, "grad_norm": 0.3352179229259491, "learning_rate": 1.9938608983294915e-05, "loss": 0.575, "step": 2395 }, { "epoch": 0.07360304733818696, "grad_norm": 0.3300243020057678, "learning_rate": 1.99385555034018e-05, "loss": 0.6393, "step": 2396 }, { "epoch": 0.07363376647313612, "grad_norm": 0.3261314332485199, "learning_rate": 1.9938502000296497e-05, "loss": 0.5757, "step": 2397 }, { "epoch": 0.07366448560808528, "grad_norm": 0.3490402400493622, "learning_rate": 1.9938448473979126e-05, "loss": 0.5504, "step": 2398 }, { "epoch": 0.07369520474303444, "grad_norm": 0.3098176419734955, "learning_rate": 1.9938394924449816e-05, "loss": 0.5768, "step": 2399 }, { "epoch": 0.0737259238779836, "grad_norm": 0.34357723593711853, "learning_rate": 1.993834135170869e-05, "loss": 0.6161, "step": 2400 }, { "epoch": 0.07375664301293276, "grad_norm": 0.31747958064079285, "learning_rate": 1.9938287755755873e-05, "loss": 0.6478, "step": 2401 }, { "epoch": 0.07378736214788191, "grad_norm": 0.3904709815979004, "learning_rate": 1.993823413659149e-05, "loss": 0.6443, "step": 2402 }, { "epoch": 0.07381808128283107, "grad_norm": 0.3252991735935211, "learning_rate": 1.993818049421567e-05, "loss": 0.6151, "step": 2403 }, { "epoch": 0.07384880041778023, "grad_norm": 0.33654919266700745, "learning_rate": 1.9938126828628537e-05, "loss": 0.6489, "step": 2404 }, { "epoch": 0.0738795195527294, "grad_norm": 0.38823971152305603, "learning_rate": 1.993807313983021e-05, "loss": 0.5497, "step": 2405 }, { "epoch": 0.07391023868767856, "grad_norm": 0.32952508330345154, "learning_rate": 1.9938019427820824e-05, "loss": 0.6854, "step": 2406 }, { "epoch": 0.07394095782262772, "grad_norm": 0.5573270320892334, "learning_rate": 1.9937965692600496e-05, "loss": 0.5863, "step": 2407 }, { "epoch": 0.07397167695757688, "grad_norm": 0.3821525573730469, "learning_rate": 1.9937911934169355e-05, "loss": 0.6139, "step": 2408 }, { "epoch": 0.07400239609252604, "grad_norm": 0.3163740932941437, "learning_rate": 1.9937858152527526e-05, "loss": 0.6567, "step": 2409 }, { "epoch": 0.07403311522747519, "grad_norm": 0.3503473997116089, "learning_rate": 1.9937804347675137e-05, "loss": 0.6365, "step": 2410 }, { "epoch": 0.07406383436242435, "grad_norm": 0.31511732935905457, "learning_rate": 1.993775051961231e-05, "loss": 0.6185, "step": 2411 }, { "epoch": 0.07409455349737351, "grad_norm": 0.3281126618385315, "learning_rate": 1.9937696668339176e-05, "loss": 0.6693, "step": 2412 }, { "epoch": 0.07412527263232267, "grad_norm": 0.8676486611366272, "learning_rate": 1.9937642793855855e-05, "loss": 0.6166, "step": 2413 }, { "epoch": 0.07415599176727183, "grad_norm": 0.3348962068557739, "learning_rate": 1.9937588896162475e-05, "loss": 0.633, "step": 2414 }, { "epoch": 0.074186710902221, "grad_norm": 0.3614314794540405, "learning_rate": 1.9937534975259163e-05, "loss": 0.6006, "step": 2415 }, { "epoch": 0.07421743003717016, "grad_norm": 0.32203245162963867, "learning_rate": 1.9937481031146043e-05, "loss": 0.5114, "step": 2416 }, { "epoch": 0.07424814917211932, "grad_norm": 0.3887616991996765, "learning_rate": 1.9937427063823245e-05, "loss": 0.6093, "step": 2417 }, { "epoch": 0.07427886830706848, "grad_norm": 0.32728227972984314, "learning_rate": 1.9937373073290887e-05, "loss": 0.5381, "step": 2418 }, { "epoch": 0.07430958744201763, "grad_norm": 0.3311820328235626, "learning_rate": 1.99373190595491e-05, "loss": 0.6365, "step": 2419 }, { "epoch": 0.07434030657696679, "grad_norm": 0.3788667619228363, "learning_rate": 1.9937265022598016e-05, "loss": 0.6056, "step": 2420 }, { "epoch": 0.07437102571191595, "grad_norm": 0.3504379391670227, "learning_rate": 1.9937210962437754e-05, "loss": 0.6617, "step": 2421 }, { "epoch": 0.07440174484686511, "grad_norm": 0.34318187832832336, "learning_rate": 1.993715687906844e-05, "loss": 0.6231, "step": 2422 }, { "epoch": 0.07443246398181427, "grad_norm": 0.38719552755355835, "learning_rate": 1.9937102772490204e-05, "loss": 0.6133, "step": 2423 }, { "epoch": 0.07446318311676343, "grad_norm": 0.37958914041519165, "learning_rate": 1.993704864270317e-05, "loss": 0.6737, "step": 2424 }, { "epoch": 0.0744939022517126, "grad_norm": 0.33352354168891907, "learning_rate": 1.9936994489707463e-05, "loss": 0.5844, "step": 2425 }, { "epoch": 0.07452462138666176, "grad_norm": 0.36835914850234985, "learning_rate": 1.9936940313503213e-05, "loss": 0.6407, "step": 2426 }, { "epoch": 0.0745553405216109, "grad_norm": 0.3791012465953827, "learning_rate": 1.9936886114090544e-05, "loss": 0.4855, "step": 2427 }, { "epoch": 0.07458605965656007, "grad_norm": 0.3943319618701935, "learning_rate": 1.9936831891469583e-05, "loss": 0.6123, "step": 2428 }, { "epoch": 0.07461677879150923, "grad_norm": 0.3538697063922882, "learning_rate": 1.9936777645640458e-05, "loss": 0.6664, "step": 2429 }, { "epoch": 0.07464749792645839, "grad_norm": 0.32536080479621887, "learning_rate": 1.9936723376603298e-05, "loss": 0.649, "step": 2430 }, { "epoch": 0.07467821706140755, "grad_norm": 0.4689594805240631, "learning_rate": 1.993666908435822e-05, "loss": 0.6474, "step": 2431 }, { "epoch": 0.07470893619635671, "grad_norm": 0.3708209693431854, "learning_rate": 1.9936614768905362e-05, "loss": 0.6121, "step": 2432 }, { "epoch": 0.07473965533130587, "grad_norm": 0.33430150151252747, "learning_rate": 1.9936560430244848e-05, "loss": 0.5542, "step": 2433 }, { "epoch": 0.07477037446625504, "grad_norm": 0.32218194007873535, "learning_rate": 1.9936506068376797e-05, "loss": 0.6173, "step": 2434 }, { "epoch": 0.0748010936012042, "grad_norm": 0.3490361273288727, "learning_rate": 1.9936451683301347e-05, "loss": 0.6056, "step": 2435 }, { "epoch": 0.07483181273615334, "grad_norm": 0.31083083152770996, "learning_rate": 1.9936397275018618e-05, "loss": 0.6352, "step": 2436 }, { "epoch": 0.0748625318711025, "grad_norm": 0.3655105531215668, "learning_rate": 1.993634284352874e-05, "loss": 0.5831, "step": 2437 }, { "epoch": 0.07489325100605167, "grad_norm": 0.37039968371391296, "learning_rate": 1.9936288388831838e-05, "loss": 0.6676, "step": 2438 }, { "epoch": 0.07492397014100083, "grad_norm": 0.3468782901763916, "learning_rate": 1.993623391092804e-05, "loss": 0.5885, "step": 2439 }, { "epoch": 0.07495468927594999, "grad_norm": 0.37939924001693726, "learning_rate": 1.9936179409817477e-05, "loss": 0.6516, "step": 2440 }, { "epoch": 0.07498540841089915, "grad_norm": 0.36011841893196106, "learning_rate": 1.9936124885500274e-05, "loss": 0.5899, "step": 2441 }, { "epoch": 0.07501612754584831, "grad_norm": 0.37312963604927063, "learning_rate": 1.9936070337976553e-05, "loss": 0.594, "step": 2442 }, { "epoch": 0.07504684668079747, "grad_norm": 0.31900861859321594, "learning_rate": 1.9936015767246446e-05, "loss": 0.5896, "step": 2443 }, { "epoch": 0.07507756581574662, "grad_norm": 0.32950448989868164, "learning_rate": 1.9935961173310083e-05, "loss": 0.5911, "step": 2444 }, { "epoch": 0.07510828495069578, "grad_norm": 0.30742791295051575, "learning_rate": 1.9935906556167586e-05, "loss": 0.5484, "step": 2445 }, { "epoch": 0.07513900408564494, "grad_norm": 0.36050426959991455, "learning_rate": 1.993585191581909e-05, "loss": 0.6279, "step": 2446 }, { "epoch": 0.0751697232205941, "grad_norm": 0.3534471094608307, "learning_rate": 1.9935797252264716e-05, "loss": 0.5827, "step": 2447 }, { "epoch": 0.07520044235554327, "grad_norm": 0.33655068278312683, "learning_rate": 1.993574256550459e-05, "loss": 0.5961, "step": 2448 }, { "epoch": 0.07523116149049243, "grad_norm": 0.35631951689720154, "learning_rate": 1.9935687855538846e-05, "loss": 0.5183, "step": 2449 }, { "epoch": 0.07526188062544159, "grad_norm": 0.3675488531589508, "learning_rate": 1.9935633122367607e-05, "loss": 0.6121, "step": 2450 }, { "epoch": 0.07529259976039075, "grad_norm": 0.35517418384552, "learning_rate": 1.9935578365991006e-05, "loss": 0.6596, "step": 2451 }, { "epoch": 0.07532331889533991, "grad_norm": 0.3383217453956604, "learning_rate": 1.993552358640917e-05, "loss": 0.5894, "step": 2452 }, { "epoch": 0.07535403803028906, "grad_norm": 0.341969758272171, "learning_rate": 1.993546878362222e-05, "loss": 0.5671, "step": 2453 }, { "epoch": 0.07538475716523822, "grad_norm": 0.4088347256183624, "learning_rate": 1.9935413957630288e-05, "loss": 0.6735, "step": 2454 }, { "epoch": 0.07541547630018738, "grad_norm": 0.36870071291923523, "learning_rate": 1.993535910843351e-05, "loss": 0.6339, "step": 2455 }, { "epoch": 0.07544619543513655, "grad_norm": 0.32350388169288635, "learning_rate": 1.9935304236032e-05, "loss": 0.5366, "step": 2456 }, { "epoch": 0.0754769145700857, "grad_norm": 0.3748690187931061, "learning_rate": 1.993524934042589e-05, "loss": 0.6513, "step": 2457 }, { "epoch": 0.07550763370503487, "grad_norm": 0.32318809628486633, "learning_rate": 1.993519442161532e-05, "loss": 0.6239, "step": 2458 }, { "epoch": 0.07553835283998403, "grad_norm": 0.29844483733177185, "learning_rate": 1.9935139479600404e-05, "loss": 0.628, "step": 2459 }, { "epoch": 0.07556907197493319, "grad_norm": 0.3190557658672333, "learning_rate": 1.9935084514381277e-05, "loss": 0.6204, "step": 2460 }, { "epoch": 0.07559979110988234, "grad_norm": 0.3238297402858734, "learning_rate": 1.993502952595807e-05, "loss": 0.5993, "step": 2461 }, { "epoch": 0.0756305102448315, "grad_norm": 0.3271649479866028, "learning_rate": 1.99349745143309e-05, "loss": 0.6096, "step": 2462 }, { "epoch": 0.07566122937978066, "grad_norm": 0.34880128502845764, "learning_rate": 1.9934919479499913e-05, "loss": 0.5984, "step": 2463 }, { "epoch": 0.07569194851472982, "grad_norm": 0.37039124965667725, "learning_rate": 1.993486442146522e-05, "loss": 0.5636, "step": 2464 }, { "epoch": 0.07572266764967898, "grad_norm": 0.33419129252433777, "learning_rate": 1.993480934022696e-05, "loss": 0.6413, "step": 2465 }, { "epoch": 0.07575338678462815, "grad_norm": 0.3515890836715698, "learning_rate": 1.993475423578526e-05, "loss": 0.6676, "step": 2466 }, { "epoch": 0.07578410591957731, "grad_norm": 0.32323887944221497, "learning_rate": 1.9934699108140244e-05, "loss": 0.6137, "step": 2467 }, { "epoch": 0.07581482505452647, "grad_norm": 0.3376542329788208, "learning_rate": 1.993464395729205e-05, "loss": 0.5625, "step": 2468 }, { "epoch": 0.07584554418947563, "grad_norm": 0.3449452221393585, "learning_rate": 1.9934588783240796e-05, "loss": 0.5811, "step": 2469 }, { "epoch": 0.07587626332442478, "grad_norm": 0.3311859369277954, "learning_rate": 1.9934533585986623e-05, "loss": 0.6436, "step": 2470 }, { "epoch": 0.07590698245937394, "grad_norm": 0.4326750636100769, "learning_rate": 1.9934478365529645e-05, "loss": 0.6297, "step": 2471 }, { "epoch": 0.0759377015943231, "grad_norm": 0.3386635184288025, "learning_rate": 1.9934423121870002e-05, "loss": 0.5267, "step": 2472 }, { "epoch": 0.07596842072927226, "grad_norm": 0.435200035572052, "learning_rate": 1.993436785500782e-05, "loss": 0.65, "step": 2473 }, { "epoch": 0.07599913986422142, "grad_norm": 0.37801307439804077, "learning_rate": 1.993431256494323e-05, "loss": 0.5121, "step": 2474 }, { "epoch": 0.07602985899917059, "grad_norm": 0.32581260800361633, "learning_rate": 1.9934257251676357e-05, "loss": 0.599, "step": 2475 }, { "epoch": 0.07606057813411975, "grad_norm": 0.34863656759262085, "learning_rate": 1.993420191520733e-05, "loss": 0.6612, "step": 2476 }, { "epoch": 0.07609129726906891, "grad_norm": 0.37314242124557495, "learning_rate": 1.9934146555536284e-05, "loss": 0.6298, "step": 2477 }, { "epoch": 0.07612201640401807, "grad_norm": 0.3531608581542969, "learning_rate": 1.9934091172663345e-05, "loss": 0.6869, "step": 2478 }, { "epoch": 0.07615273553896722, "grad_norm": 0.32910585403442383, "learning_rate": 1.993403576658864e-05, "loss": 0.5595, "step": 2479 }, { "epoch": 0.07618345467391638, "grad_norm": 0.45403623580932617, "learning_rate": 1.9933980337312305e-05, "loss": 0.6081, "step": 2480 }, { "epoch": 0.07621417380886554, "grad_norm": 0.37874236702919006, "learning_rate": 1.9933924884834464e-05, "loss": 0.5567, "step": 2481 }, { "epoch": 0.0762448929438147, "grad_norm": 0.37998706102371216, "learning_rate": 1.9933869409155244e-05, "loss": 0.6869, "step": 2482 }, { "epoch": 0.07627561207876386, "grad_norm": 0.3363747298717499, "learning_rate": 1.993381391027478e-05, "loss": 0.6606, "step": 2483 }, { "epoch": 0.07630633121371302, "grad_norm": 0.35554298758506775, "learning_rate": 1.99337583881932e-05, "loss": 0.6818, "step": 2484 }, { "epoch": 0.07633705034866219, "grad_norm": 0.4194576144218445, "learning_rate": 1.9933702842910635e-05, "loss": 0.6591, "step": 2485 }, { "epoch": 0.07636776948361135, "grad_norm": 0.4392673969268799, "learning_rate": 1.993364727442721e-05, "loss": 0.6481, "step": 2486 }, { "epoch": 0.0763984886185605, "grad_norm": 0.34611082077026367, "learning_rate": 1.993359168274306e-05, "loss": 0.5975, "step": 2487 }, { "epoch": 0.07642920775350966, "grad_norm": 0.3356529772281647, "learning_rate": 1.9933536067858313e-05, "loss": 0.6244, "step": 2488 }, { "epoch": 0.07645992688845882, "grad_norm": 0.3184433579444885, "learning_rate": 1.99334804297731e-05, "loss": 0.6252, "step": 2489 }, { "epoch": 0.07649064602340798, "grad_norm": 0.3446669578552246, "learning_rate": 1.993342476848755e-05, "loss": 0.6664, "step": 2490 }, { "epoch": 0.07652136515835714, "grad_norm": 0.42689579725265503, "learning_rate": 1.993336908400179e-05, "loss": 0.7492, "step": 2491 }, { "epoch": 0.0765520842933063, "grad_norm": 0.35681504011154175, "learning_rate": 1.9933313376315952e-05, "loss": 0.5803, "step": 2492 }, { "epoch": 0.07658280342825546, "grad_norm": 0.37526610493659973, "learning_rate": 1.993325764543017e-05, "loss": 0.6491, "step": 2493 }, { "epoch": 0.07661352256320463, "grad_norm": 0.3606531023979187, "learning_rate": 1.9933201891344568e-05, "loss": 0.5643, "step": 2494 }, { "epoch": 0.07664424169815379, "grad_norm": 0.2922174334526062, "learning_rate": 1.9933146114059285e-05, "loss": 0.6075, "step": 2495 }, { "epoch": 0.07667496083310293, "grad_norm": 0.32933056354522705, "learning_rate": 1.993309031357444e-05, "loss": 0.6708, "step": 2496 }, { "epoch": 0.0767056799680521, "grad_norm": 0.3272293508052826, "learning_rate": 1.993303448989017e-05, "loss": 0.563, "step": 2497 }, { "epoch": 0.07673639910300126, "grad_norm": 0.357683002948761, "learning_rate": 1.9932978643006604e-05, "loss": 0.5923, "step": 2498 }, { "epoch": 0.07676711823795042, "grad_norm": 0.33317482471466064, "learning_rate": 1.9932922772923873e-05, "loss": 0.6636, "step": 2499 }, { "epoch": 0.07679783737289958, "grad_norm": 0.34001195430755615, "learning_rate": 1.9932866879642107e-05, "loss": 0.6026, "step": 2500 }, { "epoch": 0.07682855650784874, "grad_norm": 0.3284319043159485, "learning_rate": 1.9932810963161436e-05, "loss": 0.5248, "step": 2501 }, { "epoch": 0.0768592756427979, "grad_norm": 0.349875271320343, "learning_rate": 1.9932755023481993e-05, "loss": 0.5863, "step": 2502 }, { "epoch": 0.07688999477774706, "grad_norm": 0.3613593280315399, "learning_rate": 1.9932699060603907e-05, "loss": 0.6434, "step": 2503 }, { "epoch": 0.07692071391269621, "grad_norm": 0.3686390221118927, "learning_rate": 1.993264307452731e-05, "loss": 0.5803, "step": 2504 }, { "epoch": 0.07695143304764537, "grad_norm": 0.3269595503807068, "learning_rate": 1.9932587065252326e-05, "loss": 0.5658, "step": 2505 }, { "epoch": 0.07698215218259453, "grad_norm": 0.3564971387386322, "learning_rate": 1.9932531032779095e-05, "loss": 0.6164, "step": 2506 }, { "epoch": 0.0770128713175437, "grad_norm": 0.3069271147251129, "learning_rate": 1.9932474977107746e-05, "loss": 0.5788, "step": 2507 }, { "epoch": 0.07704359045249286, "grad_norm": 0.30893707275390625, "learning_rate": 1.9932418898238405e-05, "loss": 0.5849, "step": 2508 }, { "epoch": 0.07707430958744202, "grad_norm": 0.29526185989379883, "learning_rate": 1.9932362796171206e-05, "loss": 0.474, "step": 2509 }, { "epoch": 0.07710502872239118, "grad_norm": 0.3152792751789093, "learning_rate": 1.993230667090628e-05, "loss": 0.5433, "step": 2510 }, { "epoch": 0.07713574785734034, "grad_norm": 0.37666696310043335, "learning_rate": 1.993225052244376e-05, "loss": 0.6594, "step": 2511 }, { "epoch": 0.0771664669922895, "grad_norm": 0.336330384016037, "learning_rate": 1.9932194350783776e-05, "loss": 0.6188, "step": 2512 }, { "epoch": 0.07719718612723865, "grad_norm": 0.6543074250221252, "learning_rate": 1.9932138155926456e-05, "loss": 0.7542, "step": 2513 }, { "epoch": 0.07722790526218781, "grad_norm": 0.31721827387809753, "learning_rate": 1.9932081937871935e-05, "loss": 0.6287, "step": 2514 }, { "epoch": 0.07725862439713697, "grad_norm": 0.5091707110404968, "learning_rate": 1.993202569662034e-05, "loss": 0.6508, "step": 2515 }, { "epoch": 0.07728934353208614, "grad_norm": 0.3712039291858673, "learning_rate": 1.993196943217181e-05, "loss": 0.6244, "step": 2516 }, { "epoch": 0.0773200626670353, "grad_norm": 0.3303382992744446, "learning_rate": 1.9931913144526467e-05, "loss": 0.7121, "step": 2517 }, { "epoch": 0.07735078180198446, "grad_norm": 0.332744836807251, "learning_rate": 1.993185683368445e-05, "loss": 0.6541, "step": 2518 }, { "epoch": 0.07738150093693362, "grad_norm": 0.34520775079727173, "learning_rate": 1.993180049964589e-05, "loss": 0.6439, "step": 2519 }, { "epoch": 0.07741222007188278, "grad_norm": 0.34182849526405334, "learning_rate": 1.9931744142410912e-05, "loss": 0.6045, "step": 2520 }, { "epoch": 0.07744293920683193, "grad_norm": 0.3610750734806061, "learning_rate": 1.9931687761979654e-05, "loss": 0.6502, "step": 2521 }, { "epoch": 0.07747365834178109, "grad_norm": 0.3256135880947113, "learning_rate": 1.9931631358352242e-05, "loss": 0.6301, "step": 2522 }, { "epoch": 0.07750437747673025, "grad_norm": 0.36114227771759033, "learning_rate": 1.9931574931528817e-05, "loss": 0.6363, "step": 2523 }, { "epoch": 0.07753509661167941, "grad_norm": 0.3857135474681854, "learning_rate": 1.9931518481509502e-05, "loss": 0.5551, "step": 2524 }, { "epoch": 0.07756581574662857, "grad_norm": 0.36042138934135437, "learning_rate": 1.9931462008294434e-05, "loss": 0.565, "step": 2525 }, { "epoch": 0.07759653488157774, "grad_norm": 0.3447607159614563, "learning_rate": 1.9931405511883742e-05, "loss": 0.615, "step": 2526 }, { "epoch": 0.0776272540165269, "grad_norm": 0.37190955877304077, "learning_rate": 1.9931348992277554e-05, "loss": 0.6596, "step": 2527 }, { "epoch": 0.07765797315147606, "grad_norm": 0.3389231264591217, "learning_rate": 1.9931292449476016e-05, "loss": 0.6934, "step": 2528 }, { "epoch": 0.07768869228642522, "grad_norm": 0.3379162549972534, "learning_rate": 1.9931235883479246e-05, "loss": 0.6482, "step": 2529 }, { "epoch": 0.07771941142137437, "grad_norm": 0.3351997137069702, "learning_rate": 1.993117929428738e-05, "loss": 0.6633, "step": 2530 }, { "epoch": 0.07775013055632353, "grad_norm": 0.34079205989837646, "learning_rate": 1.9931122681900554e-05, "loss": 0.6576, "step": 2531 }, { "epoch": 0.07778084969127269, "grad_norm": 0.30306681990623474, "learning_rate": 1.9931066046318892e-05, "loss": 0.6199, "step": 2532 }, { "epoch": 0.07781156882622185, "grad_norm": 0.3111242949962616, "learning_rate": 1.9931009387542538e-05, "loss": 0.5913, "step": 2533 }, { "epoch": 0.07784228796117101, "grad_norm": 0.32320067286491394, "learning_rate": 1.9930952705571613e-05, "loss": 0.6257, "step": 2534 }, { "epoch": 0.07787300709612018, "grad_norm": 0.33074334263801575, "learning_rate": 1.9930896000406256e-05, "loss": 0.5517, "step": 2535 }, { "epoch": 0.07790372623106934, "grad_norm": 0.33213502168655396, "learning_rate": 1.99308392720466e-05, "loss": 0.5186, "step": 2536 }, { "epoch": 0.0779344453660185, "grad_norm": 0.33391860127449036, "learning_rate": 1.9930782520492774e-05, "loss": 0.6253, "step": 2537 }, { "epoch": 0.07796516450096765, "grad_norm": 0.3306761085987091, "learning_rate": 1.993072574574491e-05, "loss": 0.7053, "step": 2538 }, { "epoch": 0.07799588363591681, "grad_norm": 0.35078611969947815, "learning_rate": 1.9930668947803144e-05, "loss": 0.6863, "step": 2539 }, { "epoch": 0.07802660277086597, "grad_norm": 0.3536697328090668, "learning_rate": 1.993061212666761e-05, "loss": 0.581, "step": 2540 }, { "epoch": 0.07805732190581513, "grad_norm": 0.47167104482650757, "learning_rate": 1.993055528233843e-05, "loss": 0.6092, "step": 2541 }, { "epoch": 0.07808804104076429, "grad_norm": 0.3312068581581116, "learning_rate": 1.9930498414815754e-05, "loss": 0.6395, "step": 2542 }, { "epoch": 0.07811876017571345, "grad_norm": 0.39657771587371826, "learning_rate": 1.99304415240997e-05, "loss": 0.621, "step": 2543 }, { "epoch": 0.07814947931066261, "grad_norm": 0.3782680928707123, "learning_rate": 1.9930384610190407e-05, "loss": 0.6322, "step": 2544 }, { "epoch": 0.07818019844561178, "grad_norm": 0.3931754529476166, "learning_rate": 1.9930327673088005e-05, "loss": 0.6306, "step": 2545 }, { "epoch": 0.07821091758056094, "grad_norm": 0.32752808928489685, "learning_rate": 1.9930270712792634e-05, "loss": 0.5884, "step": 2546 }, { "epoch": 0.07824163671551009, "grad_norm": 0.3295663297176361, "learning_rate": 1.993021372930442e-05, "loss": 0.6907, "step": 2547 }, { "epoch": 0.07827235585045925, "grad_norm": 0.45207929611206055, "learning_rate": 1.9930156722623496e-05, "loss": 0.6314, "step": 2548 }, { "epoch": 0.07830307498540841, "grad_norm": 0.36948058009147644, "learning_rate": 1.9930099692750004e-05, "loss": 0.6933, "step": 2549 }, { "epoch": 0.07833379412035757, "grad_norm": 0.33911609649658203, "learning_rate": 1.9930042639684065e-05, "loss": 0.5008, "step": 2550 }, { "epoch": 0.07836451325530673, "grad_norm": 0.3380142152309418, "learning_rate": 1.992998556342582e-05, "loss": 0.5651, "step": 2551 }, { "epoch": 0.07839523239025589, "grad_norm": 0.5416232943534851, "learning_rate": 1.99299284639754e-05, "loss": 0.6272, "step": 2552 }, { "epoch": 0.07842595152520505, "grad_norm": 0.31836748123168945, "learning_rate": 1.9929871341332937e-05, "loss": 0.6123, "step": 2553 }, { "epoch": 0.07845667066015422, "grad_norm": 0.30304020643234253, "learning_rate": 1.9929814195498567e-05, "loss": 0.527, "step": 2554 }, { "epoch": 0.07848738979510338, "grad_norm": 0.36214837431907654, "learning_rate": 1.992975702647242e-05, "loss": 0.5639, "step": 2555 }, { "epoch": 0.07851810893005252, "grad_norm": 0.3240317702293396, "learning_rate": 1.9929699834254636e-05, "loss": 0.6172, "step": 2556 }, { "epoch": 0.07854882806500169, "grad_norm": 0.367611289024353, "learning_rate": 1.9929642618845346e-05, "loss": 0.6417, "step": 2557 }, { "epoch": 0.07857954719995085, "grad_norm": 0.3312862813472748, "learning_rate": 1.9929585380244676e-05, "loss": 0.5482, "step": 2558 }, { "epoch": 0.07861026633490001, "grad_norm": 0.32803112268447876, "learning_rate": 1.992952811845277e-05, "loss": 0.646, "step": 2559 }, { "epoch": 0.07864098546984917, "grad_norm": 0.36769798398017883, "learning_rate": 1.992947083346976e-05, "loss": 0.6571, "step": 2560 }, { "epoch": 0.07867170460479833, "grad_norm": 0.3001982569694519, "learning_rate": 1.992941352529577e-05, "loss": 0.6088, "step": 2561 }, { "epoch": 0.07870242373974749, "grad_norm": 0.3441161513328552, "learning_rate": 1.9929356193930947e-05, "loss": 0.6632, "step": 2562 }, { "epoch": 0.07873314287469665, "grad_norm": 0.373363196849823, "learning_rate": 1.9929298839375417e-05, "loss": 0.574, "step": 2563 }, { "epoch": 0.0787638620096458, "grad_norm": 0.3323586583137512, "learning_rate": 1.9929241461629315e-05, "loss": 0.5971, "step": 2564 }, { "epoch": 0.07879458114459496, "grad_norm": 0.3849039375782013, "learning_rate": 1.992918406069278e-05, "loss": 0.6474, "step": 2565 }, { "epoch": 0.07882530027954412, "grad_norm": 0.3034350275993347, "learning_rate": 1.9929126636565937e-05, "loss": 0.5991, "step": 2566 }, { "epoch": 0.07885601941449329, "grad_norm": 0.3615133464336395, "learning_rate": 1.992906918924893e-05, "loss": 0.6636, "step": 2567 }, { "epoch": 0.07888673854944245, "grad_norm": 0.3255395293235779, "learning_rate": 1.9929011718741888e-05, "loss": 0.5857, "step": 2568 }, { "epoch": 0.07891745768439161, "grad_norm": 0.33248642086982727, "learning_rate": 1.9928954225044944e-05, "loss": 0.6184, "step": 2569 }, { "epoch": 0.07894817681934077, "grad_norm": 0.564972996711731, "learning_rate": 1.9928896708158234e-05, "loss": 0.6282, "step": 2570 }, { "epoch": 0.07897889595428993, "grad_norm": 0.2928917109966278, "learning_rate": 1.9928839168081892e-05, "loss": 0.5441, "step": 2571 }, { "epoch": 0.0790096150892391, "grad_norm": 0.36047419905662537, "learning_rate": 1.9928781604816056e-05, "loss": 0.6951, "step": 2572 }, { "epoch": 0.07904033422418824, "grad_norm": 0.3274795413017273, "learning_rate": 1.9928724018360852e-05, "loss": 0.5983, "step": 2573 }, { "epoch": 0.0790710533591374, "grad_norm": 0.3415004014968872, "learning_rate": 1.9928666408716424e-05, "loss": 0.5939, "step": 2574 }, { "epoch": 0.07910177249408656, "grad_norm": 0.37627875804901123, "learning_rate": 1.99286087758829e-05, "loss": 0.6591, "step": 2575 }, { "epoch": 0.07913249162903573, "grad_norm": 0.33888760209083557, "learning_rate": 1.9928551119860414e-05, "loss": 0.5956, "step": 2576 }, { "epoch": 0.07916321076398489, "grad_norm": 0.35889023542404175, "learning_rate": 1.992849344064911e-05, "loss": 0.7161, "step": 2577 }, { "epoch": 0.07919392989893405, "grad_norm": 0.322185218334198, "learning_rate": 1.9928435738249112e-05, "loss": 0.5791, "step": 2578 }, { "epoch": 0.07922464903388321, "grad_norm": 0.3304510712623596, "learning_rate": 1.992837801266056e-05, "loss": 0.6535, "step": 2579 }, { "epoch": 0.07925536816883237, "grad_norm": 0.3246460258960724, "learning_rate": 1.9928320263883586e-05, "loss": 0.6991, "step": 2580 }, { "epoch": 0.07928608730378152, "grad_norm": 0.3504895269870758, "learning_rate": 1.992826249191833e-05, "loss": 0.6158, "step": 2581 }, { "epoch": 0.07931680643873068, "grad_norm": 0.35232171416282654, "learning_rate": 1.9928204696764923e-05, "loss": 0.6834, "step": 2582 }, { "epoch": 0.07934752557367984, "grad_norm": 0.344690203666687, "learning_rate": 1.99281468784235e-05, "loss": 0.6944, "step": 2583 }, { "epoch": 0.079378244708629, "grad_norm": 0.3745235502719879, "learning_rate": 1.9928089036894193e-05, "loss": 0.6045, "step": 2584 }, { "epoch": 0.07940896384357816, "grad_norm": 0.3229826092720032, "learning_rate": 1.9928031172177144e-05, "loss": 0.6211, "step": 2585 }, { "epoch": 0.07943968297852733, "grad_norm": 0.31463873386383057, "learning_rate": 1.9927973284272486e-05, "loss": 0.576, "step": 2586 }, { "epoch": 0.07947040211347649, "grad_norm": 0.37938106060028076, "learning_rate": 1.9927915373180352e-05, "loss": 0.6868, "step": 2587 }, { "epoch": 0.07950112124842565, "grad_norm": 0.3318946957588196, "learning_rate": 1.9927857438900874e-05, "loss": 0.6523, "step": 2588 }, { "epoch": 0.07953184038337481, "grad_norm": 0.37501659989356995, "learning_rate": 1.9927799481434194e-05, "loss": 0.5997, "step": 2589 }, { "epoch": 0.07956255951832396, "grad_norm": 0.33932632207870483, "learning_rate": 1.9927741500780452e-05, "loss": 0.5828, "step": 2590 }, { "epoch": 0.07959327865327312, "grad_norm": 0.4965178966522217, "learning_rate": 1.992768349693977e-05, "loss": 0.6024, "step": 2591 }, { "epoch": 0.07962399778822228, "grad_norm": 0.354361891746521, "learning_rate": 1.9927625469912287e-05, "loss": 0.6041, "step": 2592 }, { "epoch": 0.07965471692317144, "grad_norm": 0.414718359708786, "learning_rate": 1.9927567419698146e-05, "loss": 0.646, "step": 2593 }, { "epoch": 0.0796854360581206, "grad_norm": 0.326719731092453, "learning_rate": 1.9927509346297477e-05, "loss": 0.5491, "step": 2594 }, { "epoch": 0.07971615519306977, "grad_norm": 0.3372949957847595, "learning_rate": 1.9927451249710417e-05, "loss": 0.6862, "step": 2595 }, { "epoch": 0.07974687432801893, "grad_norm": 0.33408963680267334, "learning_rate": 1.99273931299371e-05, "loss": 0.5724, "step": 2596 }, { "epoch": 0.07977759346296809, "grad_norm": 0.38652995228767395, "learning_rate": 1.9927334986977665e-05, "loss": 0.6248, "step": 2597 }, { "epoch": 0.07980831259791724, "grad_norm": 0.3323294222354889, "learning_rate": 1.9927276820832242e-05, "loss": 0.519, "step": 2598 }, { "epoch": 0.0798390317328664, "grad_norm": 0.3355146646499634, "learning_rate": 1.9927218631500976e-05, "loss": 0.6455, "step": 2599 }, { "epoch": 0.07986975086781556, "grad_norm": 0.35886266827583313, "learning_rate": 1.9927160418983992e-05, "loss": 0.6277, "step": 2600 }, { "epoch": 0.07990047000276472, "grad_norm": 0.35720446705818176, "learning_rate": 1.9927102183281436e-05, "loss": 0.7014, "step": 2601 }, { "epoch": 0.07993118913771388, "grad_norm": 0.3642016351222992, "learning_rate": 1.9927043924393437e-05, "loss": 0.6634, "step": 2602 }, { "epoch": 0.07996190827266304, "grad_norm": 0.40999141335487366, "learning_rate": 1.9926985642320137e-05, "loss": 0.6259, "step": 2603 }, { "epoch": 0.0799926274076122, "grad_norm": 0.33645880222320557, "learning_rate": 1.9926927337061665e-05, "loss": 0.5454, "step": 2604 }, { "epoch": 0.08002334654256137, "grad_norm": 0.3706991374492645, "learning_rate": 1.9926869008618163e-05, "loss": 0.6293, "step": 2605 }, { "epoch": 0.08005406567751053, "grad_norm": 0.3196882903575897, "learning_rate": 1.9926810656989763e-05, "loss": 0.6429, "step": 2606 }, { "epoch": 0.08008478481245968, "grad_norm": 0.33217158913612366, "learning_rate": 1.9926752282176605e-05, "loss": 0.6181, "step": 2607 }, { "epoch": 0.08011550394740884, "grad_norm": 0.35770994424819946, "learning_rate": 1.9926693884178823e-05, "loss": 0.6686, "step": 2608 }, { "epoch": 0.080146223082358, "grad_norm": 0.31575438380241394, "learning_rate": 1.9926635462996556e-05, "loss": 0.6115, "step": 2609 }, { "epoch": 0.08017694221730716, "grad_norm": 0.37533774971961975, "learning_rate": 1.9926577018629934e-05, "loss": 0.7062, "step": 2610 }, { "epoch": 0.08020766135225632, "grad_norm": 0.37693262100219727, "learning_rate": 1.9926518551079102e-05, "loss": 0.6056, "step": 2611 }, { "epoch": 0.08023838048720548, "grad_norm": 0.3437855839729309, "learning_rate": 1.9926460060344196e-05, "loss": 0.6329, "step": 2612 }, { "epoch": 0.08026909962215464, "grad_norm": 0.3656896650791168, "learning_rate": 1.992640154642534e-05, "loss": 0.6516, "step": 2613 }, { "epoch": 0.0802998187571038, "grad_norm": 0.32682138681411743, "learning_rate": 1.992634300932269e-05, "loss": 0.5925, "step": 2614 }, { "epoch": 0.08033053789205297, "grad_norm": 0.3157438039779663, "learning_rate": 1.9926284449036366e-05, "loss": 0.6509, "step": 2615 }, { "epoch": 0.08036125702700211, "grad_norm": 0.3685324490070343, "learning_rate": 1.9926225865566513e-05, "loss": 0.7052, "step": 2616 }, { "epoch": 0.08039197616195128, "grad_norm": 0.34214314818382263, "learning_rate": 1.9926167258913268e-05, "loss": 0.5773, "step": 2617 }, { "epoch": 0.08042269529690044, "grad_norm": 0.32922083139419556, "learning_rate": 1.9926108629076764e-05, "loss": 0.5358, "step": 2618 }, { "epoch": 0.0804534144318496, "grad_norm": 0.3413679301738739, "learning_rate": 1.992604997605714e-05, "loss": 0.6098, "step": 2619 }, { "epoch": 0.08048413356679876, "grad_norm": 0.3216688930988312, "learning_rate": 1.9925991299854536e-05, "loss": 0.569, "step": 2620 }, { "epoch": 0.08051485270174792, "grad_norm": 0.3432796895503998, "learning_rate": 1.9925932600469082e-05, "loss": 0.6023, "step": 2621 }, { "epoch": 0.08054557183669708, "grad_norm": 0.3745849132537842, "learning_rate": 1.992587387790092e-05, "loss": 0.568, "step": 2622 }, { "epoch": 0.08057629097164624, "grad_norm": 0.3953138589859009, "learning_rate": 1.992581513215019e-05, "loss": 0.5162, "step": 2623 }, { "epoch": 0.08060701010659539, "grad_norm": 0.3325346112251282, "learning_rate": 1.9925756363217023e-05, "loss": 0.6357, "step": 2624 }, { "epoch": 0.08063772924154455, "grad_norm": 0.3602798879146576, "learning_rate": 1.9925697571101556e-05, "loss": 0.6243, "step": 2625 }, { "epoch": 0.08066844837649371, "grad_norm": 0.3427089750766754, "learning_rate": 1.9925638755803933e-05, "loss": 0.6865, "step": 2626 }, { "epoch": 0.08069916751144288, "grad_norm": 0.8025543093681335, "learning_rate": 1.992557991732429e-05, "loss": 0.6642, "step": 2627 }, { "epoch": 0.08072988664639204, "grad_norm": 0.3177221119403839, "learning_rate": 1.9925521055662755e-05, "loss": 0.6084, "step": 2628 }, { "epoch": 0.0807606057813412, "grad_norm": 0.3240867853164673, "learning_rate": 1.9925462170819475e-05, "loss": 0.5661, "step": 2629 }, { "epoch": 0.08079132491629036, "grad_norm": 0.3161257803440094, "learning_rate": 1.9925403262794586e-05, "loss": 0.6437, "step": 2630 }, { "epoch": 0.08082204405123952, "grad_norm": 0.31474098563194275, "learning_rate": 1.9925344331588226e-05, "loss": 0.6781, "step": 2631 }, { "epoch": 0.08085276318618868, "grad_norm": 0.3435790538787842, "learning_rate": 1.992528537720053e-05, "loss": 0.6259, "step": 2632 }, { "epoch": 0.08088348232113783, "grad_norm": 0.3514094948768616, "learning_rate": 1.9925226399631633e-05, "loss": 0.6093, "step": 2633 }, { "epoch": 0.08091420145608699, "grad_norm": 0.33251839876174927, "learning_rate": 1.992516739888168e-05, "loss": 0.6361, "step": 2634 }, { "epoch": 0.08094492059103615, "grad_norm": 0.3178584575653076, "learning_rate": 1.9925108374950808e-05, "loss": 0.5529, "step": 2635 }, { "epoch": 0.08097563972598532, "grad_norm": 0.30904000997543335, "learning_rate": 1.992504932783915e-05, "loss": 0.6106, "step": 2636 }, { "epoch": 0.08100635886093448, "grad_norm": 0.38167572021484375, "learning_rate": 1.9924990257546845e-05, "loss": 0.5826, "step": 2637 }, { "epoch": 0.08103707799588364, "grad_norm": 0.3108833432197571, "learning_rate": 1.992493116407403e-05, "loss": 0.4768, "step": 2638 }, { "epoch": 0.0810677971308328, "grad_norm": 0.3342158794403076, "learning_rate": 1.9924872047420848e-05, "loss": 0.6665, "step": 2639 }, { "epoch": 0.08109851626578196, "grad_norm": 0.36095502972602844, "learning_rate": 1.9924812907587433e-05, "loss": 0.6232, "step": 2640 }, { "epoch": 0.08112923540073111, "grad_norm": 0.32967355847358704, "learning_rate": 1.9924753744573925e-05, "loss": 0.5866, "step": 2641 }, { "epoch": 0.08115995453568027, "grad_norm": 0.41543862223625183, "learning_rate": 1.9924694558380463e-05, "loss": 0.5832, "step": 2642 }, { "epoch": 0.08119067367062943, "grad_norm": 0.31920337677001953, "learning_rate": 1.992463534900718e-05, "loss": 0.5077, "step": 2643 }, { "epoch": 0.0812213928055786, "grad_norm": 0.3503342866897583, "learning_rate": 1.9924576116454223e-05, "loss": 0.5825, "step": 2644 }, { "epoch": 0.08125211194052775, "grad_norm": 0.32780715823173523, "learning_rate": 1.992451686072172e-05, "loss": 0.5743, "step": 2645 }, { "epoch": 0.08128283107547692, "grad_norm": 0.34778738021850586, "learning_rate": 1.992445758180982e-05, "loss": 0.5775, "step": 2646 }, { "epoch": 0.08131355021042608, "grad_norm": 0.34641531109809875, "learning_rate": 1.9924398279718654e-05, "loss": 0.6549, "step": 2647 }, { "epoch": 0.08134426934537524, "grad_norm": 0.3568730056285858, "learning_rate": 1.9924338954448362e-05, "loss": 0.5707, "step": 2648 }, { "epoch": 0.0813749884803244, "grad_norm": 0.3204611837863922, "learning_rate": 1.9924279605999084e-05, "loss": 0.6259, "step": 2649 }, { "epoch": 0.08140570761527355, "grad_norm": 0.3123834431171417, "learning_rate": 1.9924220234370956e-05, "loss": 0.5535, "step": 2650 }, { "epoch": 0.08143642675022271, "grad_norm": 0.3149631917476654, "learning_rate": 1.992416083956412e-05, "loss": 0.6457, "step": 2651 }, { "epoch": 0.08146714588517187, "grad_norm": 0.30782538652420044, "learning_rate": 1.9924101421578714e-05, "loss": 0.5652, "step": 2652 }, { "epoch": 0.08149786502012103, "grad_norm": 0.32899174094200134, "learning_rate": 1.9924041980414873e-05, "loss": 0.629, "step": 2653 }, { "epoch": 0.0815285841550702, "grad_norm": 0.37759241461753845, "learning_rate": 1.9923982516072745e-05, "loss": 0.6031, "step": 2654 }, { "epoch": 0.08155930329001936, "grad_norm": 0.7862966656684875, "learning_rate": 1.9923923028552458e-05, "loss": 0.5819, "step": 2655 }, { "epoch": 0.08159002242496852, "grad_norm": 0.3619701862335205, "learning_rate": 1.9923863517854158e-05, "loss": 0.6662, "step": 2656 }, { "epoch": 0.08162074155991768, "grad_norm": 0.33081796765327454, "learning_rate": 1.9923803983977977e-05, "loss": 0.5243, "step": 2657 }, { "epoch": 0.08165146069486683, "grad_norm": 0.383810430765152, "learning_rate": 1.9923744426924064e-05, "loss": 0.6531, "step": 2658 }, { "epoch": 0.08168217982981599, "grad_norm": 0.32071760296821594, "learning_rate": 1.9923684846692552e-05, "loss": 0.5111, "step": 2659 }, { "epoch": 0.08171289896476515, "grad_norm": 0.4031817317008972, "learning_rate": 1.992362524328358e-05, "loss": 0.6755, "step": 2660 }, { "epoch": 0.08174361809971431, "grad_norm": 0.3139403164386749, "learning_rate": 1.9923565616697287e-05, "loss": 0.5017, "step": 2661 }, { "epoch": 0.08177433723466347, "grad_norm": 0.3343051075935364, "learning_rate": 1.9923505966933815e-05, "loss": 0.5577, "step": 2662 }, { "epoch": 0.08180505636961263, "grad_norm": 0.3245469927787781, "learning_rate": 1.99234462939933e-05, "loss": 0.6244, "step": 2663 }, { "epoch": 0.0818357755045618, "grad_norm": 0.338379830121994, "learning_rate": 1.9923386597875883e-05, "loss": 0.7066, "step": 2664 }, { "epoch": 0.08186649463951096, "grad_norm": 0.32091495394706726, "learning_rate": 1.9923326878581706e-05, "loss": 0.6323, "step": 2665 }, { "epoch": 0.08189721377446012, "grad_norm": 0.3154975473880768, "learning_rate": 1.9923267136110906e-05, "loss": 0.581, "step": 2666 }, { "epoch": 0.08192793290940927, "grad_norm": 0.3442096412181854, "learning_rate": 1.992320737046362e-05, "loss": 0.5482, "step": 2667 }, { "epoch": 0.08195865204435843, "grad_norm": 0.31430405378341675, "learning_rate": 1.9923147581639992e-05, "loss": 0.5676, "step": 2668 }, { "epoch": 0.08198937117930759, "grad_norm": 0.3364175856113434, "learning_rate": 1.992308776964016e-05, "loss": 0.5633, "step": 2669 }, { "epoch": 0.08202009031425675, "grad_norm": 0.32822877168655396, "learning_rate": 1.9923027934464267e-05, "loss": 0.5112, "step": 2670 }, { "epoch": 0.08205080944920591, "grad_norm": 0.3952302038669586, "learning_rate": 1.9922968076112444e-05, "loss": 0.63, "step": 2671 }, { "epoch": 0.08208152858415507, "grad_norm": 0.31103256344795227, "learning_rate": 1.9922908194584834e-05, "loss": 0.6087, "step": 2672 }, { "epoch": 0.08211224771910423, "grad_norm": 0.3132093846797943, "learning_rate": 1.9922848289881583e-05, "loss": 0.5979, "step": 2673 }, { "epoch": 0.0821429668540534, "grad_norm": 0.37856748700141907, "learning_rate": 1.9922788362002826e-05, "loss": 0.611, "step": 2674 }, { "epoch": 0.08217368598900254, "grad_norm": 0.3739640414714813, "learning_rate": 1.9922728410948705e-05, "loss": 0.6316, "step": 2675 }, { "epoch": 0.0822044051239517, "grad_norm": 0.340451180934906, "learning_rate": 1.9922668436719356e-05, "loss": 0.5511, "step": 2676 }, { "epoch": 0.08223512425890087, "grad_norm": 0.33999231457710266, "learning_rate": 1.9922608439314922e-05, "loss": 0.6395, "step": 2677 }, { "epoch": 0.08226584339385003, "grad_norm": 0.3960931897163391, "learning_rate": 1.9922548418735544e-05, "loss": 0.5778, "step": 2678 }, { "epoch": 0.08229656252879919, "grad_norm": 0.34952008724212646, "learning_rate": 1.9922488374981362e-05, "loss": 0.6456, "step": 2679 }, { "epoch": 0.08232728166374835, "grad_norm": 0.3363831341266632, "learning_rate": 1.992242830805252e-05, "loss": 0.571, "step": 2680 }, { "epoch": 0.08235800079869751, "grad_norm": 0.6826049089431763, "learning_rate": 1.9922368217949146e-05, "loss": 0.5913, "step": 2681 }, { "epoch": 0.08238871993364667, "grad_norm": 0.426200270652771, "learning_rate": 1.992230810467139e-05, "loss": 0.6152, "step": 2682 }, { "epoch": 0.08241943906859583, "grad_norm": 0.34454798698425293, "learning_rate": 1.9922247968219393e-05, "loss": 0.7544, "step": 2683 }, { "epoch": 0.08245015820354498, "grad_norm": 0.3461996614933014, "learning_rate": 1.9922187808593293e-05, "loss": 0.594, "step": 2684 }, { "epoch": 0.08248087733849414, "grad_norm": 0.3694542348384857, "learning_rate": 1.9922127625793228e-05, "loss": 0.5862, "step": 2685 }, { "epoch": 0.0825115964734433, "grad_norm": 0.34679052233695984, "learning_rate": 1.992206741981934e-05, "loss": 0.5713, "step": 2686 }, { "epoch": 0.08254231560839247, "grad_norm": 0.4831583499908447, "learning_rate": 1.9922007190671774e-05, "loss": 0.6112, "step": 2687 }, { "epoch": 0.08257303474334163, "grad_norm": 0.3507954776287079, "learning_rate": 1.9921946938350664e-05, "loss": 0.6539, "step": 2688 }, { "epoch": 0.08260375387829079, "grad_norm": 0.32169172167778015, "learning_rate": 1.9921886662856155e-05, "loss": 0.6661, "step": 2689 }, { "epoch": 0.08263447301323995, "grad_norm": 0.38097020983695984, "learning_rate": 1.992182636418839e-05, "loss": 0.6307, "step": 2690 }, { "epoch": 0.08266519214818911, "grad_norm": 0.3176480531692505, "learning_rate": 1.99217660423475e-05, "loss": 0.5834, "step": 2691 }, { "epoch": 0.08269591128313827, "grad_norm": 0.3161037564277649, "learning_rate": 1.9921705697333636e-05, "loss": 0.6188, "step": 2692 }, { "epoch": 0.08272663041808742, "grad_norm": 0.5181129574775696, "learning_rate": 1.9921645329146936e-05, "loss": 0.5889, "step": 2693 }, { "epoch": 0.08275734955303658, "grad_norm": 0.3611733317375183, "learning_rate": 1.9921584937787542e-05, "loss": 0.658, "step": 2694 }, { "epoch": 0.08278806868798574, "grad_norm": 0.3489021062850952, "learning_rate": 1.992152452325559e-05, "loss": 0.5959, "step": 2695 }, { "epoch": 0.0828187878229349, "grad_norm": 0.3422315716743469, "learning_rate": 1.9921464085551226e-05, "loss": 0.6513, "step": 2696 }, { "epoch": 0.08284950695788407, "grad_norm": 0.37429267168045044, "learning_rate": 1.992140362467459e-05, "loss": 0.6203, "step": 2697 }, { "epoch": 0.08288022609283323, "grad_norm": 0.36002856492996216, "learning_rate": 1.9921343140625826e-05, "loss": 0.6883, "step": 2698 }, { "epoch": 0.08291094522778239, "grad_norm": 0.35953766107559204, "learning_rate": 1.9921282633405067e-05, "loss": 0.6783, "step": 2699 }, { "epoch": 0.08294166436273155, "grad_norm": 0.29967978596687317, "learning_rate": 1.992122210301246e-05, "loss": 0.5739, "step": 2700 }, { "epoch": 0.0829723834976807, "grad_norm": 0.3844905495643616, "learning_rate": 1.992116154944815e-05, "loss": 0.6931, "step": 2701 }, { "epoch": 0.08300310263262986, "grad_norm": 0.336186945438385, "learning_rate": 1.9921100972712272e-05, "loss": 0.6006, "step": 2702 }, { "epoch": 0.08303382176757902, "grad_norm": 0.34563153982162476, "learning_rate": 1.992104037280497e-05, "loss": 0.5421, "step": 2703 }, { "epoch": 0.08306454090252818, "grad_norm": 0.3917716443538666, "learning_rate": 1.9920979749726382e-05, "loss": 0.6443, "step": 2704 }, { "epoch": 0.08309526003747734, "grad_norm": 0.3517506718635559, "learning_rate": 1.9920919103476658e-05, "loss": 0.5966, "step": 2705 }, { "epoch": 0.0831259791724265, "grad_norm": 0.32867568731307983, "learning_rate": 1.992085843405593e-05, "loss": 0.6665, "step": 2706 }, { "epoch": 0.08315669830737567, "grad_norm": 0.38762763142585754, "learning_rate": 1.9920797741464347e-05, "loss": 0.6138, "step": 2707 }, { "epoch": 0.08318741744232483, "grad_norm": 0.3345802426338196, "learning_rate": 1.9920737025702048e-05, "loss": 0.6551, "step": 2708 }, { "epoch": 0.08321813657727399, "grad_norm": 0.6928338408470154, "learning_rate": 1.9920676286769172e-05, "loss": 0.5185, "step": 2709 }, { "epoch": 0.08324885571222314, "grad_norm": 0.348686546087265, "learning_rate": 1.992061552466587e-05, "loss": 0.6363, "step": 2710 }, { "epoch": 0.0832795748471723, "grad_norm": 0.4703623652458191, "learning_rate": 1.9920554739392273e-05, "loss": 0.5719, "step": 2711 }, { "epoch": 0.08331029398212146, "grad_norm": 0.3457019627094269, "learning_rate": 1.992049393094853e-05, "loss": 0.6378, "step": 2712 }, { "epoch": 0.08334101311707062, "grad_norm": 0.3253428637981415, "learning_rate": 1.9920433099334776e-05, "loss": 0.63, "step": 2713 }, { "epoch": 0.08337173225201978, "grad_norm": 0.35192522406578064, "learning_rate": 1.992037224455116e-05, "loss": 0.5784, "step": 2714 }, { "epoch": 0.08340245138696895, "grad_norm": 0.3452894687652588, "learning_rate": 1.9920311366597824e-05, "loss": 0.5397, "step": 2715 }, { "epoch": 0.0834331705219181, "grad_norm": 0.35289767384529114, "learning_rate": 1.9920250465474908e-05, "loss": 0.7256, "step": 2716 }, { "epoch": 0.08346388965686727, "grad_norm": 0.37886708974838257, "learning_rate": 1.992018954118255e-05, "loss": 0.6043, "step": 2717 }, { "epoch": 0.08349460879181642, "grad_norm": 0.3592909872531891, "learning_rate": 1.99201285937209e-05, "loss": 0.6519, "step": 2718 }, { "epoch": 0.08352532792676558, "grad_norm": 0.3316820561885834, "learning_rate": 1.9920067623090096e-05, "loss": 0.6436, "step": 2719 }, { "epoch": 0.08355604706171474, "grad_norm": 0.33537569642066956, "learning_rate": 1.992000662929028e-05, "loss": 0.6579, "step": 2720 }, { "epoch": 0.0835867661966639, "grad_norm": 0.3264249265193939, "learning_rate": 1.9919945612321597e-05, "loss": 0.5952, "step": 2721 }, { "epoch": 0.08361748533161306, "grad_norm": 0.3459533452987671, "learning_rate": 1.9919884572184188e-05, "loss": 0.6165, "step": 2722 }, { "epoch": 0.08364820446656222, "grad_norm": 0.33968856930732727, "learning_rate": 1.9919823508878196e-05, "loss": 0.5776, "step": 2723 }, { "epoch": 0.08367892360151138, "grad_norm": 0.3457880914211273, "learning_rate": 1.991976242240376e-05, "loss": 0.606, "step": 2724 }, { "epoch": 0.08370964273646055, "grad_norm": 0.37574470043182373, "learning_rate": 1.9919701312761032e-05, "loss": 0.6106, "step": 2725 }, { "epoch": 0.08374036187140971, "grad_norm": 0.3702535033226013, "learning_rate": 1.9919640179950148e-05, "loss": 0.5971, "step": 2726 }, { "epoch": 0.08377108100635886, "grad_norm": 0.32573631405830383, "learning_rate": 1.991957902397125e-05, "loss": 0.6145, "step": 2727 }, { "epoch": 0.08380180014130802, "grad_norm": 0.3460420072078705, "learning_rate": 1.991951784482448e-05, "loss": 0.5441, "step": 2728 }, { "epoch": 0.08383251927625718, "grad_norm": 0.32881563901901245, "learning_rate": 1.9919456642509985e-05, "loss": 0.6357, "step": 2729 }, { "epoch": 0.08386323841120634, "grad_norm": 0.3538065254688263, "learning_rate": 1.9919395417027907e-05, "loss": 0.6155, "step": 2730 }, { "epoch": 0.0838939575461555, "grad_norm": 0.316703200340271, "learning_rate": 1.991933416837839e-05, "loss": 0.6628, "step": 2731 }, { "epoch": 0.08392467668110466, "grad_norm": 0.357222318649292, "learning_rate": 1.9919272896561573e-05, "loss": 0.6162, "step": 2732 }, { "epoch": 0.08395539581605382, "grad_norm": 0.3429494798183441, "learning_rate": 1.9919211601577603e-05, "loss": 0.627, "step": 2733 }, { "epoch": 0.08398611495100299, "grad_norm": 0.33518558740615845, "learning_rate": 1.9919150283426617e-05, "loss": 0.6616, "step": 2734 }, { "epoch": 0.08401683408595213, "grad_norm": 0.3606233298778534, "learning_rate": 1.9919088942108768e-05, "loss": 0.6587, "step": 2735 }, { "epoch": 0.0840475532209013, "grad_norm": 0.3249228298664093, "learning_rate": 1.9919027577624192e-05, "loss": 0.6269, "step": 2736 }, { "epoch": 0.08407827235585046, "grad_norm": 0.5909728407859802, "learning_rate": 1.9918966189973034e-05, "loss": 0.5614, "step": 2737 }, { "epoch": 0.08410899149079962, "grad_norm": 0.3120335340499878, "learning_rate": 1.9918904779155438e-05, "loss": 0.6058, "step": 2738 }, { "epoch": 0.08413971062574878, "grad_norm": 0.33864280581474304, "learning_rate": 1.9918843345171548e-05, "loss": 0.6584, "step": 2739 }, { "epoch": 0.08417042976069794, "grad_norm": 0.42759689688682556, "learning_rate": 1.9918781888021503e-05, "loss": 0.6482, "step": 2740 }, { "epoch": 0.0842011488956471, "grad_norm": 0.3346795439720154, "learning_rate": 1.9918720407705453e-05, "loss": 0.6814, "step": 2741 }, { "epoch": 0.08423186803059626, "grad_norm": 0.34863877296447754, "learning_rate": 1.991865890422354e-05, "loss": 0.5982, "step": 2742 }, { "epoch": 0.08426258716554542, "grad_norm": 0.3381490409374237, "learning_rate": 1.9918597377575905e-05, "loss": 0.5952, "step": 2743 }, { "epoch": 0.08429330630049457, "grad_norm": 0.6565993428230286, "learning_rate": 1.9918535827762695e-05, "loss": 0.6715, "step": 2744 }, { "epoch": 0.08432402543544373, "grad_norm": 0.35197967290878296, "learning_rate": 1.991847425478405e-05, "loss": 0.6102, "step": 2745 }, { "epoch": 0.0843547445703929, "grad_norm": 0.3346687853336334, "learning_rate": 1.9918412658640113e-05, "loss": 0.6287, "step": 2746 }, { "epoch": 0.08438546370534206, "grad_norm": 0.3382863998413086, "learning_rate": 1.9918351039331038e-05, "loss": 0.6486, "step": 2747 }, { "epoch": 0.08441618284029122, "grad_norm": 0.7111436724662781, "learning_rate": 1.9918289396856957e-05, "loss": 0.5822, "step": 2748 }, { "epoch": 0.08444690197524038, "grad_norm": 0.3520011305809021, "learning_rate": 1.9918227731218013e-05, "loss": 0.6151, "step": 2749 }, { "epoch": 0.08447762111018954, "grad_norm": 0.385083943605423, "learning_rate": 1.991816604241436e-05, "loss": 0.6438, "step": 2750 }, { "epoch": 0.0845083402451387, "grad_norm": 0.41696402430534363, "learning_rate": 1.9918104330446143e-05, "loss": 0.6688, "step": 2751 }, { "epoch": 0.08453905938008785, "grad_norm": 0.35154059529304504, "learning_rate": 1.9918042595313495e-05, "loss": 0.6262, "step": 2752 }, { "epoch": 0.08456977851503701, "grad_norm": 0.3268583118915558, "learning_rate": 1.9917980837016566e-05, "loss": 0.5999, "step": 2753 }, { "epoch": 0.08460049764998617, "grad_norm": 0.3404058516025543, "learning_rate": 1.99179190555555e-05, "loss": 0.6448, "step": 2754 }, { "epoch": 0.08463121678493533, "grad_norm": 0.519973635673523, "learning_rate": 1.9917857250930445e-05, "loss": 0.6016, "step": 2755 }, { "epoch": 0.0846619359198845, "grad_norm": 0.33483049273490906, "learning_rate": 1.9917795423141537e-05, "loss": 0.6214, "step": 2756 }, { "epoch": 0.08469265505483366, "grad_norm": 0.38468945026397705, "learning_rate": 1.9917733572188927e-05, "loss": 0.633, "step": 2757 }, { "epoch": 0.08472337418978282, "grad_norm": 0.34885361790657043, "learning_rate": 1.991767169807276e-05, "loss": 0.6027, "step": 2758 }, { "epoch": 0.08475409332473198, "grad_norm": 0.3374369144439697, "learning_rate": 1.9917609800793173e-05, "loss": 0.5528, "step": 2759 }, { "epoch": 0.08478481245968114, "grad_norm": 0.3894156813621521, "learning_rate": 1.991754788035032e-05, "loss": 0.6417, "step": 2760 }, { "epoch": 0.08481553159463029, "grad_norm": 0.33984124660491943, "learning_rate": 1.991748593674434e-05, "loss": 0.5777, "step": 2761 }, { "epoch": 0.08484625072957945, "grad_norm": 0.36440587043762207, "learning_rate": 1.9917423969975378e-05, "loss": 0.604, "step": 2762 }, { "epoch": 0.08487696986452861, "grad_norm": 0.29852429032325745, "learning_rate": 1.991736198004358e-05, "loss": 0.5286, "step": 2763 }, { "epoch": 0.08490768899947777, "grad_norm": 0.3091351389884949, "learning_rate": 1.9917299966949093e-05, "loss": 0.5483, "step": 2764 }, { "epoch": 0.08493840813442693, "grad_norm": 0.3964248299598694, "learning_rate": 1.9917237930692055e-05, "loss": 0.6482, "step": 2765 }, { "epoch": 0.0849691272693761, "grad_norm": 0.3225655257701874, "learning_rate": 1.991717587127262e-05, "loss": 0.5907, "step": 2766 }, { "epoch": 0.08499984640432526, "grad_norm": 0.3694826662540436, "learning_rate": 1.9917113788690922e-05, "loss": 0.6675, "step": 2767 }, { "epoch": 0.08503056553927442, "grad_norm": 0.3510664701461792, "learning_rate": 1.9917051682947116e-05, "loss": 0.6474, "step": 2768 }, { "epoch": 0.08506128467422358, "grad_norm": 0.3030170500278473, "learning_rate": 1.9916989554041346e-05, "loss": 0.6736, "step": 2769 }, { "epoch": 0.08509200380917273, "grad_norm": 0.41015684604644775, "learning_rate": 1.9916927401973752e-05, "loss": 0.637, "step": 2770 }, { "epoch": 0.08512272294412189, "grad_norm": 0.3464452624320984, "learning_rate": 1.991686522674448e-05, "loss": 0.6283, "step": 2771 }, { "epoch": 0.08515344207907105, "grad_norm": 0.29857194423675537, "learning_rate": 1.9916803028353676e-05, "loss": 0.5981, "step": 2772 }, { "epoch": 0.08518416121402021, "grad_norm": 0.332711398601532, "learning_rate": 1.9916740806801486e-05, "loss": 0.6546, "step": 2773 }, { "epoch": 0.08521488034896937, "grad_norm": 0.32089757919311523, "learning_rate": 1.991667856208806e-05, "loss": 0.5655, "step": 2774 }, { "epoch": 0.08524559948391854, "grad_norm": 0.48914217948913574, "learning_rate": 1.9916616294213532e-05, "loss": 0.5204, "step": 2775 }, { "epoch": 0.0852763186188677, "grad_norm": 0.491437703371048, "learning_rate": 1.991655400317806e-05, "loss": 0.6461, "step": 2776 }, { "epoch": 0.08530703775381686, "grad_norm": 0.35501065850257874, "learning_rate": 1.991649168898178e-05, "loss": 0.6365, "step": 2777 }, { "epoch": 0.085337756888766, "grad_norm": 0.33620485663414, "learning_rate": 1.991642935162484e-05, "loss": 0.6305, "step": 2778 }, { "epoch": 0.08536847602371517, "grad_norm": 0.31454193592071533, "learning_rate": 1.991636699110739e-05, "loss": 0.6338, "step": 2779 }, { "epoch": 0.08539919515866433, "grad_norm": 0.3342844843864441, "learning_rate": 1.991630460742957e-05, "loss": 0.6707, "step": 2780 }, { "epoch": 0.08542991429361349, "grad_norm": 0.32271862030029297, "learning_rate": 1.9916242200591533e-05, "loss": 0.6007, "step": 2781 }, { "epoch": 0.08546063342856265, "grad_norm": 0.4063105285167694, "learning_rate": 1.9916179770593414e-05, "loss": 0.5967, "step": 2782 }, { "epoch": 0.08549135256351181, "grad_norm": 0.32019540667533875, "learning_rate": 1.9916117317435365e-05, "loss": 0.6065, "step": 2783 }, { "epoch": 0.08552207169846097, "grad_norm": 0.29301518201828003, "learning_rate": 1.9916054841117535e-05, "loss": 0.5687, "step": 2784 }, { "epoch": 0.08555279083341014, "grad_norm": 0.35023796558380127, "learning_rate": 1.9915992341640065e-05, "loss": 0.5872, "step": 2785 }, { "epoch": 0.0855835099683593, "grad_norm": 0.33498892188072205, "learning_rate": 1.99159298190031e-05, "loss": 0.6183, "step": 2786 }, { "epoch": 0.08561422910330845, "grad_norm": 0.3699277937412262, "learning_rate": 1.991586727320679e-05, "loss": 0.6039, "step": 2787 }, { "epoch": 0.0856449482382576, "grad_norm": 0.3337472379207611, "learning_rate": 1.9915804704251277e-05, "loss": 0.6237, "step": 2788 }, { "epoch": 0.08567566737320677, "grad_norm": 0.30646103620529175, "learning_rate": 1.9915742112136715e-05, "loss": 0.6427, "step": 2789 }, { "epoch": 0.08570638650815593, "grad_norm": 0.3532959222793579, "learning_rate": 1.9915679496863244e-05, "loss": 0.5626, "step": 2790 }, { "epoch": 0.08573710564310509, "grad_norm": 0.41130560636520386, "learning_rate": 1.9915616858431005e-05, "loss": 0.6085, "step": 2791 }, { "epoch": 0.08576782477805425, "grad_norm": 0.3900735080242157, "learning_rate": 1.9915554196840157e-05, "loss": 0.6539, "step": 2792 }, { "epoch": 0.08579854391300341, "grad_norm": 0.3587592840194702, "learning_rate": 1.9915491512090836e-05, "loss": 0.6383, "step": 2793 }, { "epoch": 0.08582926304795258, "grad_norm": 0.33698222041130066, "learning_rate": 1.9915428804183193e-05, "loss": 0.59, "step": 2794 }, { "epoch": 0.08585998218290172, "grad_norm": 0.3374677002429962, "learning_rate": 1.9915366073117374e-05, "loss": 0.6414, "step": 2795 }, { "epoch": 0.08589070131785088, "grad_norm": 0.3035382032394409, "learning_rate": 1.9915303318893522e-05, "loss": 0.552, "step": 2796 }, { "epoch": 0.08592142045280005, "grad_norm": 0.33329853415489197, "learning_rate": 1.991524054151179e-05, "loss": 0.5586, "step": 2797 }, { "epoch": 0.08595213958774921, "grad_norm": 0.35550135374069214, "learning_rate": 1.9915177740972322e-05, "loss": 0.6133, "step": 2798 }, { "epoch": 0.08598285872269837, "grad_norm": 0.3372306525707245, "learning_rate": 1.9915114917275262e-05, "loss": 0.5544, "step": 2799 }, { "epoch": 0.08601357785764753, "grad_norm": 0.3994026780128479, "learning_rate": 1.991505207042076e-05, "loss": 0.6421, "step": 2800 }, { "epoch": 0.08604429699259669, "grad_norm": 0.39369380474090576, "learning_rate": 1.991498920040896e-05, "loss": 0.6721, "step": 2801 }, { "epoch": 0.08607501612754585, "grad_norm": 0.347217857837677, "learning_rate": 1.991492630724001e-05, "loss": 0.6608, "step": 2802 }, { "epoch": 0.08610573526249501, "grad_norm": 0.3108981251716614, "learning_rate": 1.9914863390914057e-05, "loss": 0.5695, "step": 2803 }, { "epoch": 0.08613645439744416, "grad_norm": 0.3242701590061188, "learning_rate": 1.9914800451431252e-05, "loss": 0.625, "step": 2804 }, { "epoch": 0.08616717353239332, "grad_norm": 0.3623962700366974, "learning_rate": 1.9914737488791735e-05, "loss": 0.5836, "step": 2805 }, { "epoch": 0.08619789266734249, "grad_norm": 0.3292767107486725, "learning_rate": 1.9914674502995655e-05, "loss": 0.6065, "step": 2806 }, { "epoch": 0.08622861180229165, "grad_norm": 0.32808005809783936, "learning_rate": 1.9914611494043165e-05, "loss": 0.633, "step": 2807 }, { "epoch": 0.08625933093724081, "grad_norm": 0.32331836223602295, "learning_rate": 1.99145484619344e-05, "loss": 0.6504, "step": 2808 }, { "epoch": 0.08629005007218997, "grad_norm": 0.3609849810600281, "learning_rate": 1.991448540666952e-05, "loss": 0.5794, "step": 2809 }, { "epoch": 0.08632076920713913, "grad_norm": 0.3300676643848419, "learning_rate": 1.9914422328248663e-05, "loss": 0.6674, "step": 2810 }, { "epoch": 0.08635148834208829, "grad_norm": 0.46421581506729126, "learning_rate": 1.9914359226671985e-05, "loss": 0.6618, "step": 2811 }, { "epoch": 0.08638220747703744, "grad_norm": 0.3513796329498291, "learning_rate": 1.9914296101939628e-05, "loss": 0.6679, "step": 2812 }, { "epoch": 0.0864129266119866, "grad_norm": 0.37734779715538025, "learning_rate": 1.9914232954051736e-05, "loss": 0.6308, "step": 2813 }, { "epoch": 0.08644364574693576, "grad_norm": 0.33486729860305786, "learning_rate": 1.9914169783008463e-05, "loss": 0.5969, "step": 2814 }, { "epoch": 0.08647436488188492, "grad_norm": 0.3413872718811035, "learning_rate": 1.9914106588809955e-05, "loss": 0.6205, "step": 2815 }, { "epoch": 0.08650508401683409, "grad_norm": 0.3983655273914337, "learning_rate": 1.991404337145636e-05, "loss": 0.6502, "step": 2816 }, { "epoch": 0.08653580315178325, "grad_norm": 0.4158492982387543, "learning_rate": 1.9913980130947822e-05, "loss": 0.6394, "step": 2817 }, { "epoch": 0.08656652228673241, "grad_norm": 0.34144526720046997, "learning_rate": 1.9913916867284488e-05, "loss": 0.5334, "step": 2818 }, { "epoch": 0.08659724142168157, "grad_norm": 0.3516261577606201, "learning_rate": 1.9913853580466512e-05, "loss": 0.6151, "step": 2819 }, { "epoch": 0.08662796055663073, "grad_norm": 0.317141592502594, "learning_rate": 1.991379027049404e-05, "loss": 0.6478, "step": 2820 }, { "epoch": 0.08665867969157988, "grad_norm": 0.32025477290153503, "learning_rate": 1.991372693736722e-05, "loss": 0.6165, "step": 2821 }, { "epoch": 0.08668939882652904, "grad_norm": 0.34148645401000977, "learning_rate": 1.9913663581086192e-05, "loss": 0.661, "step": 2822 }, { "epoch": 0.0867201179614782, "grad_norm": 0.3270363211631775, "learning_rate": 1.9913600201651113e-05, "loss": 0.6277, "step": 2823 }, { "epoch": 0.08675083709642736, "grad_norm": 0.3432461619377136, "learning_rate": 1.991353679906213e-05, "loss": 0.5838, "step": 2824 }, { "epoch": 0.08678155623137652, "grad_norm": 0.3382496237754822, "learning_rate": 1.9913473373319388e-05, "loss": 0.69, "step": 2825 }, { "epoch": 0.08681227536632569, "grad_norm": 0.34858521819114685, "learning_rate": 1.9913409924423037e-05, "loss": 0.624, "step": 2826 }, { "epoch": 0.08684299450127485, "grad_norm": 0.33826982975006104, "learning_rate": 1.9913346452373226e-05, "loss": 0.6343, "step": 2827 }, { "epoch": 0.08687371363622401, "grad_norm": 0.36341753602027893, "learning_rate": 1.99132829571701e-05, "loss": 0.623, "step": 2828 }, { "epoch": 0.08690443277117317, "grad_norm": 0.3546089828014374, "learning_rate": 1.991321943881381e-05, "loss": 0.6001, "step": 2829 }, { "epoch": 0.08693515190612232, "grad_norm": 0.3757322132587433, "learning_rate": 1.9913155897304503e-05, "loss": 0.6556, "step": 2830 }, { "epoch": 0.08696587104107148, "grad_norm": 0.2994716465473175, "learning_rate": 1.9913092332642326e-05, "loss": 0.655, "step": 2831 }, { "epoch": 0.08699659017602064, "grad_norm": 0.4100935757160187, "learning_rate": 1.9913028744827435e-05, "loss": 0.5849, "step": 2832 }, { "epoch": 0.0870273093109698, "grad_norm": 0.35454490780830383, "learning_rate": 1.9912965133859972e-05, "loss": 0.5888, "step": 2833 }, { "epoch": 0.08705802844591896, "grad_norm": 0.32594066858291626, "learning_rate": 1.9912901499740084e-05, "loss": 0.5506, "step": 2834 }, { "epoch": 0.08708874758086813, "grad_norm": 0.5282639265060425, "learning_rate": 1.9912837842467922e-05, "loss": 0.5824, "step": 2835 }, { "epoch": 0.08711946671581729, "grad_norm": 0.3428315222263336, "learning_rate": 1.9912774162043636e-05, "loss": 0.6716, "step": 2836 }, { "epoch": 0.08715018585076645, "grad_norm": 0.35618677735328674, "learning_rate": 1.9912710458467375e-05, "loss": 0.7194, "step": 2837 }, { "epoch": 0.0871809049857156, "grad_norm": 0.31580251455307007, "learning_rate": 1.9912646731739284e-05, "loss": 0.6413, "step": 2838 }, { "epoch": 0.08721162412066476, "grad_norm": 0.418367862701416, "learning_rate": 1.991258298185952e-05, "loss": 0.6989, "step": 2839 }, { "epoch": 0.08724234325561392, "grad_norm": 0.3112344443798065, "learning_rate": 1.991251920882822e-05, "loss": 0.5506, "step": 2840 }, { "epoch": 0.08727306239056308, "grad_norm": 0.3268982470035553, "learning_rate": 1.991245541264554e-05, "loss": 0.5543, "step": 2841 }, { "epoch": 0.08730378152551224, "grad_norm": 0.32092201709747314, "learning_rate": 1.991239159331163e-05, "loss": 0.5815, "step": 2842 }, { "epoch": 0.0873345006604614, "grad_norm": 0.3718336522579193, "learning_rate": 1.9912327750826633e-05, "loss": 0.6201, "step": 2843 }, { "epoch": 0.08736521979541056, "grad_norm": 0.31510481238365173, "learning_rate": 1.9912263885190705e-05, "loss": 0.5851, "step": 2844 }, { "epoch": 0.08739593893035973, "grad_norm": 0.309508353471756, "learning_rate": 1.9912199996403995e-05, "loss": 0.6237, "step": 2845 }, { "epoch": 0.08742665806530889, "grad_norm": 0.34252476692199707, "learning_rate": 1.9912136084466648e-05, "loss": 0.5881, "step": 2846 }, { "epoch": 0.08745737720025804, "grad_norm": 0.35060063004493713, "learning_rate": 1.9912072149378813e-05, "loss": 0.5722, "step": 2847 }, { "epoch": 0.0874880963352072, "grad_norm": 0.3413955867290497, "learning_rate": 1.9912008191140645e-05, "loss": 0.6421, "step": 2848 }, { "epoch": 0.08751881547015636, "grad_norm": 0.5456919074058533, "learning_rate": 1.9911944209752285e-05, "loss": 0.671, "step": 2849 }, { "epoch": 0.08754953460510552, "grad_norm": 0.29840075969696045, "learning_rate": 1.9911880205213888e-05, "loss": 0.5522, "step": 2850 }, { "epoch": 0.08758025374005468, "grad_norm": 0.33422204852104187, "learning_rate": 1.9911816177525605e-05, "loss": 0.6457, "step": 2851 }, { "epoch": 0.08761097287500384, "grad_norm": 0.4383237063884735, "learning_rate": 1.9911752126687584e-05, "loss": 0.6568, "step": 2852 }, { "epoch": 0.087641692009953, "grad_norm": 0.35408708453178406, "learning_rate": 1.9911688052699973e-05, "loss": 0.6203, "step": 2853 }, { "epoch": 0.08767241114490217, "grad_norm": 0.3402441143989563, "learning_rate": 1.9911623955562917e-05, "loss": 0.6541, "step": 2854 }, { "epoch": 0.08770313027985131, "grad_norm": 1.3859615325927734, "learning_rate": 1.9911559835276576e-05, "loss": 0.6043, "step": 2855 }, { "epoch": 0.08773384941480047, "grad_norm": 0.43770483136177063, "learning_rate": 1.9911495691841096e-05, "loss": 0.5152, "step": 2856 }, { "epoch": 0.08776456854974964, "grad_norm": 0.33351629972457886, "learning_rate": 1.9911431525256622e-05, "loss": 0.6532, "step": 2857 }, { "epoch": 0.0877952876846988, "grad_norm": 0.3767330050468445, "learning_rate": 1.991136733552331e-05, "loss": 0.6132, "step": 2858 }, { "epoch": 0.08782600681964796, "grad_norm": 0.37083521485328674, "learning_rate": 1.9911303122641303e-05, "loss": 0.6347, "step": 2859 }, { "epoch": 0.08785672595459712, "grad_norm": 0.35362112522125244, "learning_rate": 1.991123888661076e-05, "loss": 0.5693, "step": 2860 }, { "epoch": 0.08788744508954628, "grad_norm": 0.3187294900417328, "learning_rate": 1.9911174627431825e-05, "loss": 0.5534, "step": 2861 }, { "epoch": 0.08791816422449544, "grad_norm": 0.3199913203716278, "learning_rate": 1.991111034510465e-05, "loss": 0.6294, "step": 2862 }, { "epoch": 0.0879488833594446, "grad_norm": 0.35881945490837097, "learning_rate": 1.9911046039629384e-05, "loss": 0.6256, "step": 2863 }, { "epoch": 0.08797960249439375, "grad_norm": 0.33297836780548096, "learning_rate": 1.9910981711006176e-05, "loss": 0.556, "step": 2864 }, { "epoch": 0.08801032162934291, "grad_norm": 0.3296140432357788, "learning_rate": 1.9910917359235183e-05, "loss": 0.5923, "step": 2865 }, { "epoch": 0.08804104076429208, "grad_norm": 0.5501864552497864, "learning_rate": 1.9910852984316545e-05, "loss": 0.5133, "step": 2866 }, { "epoch": 0.08807175989924124, "grad_norm": 0.35824161767959595, "learning_rate": 1.9910788586250422e-05, "loss": 0.5847, "step": 2867 }, { "epoch": 0.0881024790341904, "grad_norm": 0.3572606146335602, "learning_rate": 1.9910724165036958e-05, "loss": 0.6512, "step": 2868 }, { "epoch": 0.08813319816913956, "grad_norm": 0.34366223216056824, "learning_rate": 1.9910659720676303e-05, "loss": 0.5165, "step": 2869 }, { "epoch": 0.08816391730408872, "grad_norm": 0.41351693868637085, "learning_rate": 1.9910595253168612e-05, "loss": 0.6976, "step": 2870 }, { "epoch": 0.08819463643903788, "grad_norm": 0.31898489594459534, "learning_rate": 1.9910530762514035e-05, "loss": 0.6606, "step": 2871 }, { "epoch": 0.08822535557398703, "grad_norm": 0.30670279264450073, "learning_rate": 1.991046624871272e-05, "loss": 0.6231, "step": 2872 }, { "epoch": 0.08825607470893619, "grad_norm": 0.39864426851272583, "learning_rate": 1.991040171176482e-05, "loss": 0.6758, "step": 2873 }, { "epoch": 0.08828679384388535, "grad_norm": 0.3141469657421112, "learning_rate": 1.9910337151670485e-05, "loss": 0.6179, "step": 2874 }, { "epoch": 0.08831751297883451, "grad_norm": 0.35336899757385254, "learning_rate": 1.9910272568429863e-05, "loss": 0.5648, "step": 2875 }, { "epoch": 0.08834823211378368, "grad_norm": 0.3492419719696045, "learning_rate": 1.991020796204311e-05, "loss": 0.659, "step": 2876 }, { "epoch": 0.08837895124873284, "grad_norm": 0.32687893509864807, "learning_rate": 1.9910143332510375e-05, "loss": 0.6416, "step": 2877 }, { "epoch": 0.088409670383682, "grad_norm": 0.3387722671031952, "learning_rate": 1.9910078679831802e-05, "loss": 0.6063, "step": 2878 }, { "epoch": 0.08844038951863116, "grad_norm": 0.34158802032470703, "learning_rate": 1.9910014004007553e-05, "loss": 0.6731, "step": 2879 }, { "epoch": 0.08847110865358032, "grad_norm": 0.3612844944000244, "learning_rate": 1.9909949305037775e-05, "loss": 0.6836, "step": 2880 }, { "epoch": 0.08850182778852947, "grad_norm": 0.35798513889312744, "learning_rate": 1.9909884582922616e-05, "loss": 0.6321, "step": 2881 }, { "epoch": 0.08853254692347863, "grad_norm": 0.36449822783470154, "learning_rate": 1.990981983766223e-05, "loss": 0.7124, "step": 2882 }, { "epoch": 0.08856326605842779, "grad_norm": 0.3064562678337097, "learning_rate": 1.990975506925677e-05, "loss": 0.5711, "step": 2883 }, { "epoch": 0.08859398519337695, "grad_norm": 0.3235557973384857, "learning_rate": 1.9909690277706382e-05, "loss": 0.583, "step": 2884 }, { "epoch": 0.08862470432832611, "grad_norm": 0.3273548185825348, "learning_rate": 1.990962546301122e-05, "loss": 0.6964, "step": 2885 }, { "epoch": 0.08865542346327528, "grad_norm": 0.3228119909763336, "learning_rate": 1.9909560625171437e-05, "loss": 0.6063, "step": 2886 }, { "epoch": 0.08868614259822444, "grad_norm": 0.3395960330963135, "learning_rate": 1.9909495764187185e-05, "loss": 0.6326, "step": 2887 }, { "epoch": 0.0887168617331736, "grad_norm": 0.4731431007385254, "learning_rate": 1.990943088005861e-05, "loss": 0.6298, "step": 2888 }, { "epoch": 0.08874758086812275, "grad_norm": 0.34216126799583435, "learning_rate": 1.9909365972785868e-05, "loss": 0.6779, "step": 2889 }, { "epoch": 0.08877830000307191, "grad_norm": 0.3331107497215271, "learning_rate": 1.990930104236911e-05, "loss": 0.6074, "step": 2890 }, { "epoch": 0.08880901913802107, "grad_norm": 0.34649214148521423, "learning_rate": 1.9909236088808488e-05, "loss": 0.6713, "step": 2891 }, { "epoch": 0.08883973827297023, "grad_norm": 0.3513472378253937, "learning_rate": 1.9909171112104153e-05, "loss": 0.5745, "step": 2892 }, { "epoch": 0.08887045740791939, "grad_norm": 0.3868349492549896, "learning_rate": 1.990910611225626e-05, "loss": 0.5796, "step": 2893 }, { "epoch": 0.08890117654286855, "grad_norm": 0.3720909059047699, "learning_rate": 1.990904108926495e-05, "loss": 0.5836, "step": 2894 }, { "epoch": 0.08893189567781772, "grad_norm": 0.3021162450313568, "learning_rate": 1.990897604313039e-05, "loss": 0.6363, "step": 2895 }, { "epoch": 0.08896261481276688, "grad_norm": 0.339372843503952, "learning_rate": 1.9908910973852718e-05, "loss": 0.5994, "step": 2896 }, { "epoch": 0.08899333394771604, "grad_norm": 0.3337930142879486, "learning_rate": 1.9908845881432095e-05, "loss": 0.6792, "step": 2897 }, { "epoch": 0.08902405308266519, "grad_norm": 0.3452436029911041, "learning_rate": 1.990878076586867e-05, "loss": 0.6242, "step": 2898 }, { "epoch": 0.08905477221761435, "grad_norm": 0.31822165846824646, "learning_rate": 1.9908715627162597e-05, "loss": 0.6051, "step": 2899 }, { "epoch": 0.08908549135256351, "grad_norm": 0.3396959602832794, "learning_rate": 1.9908650465314027e-05, "loss": 0.6757, "step": 2900 }, { "epoch": 0.08911621048751267, "grad_norm": 0.332762211561203, "learning_rate": 1.990858528032311e-05, "loss": 0.628, "step": 2901 }, { "epoch": 0.08914692962246183, "grad_norm": 0.3452206552028656, "learning_rate": 1.990852007219e-05, "loss": 0.5918, "step": 2902 }, { "epoch": 0.089177648757411, "grad_norm": 0.34633803367614746, "learning_rate": 1.990845484091485e-05, "loss": 0.6361, "step": 2903 }, { "epoch": 0.08920836789236015, "grad_norm": 0.3373040556907654, "learning_rate": 1.9908389586497814e-05, "loss": 0.5998, "step": 2904 }, { "epoch": 0.08923908702730932, "grad_norm": 0.32763364911079407, "learning_rate": 1.990832430893904e-05, "loss": 0.6768, "step": 2905 }, { "epoch": 0.08926980616225848, "grad_norm": 0.33780422806739807, "learning_rate": 1.990825900823868e-05, "loss": 0.5992, "step": 2906 }, { "epoch": 0.08930052529720763, "grad_norm": 0.33109140396118164, "learning_rate": 1.9908193684396893e-05, "loss": 0.5952, "step": 2907 }, { "epoch": 0.08933124443215679, "grad_norm": 0.33682510256767273, "learning_rate": 1.9908128337413826e-05, "loss": 0.6728, "step": 2908 }, { "epoch": 0.08936196356710595, "grad_norm": 0.33662328124046326, "learning_rate": 1.9908062967289633e-05, "loss": 0.5544, "step": 2909 }, { "epoch": 0.08939268270205511, "grad_norm": 0.30653098225593567, "learning_rate": 1.9907997574024472e-05, "loss": 0.6151, "step": 2910 }, { "epoch": 0.08942340183700427, "grad_norm": 0.4054519832134247, "learning_rate": 1.9907932157618487e-05, "loss": 0.6017, "step": 2911 }, { "epoch": 0.08945412097195343, "grad_norm": 0.3121623694896698, "learning_rate": 1.9907866718071832e-05, "loss": 0.6051, "step": 2912 }, { "epoch": 0.0894848401069026, "grad_norm": 0.5889284014701843, "learning_rate": 1.9907801255384664e-05, "loss": 0.5359, "step": 2913 }, { "epoch": 0.08951555924185176, "grad_norm": 0.3040056824684143, "learning_rate": 1.9907735769557136e-05, "loss": 0.6183, "step": 2914 }, { "epoch": 0.0895462783768009, "grad_norm": 0.3106307089328766, "learning_rate": 1.9907670260589395e-05, "loss": 0.5851, "step": 2915 }, { "epoch": 0.08957699751175006, "grad_norm": 0.4000803232192993, "learning_rate": 1.9907604728481603e-05, "loss": 0.5602, "step": 2916 }, { "epoch": 0.08960771664669923, "grad_norm": 0.45386168360710144, "learning_rate": 1.9907539173233907e-05, "loss": 0.5751, "step": 2917 }, { "epoch": 0.08963843578164839, "grad_norm": 0.31115126609802246, "learning_rate": 1.990747359484646e-05, "loss": 0.6519, "step": 2918 }, { "epoch": 0.08966915491659755, "grad_norm": 0.3603752851486206, "learning_rate": 1.9907407993319414e-05, "loss": 0.6236, "step": 2919 }, { "epoch": 0.08969987405154671, "grad_norm": 0.3088606894016266, "learning_rate": 1.9907342368652926e-05, "loss": 0.5799, "step": 2920 }, { "epoch": 0.08973059318649587, "grad_norm": 0.3614917993545532, "learning_rate": 1.9907276720847152e-05, "loss": 0.5758, "step": 2921 }, { "epoch": 0.08976131232144503, "grad_norm": 0.3321084678173065, "learning_rate": 1.990721104990224e-05, "loss": 0.6324, "step": 2922 }, { "epoch": 0.0897920314563942, "grad_norm": 0.348568856716156, "learning_rate": 1.9907145355818342e-05, "loss": 0.668, "step": 2923 }, { "epoch": 0.08982275059134334, "grad_norm": 0.3571713864803314, "learning_rate": 1.9907079638595615e-05, "loss": 0.6349, "step": 2924 }, { "epoch": 0.0898534697262925, "grad_norm": 0.35281437635421753, "learning_rate": 1.9907013898234212e-05, "loss": 0.6343, "step": 2925 }, { "epoch": 0.08988418886124167, "grad_norm": 0.35248422622680664, "learning_rate": 1.9906948134734287e-05, "loss": 0.5553, "step": 2926 }, { "epoch": 0.08991490799619083, "grad_norm": 0.3309333920478821, "learning_rate": 1.990688234809599e-05, "loss": 0.5623, "step": 2927 }, { "epoch": 0.08994562713113999, "grad_norm": 0.3356938362121582, "learning_rate": 1.990681653831948e-05, "loss": 0.6489, "step": 2928 }, { "epoch": 0.08997634626608915, "grad_norm": 0.31630897521972656, "learning_rate": 1.990675070540491e-05, "loss": 0.613, "step": 2929 }, { "epoch": 0.09000706540103831, "grad_norm": 0.3446984589099884, "learning_rate": 1.9906684849352428e-05, "loss": 0.6738, "step": 2930 }, { "epoch": 0.09003778453598747, "grad_norm": 0.33326825499534607, "learning_rate": 1.9906618970162196e-05, "loss": 0.5874, "step": 2931 }, { "epoch": 0.09006850367093662, "grad_norm": 0.3563658595085144, "learning_rate": 1.9906553067834357e-05, "loss": 0.587, "step": 2932 }, { "epoch": 0.09009922280588578, "grad_norm": 0.3508792519569397, "learning_rate": 1.9906487142369075e-05, "loss": 0.5906, "step": 2933 }, { "epoch": 0.09012994194083494, "grad_norm": 0.36717477440834045, "learning_rate": 1.9906421193766502e-05, "loss": 0.5918, "step": 2934 }, { "epoch": 0.0901606610757841, "grad_norm": 0.32346364855766296, "learning_rate": 1.9906355222026787e-05, "loss": 0.5731, "step": 2935 }, { "epoch": 0.09019138021073327, "grad_norm": 0.3407783508300781, "learning_rate": 1.9906289227150092e-05, "loss": 0.5307, "step": 2936 }, { "epoch": 0.09022209934568243, "grad_norm": 0.31510129570961, "learning_rate": 1.9906223209136564e-05, "loss": 0.531, "step": 2937 }, { "epoch": 0.09025281848063159, "grad_norm": 0.3410637378692627, "learning_rate": 1.9906157167986357e-05, "loss": 0.6489, "step": 2938 }, { "epoch": 0.09028353761558075, "grad_norm": 0.352766752243042, "learning_rate": 1.9906091103699633e-05, "loss": 0.6011, "step": 2939 }, { "epoch": 0.09031425675052991, "grad_norm": 0.32410386204719543, "learning_rate": 1.9906025016276538e-05, "loss": 0.5191, "step": 2940 }, { "epoch": 0.09034497588547906, "grad_norm": 0.5000545978546143, "learning_rate": 1.990595890571723e-05, "loss": 0.5787, "step": 2941 }, { "epoch": 0.09037569502042822, "grad_norm": 0.5013996958732605, "learning_rate": 1.9905892772021868e-05, "loss": 0.6701, "step": 2942 }, { "epoch": 0.09040641415537738, "grad_norm": 0.3647080659866333, "learning_rate": 1.9905826615190597e-05, "loss": 0.6515, "step": 2943 }, { "epoch": 0.09043713329032654, "grad_norm": 0.4589601755142212, "learning_rate": 1.990576043522358e-05, "loss": 0.6307, "step": 2944 }, { "epoch": 0.0904678524252757, "grad_norm": 0.3605164587497711, "learning_rate": 1.990569423212096e-05, "loss": 0.661, "step": 2945 }, { "epoch": 0.09049857156022487, "grad_norm": 0.31450337171554565, "learning_rate": 1.9905628005882906e-05, "loss": 0.5853, "step": 2946 }, { "epoch": 0.09052929069517403, "grad_norm": 0.32791391015052795, "learning_rate": 1.9905561756509565e-05, "loss": 0.549, "step": 2947 }, { "epoch": 0.09056000983012319, "grad_norm": 0.34124642610549927, "learning_rate": 1.9905495484001088e-05, "loss": 0.5848, "step": 2948 }, { "epoch": 0.09059072896507234, "grad_norm": 0.3467124104499817, "learning_rate": 1.990542918835764e-05, "loss": 0.5292, "step": 2949 }, { "epoch": 0.0906214481000215, "grad_norm": 0.35150575637817383, "learning_rate": 1.990536286957937e-05, "loss": 0.6124, "step": 2950 }, { "epoch": 0.09065216723497066, "grad_norm": 0.3905400037765503, "learning_rate": 1.990529652766643e-05, "loss": 0.6358, "step": 2951 }, { "epoch": 0.09068288636991982, "grad_norm": 0.32531070709228516, "learning_rate": 1.990523016261898e-05, "loss": 0.6339, "step": 2952 }, { "epoch": 0.09071360550486898, "grad_norm": 0.34192341566085815, "learning_rate": 1.990516377443717e-05, "loss": 0.5326, "step": 2953 }, { "epoch": 0.09074432463981814, "grad_norm": 0.35302433371543884, "learning_rate": 1.9905097363121166e-05, "loss": 0.6556, "step": 2954 }, { "epoch": 0.0907750437747673, "grad_norm": 0.3235360383987427, "learning_rate": 1.990503092867111e-05, "loss": 0.639, "step": 2955 }, { "epoch": 0.09080576290971647, "grad_norm": 0.4077363610267639, "learning_rate": 1.9904964471087163e-05, "loss": 0.639, "step": 2956 }, { "epoch": 0.09083648204466563, "grad_norm": 0.4379468858242035, "learning_rate": 1.990489799036948e-05, "loss": 0.6666, "step": 2957 }, { "epoch": 0.09086720117961478, "grad_norm": 0.33784446120262146, "learning_rate": 1.9904831486518216e-05, "loss": 0.6273, "step": 2958 }, { "epoch": 0.09089792031456394, "grad_norm": 0.370843768119812, "learning_rate": 1.9904764959533527e-05, "loss": 0.6934, "step": 2959 }, { "epoch": 0.0909286394495131, "grad_norm": 0.34431809186935425, "learning_rate": 1.9904698409415564e-05, "loss": 0.5961, "step": 2960 }, { "epoch": 0.09095935858446226, "grad_norm": 0.32182204723358154, "learning_rate": 1.9904631836164492e-05, "loss": 0.5857, "step": 2961 }, { "epoch": 0.09099007771941142, "grad_norm": 0.3572239875793457, "learning_rate": 1.990456523978046e-05, "loss": 0.622, "step": 2962 }, { "epoch": 0.09102079685436058, "grad_norm": 0.6481896042823792, "learning_rate": 1.990449862026362e-05, "loss": 0.5241, "step": 2963 }, { "epoch": 0.09105151598930974, "grad_norm": 0.35661813616752625, "learning_rate": 1.9904431977614135e-05, "loss": 0.5909, "step": 2964 }, { "epoch": 0.0910822351242589, "grad_norm": 0.3629101514816284, "learning_rate": 1.9904365311832154e-05, "loss": 0.6445, "step": 2965 }, { "epoch": 0.09111295425920805, "grad_norm": 0.3305891752243042, "learning_rate": 1.9904298622917838e-05, "loss": 0.6258, "step": 2966 }, { "epoch": 0.09114367339415722, "grad_norm": 0.48888248205184937, "learning_rate": 1.990423191087134e-05, "loss": 0.5333, "step": 2967 }, { "epoch": 0.09117439252910638, "grad_norm": 0.3347196578979492, "learning_rate": 1.990416517569282e-05, "loss": 0.6316, "step": 2968 }, { "epoch": 0.09120511166405554, "grad_norm": 0.3497677743434906, "learning_rate": 1.990409841738243e-05, "loss": 0.6389, "step": 2969 }, { "epoch": 0.0912358307990047, "grad_norm": 0.4351593852043152, "learning_rate": 1.9904031635940322e-05, "loss": 0.5856, "step": 2970 }, { "epoch": 0.09126654993395386, "grad_norm": 0.5817432403564453, "learning_rate": 1.990396483136666e-05, "loss": 0.6497, "step": 2971 }, { "epoch": 0.09129726906890302, "grad_norm": 0.3639446794986725, "learning_rate": 1.9903898003661593e-05, "loss": 0.641, "step": 2972 }, { "epoch": 0.09132798820385218, "grad_norm": 0.32966291904449463, "learning_rate": 1.9903831152825285e-05, "loss": 0.6218, "step": 2973 }, { "epoch": 0.09135870733880135, "grad_norm": 0.42890846729278564, "learning_rate": 1.9903764278857885e-05, "loss": 0.5123, "step": 2974 }, { "epoch": 0.09138942647375049, "grad_norm": 0.33704906702041626, "learning_rate": 1.9903697381759554e-05, "loss": 0.5445, "step": 2975 }, { "epoch": 0.09142014560869965, "grad_norm": 0.41058990359306335, "learning_rate": 1.9903630461530444e-05, "loss": 0.7687, "step": 2976 }, { "epoch": 0.09145086474364882, "grad_norm": 0.35460764169692993, "learning_rate": 1.9903563518170714e-05, "loss": 0.5701, "step": 2977 }, { "epoch": 0.09148158387859798, "grad_norm": 0.3408917188644409, "learning_rate": 1.990349655168052e-05, "loss": 0.5679, "step": 2978 }, { "epoch": 0.09151230301354714, "grad_norm": 0.3375881016254425, "learning_rate": 1.9903429562060018e-05, "loss": 0.6125, "step": 2979 }, { "epoch": 0.0915430221484963, "grad_norm": 0.3554691970348358, "learning_rate": 1.9903362549309366e-05, "loss": 0.6484, "step": 2980 }, { "epoch": 0.09157374128344546, "grad_norm": 0.30589696764945984, "learning_rate": 1.9903295513428715e-05, "loss": 0.6045, "step": 2981 }, { "epoch": 0.09160446041839462, "grad_norm": 0.31686222553253174, "learning_rate": 1.9903228454418227e-05, "loss": 0.5719, "step": 2982 }, { "epoch": 0.09163517955334378, "grad_norm": 0.3413853347301483, "learning_rate": 1.990316137227806e-05, "loss": 0.6269, "step": 2983 }, { "epoch": 0.09166589868829293, "grad_norm": 0.3475573658943176, "learning_rate": 1.990309426700837e-05, "loss": 0.5253, "step": 2984 }, { "epoch": 0.0916966178232421, "grad_norm": 0.4952273666858673, "learning_rate": 1.9903027138609308e-05, "loss": 0.5404, "step": 2985 }, { "epoch": 0.09172733695819126, "grad_norm": 0.3303493559360504, "learning_rate": 1.9902959987081033e-05, "loss": 0.5733, "step": 2986 }, { "epoch": 0.09175805609314042, "grad_norm": 0.3828124701976776, "learning_rate": 1.9902892812423706e-05, "loss": 0.6357, "step": 2987 }, { "epoch": 0.09178877522808958, "grad_norm": 0.3044688403606415, "learning_rate": 1.990282561463748e-05, "loss": 0.5374, "step": 2988 }, { "epoch": 0.09181949436303874, "grad_norm": 0.32026684284210205, "learning_rate": 1.9902758393722514e-05, "loss": 0.6285, "step": 2989 }, { "epoch": 0.0918502134979879, "grad_norm": 0.31739017367362976, "learning_rate": 1.9902691149678962e-05, "loss": 0.5538, "step": 2990 }, { "epoch": 0.09188093263293706, "grad_norm": 0.47062382102012634, "learning_rate": 1.990262388250699e-05, "loss": 0.5952, "step": 2991 }, { "epoch": 0.09191165176788621, "grad_norm": 0.34932741522789, "learning_rate": 1.9902556592206742e-05, "loss": 0.6454, "step": 2992 }, { "epoch": 0.09194237090283537, "grad_norm": 0.32357731461524963, "learning_rate": 1.990248927877838e-05, "loss": 0.7157, "step": 2993 }, { "epoch": 0.09197309003778453, "grad_norm": 0.5949954986572266, "learning_rate": 1.9902421942222068e-05, "loss": 0.7233, "step": 2994 }, { "epoch": 0.0920038091727337, "grad_norm": 0.31200310587882996, "learning_rate": 1.9902354582537957e-05, "loss": 0.5432, "step": 2995 }, { "epoch": 0.09203452830768286, "grad_norm": 0.3419654965400696, "learning_rate": 1.99022871997262e-05, "loss": 0.5639, "step": 2996 }, { "epoch": 0.09206524744263202, "grad_norm": 0.36676156520843506, "learning_rate": 1.9902219793786965e-05, "loss": 0.5943, "step": 2997 }, { "epoch": 0.09209596657758118, "grad_norm": 0.3639043867588043, "learning_rate": 1.9902152364720403e-05, "loss": 0.6216, "step": 2998 }, { "epoch": 0.09212668571253034, "grad_norm": 0.434451699256897, "learning_rate": 1.990208491252667e-05, "loss": 0.6669, "step": 2999 }, { "epoch": 0.0921574048474795, "grad_norm": 0.35373491048812866, "learning_rate": 1.9902017437205932e-05, "loss": 0.673, "step": 3000 }, { "epoch": 0.09218812398242865, "grad_norm": 0.28852495551109314, "learning_rate": 1.9901949938758336e-05, "loss": 0.5131, "step": 3001 }, { "epoch": 0.09221884311737781, "grad_norm": 0.43100324273109436, "learning_rate": 1.9901882417184043e-05, "loss": 0.5765, "step": 3002 }, { "epoch": 0.09224956225232697, "grad_norm": 0.3322145640850067, "learning_rate": 1.9901814872483216e-05, "loss": 0.5696, "step": 3003 }, { "epoch": 0.09228028138727613, "grad_norm": 0.3334522545337677, "learning_rate": 1.9901747304656008e-05, "loss": 0.6507, "step": 3004 }, { "epoch": 0.0923110005222253, "grad_norm": 0.3649522066116333, "learning_rate": 1.9901679713702577e-05, "loss": 0.5689, "step": 3005 }, { "epoch": 0.09234171965717446, "grad_norm": 0.32084330916404724, "learning_rate": 1.990161209962308e-05, "loss": 0.7135, "step": 3006 }, { "epoch": 0.09237243879212362, "grad_norm": 0.3886111080646515, "learning_rate": 1.9901544462417678e-05, "loss": 0.633, "step": 3007 }, { "epoch": 0.09240315792707278, "grad_norm": 0.3965243995189667, "learning_rate": 1.990147680208653e-05, "loss": 0.5873, "step": 3008 }, { "epoch": 0.09243387706202193, "grad_norm": 0.34569305181503296, "learning_rate": 1.990140911862979e-05, "loss": 0.621, "step": 3009 }, { "epoch": 0.09246459619697109, "grad_norm": 0.32436683773994446, "learning_rate": 1.990134141204761e-05, "loss": 0.6281, "step": 3010 }, { "epoch": 0.09249531533192025, "grad_norm": 0.3427463471889496, "learning_rate": 1.9901273682340165e-05, "loss": 0.6045, "step": 3011 }, { "epoch": 0.09252603446686941, "grad_norm": 0.39211928844451904, "learning_rate": 1.9901205929507602e-05, "loss": 0.585, "step": 3012 }, { "epoch": 0.09255675360181857, "grad_norm": 0.45308804512023926, "learning_rate": 1.990113815355008e-05, "loss": 0.5895, "step": 3013 }, { "epoch": 0.09258747273676773, "grad_norm": 0.3194686770439148, "learning_rate": 1.9901070354467757e-05, "loss": 0.5983, "step": 3014 }, { "epoch": 0.0926181918717169, "grad_norm": 0.3314695358276367, "learning_rate": 1.9901002532260793e-05, "loss": 0.5466, "step": 3015 }, { "epoch": 0.09264891100666606, "grad_norm": 0.322365403175354, "learning_rate": 1.990093468692935e-05, "loss": 0.6173, "step": 3016 }, { "epoch": 0.09267963014161522, "grad_norm": 0.354158490896225, "learning_rate": 1.9900866818473576e-05, "loss": 0.5488, "step": 3017 }, { "epoch": 0.09271034927656437, "grad_norm": 0.3489501476287842, "learning_rate": 1.990079892689364e-05, "loss": 0.6065, "step": 3018 }, { "epoch": 0.09274106841151353, "grad_norm": 0.3137321174144745, "learning_rate": 1.9900731012189697e-05, "loss": 0.5581, "step": 3019 }, { "epoch": 0.09277178754646269, "grad_norm": 0.37211352586746216, "learning_rate": 1.9900663074361906e-05, "loss": 0.5455, "step": 3020 }, { "epoch": 0.09280250668141185, "grad_norm": 0.36809325218200684, "learning_rate": 1.990059511341042e-05, "loss": 0.5984, "step": 3021 }, { "epoch": 0.09283322581636101, "grad_norm": 0.3194848299026489, "learning_rate": 1.9900527129335406e-05, "loss": 0.5877, "step": 3022 }, { "epoch": 0.09286394495131017, "grad_norm": 0.31725338101387024, "learning_rate": 1.9900459122137023e-05, "loss": 0.5611, "step": 3023 }, { "epoch": 0.09289466408625933, "grad_norm": 0.33301427960395813, "learning_rate": 1.990039109181542e-05, "loss": 0.6142, "step": 3024 }, { "epoch": 0.0929253832212085, "grad_norm": 0.3177962005138397, "learning_rate": 1.9900323038370768e-05, "loss": 0.6265, "step": 3025 }, { "epoch": 0.09295610235615764, "grad_norm": 0.3507539629936218, "learning_rate": 1.9900254961803215e-05, "loss": 0.6028, "step": 3026 }, { "epoch": 0.0929868214911068, "grad_norm": 0.32503560185432434, "learning_rate": 1.990018686211293e-05, "loss": 0.6341, "step": 3027 }, { "epoch": 0.09301754062605597, "grad_norm": 0.34986966848373413, "learning_rate": 1.9900118739300062e-05, "loss": 0.5831, "step": 3028 }, { "epoch": 0.09304825976100513, "grad_norm": 0.31542861461639404, "learning_rate": 1.9900050593364777e-05, "loss": 0.5814, "step": 3029 }, { "epoch": 0.09307897889595429, "grad_norm": 0.36861687898635864, "learning_rate": 1.9899982424307233e-05, "loss": 0.6381, "step": 3030 }, { "epoch": 0.09310969803090345, "grad_norm": 0.36474472284317017, "learning_rate": 1.989991423212759e-05, "loss": 0.6607, "step": 3031 }, { "epoch": 0.09314041716585261, "grad_norm": 0.364353746175766, "learning_rate": 1.9899846016826006e-05, "loss": 0.5877, "step": 3032 }, { "epoch": 0.09317113630080177, "grad_norm": 0.33549854159355164, "learning_rate": 1.9899777778402637e-05, "loss": 0.6245, "step": 3033 }, { "epoch": 0.09320185543575094, "grad_norm": 0.33939433097839355, "learning_rate": 1.9899709516857648e-05, "loss": 0.6662, "step": 3034 }, { "epoch": 0.09323257457070008, "grad_norm": 0.34682485461235046, "learning_rate": 1.9899641232191196e-05, "loss": 0.5758, "step": 3035 }, { "epoch": 0.09326329370564924, "grad_norm": 0.37717172503471375, "learning_rate": 1.9899572924403442e-05, "loss": 0.543, "step": 3036 }, { "epoch": 0.0932940128405984, "grad_norm": 0.3646458685398102, "learning_rate": 1.9899504593494542e-05, "loss": 0.5769, "step": 3037 }, { "epoch": 0.09332473197554757, "grad_norm": 0.3156493008136749, "learning_rate": 1.9899436239464657e-05, "loss": 0.6887, "step": 3038 }, { "epoch": 0.09335545111049673, "grad_norm": 0.3352205753326416, "learning_rate": 1.9899367862313944e-05, "loss": 0.6679, "step": 3039 }, { "epoch": 0.09338617024544589, "grad_norm": 0.3556821346282959, "learning_rate": 1.9899299462042573e-05, "loss": 0.6345, "step": 3040 }, { "epoch": 0.09341688938039505, "grad_norm": 0.32613930106163025, "learning_rate": 1.9899231038650692e-05, "loss": 0.6857, "step": 3041 }, { "epoch": 0.09344760851534421, "grad_norm": 0.39716362953186035, "learning_rate": 1.9899162592138467e-05, "loss": 0.6593, "step": 3042 }, { "epoch": 0.09347832765029337, "grad_norm": 0.386440247297287, "learning_rate": 1.9899094122506058e-05, "loss": 0.6452, "step": 3043 }, { "epoch": 0.09350904678524252, "grad_norm": 0.37525397539138794, "learning_rate": 1.9899025629753623e-05, "loss": 0.558, "step": 3044 }, { "epoch": 0.09353976592019168, "grad_norm": 0.3854731619358063, "learning_rate": 1.989895711388132e-05, "loss": 0.6333, "step": 3045 }, { "epoch": 0.09357048505514085, "grad_norm": 0.5604630708694458, "learning_rate": 1.9898888574889312e-05, "loss": 0.655, "step": 3046 }, { "epoch": 0.09360120419009, "grad_norm": 0.362789511680603, "learning_rate": 1.989882001277776e-05, "loss": 0.6075, "step": 3047 }, { "epoch": 0.09363192332503917, "grad_norm": 0.337336927652359, "learning_rate": 1.989875142754682e-05, "loss": 0.6302, "step": 3048 }, { "epoch": 0.09366264245998833, "grad_norm": 0.328815758228302, "learning_rate": 1.9898682819196657e-05, "loss": 0.5639, "step": 3049 }, { "epoch": 0.09369336159493749, "grad_norm": 0.3341332674026489, "learning_rate": 1.989861418772743e-05, "loss": 0.6073, "step": 3050 }, { "epoch": 0.09372408072988665, "grad_norm": 0.4035934805870056, "learning_rate": 1.9898545533139297e-05, "loss": 0.6292, "step": 3051 }, { "epoch": 0.0937547998648358, "grad_norm": 0.36576902866363525, "learning_rate": 1.9898476855432418e-05, "loss": 0.6708, "step": 3052 }, { "epoch": 0.09378551899978496, "grad_norm": 0.2967241704463959, "learning_rate": 1.9898408154606956e-05, "loss": 0.5991, "step": 3053 }, { "epoch": 0.09381623813473412, "grad_norm": 0.3600403666496277, "learning_rate": 1.989833943066307e-05, "loss": 0.61, "step": 3054 }, { "epoch": 0.09384695726968328, "grad_norm": 0.31540101766586304, "learning_rate": 1.9898270683600922e-05, "loss": 0.5982, "step": 3055 }, { "epoch": 0.09387767640463245, "grad_norm": 0.358182430267334, "learning_rate": 1.989820191342067e-05, "loss": 0.5896, "step": 3056 }, { "epoch": 0.09390839553958161, "grad_norm": 0.39075520634651184, "learning_rate": 1.9898133120122478e-05, "loss": 0.6628, "step": 3057 }, { "epoch": 0.09393911467453077, "grad_norm": 0.36949241161346436, "learning_rate": 1.9898064303706506e-05, "loss": 0.647, "step": 3058 }, { "epoch": 0.09396983380947993, "grad_norm": 0.4862317442893982, "learning_rate": 1.989799546417291e-05, "loss": 0.6744, "step": 3059 }, { "epoch": 0.09400055294442909, "grad_norm": 0.3393234312534332, "learning_rate": 1.9897926601521856e-05, "loss": 0.595, "step": 3060 }, { "epoch": 0.09403127207937824, "grad_norm": 0.3569565415382385, "learning_rate": 1.9897857715753503e-05, "loss": 0.7393, "step": 3061 }, { "epoch": 0.0940619912143274, "grad_norm": 0.33074215054512024, "learning_rate": 1.9897788806868012e-05, "loss": 0.4774, "step": 3062 }, { "epoch": 0.09409271034927656, "grad_norm": 0.45157933235168457, "learning_rate": 1.9897719874865545e-05, "loss": 0.6368, "step": 3063 }, { "epoch": 0.09412342948422572, "grad_norm": 0.33859291672706604, "learning_rate": 1.989765091974626e-05, "loss": 0.5694, "step": 3064 }, { "epoch": 0.09415414861917488, "grad_norm": 0.31172916293144226, "learning_rate": 1.9897581941510323e-05, "loss": 0.6045, "step": 3065 }, { "epoch": 0.09418486775412405, "grad_norm": 0.3353649973869324, "learning_rate": 1.9897512940157887e-05, "loss": 0.6344, "step": 3066 }, { "epoch": 0.09421558688907321, "grad_norm": 0.4823385179042816, "learning_rate": 1.9897443915689126e-05, "loss": 0.6728, "step": 3067 }, { "epoch": 0.09424630602402237, "grad_norm": 0.33116960525512695, "learning_rate": 1.9897374868104185e-05, "loss": 0.6332, "step": 3068 }, { "epoch": 0.09427702515897152, "grad_norm": 0.3635295033454895, "learning_rate": 1.989730579740324e-05, "loss": 0.5749, "step": 3069 }, { "epoch": 0.09430774429392068, "grad_norm": 0.36007440090179443, "learning_rate": 1.9897236703586445e-05, "loss": 0.6432, "step": 3070 }, { "epoch": 0.09433846342886984, "grad_norm": 0.43777427077293396, "learning_rate": 1.9897167586653962e-05, "loss": 0.5993, "step": 3071 }, { "epoch": 0.094369182563819, "grad_norm": 0.35600051283836365, "learning_rate": 1.989709844660595e-05, "loss": 0.6014, "step": 3072 }, { "epoch": 0.09439990169876816, "grad_norm": 0.386452853679657, "learning_rate": 1.9897029283442576e-05, "loss": 0.6903, "step": 3073 }, { "epoch": 0.09443062083371732, "grad_norm": 0.30540695786476135, "learning_rate": 1.9896960097163996e-05, "loss": 0.6, "step": 3074 }, { "epoch": 0.09446133996866649, "grad_norm": 0.34095388650894165, "learning_rate": 1.9896890887770382e-05, "loss": 0.6659, "step": 3075 }, { "epoch": 0.09449205910361565, "grad_norm": 0.33221685886383057, "learning_rate": 1.9896821655261882e-05, "loss": 0.6207, "step": 3076 }, { "epoch": 0.09452277823856481, "grad_norm": 0.33590376377105713, "learning_rate": 1.9896752399638663e-05, "loss": 0.5465, "step": 3077 }, { "epoch": 0.09455349737351396, "grad_norm": 0.4194725751876831, "learning_rate": 1.989668312090089e-05, "loss": 0.5983, "step": 3078 }, { "epoch": 0.09458421650846312, "grad_norm": 0.4226672649383545, "learning_rate": 1.989661381904872e-05, "loss": 0.6009, "step": 3079 }, { "epoch": 0.09461493564341228, "grad_norm": 0.3343108296394348, "learning_rate": 1.9896544494082323e-05, "loss": 0.5806, "step": 3080 }, { "epoch": 0.09464565477836144, "grad_norm": 0.3709106147289276, "learning_rate": 1.989647514600185e-05, "loss": 0.6288, "step": 3081 }, { "epoch": 0.0946763739133106, "grad_norm": 0.3619639575481415, "learning_rate": 1.989640577480747e-05, "loss": 0.5544, "step": 3082 }, { "epoch": 0.09470709304825976, "grad_norm": 0.3328418433666229, "learning_rate": 1.9896336380499343e-05, "loss": 0.6505, "step": 3083 }, { "epoch": 0.09473781218320892, "grad_norm": 0.34509384632110596, "learning_rate": 1.989626696307763e-05, "loss": 0.5889, "step": 3084 }, { "epoch": 0.09476853131815809, "grad_norm": 0.5031507611274719, "learning_rate": 1.9896197522542495e-05, "loss": 0.5791, "step": 3085 }, { "epoch": 0.09479925045310723, "grad_norm": 0.3560812473297119, "learning_rate": 1.9896128058894103e-05, "loss": 0.606, "step": 3086 }, { "epoch": 0.0948299695880564, "grad_norm": 0.3324228525161743, "learning_rate": 1.9896058572132608e-05, "loss": 0.6184, "step": 3087 }, { "epoch": 0.09486068872300556, "grad_norm": 0.35801437497138977, "learning_rate": 1.989598906225818e-05, "loss": 0.6576, "step": 3088 }, { "epoch": 0.09489140785795472, "grad_norm": 0.37792912125587463, "learning_rate": 1.989591952927098e-05, "loss": 0.6766, "step": 3089 }, { "epoch": 0.09492212699290388, "grad_norm": 0.31696853041648865, "learning_rate": 1.9895849973171163e-05, "loss": 0.5247, "step": 3090 }, { "epoch": 0.09495284612785304, "grad_norm": 0.34134209156036377, "learning_rate": 1.98957803939589e-05, "loss": 0.6969, "step": 3091 }, { "epoch": 0.0949835652628022, "grad_norm": 0.3069455623626709, "learning_rate": 1.989571079163435e-05, "loss": 0.5695, "step": 3092 }, { "epoch": 0.09501428439775136, "grad_norm": 0.3588535785675049, "learning_rate": 1.9895641166197678e-05, "loss": 0.5858, "step": 3093 }, { "epoch": 0.09504500353270053, "grad_norm": 0.3107183575630188, "learning_rate": 1.9895571517649045e-05, "loss": 0.5978, "step": 3094 }, { "epoch": 0.09507572266764967, "grad_norm": 0.33750200271606445, "learning_rate": 1.9895501845988612e-05, "loss": 0.6533, "step": 3095 }, { "epoch": 0.09510644180259883, "grad_norm": 0.3359595835208893, "learning_rate": 1.9895432151216544e-05, "loss": 0.5512, "step": 3096 }, { "epoch": 0.095137160937548, "grad_norm": 0.3142557442188263, "learning_rate": 1.9895362433333005e-05, "loss": 0.613, "step": 3097 }, { "epoch": 0.09516788007249716, "grad_norm": 0.31841665506362915, "learning_rate": 1.9895292692338152e-05, "loss": 0.6419, "step": 3098 }, { "epoch": 0.09519859920744632, "grad_norm": 0.46296221017837524, "learning_rate": 1.9895222928232157e-05, "loss": 0.5942, "step": 3099 }, { "epoch": 0.09522931834239548, "grad_norm": 0.3235425651073456, "learning_rate": 1.9895153141015174e-05, "loss": 0.6305, "step": 3100 }, { "epoch": 0.09526003747734464, "grad_norm": 0.31966596841812134, "learning_rate": 1.989508333068737e-05, "loss": 0.5959, "step": 3101 }, { "epoch": 0.0952907566122938, "grad_norm": 0.3342626094818115, "learning_rate": 1.9895013497248907e-05, "loss": 0.6852, "step": 3102 }, { "epoch": 0.09532147574724295, "grad_norm": 0.5294504761695862, "learning_rate": 1.9894943640699952e-05, "loss": 0.6147, "step": 3103 }, { "epoch": 0.09535219488219211, "grad_norm": 0.3317243158817291, "learning_rate": 1.9894873761040663e-05, "loss": 0.5752, "step": 3104 }, { "epoch": 0.09538291401714127, "grad_norm": 0.4122965335845947, "learning_rate": 1.9894803858271207e-05, "loss": 0.544, "step": 3105 }, { "epoch": 0.09541363315209044, "grad_norm": 0.3633585572242737, "learning_rate": 1.9894733932391742e-05, "loss": 0.6825, "step": 3106 }, { "epoch": 0.0954443522870396, "grad_norm": 0.3257274627685547, "learning_rate": 1.989466398340244e-05, "loss": 0.6506, "step": 3107 }, { "epoch": 0.09547507142198876, "grad_norm": 0.43088841438293457, "learning_rate": 1.9894594011303456e-05, "loss": 0.5546, "step": 3108 }, { "epoch": 0.09550579055693792, "grad_norm": 0.3653255105018616, "learning_rate": 1.9894524016094957e-05, "loss": 0.5903, "step": 3109 }, { "epoch": 0.09553650969188708, "grad_norm": 0.33667659759521484, "learning_rate": 1.9894453997777106e-05, "loss": 0.6037, "step": 3110 }, { "epoch": 0.09556722882683624, "grad_norm": 0.33307379484176636, "learning_rate": 1.9894383956350065e-05, "loss": 0.6758, "step": 3111 }, { "epoch": 0.09559794796178539, "grad_norm": 0.34529703855514526, "learning_rate": 1.9894313891814003e-05, "loss": 0.6138, "step": 3112 }, { "epoch": 0.09562866709673455, "grad_norm": 0.6320335268974304, "learning_rate": 1.9894243804169076e-05, "loss": 0.5579, "step": 3113 }, { "epoch": 0.09565938623168371, "grad_norm": 0.3275863826274872, "learning_rate": 1.9894173693415455e-05, "loss": 0.5342, "step": 3114 }, { "epoch": 0.09569010536663287, "grad_norm": 0.3275385797023773, "learning_rate": 1.98941035595533e-05, "loss": 0.6256, "step": 3115 }, { "epoch": 0.09572082450158204, "grad_norm": 0.32727739214897156, "learning_rate": 1.9894033402582772e-05, "loss": 0.6416, "step": 3116 }, { "epoch": 0.0957515436365312, "grad_norm": 0.3242000937461853, "learning_rate": 1.989396322250404e-05, "loss": 0.505, "step": 3117 }, { "epoch": 0.09578226277148036, "grad_norm": 0.49472177028656006, "learning_rate": 1.9893893019317268e-05, "loss": 0.6617, "step": 3118 }, { "epoch": 0.09581298190642952, "grad_norm": 0.36516624689102173, "learning_rate": 1.9893822793022616e-05, "loss": 0.6831, "step": 3119 }, { "epoch": 0.09584370104137868, "grad_norm": 0.31274929642677307, "learning_rate": 1.9893752543620248e-05, "loss": 0.6104, "step": 3120 }, { "epoch": 0.09587442017632783, "grad_norm": 0.3973565697669983, "learning_rate": 1.9893682271110333e-05, "loss": 0.6541, "step": 3121 }, { "epoch": 0.09590513931127699, "grad_norm": 0.31526824831962585, "learning_rate": 1.989361197549303e-05, "loss": 0.6684, "step": 3122 }, { "epoch": 0.09593585844622615, "grad_norm": 0.5545879006385803, "learning_rate": 1.9893541656768508e-05, "loss": 0.5869, "step": 3123 }, { "epoch": 0.09596657758117531, "grad_norm": 0.3378651440143585, "learning_rate": 1.9893471314936925e-05, "loss": 0.5708, "step": 3124 }, { "epoch": 0.09599729671612448, "grad_norm": 0.4120694100856781, "learning_rate": 1.989340094999845e-05, "loss": 0.707, "step": 3125 }, { "epoch": 0.09602801585107364, "grad_norm": 0.33883607387542725, "learning_rate": 1.989333056195325e-05, "loss": 0.633, "step": 3126 }, { "epoch": 0.0960587349860228, "grad_norm": 0.29885026812553406, "learning_rate": 1.9893260150801482e-05, "loss": 0.5015, "step": 3127 }, { "epoch": 0.09608945412097196, "grad_norm": 0.34195348620414734, "learning_rate": 1.989318971654331e-05, "loss": 0.6404, "step": 3128 }, { "epoch": 0.09612017325592111, "grad_norm": 0.3431924283504486, "learning_rate": 1.989311925917891e-05, "loss": 0.6043, "step": 3129 }, { "epoch": 0.09615089239087027, "grad_norm": 0.31518083810806274, "learning_rate": 1.9893048778708436e-05, "loss": 0.5943, "step": 3130 }, { "epoch": 0.09618161152581943, "grad_norm": 0.3537110388278961, "learning_rate": 1.9892978275132053e-05, "loss": 0.6236, "step": 3131 }, { "epoch": 0.09621233066076859, "grad_norm": 0.35050931572914124, "learning_rate": 1.9892907748449933e-05, "loss": 0.5989, "step": 3132 }, { "epoch": 0.09624304979571775, "grad_norm": 0.31870096921920776, "learning_rate": 1.989283719866223e-05, "loss": 0.5664, "step": 3133 }, { "epoch": 0.09627376893066691, "grad_norm": 0.33222806453704834, "learning_rate": 1.989276662576912e-05, "loss": 0.6104, "step": 3134 }, { "epoch": 0.09630448806561608, "grad_norm": 0.39251580834388733, "learning_rate": 1.989269602977076e-05, "loss": 0.7119, "step": 3135 }, { "epoch": 0.09633520720056524, "grad_norm": 0.3274654448032379, "learning_rate": 1.989262541066732e-05, "loss": 0.5689, "step": 3136 }, { "epoch": 0.0963659263355144, "grad_norm": 0.3265276253223419, "learning_rate": 1.989255476845896e-05, "loss": 0.61, "step": 3137 }, { "epoch": 0.09639664547046355, "grad_norm": 0.34639081358909607, "learning_rate": 1.989248410314585e-05, "loss": 0.6734, "step": 3138 }, { "epoch": 0.09642736460541271, "grad_norm": 0.3711862862110138, "learning_rate": 1.989241341472815e-05, "loss": 0.6433, "step": 3139 }, { "epoch": 0.09645808374036187, "grad_norm": 0.3116357922554016, "learning_rate": 1.989234270320603e-05, "loss": 0.5965, "step": 3140 }, { "epoch": 0.09648880287531103, "grad_norm": 0.3611212372779846, "learning_rate": 1.989227196857965e-05, "loss": 0.6003, "step": 3141 }, { "epoch": 0.09651952201026019, "grad_norm": 0.34351107478141785, "learning_rate": 1.989220121084918e-05, "loss": 0.5991, "step": 3142 }, { "epoch": 0.09655024114520935, "grad_norm": 0.31570589542388916, "learning_rate": 1.9892130430014783e-05, "loss": 0.6425, "step": 3143 }, { "epoch": 0.09658096028015851, "grad_norm": 0.3305307924747467, "learning_rate": 1.9892059626076624e-05, "loss": 0.5559, "step": 3144 }, { "epoch": 0.09661167941510768, "grad_norm": 0.3336349427700043, "learning_rate": 1.9891988799034866e-05, "loss": 0.6433, "step": 3145 }, { "epoch": 0.09664239855005682, "grad_norm": 0.3306734263896942, "learning_rate": 1.989191794888968e-05, "loss": 0.5666, "step": 3146 }, { "epoch": 0.09667311768500599, "grad_norm": 0.3830919563770294, "learning_rate": 1.989184707564123e-05, "loss": 0.6408, "step": 3147 }, { "epoch": 0.09670383681995515, "grad_norm": 0.34692418575286865, "learning_rate": 1.9891776179289678e-05, "loss": 0.6745, "step": 3148 }, { "epoch": 0.09673455595490431, "grad_norm": 0.34817132353782654, "learning_rate": 1.98917052598352e-05, "loss": 0.6525, "step": 3149 }, { "epoch": 0.09676527508985347, "grad_norm": 0.34807950258255005, "learning_rate": 1.9891634317277943e-05, "loss": 0.5664, "step": 3150 }, { "epoch": 0.09679599422480263, "grad_norm": 0.3407091200351715, "learning_rate": 1.9891563351618088e-05, "loss": 0.6136, "step": 3151 }, { "epoch": 0.09682671335975179, "grad_norm": 0.3422669768333435, "learning_rate": 1.9891492362855795e-05, "loss": 0.5827, "step": 3152 }, { "epoch": 0.09685743249470095, "grad_norm": 0.30555665493011475, "learning_rate": 1.989142135099123e-05, "loss": 0.5669, "step": 3153 }, { "epoch": 0.09688815162965012, "grad_norm": 0.34539994597435, "learning_rate": 1.989135031602456e-05, "loss": 0.5998, "step": 3154 }, { "epoch": 0.09691887076459926, "grad_norm": 0.38171660900115967, "learning_rate": 1.989127925795595e-05, "loss": 0.7091, "step": 3155 }, { "epoch": 0.09694958989954842, "grad_norm": 0.34479010105133057, "learning_rate": 1.9891208176785567e-05, "loss": 0.6304, "step": 3156 }, { "epoch": 0.09698030903449759, "grad_norm": 0.3492222726345062, "learning_rate": 1.9891137072513577e-05, "loss": 0.7334, "step": 3157 }, { "epoch": 0.09701102816944675, "grad_norm": 0.3133176565170288, "learning_rate": 1.9891065945140146e-05, "loss": 0.6112, "step": 3158 }, { "epoch": 0.09704174730439591, "grad_norm": 0.3359071910381317, "learning_rate": 1.989099479466544e-05, "loss": 0.5579, "step": 3159 }, { "epoch": 0.09707246643934507, "grad_norm": 0.3366149663925171, "learning_rate": 1.9890923621089625e-05, "loss": 0.5694, "step": 3160 }, { "epoch": 0.09710318557429423, "grad_norm": 0.35750600695610046, "learning_rate": 1.9890852424412867e-05, "loss": 0.6398, "step": 3161 }, { "epoch": 0.0971339047092434, "grad_norm": 0.363984614610672, "learning_rate": 1.9890781204635327e-05, "loss": 0.6014, "step": 3162 }, { "epoch": 0.09716462384419254, "grad_norm": 0.292131245136261, "learning_rate": 1.9890709961757184e-05, "loss": 0.6785, "step": 3163 }, { "epoch": 0.0971953429791417, "grad_norm": 0.35772615671157837, "learning_rate": 1.9890638695778595e-05, "loss": 0.6284, "step": 3164 }, { "epoch": 0.09722606211409086, "grad_norm": 0.3279832601547241, "learning_rate": 1.9890567406699727e-05, "loss": 0.5879, "step": 3165 }, { "epoch": 0.09725678124904003, "grad_norm": 0.36719343066215515, "learning_rate": 1.9890496094520747e-05, "loss": 0.6648, "step": 3166 }, { "epoch": 0.09728750038398919, "grad_norm": 0.4615706503391266, "learning_rate": 1.9890424759241825e-05, "loss": 0.5563, "step": 3167 }, { "epoch": 0.09731821951893835, "grad_norm": 0.35545116662979126, "learning_rate": 1.9890353400863126e-05, "loss": 0.6227, "step": 3168 }, { "epoch": 0.09734893865388751, "grad_norm": 0.40542760491371155, "learning_rate": 1.9890282019384816e-05, "loss": 0.5837, "step": 3169 }, { "epoch": 0.09737965778883667, "grad_norm": 0.32620513439178467, "learning_rate": 1.989021061480706e-05, "loss": 0.6079, "step": 3170 }, { "epoch": 0.09741037692378583, "grad_norm": 0.3204731047153473, "learning_rate": 1.9890139187130027e-05, "loss": 0.6088, "step": 3171 }, { "epoch": 0.09744109605873498, "grad_norm": 0.3572083115577698, "learning_rate": 1.989006773635388e-05, "loss": 0.6513, "step": 3172 }, { "epoch": 0.09747181519368414, "grad_norm": 0.347637414932251, "learning_rate": 1.9889996262478793e-05, "loss": 0.6231, "step": 3173 }, { "epoch": 0.0975025343286333, "grad_norm": 0.32068097591400146, "learning_rate": 1.988992476550493e-05, "loss": 0.5563, "step": 3174 }, { "epoch": 0.09753325346358246, "grad_norm": 0.3442131578922272, "learning_rate": 1.9889853245432454e-05, "loss": 0.6599, "step": 3175 }, { "epoch": 0.09756397259853163, "grad_norm": 0.31120118498802185, "learning_rate": 1.9889781702261537e-05, "loss": 0.5984, "step": 3176 }, { "epoch": 0.09759469173348079, "grad_norm": 0.3388780951499939, "learning_rate": 1.988971013599234e-05, "loss": 0.5861, "step": 3177 }, { "epoch": 0.09762541086842995, "grad_norm": 0.36215415596961975, "learning_rate": 1.988963854662504e-05, "loss": 0.652, "step": 3178 }, { "epoch": 0.09765613000337911, "grad_norm": 0.3817155957221985, "learning_rate": 1.9889566934159797e-05, "loss": 0.6593, "step": 3179 }, { "epoch": 0.09768684913832826, "grad_norm": 0.30760952830314636, "learning_rate": 1.988949529859678e-05, "loss": 0.5537, "step": 3180 }, { "epoch": 0.09771756827327742, "grad_norm": 0.35300004482269287, "learning_rate": 1.9889423639936156e-05, "loss": 0.5739, "step": 3181 }, { "epoch": 0.09774828740822658, "grad_norm": 0.3388673961162567, "learning_rate": 1.988935195817809e-05, "loss": 0.5217, "step": 3182 }, { "epoch": 0.09777900654317574, "grad_norm": 0.2992795407772064, "learning_rate": 1.988928025332275e-05, "loss": 0.5705, "step": 3183 }, { "epoch": 0.0978097256781249, "grad_norm": 0.3405119478702545, "learning_rate": 1.9889208525370313e-05, "loss": 0.5893, "step": 3184 }, { "epoch": 0.09784044481307407, "grad_norm": 0.3696826696395874, "learning_rate": 1.9889136774320933e-05, "loss": 0.6017, "step": 3185 }, { "epoch": 0.09787116394802323, "grad_norm": 0.3242189884185791, "learning_rate": 1.9889065000174784e-05, "loss": 0.627, "step": 3186 }, { "epoch": 0.09790188308297239, "grad_norm": 0.3538976311683655, "learning_rate": 1.988899320293203e-05, "loss": 0.5933, "step": 3187 }, { "epoch": 0.09793260221792155, "grad_norm": 0.32799381017684937, "learning_rate": 1.988892138259285e-05, "loss": 0.5951, "step": 3188 }, { "epoch": 0.0979633213528707, "grad_norm": 0.3303426206111908, "learning_rate": 1.98888495391574e-05, "loss": 0.6432, "step": 3189 }, { "epoch": 0.09799404048781986, "grad_norm": 0.33012378215789795, "learning_rate": 1.9888777672625847e-05, "loss": 0.6053, "step": 3190 }, { "epoch": 0.09802475962276902, "grad_norm": 0.32762783765792847, "learning_rate": 1.9888705782998366e-05, "loss": 0.603, "step": 3191 }, { "epoch": 0.09805547875771818, "grad_norm": 0.37913039326667786, "learning_rate": 1.9888633870275123e-05, "loss": 0.5543, "step": 3192 }, { "epoch": 0.09808619789266734, "grad_norm": 0.37761685252189636, "learning_rate": 1.9888561934456286e-05, "loss": 0.59, "step": 3193 }, { "epoch": 0.0981169170276165, "grad_norm": 0.39236336946487427, "learning_rate": 1.988848997554202e-05, "loss": 0.6448, "step": 3194 }, { "epoch": 0.09814763616256567, "grad_norm": 0.340372234582901, "learning_rate": 1.9888417993532494e-05, "loss": 0.6262, "step": 3195 }, { "epoch": 0.09817835529751483, "grad_norm": 0.3991527855396271, "learning_rate": 1.988834598842788e-05, "loss": 0.6509, "step": 3196 }, { "epoch": 0.09820907443246399, "grad_norm": 0.3481108248233795, "learning_rate": 1.988827396022834e-05, "loss": 0.5462, "step": 3197 }, { "epoch": 0.09823979356741314, "grad_norm": 0.3726717531681061, "learning_rate": 1.9888201908934047e-05, "loss": 0.6151, "step": 3198 }, { "epoch": 0.0982705127023623, "grad_norm": 0.3433361351490021, "learning_rate": 1.988812983454517e-05, "loss": 0.6198, "step": 3199 }, { "epoch": 0.09830123183731146, "grad_norm": 0.3662789463996887, "learning_rate": 1.9888057737061873e-05, "loss": 0.579, "step": 3200 }, { "epoch": 0.09833195097226062, "grad_norm": 0.46841180324554443, "learning_rate": 1.988798561648433e-05, "loss": 0.6624, "step": 3201 }, { "epoch": 0.09836267010720978, "grad_norm": 1.2390342950820923, "learning_rate": 1.9887913472812702e-05, "loss": 0.5985, "step": 3202 }, { "epoch": 0.09839338924215894, "grad_norm": 0.3182505667209625, "learning_rate": 1.988784130604716e-05, "loss": 0.6478, "step": 3203 }, { "epoch": 0.0984241083771081, "grad_norm": 0.3574591279029846, "learning_rate": 1.9887769116187877e-05, "loss": 0.696, "step": 3204 }, { "epoch": 0.09845482751205727, "grad_norm": 0.3146666884422302, "learning_rate": 1.988769690323502e-05, "loss": 0.57, "step": 3205 }, { "epoch": 0.09848554664700641, "grad_norm": 0.33564484119415283, "learning_rate": 1.9887624667188755e-05, "loss": 0.5238, "step": 3206 }, { "epoch": 0.09851626578195558, "grad_norm": 0.3015575408935547, "learning_rate": 1.9887552408049253e-05, "loss": 0.5853, "step": 3207 }, { "epoch": 0.09854698491690474, "grad_norm": 0.3676294982433319, "learning_rate": 1.988748012581668e-05, "loss": 0.5594, "step": 3208 }, { "epoch": 0.0985777040518539, "grad_norm": 0.33213144540786743, "learning_rate": 1.988740782049121e-05, "loss": 0.6204, "step": 3209 }, { "epoch": 0.09860842318680306, "grad_norm": 0.46569740772247314, "learning_rate": 1.9887335492073e-05, "loss": 0.6253, "step": 3210 }, { "epoch": 0.09863914232175222, "grad_norm": 0.48497289419174194, "learning_rate": 1.9887263140562235e-05, "loss": 0.6403, "step": 3211 }, { "epoch": 0.09866986145670138, "grad_norm": 1.1228139400482178, "learning_rate": 1.9887190765959078e-05, "loss": 0.5504, "step": 3212 }, { "epoch": 0.09870058059165054, "grad_norm": 0.540431797504425, "learning_rate": 1.9887118368263692e-05, "loss": 0.7101, "step": 3213 }, { "epoch": 0.0987312997265997, "grad_norm": 0.32820016145706177, "learning_rate": 1.988704594747625e-05, "loss": 0.5193, "step": 3214 }, { "epoch": 0.09876201886154885, "grad_norm": 0.36457279324531555, "learning_rate": 1.9886973503596924e-05, "loss": 0.6225, "step": 3215 }, { "epoch": 0.09879273799649801, "grad_norm": 0.3551771938800812, "learning_rate": 1.988690103662588e-05, "loss": 0.6826, "step": 3216 }, { "epoch": 0.09882345713144718, "grad_norm": 0.3317866623401642, "learning_rate": 1.9886828546563287e-05, "loss": 0.5606, "step": 3217 }, { "epoch": 0.09885417626639634, "grad_norm": 0.3296241760253906, "learning_rate": 1.9886756033409318e-05, "loss": 0.6746, "step": 3218 }, { "epoch": 0.0988848954013455, "grad_norm": 0.36483967304229736, "learning_rate": 1.988668349716414e-05, "loss": 0.5625, "step": 3219 }, { "epoch": 0.09891561453629466, "grad_norm": 0.31270354986190796, "learning_rate": 1.9886610937827918e-05, "loss": 0.6376, "step": 3220 }, { "epoch": 0.09894633367124382, "grad_norm": 0.3909209668636322, "learning_rate": 1.988653835540083e-05, "loss": 0.6278, "step": 3221 }, { "epoch": 0.09897705280619298, "grad_norm": 0.40044069290161133, "learning_rate": 1.988646574988304e-05, "loss": 0.6131, "step": 3222 }, { "epoch": 0.09900777194114213, "grad_norm": 0.30838102102279663, "learning_rate": 1.9886393121274715e-05, "loss": 0.569, "step": 3223 }, { "epoch": 0.09903849107609129, "grad_norm": 0.30814129114151, "learning_rate": 1.9886320469576036e-05, "loss": 0.5982, "step": 3224 }, { "epoch": 0.09906921021104045, "grad_norm": 0.32787784934043884, "learning_rate": 1.988624779478716e-05, "loss": 0.5327, "step": 3225 }, { "epoch": 0.09909992934598962, "grad_norm": 0.3302150368690491, "learning_rate": 1.988617509690826e-05, "loss": 0.5122, "step": 3226 }, { "epoch": 0.09913064848093878, "grad_norm": 0.37652719020843506, "learning_rate": 1.9886102375939512e-05, "loss": 0.6208, "step": 3227 }, { "epoch": 0.09916136761588794, "grad_norm": 0.37248003482818604, "learning_rate": 1.988602963188108e-05, "loss": 0.6214, "step": 3228 }, { "epoch": 0.0991920867508371, "grad_norm": 0.34426459670066833, "learning_rate": 1.9885956864733136e-05, "loss": 0.6419, "step": 3229 }, { "epoch": 0.09922280588578626, "grad_norm": 0.3469007909297943, "learning_rate": 1.9885884074495848e-05, "loss": 0.5561, "step": 3230 }, { "epoch": 0.09925352502073542, "grad_norm": 0.3551928997039795, "learning_rate": 1.9885811261169387e-05, "loss": 0.5981, "step": 3231 }, { "epoch": 0.09928424415568457, "grad_norm": 0.3593151271343231, "learning_rate": 1.9885738424753923e-05, "loss": 0.6381, "step": 3232 }, { "epoch": 0.09931496329063373, "grad_norm": 0.35658353567123413, "learning_rate": 1.988566556524963e-05, "loss": 0.6341, "step": 3233 }, { "epoch": 0.09934568242558289, "grad_norm": 0.36657246947288513, "learning_rate": 1.9885592682656673e-05, "loss": 0.6083, "step": 3234 }, { "epoch": 0.09937640156053205, "grad_norm": 0.33080846071243286, "learning_rate": 1.9885519776975225e-05, "loss": 0.5498, "step": 3235 }, { "epoch": 0.09940712069548122, "grad_norm": 0.34186798334121704, "learning_rate": 1.988544684820545e-05, "loss": 0.6732, "step": 3236 }, { "epoch": 0.09943783983043038, "grad_norm": 0.40575557947158813, "learning_rate": 1.9885373896347528e-05, "loss": 0.6761, "step": 3237 }, { "epoch": 0.09946855896537954, "grad_norm": 0.37897878885269165, "learning_rate": 1.988530092140162e-05, "loss": 0.6037, "step": 3238 }, { "epoch": 0.0994992781003287, "grad_norm": 0.37084636092185974, "learning_rate": 1.988522792336791e-05, "loss": 0.6208, "step": 3239 }, { "epoch": 0.09952999723527785, "grad_norm": 0.39701297879219055, "learning_rate": 1.9885154902246554e-05, "loss": 0.6201, "step": 3240 }, { "epoch": 0.09956071637022701, "grad_norm": 0.34214484691619873, "learning_rate": 1.988508185803773e-05, "loss": 0.6261, "step": 3241 }, { "epoch": 0.09959143550517617, "grad_norm": 0.4026789665222168, "learning_rate": 1.9885008790741603e-05, "loss": 0.6536, "step": 3242 }, { "epoch": 0.09962215464012533, "grad_norm": 0.35370704531669617, "learning_rate": 1.9884935700358353e-05, "loss": 0.5205, "step": 3243 }, { "epoch": 0.0996528737750745, "grad_norm": 0.31261146068573, "learning_rate": 1.9884862586888146e-05, "loss": 0.5779, "step": 3244 }, { "epoch": 0.09968359291002366, "grad_norm": 0.31941211223602295, "learning_rate": 1.9884789450331148e-05, "loss": 0.6413, "step": 3245 }, { "epoch": 0.09971431204497282, "grad_norm": 0.34385713934898376, "learning_rate": 1.9884716290687535e-05, "loss": 0.5711, "step": 3246 }, { "epoch": 0.09974503117992198, "grad_norm": 0.3420011103153229, "learning_rate": 1.9884643107957477e-05, "loss": 0.6275, "step": 3247 }, { "epoch": 0.09977575031487114, "grad_norm": 0.37562310695648193, "learning_rate": 1.9884569902141142e-05, "loss": 0.6829, "step": 3248 }, { "epoch": 0.09980646944982029, "grad_norm": 0.3496978282928467, "learning_rate": 1.988449667323871e-05, "loss": 0.5974, "step": 3249 }, { "epoch": 0.09983718858476945, "grad_norm": 0.36009493470191956, "learning_rate": 1.9884423421250337e-05, "loss": 0.5582, "step": 3250 }, { "epoch": 0.09986790771971861, "grad_norm": 0.31608545780181885, "learning_rate": 1.988435014617621e-05, "loss": 0.6193, "step": 3251 }, { "epoch": 0.09989862685466777, "grad_norm": 0.35845357179641724, "learning_rate": 1.988427684801649e-05, "loss": 0.594, "step": 3252 }, { "epoch": 0.09992934598961693, "grad_norm": 0.33128246665000916, "learning_rate": 1.9884203526771352e-05, "loss": 0.5934, "step": 3253 }, { "epoch": 0.0999600651245661, "grad_norm": 0.3620995879173279, "learning_rate": 1.9884130182440967e-05, "loss": 0.6672, "step": 3254 }, { "epoch": 0.09999078425951526, "grad_norm": 0.35481715202331543, "learning_rate": 1.9884056815025505e-05, "loss": 0.7009, "step": 3255 }, { "epoch": 0.10002150339446442, "grad_norm": 0.3524946868419647, "learning_rate": 1.988398342452514e-05, "loss": 0.5872, "step": 3256 }, { "epoch": 0.10005222252941358, "grad_norm": 0.359903484582901, "learning_rate": 1.988391001094004e-05, "loss": 0.6225, "step": 3257 }, { "epoch": 0.10008294166436273, "grad_norm": 0.30613529682159424, "learning_rate": 1.9883836574270374e-05, "loss": 0.6274, "step": 3258 }, { "epoch": 0.10011366079931189, "grad_norm": 0.36041247844696045, "learning_rate": 1.988376311451632e-05, "loss": 0.5894, "step": 3259 }, { "epoch": 0.10014437993426105, "grad_norm": 2.1787195205688477, "learning_rate": 1.9883689631678047e-05, "loss": 0.5658, "step": 3260 }, { "epoch": 0.10017509906921021, "grad_norm": 0.3624386191368103, "learning_rate": 1.988361612575573e-05, "loss": 0.6283, "step": 3261 }, { "epoch": 0.10020581820415937, "grad_norm": 0.3233952224254608, "learning_rate": 1.9883542596749533e-05, "loss": 0.5664, "step": 3262 }, { "epoch": 0.10023653733910853, "grad_norm": 0.39429712295532227, "learning_rate": 1.988346904465963e-05, "loss": 0.6253, "step": 3263 }, { "epoch": 0.1002672564740577, "grad_norm": 0.3670956790447235, "learning_rate": 1.98833954694862e-05, "loss": 0.6989, "step": 3264 }, { "epoch": 0.10029797560900686, "grad_norm": 0.3167235553264618, "learning_rate": 1.988332187122941e-05, "loss": 0.5127, "step": 3265 }, { "epoch": 0.100328694743956, "grad_norm": 0.32421088218688965, "learning_rate": 1.9883248249889425e-05, "loss": 0.6342, "step": 3266 }, { "epoch": 0.10035941387890517, "grad_norm": 0.3206900954246521, "learning_rate": 1.988317460546643e-05, "loss": 0.6364, "step": 3267 }, { "epoch": 0.10039013301385433, "grad_norm": 0.3310061991214752, "learning_rate": 1.988310093796059e-05, "loss": 0.6175, "step": 3268 }, { "epoch": 0.10042085214880349, "grad_norm": 0.4110753536224365, "learning_rate": 1.9883027247372076e-05, "loss": 0.5197, "step": 3269 }, { "epoch": 0.10045157128375265, "grad_norm": 0.34525465965270996, "learning_rate": 1.988295353370106e-05, "loss": 0.5682, "step": 3270 }, { "epoch": 0.10048229041870181, "grad_norm": 0.3513871431350708, "learning_rate": 1.9882879796947718e-05, "loss": 0.7262, "step": 3271 }, { "epoch": 0.10051300955365097, "grad_norm": 0.3012881577014923, "learning_rate": 1.988280603711222e-05, "loss": 0.6312, "step": 3272 }, { "epoch": 0.10054372868860013, "grad_norm": 0.35105040669441223, "learning_rate": 1.9882732254194736e-05, "loss": 0.5294, "step": 3273 }, { "epoch": 0.1005744478235493, "grad_norm": 0.7928096055984497, "learning_rate": 1.9882658448195443e-05, "loss": 0.6321, "step": 3274 }, { "epoch": 0.10060516695849844, "grad_norm": 0.33293625712394714, "learning_rate": 1.9882584619114512e-05, "loss": 0.5833, "step": 3275 }, { "epoch": 0.1006358860934476, "grad_norm": 0.3995696008205414, "learning_rate": 1.988251076695211e-05, "loss": 0.6682, "step": 3276 }, { "epoch": 0.10066660522839677, "grad_norm": 0.3311254382133484, "learning_rate": 1.9882436891708416e-05, "loss": 0.6126, "step": 3277 }, { "epoch": 0.10069732436334593, "grad_norm": 0.4280281960964203, "learning_rate": 1.9882362993383602e-05, "loss": 0.6563, "step": 3278 }, { "epoch": 0.10072804349829509, "grad_norm": 0.34369105100631714, "learning_rate": 1.988228907197784e-05, "loss": 0.6365, "step": 3279 }, { "epoch": 0.10075876263324425, "grad_norm": 0.3648221492767334, "learning_rate": 1.9882215127491298e-05, "loss": 0.5216, "step": 3280 }, { "epoch": 0.10078948176819341, "grad_norm": 0.3379635810852051, "learning_rate": 1.9882141159924153e-05, "loss": 0.5971, "step": 3281 }, { "epoch": 0.10082020090314257, "grad_norm": 0.350720077753067, "learning_rate": 1.9882067169276577e-05, "loss": 0.6579, "step": 3282 }, { "epoch": 0.10085092003809172, "grad_norm": 0.3155682682991028, "learning_rate": 1.9881993155548745e-05, "loss": 0.5674, "step": 3283 }, { "epoch": 0.10088163917304088, "grad_norm": 0.36580705642700195, "learning_rate": 1.9881919118740828e-05, "loss": 0.6115, "step": 3284 }, { "epoch": 0.10091235830799004, "grad_norm": 0.36517637968063354, "learning_rate": 1.9881845058852997e-05, "loss": 0.6742, "step": 3285 }, { "epoch": 0.1009430774429392, "grad_norm": 0.3886603116989136, "learning_rate": 1.9881770975885427e-05, "loss": 0.6219, "step": 3286 }, { "epoch": 0.10097379657788837, "grad_norm": 0.34510332345962524, "learning_rate": 1.9881696869838292e-05, "loss": 0.5408, "step": 3287 }, { "epoch": 0.10100451571283753, "grad_norm": 0.3264683485031128, "learning_rate": 1.9881622740711765e-05, "loss": 0.6292, "step": 3288 }, { "epoch": 0.10103523484778669, "grad_norm": 0.3278883397579193, "learning_rate": 1.9881548588506017e-05, "loss": 0.6179, "step": 3289 }, { "epoch": 0.10106595398273585, "grad_norm": 0.34580516815185547, "learning_rate": 1.988147441322122e-05, "loss": 0.5821, "step": 3290 }, { "epoch": 0.10109667311768501, "grad_norm": 0.3468242883682251, "learning_rate": 1.988140021485755e-05, "loss": 0.6418, "step": 3291 }, { "epoch": 0.10112739225263416, "grad_norm": 0.3375232219696045, "learning_rate": 1.9881325993415183e-05, "loss": 0.6242, "step": 3292 }, { "epoch": 0.10115811138758332, "grad_norm": 0.4066527187824249, "learning_rate": 1.9881251748894285e-05, "loss": 0.5861, "step": 3293 }, { "epoch": 0.10118883052253248, "grad_norm": 0.34245970845222473, "learning_rate": 1.9881177481295033e-05, "loss": 0.5271, "step": 3294 }, { "epoch": 0.10121954965748164, "grad_norm": 0.33548909425735474, "learning_rate": 1.9881103190617602e-05, "loss": 0.568, "step": 3295 }, { "epoch": 0.1012502687924308, "grad_norm": 0.3868129849433899, "learning_rate": 1.9881028876862166e-05, "loss": 0.6104, "step": 3296 }, { "epoch": 0.10128098792737997, "grad_norm": 0.48436084389686584, "learning_rate": 1.9880954540028897e-05, "loss": 0.7015, "step": 3297 }, { "epoch": 0.10131170706232913, "grad_norm": 0.322892963886261, "learning_rate": 1.9880880180117968e-05, "loss": 0.5548, "step": 3298 }, { "epoch": 0.10134242619727829, "grad_norm": 0.3423095643520355, "learning_rate": 1.9880805797129552e-05, "loss": 0.6441, "step": 3299 }, { "epoch": 0.10137314533222744, "grad_norm": 0.3243393301963806, "learning_rate": 1.9880731391063825e-05, "loss": 0.5787, "step": 3300 }, { "epoch": 0.1014038644671766, "grad_norm": 0.4149682819843292, "learning_rate": 1.988065696192096e-05, "loss": 0.6298, "step": 3301 }, { "epoch": 0.10143458360212576, "grad_norm": 0.31153905391693115, "learning_rate": 1.988058250970113e-05, "loss": 0.5146, "step": 3302 }, { "epoch": 0.10146530273707492, "grad_norm": 0.33997583389282227, "learning_rate": 1.9880508034404512e-05, "loss": 0.6519, "step": 3303 }, { "epoch": 0.10149602187202408, "grad_norm": 0.40162330865859985, "learning_rate": 1.9880433536031272e-05, "loss": 0.5912, "step": 3304 }, { "epoch": 0.10152674100697325, "grad_norm": 0.37630707025527954, "learning_rate": 1.9880359014581592e-05, "loss": 0.6958, "step": 3305 }, { "epoch": 0.1015574601419224, "grad_norm": 0.38757815957069397, "learning_rate": 1.9880284470055645e-05, "loss": 0.6432, "step": 3306 }, { "epoch": 0.10158817927687157, "grad_norm": 0.3097861707210541, "learning_rate": 1.9880209902453606e-05, "loss": 0.5675, "step": 3307 }, { "epoch": 0.10161889841182073, "grad_norm": 0.32999154925346375, "learning_rate": 1.9880135311775642e-05, "loss": 0.661, "step": 3308 }, { "epoch": 0.10164961754676988, "grad_norm": 0.3612196147441864, "learning_rate": 1.9880060698021936e-05, "loss": 0.6672, "step": 3309 }, { "epoch": 0.10168033668171904, "grad_norm": 0.3236655592918396, "learning_rate": 1.9879986061192653e-05, "loss": 0.5548, "step": 3310 }, { "epoch": 0.1017110558166682, "grad_norm": 0.37208595871925354, "learning_rate": 1.9879911401287977e-05, "loss": 0.5669, "step": 3311 }, { "epoch": 0.10174177495161736, "grad_norm": 0.3480599522590637, "learning_rate": 1.9879836718308074e-05, "loss": 0.6682, "step": 3312 }, { "epoch": 0.10177249408656652, "grad_norm": 0.3658190369606018, "learning_rate": 1.9879762012253126e-05, "loss": 0.6547, "step": 3313 }, { "epoch": 0.10180321322151568, "grad_norm": 0.32774636149406433, "learning_rate": 1.98796872831233e-05, "loss": 0.5839, "step": 3314 }, { "epoch": 0.10183393235646485, "grad_norm": 0.34338146448135376, "learning_rate": 1.9879612530918776e-05, "loss": 0.6209, "step": 3315 }, { "epoch": 0.10186465149141401, "grad_norm": 0.3632097542285919, "learning_rate": 1.9879537755639728e-05, "loss": 0.6339, "step": 3316 }, { "epoch": 0.10189537062636315, "grad_norm": 0.46870946884155273, "learning_rate": 1.9879462957286335e-05, "loss": 0.6167, "step": 3317 }, { "epoch": 0.10192608976131232, "grad_norm": 0.3515945374965668, "learning_rate": 1.9879388135858757e-05, "loss": 0.6847, "step": 3318 }, { "epoch": 0.10195680889626148, "grad_norm": 0.326180636882782, "learning_rate": 1.9879313291357182e-05, "loss": 0.6072, "step": 3319 }, { "epoch": 0.10198752803121064, "grad_norm": 0.33071663975715637, "learning_rate": 1.987923842378178e-05, "loss": 0.5917, "step": 3320 }, { "epoch": 0.1020182471661598, "grad_norm": 0.32090839743614197, "learning_rate": 1.9879163533132728e-05, "loss": 0.5661, "step": 3321 }, { "epoch": 0.10204896630110896, "grad_norm": 0.32336312532424927, "learning_rate": 1.9879088619410195e-05, "loss": 0.5408, "step": 3322 }, { "epoch": 0.10207968543605812, "grad_norm": 0.3294239342212677, "learning_rate": 1.9879013682614368e-05, "loss": 0.635, "step": 3323 }, { "epoch": 0.10211040457100728, "grad_norm": 0.30902040004730225, "learning_rate": 1.987893872274541e-05, "loss": 0.6282, "step": 3324 }, { "epoch": 0.10214112370595645, "grad_norm": 0.3844924569129944, "learning_rate": 1.98788637398035e-05, "loss": 0.584, "step": 3325 }, { "epoch": 0.1021718428409056, "grad_norm": 0.3408164978027344, "learning_rate": 1.9878788733788817e-05, "loss": 0.6256, "step": 3326 }, { "epoch": 0.10220256197585476, "grad_norm": 0.3339153230190277, "learning_rate": 1.987871370470153e-05, "loss": 0.5468, "step": 3327 }, { "epoch": 0.10223328111080392, "grad_norm": 0.3190523386001587, "learning_rate": 1.9878638652541823e-05, "loss": 0.5836, "step": 3328 }, { "epoch": 0.10226400024575308, "grad_norm": 0.33561238646507263, "learning_rate": 1.9878563577309856e-05, "loss": 0.6093, "step": 3329 }, { "epoch": 0.10229471938070224, "grad_norm": 0.3750517964363098, "learning_rate": 1.987848847900582e-05, "loss": 0.5633, "step": 3330 }, { "epoch": 0.1023254385156514, "grad_norm": 0.4086240530014038, "learning_rate": 1.9878413357629885e-05, "loss": 0.5474, "step": 3331 }, { "epoch": 0.10235615765060056, "grad_norm": 0.41579195857048035, "learning_rate": 1.987833821318222e-05, "loss": 0.5946, "step": 3332 }, { "epoch": 0.10238687678554972, "grad_norm": 0.3820441663265228, "learning_rate": 1.9878263045663012e-05, "loss": 0.5846, "step": 3333 }, { "epoch": 0.10241759592049889, "grad_norm": 0.3447595238685608, "learning_rate": 1.987818785507243e-05, "loss": 0.6691, "step": 3334 }, { "epoch": 0.10244831505544803, "grad_norm": 0.36575114727020264, "learning_rate": 1.987811264141065e-05, "loss": 0.6293, "step": 3335 }, { "epoch": 0.1024790341903972, "grad_norm": 0.3342439532279968, "learning_rate": 1.9878037404677847e-05, "loss": 0.5907, "step": 3336 }, { "epoch": 0.10250975332534636, "grad_norm": 0.41989102959632874, "learning_rate": 1.98779621448742e-05, "loss": 0.6735, "step": 3337 }, { "epoch": 0.10254047246029552, "grad_norm": 0.366451621055603, "learning_rate": 1.9877886861999877e-05, "loss": 0.5885, "step": 3338 }, { "epoch": 0.10257119159524468, "grad_norm": 0.35216495394706726, "learning_rate": 1.9877811556055064e-05, "loss": 0.6258, "step": 3339 }, { "epoch": 0.10260191073019384, "grad_norm": 0.3150026500225067, "learning_rate": 1.987773622703993e-05, "loss": 0.5848, "step": 3340 }, { "epoch": 0.102632629865143, "grad_norm": 0.32018280029296875, "learning_rate": 1.9877660874954655e-05, "loss": 0.5679, "step": 3341 }, { "epoch": 0.10266334900009216, "grad_norm": 0.3118934631347656, "learning_rate": 1.9877585499799414e-05, "loss": 0.5957, "step": 3342 }, { "epoch": 0.10269406813504131, "grad_norm": 0.6014252305030823, "learning_rate": 1.987751010157438e-05, "loss": 0.6404, "step": 3343 }, { "epoch": 0.10272478726999047, "grad_norm": 0.3465321958065033, "learning_rate": 1.9877434680279736e-05, "loss": 0.6414, "step": 3344 }, { "epoch": 0.10275550640493963, "grad_norm": 0.33455878496170044, "learning_rate": 1.987735923591565e-05, "loss": 0.5236, "step": 3345 }, { "epoch": 0.1027862255398888, "grad_norm": 0.4493517279624939, "learning_rate": 1.9877283768482297e-05, "loss": 0.6782, "step": 3346 }, { "epoch": 0.10281694467483796, "grad_norm": 0.33761411905288696, "learning_rate": 1.9877208277979864e-05, "loss": 0.6519, "step": 3347 }, { "epoch": 0.10284766380978712, "grad_norm": 0.3426732122898102, "learning_rate": 1.987713276440852e-05, "loss": 0.5677, "step": 3348 }, { "epoch": 0.10287838294473628, "grad_norm": 0.3466872274875641, "learning_rate": 1.9877057227768442e-05, "loss": 0.6554, "step": 3349 }, { "epoch": 0.10290910207968544, "grad_norm": 0.36354297399520874, "learning_rate": 1.9876981668059806e-05, "loss": 0.6212, "step": 3350 }, { "epoch": 0.1029398212146346, "grad_norm": 0.33885109424591064, "learning_rate": 1.9876906085282793e-05, "loss": 0.5397, "step": 3351 }, { "epoch": 0.10297054034958375, "grad_norm": 0.32476162910461426, "learning_rate": 1.9876830479437573e-05, "loss": 0.723, "step": 3352 }, { "epoch": 0.10300125948453291, "grad_norm": 0.33648350834846497, "learning_rate": 1.9876754850524326e-05, "loss": 0.6803, "step": 3353 }, { "epoch": 0.10303197861948207, "grad_norm": 0.3188552260398865, "learning_rate": 1.9876679198543227e-05, "loss": 0.5922, "step": 3354 }, { "epoch": 0.10306269775443123, "grad_norm": 0.581777036190033, "learning_rate": 1.9876603523494456e-05, "loss": 0.6327, "step": 3355 }, { "epoch": 0.1030934168893804, "grad_norm": 0.3482327461242676, "learning_rate": 1.987652782537819e-05, "loss": 0.5821, "step": 3356 }, { "epoch": 0.10312413602432956, "grad_norm": 0.4128420054912567, "learning_rate": 1.9876452104194597e-05, "loss": 0.7026, "step": 3357 }, { "epoch": 0.10315485515927872, "grad_norm": 0.35158807039260864, "learning_rate": 1.9876376359943865e-05, "loss": 0.6654, "step": 3358 }, { "epoch": 0.10318557429422788, "grad_norm": 0.34143081307411194, "learning_rate": 1.9876300592626166e-05, "loss": 0.5736, "step": 3359 }, { "epoch": 0.10321629342917703, "grad_norm": 0.33089399337768555, "learning_rate": 1.9876224802241676e-05, "loss": 0.5224, "step": 3360 }, { "epoch": 0.10324701256412619, "grad_norm": 0.32293233275413513, "learning_rate": 1.9876148988790574e-05, "loss": 0.6484, "step": 3361 }, { "epoch": 0.10327773169907535, "grad_norm": 0.34166866540908813, "learning_rate": 1.9876073152273038e-05, "loss": 0.6567, "step": 3362 }, { "epoch": 0.10330845083402451, "grad_norm": 0.353118896484375, "learning_rate": 1.987599729268924e-05, "loss": 0.6478, "step": 3363 }, { "epoch": 0.10333916996897367, "grad_norm": 0.3449006676673889, "learning_rate": 1.9875921410039364e-05, "loss": 0.5923, "step": 3364 }, { "epoch": 0.10336988910392284, "grad_norm": 0.3197091817855835, "learning_rate": 1.987584550432358e-05, "loss": 0.6435, "step": 3365 }, { "epoch": 0.103400608238872, "grad_norm": 0.3524058163166046, "learning_rate": 1.987576957554207e-05, "loss": 0.581, "step": 3366 }, { "epoch": 0.10343132737382116, "grad_norm": 0.35161375999450684, "learning_rate": 1.9875693623695012e-05, "loss": 0.5951, "step": 3367 }, { "epoch": 0.10346204650877032, "grad_norm": 0.3134300112724304, "learning_rate": 1.9875617648782582e-05, "loss": 0.6118, "step": 3368 }, { "epoch": 0.10349276564371947, "grad_norm": 0.3463611900806427, "learning_rate": 1.9875541650804956e-05, "loss": 0.681, "step": 3369 }, { "epoch": 0.10352348477866863, "grad_norm": 0.3985813558101654, "learning_rate": 1.9875465629762316e-05, "loss": 0.7398, "step": 3370 }, { "epoch": 0.10355420391361779, "grad_norm": 0.32477596402168274, "learning_rate": 1.9875389585654832e-05, "loss": 0.7319, "step": 3371 }, { "epoch": 0.10358492304856695, "grad_norm": 0.3299289643764496, "learning_rate": 1.9875313518482685e-05, "loss": 0.6276, "step": 3372 }, { "epoch": 0.10361564218351611, "grad_norm": 0.3683394491672516, "learning_rate": 1.987523742824606e-05, "loss": 0.6059, "step": 3373 }, { "epoch": 0.10364636131846527, "grad_norm": 0.32284900546073914, "learning_rate": 1.987516131494512e-05, "loss": 0.5808, "step": 3374 }, { "epoch": 0.10367708045341444, "grad_norm": 0.32530537247657776, "learning_rate": 1.987508517858006e-05, "loss": 0.5979, "step": 3375 }, { "epoch": 0.1037077995883636, "grad_norm": 0.32396018505096436, "learning_rate": 1.9875009019151042e-05, "loss": 0.6183, "step": 3376 }, { "epoch": 0.10373851872331274, "grad_norm": 0.3267766535282135, "learning_rate": 1.9874932836658253e-05, "loss": 0.6234, "step": 3377 }, { "epoch": 0.1037692378582619, "grad_norm": 0.3189561069011688, "learning_rate": 1.987485663110187e-05, "loss": 0.6315, "step": 3378 }, { "epoch": 0.10379995699321107, "grad_norm": 0.3408612906932831, "learning_rate": 1.987478040248207e-05, "loss": 0.6152, "step": 3379 }, { "epoch": 0.10383067612816023, "grad_norm": 0.41029730439186096, "learning_rate": 1.9874704150799026e-05, "loss": 0.6367, "step": 3380 }, { "epoch": 0.10386139526310939, "grad_norm": 0.3840169906616211, "learning_rate": 1.9874627876052924e-05, "loss": 0.6051, "step": 3381 }, { "epoch": 0.10389211439805855, "grad_norm": 0.361579030752182, "learning_rate": 1.9874551578243942e-05, "loss": 0.6481, "step": 3382 }, { "epoch": 0.10392283353300771, "grad_norm": 0.383648544549942, "learning_rate": 1.987447525737225e-05, "loss": 0.6965, "step": 3383 }, { "epoch": 0.10395355266795687, "grad_norm": 0.37728843092918396, "learning_rate": 1.9874398913438035e-05, "loss": 0.6193, "step": 3384 }, { "epoch": 0.10398427180290604, "grad_norm": 0.3863455057144165, "learning_rate": 1.987432254644147e-05, "loss": 0.6267, "step": 3385 }, { "epoch": 0.10401499093785518, "grad_norm": 0.3271351456642151, "learning_rate": 1.9874246156382736e-05, "loss": 0.6049, "step": 3386 }, { "epoch": 0.10404571007280435, "grad_norm": 0.34353017807006836, "learning_rate": 1.987416974326201e-05, "loss": 0.6014, "step": 3387 }, { "epoch": 0.1040764292077535, "grad_norm": 0.35475918650627136, "learning_rate": 1.9874093307079473e-05, "loss": 0.529, "step": 3388 }, { "epoch": 0.10410714834270267, "grad_norm": 0.38387593626976013, "learning_rate": 1.9874016847835298e-05, "loss": 0.696, "step": 3389 }, { "epoch": 0.10413786747765183, "grad_norm": 0.3422696888446808, "learning_rate": 1.9873940365529668e-05, "loss": 0.5813, "step": 3390 }, { "epoch": 0.10416858661260099, "grad_norm": 0.35487478971481323, "learning_rate": 1.9873863860162763e-05, "loss": 0.6172, "step": 3391 }, { "epoch": 0.10419930574755015, "grad_norm": 0.3138427734375, "learning_rate": 1.9873787331734755e-05, "loss": 0.655, "step": 3392 }, { "epoch": 0.10423002488249931, "grad_norm": 0.4507734775543213, "learning_rate": 1.987371078024583e-05, "loss": 0.6574, "step": 3393 }, { "epoch": 0.10426074401744846, "grad_norm": 0.4047156274318695, "learning_rate": 1.9873634205696163e-05, "loss": 0.5948, "step": 3394 }, { "epoch": 0.10429146315239762, "grad_norm": 0.3993381857872009, "learning_rate": 1.9873557608085937e-05, "loss": 0.6764, "step": 3395 }, { "epoch": 0.10432218228734678, "grad_norm": 0.385838121175766, "learning_rate": 1.9873480987415323e-05, "loss": 0.7104, "step": 3396 }, { "epoch": 0.10435290142229595, "grad_norm": 0.3451135456562042, "learning_rate": 1.987340434368451e-05, "loss": 0.6599, "step": 3397 }, { "epoch": 0.10438362055724511, "grad_norm": 0.3311775326728821, "learning_rate": 1.9873327676893666e-05, "loss": 0.6212, "step": 3398 }, { "epoch": 0.10441433969219427, "grad_norm": 0.36715981364250183, "learning_rate": 1.987325098704298e-05, "loss": 0.7158, "step": 3399 }, { "epoch": 0.10444505882714343, "grad_norm": 0.3411772549152374, "learning_rate": 1.9873174274132622e-05, "loss": 0.6217, "step": 3400 }, { "epoch": 0.10447577796209259, "grad_norm": 0.32418715953826904, "learning_rate": 1.987309753816278e-05, "loss": 0.6127, "step": 3401 }, { "epoch": 0.10450649709704175, "grad_norm": 0.32916319370269775, "learning_rate": 1.9873020779133626e-05, "loss": 0.7133, "step": 3402 }, { "epoch": 0.1045372162319909, "grad_norm": 0.3439186215400696, "learning_rate": 1.9872943997045344e-05, "loss": 0.5625, "step": 3403 }, { "epoch": 0.10456793536694006, "grad_norm": 0.3174929916858673, "learning_rate": 1.9872867191898113e-05, "loss": 0.5709, "step": 3404 }, { "epoch": 0.10459865450188922, "grad_norm": 0.3424277901649475, "learning_rate": 1.987279036369211e-05, "loss": 0.6184, "step": 3405 }, { "epoch": 0.10462937363683839, "grad_norm": 0.5531808137893677, "learning_rate": 1.9872713512427516e-05, "loss": 0.5988, "step": 3406 }, { "epoch": 0.10466009277178755, "grad_norm": 0.3487642705440521, "learning_rate": 1.9872636638104507e-05, "loss": 0.6398, "step": 3407 }, { "epoch": 0.10469081190673671, "grad_norm": 0.3647867739200592, "learning_rate": 1.9872559740723273e-05, "loss": 0.6844, "step": 3408 }, { "epoch": 0.10472153104168587, "grad_norm": 0.32465678453445435, "learning_rate": 1.987248282028398e-05, "loss": 0.6036, "step": 3409 }, { "epoch": 0.10475225017663503, "grad_norm": 0.33469387888908386, "learning_rate": 1.9872405876786813e-05, "loss": 0.6385, "step": 3410 }, { "epoch": 0.10478296931158419, "grad_norm": 0.2913793921470642, "learning_rate": 1.987232891023196e-05, "loss": 0.6037, "step": 3411 }, { "epoch": 0.10481368844653334, "grad_norm": 0.33242788910865784, "learning_rate": 1.9872251920619584e-05, "loss": 0.5973, "step": 3412 }, { "epoch": 0.1048444075814825, "grad_norm": 0.3290729820728302, "learning_rate": 1.987217490794988e-05, "loss": 0.5715, "step": 3413 }, { "epoch": 0.10487512671643166, "grad_norm": 0.3005902171134949, "learning_rate": 1.987209787222302e-05, "loss": 0.5634, "step": 3414 }, { "epoch": 0.10490584585138082, "grad_norm": 0.3476839065551758, "learning_rate": 1.9872020813439187e-05, "loss": 0.5947, "step": 3415 }, { "epoch": 0.10493656498632999, "grad_norm": 0.3725834786891937, "learning_rate": 1.987194373159856e-05, "loss": 0.5858, "step": 3416 }, { "epoch": 0.10496728412127915, "grad_norm": 0.36059942841529846, "learning_rate": 1.987186662670132e-05, "loss": 0.5783, "step": 3417 }, { "epoch": 0.10499800325622831, "grad_norm": 0.40658167004585266, "learning_rate": 1.9871789498747645e-05, "loss": 0.6011, "step": 3418 }, { "epoch": 0.10502872239117747, "grad_norm": 0.3487873673439026, "learning_rate": 1.9871712347737716e-05, "loss": 0.6177, "step": 3419 }, { "epoch": 0.10505944152612662, "grad_norm": 0.4027774930000305, "learning_rate": 1.9871635173671712e-05, "loss": 0.7066, "step": 3420 }, { "epoch": 0.10509016066107578, "grad_norm": 0.3590320646762848, "learning_rate": 1.9871557976549815e-05, "loss": 0.6498, "step": 3421 }, { "epoch": 0.10512087979602494, "grad_norm": 0.3469177484512329, "learning_rate": 1.9871480756372205e-05, "loss": 0.5918, "step": 3422 }, { "epoch": 0.1051515989309741, "grad_norm": 0.3012349605560303, "learning_rate": 1.9871403513139064e-05, "loss": 0.6022, "step": 3423 }, { "epoch": 0.10518231806592326, "grad_norm": 0.3203411400318146, "learning_rate": 1.987132624685057e-05, "loss": 0.6126, "step": 3424 }, { "epoch": 0.10521303720087243, "grad_norm": 0.45684704184532166, "learning_rate": 1.9871248957506903e-05, "loss": 0.5836, "step": 3425 }, { "epoch": 0.10524375633582159, "grad_norm": 0.41580966114997864, "learning_rate": 1.9871171645108246e-05, "loss": 0.719, "step": 3426 }, { "epoch": 0.10527447547077075, "grad_norm": 0.3926449716091156, "learning_rate": 1.9871094309654778e-05, "loss": 0.6042, "step": 3427 }, { "epoch": 0.10530519460571991, "grad_norm": 0.3490860164165497, "learning_rate": 1.987101695114668e-05, "loss": 0.6156, "step": 3428 }, { "epoch": 0.10533591374066906, "grad_norm": 0.330984503030777, "learning_rate": 1.9870939569584134e-05, "loss": 0.6573, "step": 3429 }, { "epoch": 0.10536663287561822, "grad_norm": 0.32824939489364624, "learning_rate": 1.9870862164967316e-05, "loss": 0.6282, "step": 3430 }, { "epoch": 0.10539735201056738, "grad_norm": 0.33163386583328247, "learning_rate": 1.987078473729641e-05, "loss": 0.6184, "step": 3431 }, { "epoch": 0.10542807114551654, "grad_norm": 0.34851354360580444, "learning_rate": 1.9870707286571597e-05, "loss": 0.6521, "step": 3432 }, { "epoch": 0.1054587902804657, "grad_norm": 0.3479584753513336, "learning_rate": 1.9870629812793062e-05, "loss": 0.6575, "step": 3433 }, { "epoch": 0.10548950941541486, "grad_norm": 0.315220445394516, "learning_rate": 1.9870552315960977e-05, "loss": 0.5467, "step": 3434 }, { "epoch": 0.10552022855036403, "grad_norm": 0.3071170151233673, "learning_rate": 1.987047479607553e-05, "loss": 0.5982, "step": 3435 }, { "epoch": 0.10555094768531319, "grad_norm": 0.32807835936546326, "learning_rate": 1.9870397253136898e-05, "loss": 0.6025, "step": 3436 }, { "epoch": 0.10558166682026233, "grad_norm": 0.5509961843490601, "learning_rate": 1.9870319687145264e-05, "loss": 0.5788, "step": 3437 }, { "epoch": 0.1056123859552115, "grad_norm": 0.31846436858177185, "learning_rate": 1.9870242098100815e-05, "loss": 0.5586, "step": 3438 }, { "epoch": 0.10564310509016066, "grad_norm": 0.3222493827342987, "learning_rate": 1.9870164486003717e-05, "loss": 0.5762, "step": 3439 }, { "epoch": 0.10567382422510982, "grad_norm": 0.41203203797340393, "learning_rate": 1.9870086850854167e-05, "loss": 0.5826, "step": 3440 }, { "epoch": 0.10570454336005898, "grad_norm": 0.3611575663089752, "learning_rate": 1.9870009192652337e-05, "loss": 0.6272, "step": 3441 }, { "epoch": 0.10573526249500814, "grad_norm": 0.33497780561447144, "learning_rate": 1.986993151139841e-05, "loss": 0.6324, "step": 3442 }, { "epoch": 0.1057659816299573, "grad_norm": 0.32794028520584106, "learning_rate": 1.9869853807092575e-05, "loss": 0.6106, "step": 3443 }, { "epoch": 0.10579670076490647, "grad_norm": 0.3575765788555145, "learning_rate": 1.9869776079735e-05, "loss": 0.6867, "step": 3444 }, { "epoch": 0.10582741989985563, "grad_norm": 0.3414032459259033, "learning_rate": 1.9869698329325876e-05, "loss": 0.5478, "step": 3445 }, { "epoch": 0.10585813903480477, "grad_norm": 0.36993423104286194, "learning_rate": 1.9869620555865384e-05, "loss": 0.6368, "step": 3446 }, { "epoch": 0.10588885816975394, "grad_norm": 0.3795616924762726, "learning_rate": 1.9869542759353702e-05, "loss": 0.6654, "step": 3447 }, { "epoch": 0.1059195773047031, "grad_norm": 0.40292730927467346, "learning_rate": 1.9869464939791015e-05, "loss": 0.6155, "step": 3448 }, { "epoch": 0.10595029643965226, "grad_norm": 0.3794935345649719, "learning_rate": 1.9869387097177503e-05, "loss": 0.604, "step": 3449 }, { "epoch": 0.10598101557460142, "grad_norm": 0.3322625160217285, "learning_rate": 1.9869309231513347e-05, "loss": 0.6315, "step": 3450 }, { "epoch": 0.10601173470955058, "grad_norm": 0.3488585948944092, "learning_rate": 1.986923134279873e-05, "loss": 0.569, "step": 3451 }, { "epoch": 0.10604245384449974, "grad_norm": 0.31652525067329407, "learning_rate": 1.9869153431033838e-05, "loss": 0.5263, "step": 3452 }, { "epoch": 0.1060731729794489, "grad_norm": 0.40059995651245117, "learning_rate": 1.9869075496218844e-05, "loss": 0.693, "step": 3453 }, { "epoch": 0.10610389211439805, "grad_norm": 0.360721230506897, "learning_rate": 1.9868997538353935e-05, "loss": 0.6356, "step": 3454 }, { "epoch": 0.10613461124934721, "grad_norm": 0.362016886472702, "learning_rate": 1.9868919557439295e-05, "loss": 0.5974, "step": 3455 }, { "epoch": 0.10616533038429637, "grad_norm": 0.3159463107585907, "learning_rate": 1.9868841553475103e-05, "loss": 0.5762, "step": 3456 }, { "epoch": 0.10619604951924554, "grad_norm": 0.3351120352745056, "learning_rate": 1.9868763526461542e-05, "loss": 0.6173, "step": 3457 }, { "epoch": 0.1062267686541947, "grad_norm": 0.37833738327026367, "learning_rate": 1.9868685476398798e-05, "loss": 0.6136, "step": 3458 }, { "epoch": 0.10625748778914386, "grad_norm": 0.35164880752563477, "learning_rate": 1.9868607403287047e-05, "loss": 0.5283, "step": 3459 }, { "epoch": 0.10628820692409302, "grad_norm": 0.34129154682159424, "learning_rate": 1.9868529307126473e-05, "loss": 0.6767, "step": 3460 }, { "epoch": 0.10631892605904218, "grad_norm": 0.38312384486198425, "learning_rate": 1.9868451187917265e-05, "loss": 0.6678, "step": 3461 }, { "epoch": 0.10634964519399134, "grad_norm": 0.339773029088974, "learning_rate": 1.9868373045659596e-05, "loss": 0.5806, "step": 3462 }, { "epoch": 0.10638036432894049, "grad_norm": 0.36767128109931946, "learning_rate": 1.9868294880353652e-05, "loss": 0.5957, "step": 3463 }, { "epoch": 0.10641108346388965, "grad_norm": 0.3161228597164154, "learning_rate": 1.9868216691999617e-05, "loss": 0.6111, "step": 3464 }, { "epoch": 0.10644180259883881, "grad_norm": 0.3610501289367676, "learning_rate": 1.9868138480597673e-05, "loss": 0.5523, "step": 3465 }, { "epoch": 0.10647252173378798, "grad_norm": 0.36486586928367615, "learning_rate": 1.9868060246148002e-05, "loss": 0.6227, "step": 3466 }, { "epoch": 0.10650324086873714, "grad_norm": 0.34363508224487305, "learning_rate": 1.9867981988650786e-05, "loss": 0.5495, "step": 3467 }, { "epoch": 0.1065339600036863, "grad_norm": 0.3732171952724457, "learning_rate": 1.986790370810621e-05, "loss": 0.567, "step": 3468 }, { "epoch": 0.10656467913863546, "grad_norm": 0.3281377851963043, "learning_rate": 1.9867825404514456e-05, "loss": 0.6404, "step": 3469 }, { "epoch": 0.10659539827358462, "grad_norm": 0.3282632529735565, "learning_rate": 1.9867747077875706e-05, "loss": 0.5971, "step": 3470 }, { "epoch": 0.10662611740853378, "grad_norm": 0.3331635892391205, "learning_rate": 1.9867668728190144e-05, "loss": 0.5716, "step": 3471 }, { "epoch": 0.10665683654348293, "grad_norm": 0.36009323596954346, "learning_rate": 1.9867590355457954e-05, "loss": 0.6278, "step": 3472 }, { "epoch": 0.10668755567843209, "grad_norm": 0.3530612885951996, "learning_rate": 1.9867511959679313e-05, "loss": 0.6058, "step": 3473 }, { "epoch": 0.10671827481338125, "grad_norm": 0.3470054268836975, "learning_rate": 1.9867433540854412e-05, "loss": 0.6318, "step": 3474 }, { "epoch": 0.10674899394833041, "grad_norm": 0.34973645210266113, "learning_rate": 1.9867355098983432e-05, "loss": 0.5766, "step": 3475 }, { "epoch": 0.10677971308327958, "grad_norm": 0.3279939293861389, "learning_rate": 1.9867276634066553e-05, "loss": 0.7215, "step": 3476 }, { "epoch": 0.10681043221822874, "grad_norm": 0.36407825350761414, "learning_rate": 1.986719814610396e-05, "loss": 0.558, "step": 3477 }, { "epoch": 0.1068411513531779, "grad_norm": 0.3348643183708191, "learning_rate": 1.986711963509584e-05, "loss": 0.5209, "step": 3478 }, { "epoch": 0.10687187048812706, "grad_norm": 0.818617582321167, "learning_rate": 1.986704110104237e-05, "loss": 0.5632, "step": 3479 }, { "epoch": 0.10690258962307621, "grad_norm": 0.514984130859375, "learning_rate": 1.9866962543943736e-05, "loss": 0.578, "step": 3480 }, { "epoch": 0.10693330875802537, "grad_norm": 0.31598973274230957, "learning_rate": 1.9866883963800123e-05, "loss": 0.6558, "step": 3481 }, { "epoch": 0.10696402789297453, "grad_norm": 0.33607158064842224, "learning_rate": 1.9866805360611715e-05, "loss": 0.5762, "step": 3482 }, { "epoch": 0.10699474702792369, "grad_norm": 0.31652718782424927, "learning_rate": 1.9866726734378692e-05, "loss": 0.5867, "step": 3483 }, { "epoch": 0.10702546616287285, "grad_norm": 0.3445282578468323, "learning_rate": 1.9866648085101236e-05, "loss": 0.6758, "step": 3484 }, { "epoch": 0.10705618529782202, "grad_norm": 0.3151656687259674, "learning_rate": 1.986656941277954e-05, "loss": 0.6869, "step": 3485 }, { "epoch": 0.10708690443277118, "grad_norm": 0.33601903915405273, "learning_rate": 1.986649071741378e-05, "loss": 0.5309, "step": 3486 }, { "epoch": 0.10711762356772034, "grad_norm": 0.4403543472290039, "learning_rate": 1.9866411999004143e-05, "loss": 0.634, "step": 3487 }, { "epoch": 0.1071483427026695, "grad_norm": 0.31385430693626404, "learning_rate": 1.9866333257550813e-05, "loss": 0.5622, "step": 3488 }, { "epoch": 0.10717906183761865, "grad_norm": 0.33242735266685486, "learning_rate": 1.9866254493053966e-05, "loss": 0.6139, "step": 3489 }, { "epoch": 0.10720978097256781, "grad_norm": 0.3707055449485779, "learning_rate": 1.98661757055138e-05, "loss": 0.5957, "step": 3490 }, { "epoch": 0.10724050010751697, "grad_norm": 0.3350510895252228, "learning_rate": 1.9866096894930487e-05, "loss": 0.552, "step": 3491 }, { "epoch": 0.10727121924246613, "grad_norm": 0.542445957660675, "learning_rate": 1.9866018061304218e-05, "loss": 0.6299, "step": 3492 }, { "epoch": 0.10730193837741529, "grad_norm": 0.37483662366867065, "learning_rate": 1.9865939204635173e-05, "loss": 0.6641, "step": 3493 }, { "epoch": 0.10733265751236445, "grad_norm": 0.32842907309532166, "learning_rate": 1.986586032492354e-05, "loss": 0.6102, "step": 3494 }, { "epoch": 0.10736337664731362, "grad_norm": 0.35350799560546875, "learning_rate": 1.9865781422169498e-05, "loss": 0.6602, "step": 3495 }, { "epoch": 0.10739409578226278, "grad_norm": 0.3558604121208191, "learning_rate": 1.9865702496373237e-05, "loss": 0.6078, "step": 3496 }, { "epoch": 0.10742481491721192, "grad_norm": 0.32265138626098633, "learning_rate": 1.986562354753494e-05, "loss": 0.6542, "step": 3497 }, { "epoch": 0.10745553405216109, "grad_norm": 0.3723835051059723, "learning_rate": 1.9865544575654787e-05, "loss": 0.6901, "step": 3498 }, { "epoch": 0.10748625318711025, "grad_norm": 0.364217072725296, "learning_rate": 1.9865465580732967e-05, "loss": 0.6273, "step": 3499 }, { "epoch": 0.10751697232205941, "grad_norm": 0.38424980640411377, "learning_rate": 1.9865386562769666e-05, "loss": 0.6146, "step": 3500 }, { "epoch": 0.10754769145700857, "grad_norm": 0.3182368874549866, "learning_rate": 1.9865307521765062e-05, "loss": 0.6384, "step": 3501 }, { "epoch": 0.10757841059195773, "grad_norm": 0.3424476981163025, "learning_rate": 1.9865228457719343e-05, "loss": 0.6097, "step": 3502 }, { "epoch": 0.1076091297269069, "grad_norm": 0.3679790198802948, "learning_rate": 1.9865149370632695e-05, "loss": 0.5779, "step": 3503 }, { "epoch": 0.10763984886185606, "grad_norm": 0.3697637617588043, "learning_rate": 1.9865070260505303e-05, "loss": 0.6273, "step": 3504 }, { "epoch": 0.10767056799680522, "grad_norm": 0.3749689757823944, "learning_rate": 1.986499112733735e-05, "loss": 0.5149, "step": 3505 }, { "epoch": 0.10770128713175436, "grad_norm": 0.35091137886047363, "learning_rate": 1.986491197112902e-05, "loss": 0.6481, "step": 3506 }, { "epoch": 0.10773200626670353, "grad_norm": 0.32427695393562317, "learning_rate": 1.98648327918805e-05, "loss": 0.6334, "step": 3507 }, { "epoch": 0.10776272540165269, "grad_norm": 0.3559042513370514, "learning_rate": 1.9864753589591975e-05, "loss": 0.5967, "step": 3508 }, { "epoch": 0.10779344453660185, "grad_norm": 0.3369046449661255, "learning_rate": 1.9864674364263624e-05, "loss": 0.5827, "step": 3509 }, { "epoch": 0.10782416367155101, "grad_norm": 0.3360154926776886, "learning_rate": 1.9864595115895645e-05, "loss": 0.6644, "step": 3510 }, { "epoch": 0.10785488280650017, "grad_norm": 0.3208048641681671, "learning_rate": 1.986451584448821e-05, "loss": 0.652, "step": 3511 }, { "epoch": 0.10788560194144933, "grad_norm": 0.3626754879951477, "learning_rate": 1.986443655004151e-05, "loss": 0.6007, "step": 3512 }, { "epoch": 0.1079163210763985, "grad_norm": 0.3851911723613739, "learning_rate": 1.986435723255573e-05, "loss": 0.5919, "step": 3513 }, { "epoch": 0.10794704021134764, "grad_norm": 0.391366571187973, "learning_rate": 1.9864277892031057e-05, "loss": 0.6019, "step": 3514 }, { "epoch": 0.1079777593462968, "grad_norm": 0.38969114422798157, "learning_rate": 1.9864198528467667e-05, "loss": 0.5993, "step": 3515 }, { "epoch": 0.10800847848124596, "grad_norm": 0.3346009850502014, "learning_rate": 1.986411914186576e-05, "loss": 0.6163, "step": 3516 }, { "epoch": 0.10803919761619513, "grad_norm": 0.36977264285087585, "learning_rate": 1.9864039732225514e-05, "loss": 0.5769, "step": 3517 }, { "epoch": 0.10806991675114429, "grad_norm": 0.3331477642059326, "learning_rate": 1.986396029954711e-05, "loss": 0.6374, "step": 3518 }, { "epoch": 0.10810063588609345, "grad_norm": 0.3944768011569977, "learning_rate": 1.986388084383074e-05, "loss": 0.6852, "step": 3519 }, { "epoch": 0.10813135502104261, "grad_norm": 0.31893298029899597, "learning_rate": 1.9863801365076585e-05, "loss": 0.623, "step": 3520 }, { "epoch": 0.10816207415599177, "grad_norm": 0.32927021384239197, "learning_rate": 1.9863721863284835e-05, "loss": 0.5849, "step": 3521 }, { "epoch": 0.10819279329094093, "grad_norm": 0.36919930577278137, "learning_rate": 1.9863642338455672e-05, "loss": 0.6387, "step": 3522 }, { "epoch": 0.10822351242589008, "grad_norm": 0.3350495398044586, "learning_rate": 1.9863562790589287e-05, "loss": 0.5441, "step": 3523 }, { "epoch": 0.10825423156083924, "grad_norm": 0.318958044052124, "learning_rate": 1.9863483219685856e-05, "loss": 0.5289, "step": 3524 }, { "epoch": 0.1082849506957884, "grad_norm": 0.3514644503593445, "learning_rate": 1.9863403625745578e-05, "loss": 0.623, "step": 3525 }, { "epoch": 0.10831566983073757, "grad_norm": 0.4432719945907593, "learning_rate": 1.986332400876863e-05, "loss": 0.7456, "step": 3526 }, { "epoch": 0.10834638896568673, "grad_norm": 0.35523155331611633, "learning_rate": 1.9863244368755197e-05, "loss": 0.6499, "step": 3527 }, { "epoch": 0.10837710810063589, "grad_norm": 0.38579124212265015, "learning_rate": 1.986316470570547e-05, "loss": 0.5339, "step": 3528 }, { "epoch": 0.10840782723558505, "grad_norm": 0.3619006872177124, "learning_rate": 1.9863085019619633e-05, "loss": 0.6466, "step": 3529 }, { "epoch": 0.10843854637053421, "grad_norm": 0.3567807972431183, "learning_rate": 1.9863005310497873e-05, "loss": 0.5561, "step": 3530 }, { "epoch": 0.10846926550548336, "grad_norm": 0.3405373990535736, "learning_rate": 1.9862925578340375e-05, "loss": 0.6747, "step": 3531 }, { "epoch": 0.10849998464043252, "grad_norm": 0.33904680609703064, "learning_rate": 1.9862845823147325e-05, "loss": 0.5627, "step": 3532 }, { "epoch": 0.10853070377538168, "grad_norm": 0.36760711669921875, "learning_rate": 1.9862766044918912e-05, "loss": 0.5824, "step": 3533 }, { "epoch": 0.10856142291033084, "grad_norm": 0.29830652475357056, "learning_rate": 1.9862686243655313e-05, "loss": 0.6683, "step": 3534 }, { "epoch": 0.10859214204528, "grad_norm": 0.4152737557888031, "learning_rate": 1.986260641935673e-05, "loss": 0.5788, "step": 3535 }, { "epoch": 0.10862286118022917, "grad_norm": 0.3278442919254303, "learning_rate": 1.9862526572023334e-05, "loss": 0.6371, "step": 3536 }, { "epoch": 0.10865358031517833, "grad_norm": 0.30844974517822266, "learning_rate": 1.986244670165532e-05, "loss": 0.5578, "step": 3537 }, { "epoch": 0.10868429945012749, "grad_norm": 0.32755953073501587, "learning_rate": 1.9862366808252877e-05, "loss": 0.5979, "step": 3538 }, { "epoch": 0.10871501858507665, "grad_norm": 0.36990633606910706, "learning_rate": 1.986228689181619e-05, "loss": 0.5197, "step": 3539 }, { "epoch": 0.1087457377200258, "grad_norm": 0.38616272807121277, "learning_rate": 1.9862206952345432e-05, "loss": 0.5922, "step": 3540 }, { "epoch": 0.10877645685497496, "grad_norm": 0.4480770230293274, "learning_rate": 1.986212698984081e-05, "loss": 0.5577, "step": 3541 }, { "epoch": 0.10880717598992412, "grad_norm": 0.3624168038368225, "learning_rate": 1.98620470043025e-05, "loss": 0.6729, "step": 3542 }, { "epoch": 0.10883789512487328, "grad_norm": 0.3767770826816559, "learning_rate": 1.9861966995730687e-05, "loss": 0.616, "step": 3543 }, { "epoch": 0.10886861425982244, "grad_norm": 0.38926181197166443, "learning_rate": 1.9861886964125564e-05, "loss": 0.6632, "step": 3544 }, { "epoch": 0.1088993333947716, "grad_norm": 0.3576256334781647, "learning_rate": 1.9861806909487312e-05, "loss": 0.6028, "step": 3545 }, { "epoch": 0.10893005252972077, "grad_norm": 0.3318727910518646, "learning_rate": 1.9861726831816127e-05, "loss": 0.6288, "step": 3546 }, { "epoch": 0.10896077166466993, "grad_norm": 0.36502209305763245, "learning_rate": 1.9861646731112184e-05, "loss": 0.68, "step": 3547 }, { "epoch": 0.10899149079961909, "grad_norm": 0.3323136866092682, "learning_rate": 1.986156660737568e-05, "loss": 0.6085, "step": 3548 }, { "epoch": 0.10902220993456824, "grad_norm": 0.3636404573917389, "learning_rate": 1.98614864606068e-05, "loss": 0.6791, "step": 3549 }, { "epoch": 0.1090529290695174, "grad_norm": 0.3158240020275116, "learning_rate": 1.986140629080573e-05, "loss": 0.5692, "step": 3550 }, { "epoch": 0.10908364820446656, "grad_norm": 0.32174256443977356, "learning_rate": 1.9861326097972652e-05, "loss": 0.5762, "step": 3551 }, { "epoch": 0.10911436733941572, "grad_norm": 0.3713470995426178, "learning_rate": 1.986124588210776e-05, "loss": 0.5883, "step": 3552 }, { "epoch": 0.10914508647436488, "grad_norm": 0.35260289907455444, "learning_rate": 1.9861165643211243e-05, "loss": 0.6054, "step": 3553 }, { "epoch": 0.10917580560931404, "grad_norm": 0.32799071073532104, "learning_rate": 1.986108538128328e-05, "loss": 0.6258, "step": 3554 }, { "epoch": 0.1092065247442632, "grad_norm": 0.3113437592983246, "learning_rate": 1.9861005096324067e-05, "loss": 0.541, "step": 3555 }, { "epoch": 0.10923724387921237, "grad_norm": 0.34527596831321716, "learning_rate": 1.9860924788333785e-05, "loss": 0.6811, "step": 3556 }, { "epoch": 0.10926796301416151, "grad_norm": 0.33286911249160767, "learning_rate": 1.9860844457312626e-05, "loss": 0.6088, "step": 3557 }, { "epoch": 0.10929868214911068, "grad_norm": 0.3324921131134033, "learning_rate": 1.986076410326078e-05, "loss": 0.6608, "step": 3558 }, { "epoch": 0.10932940128405984, "grad_norm": 0.3607599139213562, "learning_rate": 1.9860683726178423e-05, "loss": 0.5402, "step": 3559 }, { "epoch": 0.109360120419009, "grad_norm": 0.31252723932266235, "learning_rate": 1.9860603326065756e-05, "loss": 0.6396, "step": 3560 }, { "epoch": 0.10939083955395816, "grad_norm": 0.34485357999801636, "learning_rate": 1.986052290292296e-05, "loss": 0.5765, "step": 3561 }, { "epoch": 0.10942155868890732, "grad_norm": 0.45759209990501404, "learning_rate": 1.9860442456750223e-05, "loss": 0.5491, "step": 3562 }, { "epoch": 0.10945227782385648, "grad_norm": 0.3685763478279114, "learning_rate": 1.9860361987547735e-05, "loss": 0.5905, "step": 3563 }, { "epoch": 0.10948299695880565, "grad_norm": 0.37201449275016785, "learning_rate": 1.9860281495315684e-05, "loss": 0.5921, "step": 3564 }, { "epoch": 0.1095137160937548, "grad_norm": 0.34410780668258667, "learning_rate": 1.9860200980054257e-05, "loss": 0.5732, "step": 3565 }, { "epoch": 0.10954443522870395, "grad_norm": 0.34331223368644714, "learning_rate": 1.9860120441763638e-05, "loss": 0.6213, "step": 3566 }, { "epoch": 0.10957515436365312, "grad_norm": 0.5508716702461243, "learning_rate": 1.9860039880444023e-05, "loss": 0.571, "step": 3567 }, { "epoch": 0.10960587349860228, "grad_norm": 0.3191051781177521, "learning_rate": 1.9859959296095594e-05, "loss": 0.5455, "step": 3568 }, { "epoch": 0.10963659263355144, "grad_norm": 0.3307940363883972, "learning_rate": 1.9859878688718545e-05, "loss": 0.6314, "step": 3569 }, { "epoch": 0.1096673117685006, "grad_norm": 0.3467053174972534, "learning_rate": 1.9859798058313055e-05, "loss": 0.5978, "step": 3570 }, { "epoch": 0.10969803090344976, "grad_norm": 0.36862242221832275, "learning_rate": 1.985971740487932e-05, "loss": 0.5999, "step": 3571 }, { "epoch": 0.10972875003839892, "grad_norm": 0.600034773349762, "learning_rate": 1.985963672841753e-05, "loss": 0.6125, "step": 3572 }, { "epoch": 0.10975946917334808, "grad_norm": 0.3595225214958191, "learning_rate": 1.9859556028927868e-05, "loss": 0.6143, "step": 3573 }, { "epoch": 0.10979018830829723, "grad_norm": 0.3493101894855499, "learning_rate": 1.9859475306410523e-05, "loss": 0.5708, "step": 3574 }, { "epoch": 0.1098209074432464, "grad_norm": 0.3422781825065613, "learning_rate": 1.9859394560865687e-05, "loss": 0.6512, "step": 3575 }, { "epoch": 0.10985162657819555, "grad_norm": 0.3491266965866089, "learning_rate": 1.9859313792293545e-05, "loss": 0.677, "step": 3576 }, { "epoch": 0.10988234571314472, "grad_norm": 0.3646570146083832, "learning_rate": 1.9859233000694287e-05, "loss": 0.6677, "step": 3577 }, { "epoch": 0.10991306484809388, "grad_norm": 0.3285301625728607, "learning_rate": 1.98591521860681e-05, "loss": 0.5922, "step": 3578 }, { "epoch": 0.10994378398304304, "grad_norm": 0.32042428851127625, "learning_rate": 1.9859071348415177e-05, "loss": 0.6228, "step": 3579 }, { "epoch": 0.1099745031179922, "grad_norm": 0.4295864999294281, "learning_rate": 1.9858990487735703e-05, "loss": 0.6637, "step": 3580 }, { "epoch": 0.11000522225294136, "grad_norm": 0.31378769874572754, "learning_rate": 1.985890960402987e-05, "loss": 0.6695, "step": 3581 }, { "epoch": 0.11003594138789052, "grad_norm": 0.3562178313732147, "learning_rate": 1.985882869729786e-05, "loss": 0.6239, "step": 3582 }, { "epoch": 0.11006666052283967, "grad_norm": 0.3252597153186798, "learning_rate": 1.9858747767539873e-05, "loss": 0.6176, "step": 3583 }, { "epoch": 0.11009737965778883, "grad_norm": 0.40002956986427307, "learning_rate": 1.985866681475609e-05, "loss": 0.5504, "step": 3584 }, { "epoch": 0.110128098792738, "grad_norm": 0.45329511165618896, "learning_rate": 1.9858585838946705e-05, "loss": 0.5736, "step": 3585 }, { "epoch": 0.11015881792768716, "grad_norm": 0.8407210111618042, "learning_rate": 1.98585048401119e-05, "loss": 0.5832, "step": 3586 }, { "epoch": 0.11018953706263632, "grad_norm": 0.3354388475418091, "learning_rate": 1.985842381825187e-05, "loss": 0.6149, "step": 3587 }, { "epoch": 0.11022025619758548, "grad_norm": 0.32311496138572693, "learning_rate": 1.98583427733668e-05, "loss": 0.571, "step": 3588 }, { "epoch": 0.11025097533253464, "grad_norm": 0.35623839497566223, "learning_rate": 1.9858261705456888e-05, "loss": 0.5491, "step": 3589 }, { "epoch": 0.1102816944674838, "grad_norm": 0.3393486440181732, "learning_rate": 1.9858180614522313e-05, "loss": 0.6303, "step": 3590 }, { "epoch": 0.11031241360243295, "grad_norm": 0.3281818628311157, "learning_rate": 1.9858099500563268e-05, "loss": 0.5975, "step": 3591 }, { "epoch": 0.11034313273738211, "grad_norm": 0.3343569338321686, "learning_rate": 1.9858018363579946e-05, "loss": 0.5665, "step": 3592 }, { "epoch": 0.11037385187233127, "grad_norm": 0.37314268946647644, "learning_rate": 1.9857937203572534e-05, "loss": 0.5653, "step": 3593 }, { "epoch": 0.11040457100728043, "grad_norm": 0.33678436279296875, "learning_rate": 1.9857856020541218e-05, "loss": 0.575, "step": 3594 }, { "epoch": 0.1104352901422296, "grad_norm": 0.3310183584690094, "learning_rate": 1.985777481448619e-05, "loss": 0.6562, "step": 3595 }, { "epoch": 0.11046600927717876, "grad_norm": 0.35815104842185974, "learning_rate": 1.9857693585407643e-05, "loss": 0.6543, "step": 3596 }, { "epoch": 0.11049672841212792, "grad_norm": 0.3451935946941376, "learning_rate": 1.9857612333305764e-05, "loss": 0.6392, "step": 3597 }, { "epoch": 0.11052744754707708, "grad_norm": 0.3795121908187866, "learning_rate": 1.9857531058180744e-05, "loss": 0.6157, "step": 3598 }, { "epoch": 0.11055816668202624, "grad_norm": 0.44748643040657043, "learning_rate": 1.9857449760032768e-05, "loss": 0.6165, "step": 3599 }, { "epoch": 0.11058888581697539, "grad_norm": 0.3774811029434204, "learning_rate": 1.9857368438862034e-05, "loss": 0.592, "step": 3600 }, { "epoch": 0.11061960495192455, "grad_norm": 0.7576661109924316, "learning_rate": 1.9857287094668727e-05, "loss": 0.6334, "step": 3601 }, { "epoch": 0.11065032408687371, "grad_norm": 0.4565783739089966, "learning_rate": 1.9857205727453034e-05, "loss": 0.6089, "step": 3602 }, { "epoch": 0.11068104322182287, "grad_norm": 0.33544954657554626, "learning_rate": 1.9857124337215152e-05, "loss": 0.5833, "step": 3603 }, { "epoch": 0.11071176235677203, "grad_norm": 0.34579914808273315, "learning_rate": 1.9857042923955267e-05, "loss": 0.5423, "step": 3604 }, { "epoch": 0.1107424814917212, "grad_norm": 0.33484408259391785, "learning_rate": 1.985696148767357e-05, "loss": 0.6156, "step": 3605 }, { "epoch": 0.11077320062667036, "grad_norm": 0.35987773537635803, "learning_rate": 1.9856880028370252e-05, "loss": 0.5401, "step": 3606 }, { "epoch": 0.11080391976161952, "grad_norm": 0.45957496762275696, "learning_rate": 1.9856798546045502e-05, "loss": 0.633, "step": 3607 }, { "epoch": 0.11083463889656867, "grad_norm": 0.32677319645881653, "learning_rate": 1.985671704069951e-05, "loss": 0.5728, "step": 3608 }, { "epoch": 0.11086535803151783, "grad_norm": 0.3927885890007019, "learning_rate": 1.9856635512332464e-05, "loss": 0.5766, "step": 3609 }, { "epoch": 0.11089607716646699, "grad_norm": 0.3126145005226135, "learning_rate": 1.985655396094456e-05, "loss": 0.6341, "step": 3610 }, { "epoch": 0.11092679630141615, "grad_norm": 0.4004031717777252, "learning_rate": 1.9856472386535986e-05, "loss": 0.5509, "step": 3611 }, { "epoch": 0.11095751543636531, "grad_norm": 0.3809654414653778, "learning_rate": 1.9856390789106933e-05, "loss": 0.6373, "step": 3612 }, { "epoch": 0.11098823457131447, "grad_norm": 0.3488551080226898, "learning_rate": 1.985630916865759e-05, "loss": 0.5984, "step": 3613 }, { "epoch": 0.11101895370626363, "grad_norm": 0.32215163111686707, "learning_rate": 1.9856227525188148e-05, "loss": 0.6228, "step": 3614 }, { "epoch": 0.1110496728412128, "grad_norm": 0.3190827965736389, "learning_rate": 1.9856145858698798e-05, "loss": 0.5187, "step": 3615 }, { "epoch": 0.11108039197616196, "grad_norm": 0.34365713596343994, "learning_rate": 1.985606416918973e-05, "loss": 0.4838, "step": 3616 }, { "epoch": 0.1111111111111111, "grad_norm": 0.3539183735847473, "learning_rate": 1.985598245666114e-05, "loss": 0.6466, "step": 3617 }, { "epoch": 0.11114183024606027, "grad_norm": 0.35482561588287354, "learning_rate": 1.9855900721113213e-05, "loss": 0.5995, "step": 3618 }, { "epoch": 0.11117254938100943, "grad_norm": 0.3133988678455353, "learning_rate": 1.985581896254614e-05, "loss": 0.5521, "step": 3619 }, { "epoch": 0.11120326851595859, "grad_norm": 0.40497711300849915, "learning_rate": 1.9855737180960114e-05, "loss": 0.5745, "step": 3620 }, { "epoch": 0.11123398765090775, "grad_norm": 0.4514729082584381, "learning_rate": 1.9855655376355326e-05, "loss": 0.5743, "step": 3621 }, { "epoch": 0.11126470678585691, "grad_norm": 0.3270755410194397, "learning_rate": 1.9855573548731968e-05, "loss": 0.6327, "step": 3622 }, { "epoch": 0.11129542592080607, "grad_norm": 0.29781270027160645, "learning_rate": 1.985549169809023e-05, "loss": 0.6158, "step": 3623 }, { "epoch": 0.11132614505575524, "grad_norm": 0.387952983379364, "learning_rate": 1.9855409824430298e-05, "loss": 0.5433, "step": 3624 }, { "epoch": 0.1113568641907044, "grad_norm": 0.37318992614746094, "learning_rate": 1.985532792775237e-05, "loss": 0.6663, "step": 3625 }, { "epoch": 0.11138758332565354, "grad_norm": 0.29879212379455566, "learning_rate": 1.9855246008056637e-05, "loss": 0.4902, "step": 3626 }, { "epoch": 0.1114183024606027, "grad_norm": 0.5849641561508179, "learning_rate": 1.9855164065343287e-05, "loss": 0.5597, "step": 3627 }, { "epoch": 0.11144902159555187, "grad_norm": 0.3768247961997986, "learning_rate": 1.9855082099612514e-05, "loss": 0.6423, "step": 3628 }, { "epoch": 0.11147974073050103, "grad_norm": 0.3498307168483734, "learning_rate": 1.985500011086451e-05, "loss": 0.6133, "step": 3629 }, { "epoch": 0.11151045986545019, "grad_norm": 0.29821786284446716, "learning_rate": 1.985491809909946e-05, "loss": 0.5848, "step": 3630 }, { "epoch": 0.11154117900039935, "grad_norm": 0.34664052724838257, "learning_rate": 1.9854836064317567e-05, "loss": 0.6322, "step": 3631 }, { "epoch": 0.11157189813534851, "grad_norm": 0.4375780522823334, "learning_rate": 1.9854754006519012e-05, "loss": 0.6018, "step": 3632 }, { "epoch": 0.11160261727029767, "grad_norm": 0.31445521116256714, "learning_rate": 1.985467192570399e-05, "loss": 0.6104, "step": 3633 }, { "epoch": 0.11163333640524682, "grad_norm": 0.3879116177558899, "learning_rate": 1.98545898218727e-05, "loss": 0.5503, "step": 3634 }, { "epoch": 0.11166405554019598, "grad_norm": 0.36149361729621887, "learning_rate": 1.985450769502532e-05, "loss": 0.7001, "step": 3635 }, { "epoch": 0.11169477467514514, "grad_norm": 0.3327271640300751, "learning_rate": 1.985442554516205e-05, "loss": 0.6133, "step": 3636 }, { "epoch": 0.1117254938100943, "grad_norm": 0.304849773645401, "learning_rate": 1.9854343372283084e-05, "loss": 0.5727, "step": 3637 }, { "epoch": 0.11175621294504347, "grad_norm": 0.3618071675300598, "learning_rate": 1.9854261176388608e-05, "loss": 0.6015, "step": 3638 }, { "epoch": 0.11178693207999263, "grad_norm": 0.3424777090549469, "learning_rate": 1.985417895747882e-05, "loss": 0.601, "step": 3639 }, { "epoch": 0.11181765121494179, "grad_norm": 0.3185243308544159, "learning_rate": 1.9854096715553907e-05, "loss": 0.5732, "step": 3640 }, { "epoch": 0.11184837034989095, "grad_norm": 0.3296416103839874, "learning_rate": 1.9854014450614062e-05, "loss": 0.6338, "step": 3641 }, { "epoch": 0.11187908948484011, "grad_norm": 0.3447137176990509, "learning_rate": 1.985393216265948e-05, "loss": 0.6074, "step": 3642 }, { "epoch": 0.11190980861978926, "grad_norm": 0.876660943031311, "learning_rate": 1.985384985169035e-05, "loss": 0.7593, "step": 3643 }, { "epoch": 0.11194052775473842, "grad_norm": 0.3131575286388397, "learning_rate": 1.9853767517706865e-05, "loss": 0.5763, "step": 3644 }, { "epoch": 0.11197124688968758, "grad_norm": 0.4026492238044739, "learning_rate": 1.985368516070922e-05, "loss": 0.6161, "step": 3645 }, { "epoch": 0.11200196602463675, "grad_norm": 0.4172850549221039, "learning_rate": 1.9853602780697604e-05, "loss": 0.5094, "step": 3646 }, { "epoch": 0.1120326851595859, "grad_norm": 0.48110243678092957, "learning_rate": 1.9853520377672208e-05, "loss": 0.6445, "step": 3647 }, { "epoch": 0.11206340429453507, "grad_norm": 0.3275093734264374, "learning_rate": 1.9853437951633232e-05, "loss": 0.5757, "step": 3648 }, { "epoch": 0.11209412342948423, "grad_norm": 0.3749914765357971, "learning_rate": 1.985335550258086e-05, "loss": 0.5821, "step": 3649 }, { "epoch": 0.11212484256443339, "grad_norm": 0.30625709891319275, "learning_rate": 1.9853273030515288e-05, "loss": 0.5455, "step": 3650 }, { "epoch": 0.11215556169938254, "grad_norm": 0.3580871820449829, "learning_rate": 1.985319053543671e-05, "loss": 0.6422, "step": 3651 }, { "epoch": 0.1121862808343317, "grad_norm": 0.31980985403060913, "learning_rate": 1.9853108017345314e-05, "loss": 0.5981, "step": 3652 }, { "epoch": 0.11221699996928086, "grad_norm": 0.3333090543746948, "learning_rate": 1.9853025476241303e-05, "loss": 0.6169, "step": 3653 }, { "epoch": 0.11224771910423002, "grad_norm": 0.43548858165740967, "learning_rate": 1.9852942912124857e-05, "loss": 0.5449, "step": 3654 }, { "epoch": 0.11227843823917918, "grad_norm": 0.32213085889816284, "learning_rate": 1.9852860324996176e-05, "loss": 0.5257, "step": 3655 }, { "epoch": 0.11230915737412835, "grad_norm": 0.35707730054855347, "learning_rate": 1.9852777714855453e-05, "loss": 0.5594, "step": 3656 }, { "epoch": 0.11233987650907751, "grad_norm": 0.36646443605422974, "learning_rate": 1.985269508170288e-05, "loss": 0.6604, "step": 3657 }, { "epoch": 0.11237059564402667, "grad_norm": 0.3372706472873688, "learning_rate": 1.9852612425538646e-05, "loss": 0.6402, "step": 3658 }, { "epoch": 0.11240131477897583, "grad_norm": 0.3468382656574249, "learning_rate": 1.985252974636295e-05, "loss": 0.6137, "step": 3659 }, { "epoch": 0.11243203391392498, "grad_norm": 0.34987303614616394, "learning_rate": 1.9852447044175982e-05, "loss": 0.6234, "step": 3660 }, { "epoch": 0.11246275304887414, "grad_norm": 0.3773833215236664, "learning_rate": 1.9852364318977935e-05, "loss": 0.6917, "step": 3661 }, { "epoch": 0.1124934721838233, "grad_norm": 0.3723367750644684, "learning_rate": 1.985228157076901e-05, "loss": 0.6675, "step": 3662 }, { "epoch": 0.11252419131877246, "grad_norm": 0.3827250599861145, "learning_rate": 1.9852198799549382e-05, "loss": 0.5986, "step": 3663 }, { "epoch": 0.11255491045372162, "grad_norm": 0.3177677392959595, "learning_rate": 1.985211600531926e-05, "loss": 0.4962, "step": 3664 }, { "epoch": 0.11258562958867079, "grad_norm": 0.34122058749198914, "learning_rate": 1.9852033188078837e-05, "loss": 0.7325, "step": 3665 }, { "epoch": 0.11261634872361995, "grad_norm": 0.45283445715904236, "learning_rate": 1.98519503478283e-05, "loss": 0.6029, "step": 3666 }, { "epoch": 0.11264706785856911, "grad_norm": 0.32625705003738403, "learning_rate": 1.9851867484567842e-05, "loss": 0.5449, "step": 3667 }, { "epoch": 0.11267778699351826, "grad_norm": 0.4151667654514313, "learning_rate": 1.9851784598297665e-05, "loss": 0.61, "step": 3668 }, { "epoch": 0.11270850612846742, "grad_norm": 0.351315975189209, "learning_rate": 1.9851701689017956e-05, "loss": 0.5806, "step": 3669 }, { "epoch": 0.11273922526341658, "grad_norm": 0.3853437304496765, "learning_rate": 1.9851618756728905e-05, "loss": 0.5871, "step": 3670 }, { "epoch": 0.11276994439836574, "grad_norm": 0.3557128608226776, "learning_rate": 1.9851535801430713e-05, "loss": 0.6086, "step": 3671 }, { "epoch": 0.1128006635333149, "grad_norm": 0.3255499005317688, "learning_rate": 1.9851452823123572e-05, "loss": 0.5972, "step": 3672 }, { "epoch": 0.11283138266826406, "grad_norm": 0.3665372133255005, "learning_rate": 1.9851369821807672e-05, "loss": 0.6552, "step": 3673 }, { "epoch": 0.11286210180321322, "grad_norm": 0.3607790768146515, "learning_rate": 1.9851286797483213e-05, "loss": 0.7137, "step": 3674 }, { "epoch": 0.11289282093816239, "grad_norm": 0.3338436484336853, "learning_rate": 1.9851203750150385e-05, "loss": 0.5541, "step": 3675 }, { "epoch": 0.11292354007311155, "grad_norm": 0.3600040376186371, "learning_rate": 1.9851120679809384e-05, "loss": 0.5497, "step": 3676 }, { "epoch": 0.1129542592080607, "grad_norm": 0.3450561761856079, "learning_rate": 1.98510375864604e-05, "loss": 0.5412, "step": 3677 }, { "epoch": 0.11298497834300986, "grad_norm": 0.34481704235076904, "learning_rate": 1.985095447010363e-05, "loss": 0.572, "step": 3678 }, { "epoch": 0.11301569747795902, "grad_norm": 0.332034170627594, "learning_rate": 1.9850871330739268e-05, "loss": 0.5842, "step": 3679 }, { "epoch": 0.11304641661290818, "grad_norm": 0.3462934195995331, "learning_rate": 1.985078816836751e-05, "loss": 0.624, "step": 3680 }, { "epoch": 0.11307713574785734, "grad_norm": 0.31092169880867004, "learning_rate": 1.9850704982988546e-05, "loss": 0.6484, "step": 3681 }, { "epoch": 0.1131078548828065, "grad_norm": 0.2953418791294098, "learning_rate": 1.9850621774602574e-05, "loss": 0.517, "step": 3682 }, { "epoch": 0.11313857401775566, "grad_norm": 0.3259090185165405, "learning_rate": 1.9850538543209784e-05, "loss": 0.6249, "step": 3683 }, { "epoch": 0.11316929315270483, "grad_norm": 0.3289620876312256, "learning_rate": 1.9850455288810377e-05, "loss": 0.5804, "step": 3684 }, { "epoch": 0.11320001228765399, "grad_norm": 0.3384961187839508, "learning_rate": 1.9850372011404542e-05, "loss": 0.6028, "step": 3685 }, { "epoch": 0.11323073142260313, "grad_norm": 0.3455812335014343, "learning_rate": 1.9850288710992476e-05, "loss": 0.642, "step": 3686 }, { "epoch": 0.1132614505575523, "grad_norm": 0.3522243797779083, "learning_rate": 1.985020538757437e-05, "loss": 0.6369, "step": 3687 }, { "epoch": 0.11329216969250146, "grad_norm": 0.36047083139419556, "learning_rate": 1.9850122041150424e-05, "loss": 0.6434, "step": 3688 }, { "epoch": 0.11332288882745062, "grad_norm": 0.32674795389175415, "learning_rate": 1.985003867172083e-05, "loss": 0.5916, "step": 3689 }, { "epoch": 0.11335360796239978, "grad_norm": 0.3712977170944214, "learning_rate": 1.9849955279285785e-05, "loss": 0.6613, "step": 3690 }, { "epoch": 0.11338432709734894, "grad_norm": 0.3866407573223114, "learning_rate": 1.984987186384548e-05, "loss": 0.6041, "step": 3691 }, { "epoch": 0.1134150462322981, "grad_norm": 0.3390164077281952, "learning_rate": 1.9849788425400107e-05, "loss": 0.6058, "step": 3692 }, { "epoch": 0.11344576536724726, "grad_norm": 0.35366421937942505, "learning_rate": 1.984970496394987e-05, "loss": 0.6616, "step": 3693 }, { "epoch": 0.11347648450219641, "grad_norm": 0.3490495979785919, "learning_rate": 1.9849621479494955e-05, "loss": 0.6785, "step": 3694 }, { "epoch": 0.11350720363714557, "grad_norm": 0.5634581446647644, "learning_rate": 1.9849537972035567e-05, "loss": 0.5639, "step": 3695 }, { "epoch": 0.11353792277209473, "grad_norm": 0.41927042603492737, "learning_rate": 1.984945444157189e-05, "loss": 0.6334, "step": 3696 }, { "epoch": 0.1135686419070439, "grad_norm": 0.41765859723091125, "learning_rate": 1.9849370888104126e-05, "loss": 0.5963, "step": 3697 }, { "epoch": 0.11359936104199306, "grad_norm": 0.4777277112007141, "learning_rate": 1.984928731163247e-05, "loss": 0.5959, "step": 3698 }, { "epoch": 0.11363008017694222, "grad_norm": 0.3572618365287781, "learning_rate": 1.9849203712157114e-05, "loss": 0.5823, "step": 3699 }, { "epoch": 0.11366079931189138, "grad_norm": 0.32865867018699646, "learning_rate": 1.9849120089678252e-05, "loss": 0.5877, "step": 3700 }, { "epoch": 0.11369151844684054, "grad_norm": 0.31242507696151733, "learning_rate": 1.984903644419609e-05, "loss": 0.5413, "step": 3701 }, { "epoch": 0.1137222375817897, "grad_norm": 0.32249462604522705, "learning_rate": 1.984895277571081e-05, "loss": 0.6699, "step": 3702 }, { "epoch": 0.11375295671673885, "grad_norm": 0.3626345694065094, "learning_rate": 1.984886908422261e-05, "loss": 0.6249, "step": 3703 }, { "epoch": 0.11378367585168801, "grad_norm": 0.38088729977607727, "learning_rate": 1.9848785369731695e-05, "loss": 0.6256, "step": 3704 }, { "epoch": 0.11381439498663717, "grad_norm": 0.3482777774333954, "learning_rate": 1.984870163223825e-05, "loss": 0.5718, "step": 3705 }, { "epoch": 0.11384511412158634, "grad_norm": 0.3128650188446045, "learning_rate": 1.984861787174248e-05, "loss": 0.5532, "step": 3706 }, { "epoch": 0.1138758332565355, "grad_norm": 0.32504501938819885, "learning_rate": 1.9848534088244568e-05, "loss": 0.6289, "step": 3707 }, { "epoch": 0.11390655239148466, "grad_norm": 0.3439810276031494, "learning_rate": 1.984845028174472e-05, "loss": 0.6143, "step": 3708 }, { "epoch": 0.11393727152643382, "grad_norm": 0.31374475359916687, "learning_rate": 1.984836645224313e-05, "loss": 0.5006, "step": 3709 }, { "epoch": 0.11396799066138298, "grad_norm": 0.3303595781326294, "learning_rate": 1.9848282599739986e-05, "loss": 0.6244, "step": 3710 }, { "epoch": 0.11399870979633213, "grad_norm": 0.3594200313091278, "learning_rate": 1.9848198724235496e-05, "loss": 0.6323, "step": 3711 }, { "epoch": 0.11402942893128129, "grad_norm": 0.3224477767944336, "learning_rate": 1.984811482572985e-05, "loss": 0.6207, "step": 3712 }, { "epoch": 0.11406014806623045, "grad_norm": 0.3226579427719116, "learning_rate": 1.9848030904223242e-05, "loss": 0.5579, "step": 3713 }, { "epoch": 0.11409086720117961, "grad_norm": 0.3188364803791046, "learning_rate": 1.984794695971587e-05, "loss": 0.5743, "step": 3714 }, { "epoch": 0.11412158633612877, "grad_norm": 0.3319861888885498, "learning_rate": 1.9847862992207933e-05, "loss": 0.5972, "step": 3715 }, { "epoch": 0.11415230547107794, "grad_norm": 0.32829591631889343, "learning_rate": 1.984777900169962e-05, "loss": 0.6356, "step": 3716 }, { "epoch": 0.1141830246060271, "grad_norm": 0.34315094351768494, "learning_rate": 1.9847694988191137e-05, "loss": 0.584, "step": 3717 }, { "epoch": 0.11421374374097626, "grad_norm": 0.33385223150253296, "learning_rate": 1.984761095168267e-05, "loss": 0.6258, "step": 3718 }, { "epoch": 0.11424446287592542, "grad_norm": 0.34356430172920227, "learning_rate": 1.9847526892174423e-05, "loss": 0.7014, "step": 3719 }, { "epoch": 0.11427518201087457, "grad_norm": 0.32874423265457153, "learning_rate": 1.9847442809666587e-05, "loss": 0.5858, "step": 3720 }, { "epoch": 0.11430590114582373, "grad_norm": 0.33098146319389343, "learning_rate": 1.984735870415936e-05, "loss": 0.5838, "step": 3721 }, { "epoch": 0.11433662028077289, "grad_norm": 0.43197765946388245, "learning_rate": 1.984727457565294e-05, "loss": 0.6773, "step": 3722 }, { "epoch": 0.11436733941572205, "grad_norm": 0.3538738787174225, "learning_rate": 1.984719042414752e-05, "loss": 0.5701, "step": 3723 }, { "epoch": 0.11439805855067121, "grad_norm": 0.3129217028617859, "learning_rate": 1.9847106249643305e-05, "loss": 0.5988, "step": 3724 }, { "epoch": 0.11442877768562038, "grad_norm": 0.37899449467658997, "learning_rate": 1.984702205214048e-05, "loss": 0.6804, "step": 3725 }, { "epoch": 0.11445949682056954, "grad_norm": 0.3788180947303772, "learning_rate": 1.984693783163925e-05, "loss": 0.5832, "step": 3726 }, { "epoch": 0.1144902159555187, "grad_norm": 0.3483436107635498, "learning_rate": 1.9846853588139806e-05, "loss": 0.6779, "step": 3727 }, { "epoch": 0.11452093509046785, "grad_norm": 0.5696409344673157, "learning_rate": 1.984676932164235e-05, "loss": 0.5686, "step": 3728 }, { "epoch": 0.11455165422541701, "grad_norm": 0.3305493891239166, "learning_rate": 1.984668503214708e-05, "loss": 0.5898, "step": 3729 }, { "epoch": 0.11458237336036617, "grad_norm": 0.3107776343822479, "learning_rate": 1.9846600719654185e-05, "loss": 0.6256, "step": 3730 }, { "epoch": 0.11461309249531533, "grad_norm": 0.3834291696548462, "learning_rate": 1.9846516384163866e-05, "loss": 0.7225, "step": 3731 }, { "epoch": 0.11464381163026449, "grad_norm": 0.3137303292751312, "learning_rate": 1.9846432025676324e-05, "loss": 0.5782, "step": 3732 }, { "epoch": 0.11467453076521365, "grad_norm": 0.29800671339035034, "learning_rate": 1.984634764419175e-05, "loss": 0.5595, "step": 3733 }, { "epoch": 0.11470524990016281, "grad_norm": 0.36078667640686035, "learning_rate": 1.9846263239710343e-05, "loss": 0.59, "step": 3734 }, { "epoch": 0.11473596903511198, "grad_norm": 0.34919342398643494, "learning_rate": 1.9846178812232303e-05, "loss": 0.6349, "step": 3735 }, { "epoch": 0.11476668817006114, "grad_norm": 0.4742375910282135, "learning_rate": 1.984609436175782e-05, "loss": 0.5852, "step": 3736 }, { "epoch": 0.11479740730501029, "grad_norm": 0.4074249267578125, "learning_rate": 1.98460098882871e-05, "loss": 0.6288, "step": 3737 }, { "epoch": 0.11482812643995945, "grad_norm": 0.36118096113204956, "learning_rate": 1.9845925391820336e-05, "loss": 0.576, "step": 3738 }, { "epoch": 0.11485884557490861, "grad_norm": 0.37920767068862915, "learning_rate": 1.9845840872357723e-05, "loss": 0.5994, "step": 3739 }, { "epoch": 0.11488956470985777, "grad_norm": 0.3272668421268463, "learning_rate": 1.9845756329899464e-05, "loss": 0.6378, "step": 3740 }, { "epoch": 0.11492028384480693, "grad_norm": 0.30889472365379333, "learning_rate": 1.984567176444575e-05, "loss": 0.5942, "step": 3741 }, { "epoch": 0.11495100297975609, "grad_norm": 0.3245025873184204, "learning_rate": 1.9845587175996787e-05, "loss": 0.6037, "step": 3742 }, { "epoch": 0.11498172211470525, "grad_norm": 0.3550287187099457, "learning_rate": 1.9845502564552762e-05, "loss": 0.5402, "step": 3743 }, { "epoch": 0.11501244124965442, "grad_norm": 0.32287874817848206, "learning_rate": 1.984541793011388e-05, "loss": 0.5974, "step": 3744 }, { "epoch": 0.11504316038460356, "grad_norm": 0.3560314476490021, "learning_rate": 1.984533327268034e-05, "loss": 0.6511, "step": 3745 }, { "epoch": 0.11507387951955272, "grad_norm": 2.647883892059326, "learning_rate": 1.9845248592252334e-05, "loss": 0.6932, "step": 3746 }, { "epoch": 0.11510459865450189, "grad_norm": 0.4019797444343567, "learning_rate": 1.9845163888830062e-05, "loss": 0.4805, "step": 3747 }, { "epoch": 0.11513531778945105, "grad_norm": 0.3559793531894684, "learning_rate": 1.9845079162413723e-05, "loss": 0.6695, "step": 3748 }, { "epoch": 0.11516603692440021, "grad_norm": 0.3049938678741455, "learning_rate": 1.9844994413003515e-05, "loss": 0.6378, "step": 3749 }, { "epoch": 0.11519675605934937, "grad_norm": 0.3690749406814575, "learning_rate": 1.9844909640599632e-05, "loss": 0.6406, "step": 3750 }, { "epoch": 0.11522747519429853, "grad_norm": 0.3373993933200836, "learning_rate": 1.9844824845202278e-05, "loss": 0.6461, "step": 3751 }, { "epoch": 0.11525819432924769, "grad_norm": 0.3101414442062378, "learning_rate": 1.9844740026811645e-05, "loss": 0.6391, "step": 3752 }, { "epoch": 0.11528891346419685, "grad_norm": 0.31894198060035706, "learning_rate": 1.9844655185427937e-05, "loss": 0.5255, "step": 3753 }, { "epoch": 0.115319632599146, "grad_norm": 0.3327147364616394, "learning_rate": 1.9844570321051346e-05, "loss": 0.5781, "step": 3754 }, { "epoch": 0.11535035173409516, "grad_norm": 0.3393171429634094, "learning_rate": 1.9844485433682077e-05, "loss": 0.6234, "step": 3755 }, { "epoch": 0.11538107086904432, "grad_norm": 0.4035336971282959, "learning_rate": 1.9844400523320325e-05, "loss": 0.7211, "step": 3756 }, { "epoch": 0.11541179000399349, "grad_norm": 0.3127652108669281, "learning_rate": 1.9844315589966285e-05, "loss": 0.6024, "step": 3757 }, { "epoch": 0.11544250913894265, "grad_norm": 0.3839256167411804, "learning_rate": 1.9844230633620158e-05, "loss": 0.6948, "step": 3758 }, { "epoch": 0.11547322827389181, "grad_norm": 0.3628309667110443, "learning_rate": 1.984414565428215e-05, "loss": 0.7031, "step": 3759 }, { "epoch": 0.11550394740884097, "grad_norm": 0.3668003976345062, "learning_rate": 1.9844060651952443e-05, "loss": 0.5777, "step": 3760 }, { "epoch": 0.11553466654379013, "grad_norm": 0.4155294895172119, "learning_rate": 1.9843975626631247e-05, "loss": 0.5939, "step": 3761 }, { "epoch": 0.1155653856787393, "grad_norm": 0.33983907103538513, "learning_rate": 1.9843890578318762e-05, "loss": 0.6047, "step": 3762 }, { "epoch": 0.11559610481368844, "grad_norm": 0.37674611806869507, "learning_rate": 1.984380550701518e-05, "loss": 0.5999, "step": 3763 }, { "epoch": 0.1156268239486376, "grad_norm": 0.30768948793411255, "learning_rate": 1.9843720412720706e-05, "loss": 0.5912, "step": 3764 }, { "epoch": 0.11565754308358676, "grad_norm": 0.3394695818424225, "learning_rate": 1.9843635295435534e-05, "loss": 0.5756, "step": 3765 }, { "epoch": 0.11568826221853593, "grad_norm": 0.35003724694252014, "learning_rate": 1.9843550155159862e-05, "loss": 0.6306, "step": 3766 }, { "epoch": 0.11571898135348509, "grad_norm": 0.32819175720214844, "learning_rate": 1.9843464991893896e-05, "loss": 0.5785, "step": 3767 }, { "epoch": 0.11574970048843425, "grad_norm": 0.3929077982902527, "learning_rate": 1.9843379805637827e-05, "loss": 0.5988, "step": 3768 }, { "epoch": 0.11578041962338341, "grad_norm": 0.3248562216758728, "learning_rate": 1.9843294596391857e-05, "loss": 0.4749, "step": 3769 }, { "epoch": 0.11581113875833257, "grad_norm": 0.4304574131965637, "learning_rate": 1.9843209364156182e-05, "loss": 0.5714, "step": 3770 }, { "epoch": 0.11584185789328172, "grad_norm": 0.34659627079963684, "learning_rate": 1.9843124108931007e-05, "loss": 0.4858, "step": 3771 }, { "epoch": 0.11587257702823088, "grad_norm": 0.333219975233078, "learning_rate": 1.984303883071653e-05, "loss": 0.5371, "step": 3772 }, { "epoch": 0.11590329616318004, "grad_norm": 0.34022560715675354, "learning_rate": 1.9842953529512948e-05, "loss": 0.6022, "step": 3773 }, { "epoch": 0.1159340152981292, "grad_norm": 0.3372803032398224, "learning_rate": 1.984286820532046e-05, "loss": 0.5485, "step": 3774 }, { "epoch": 0.11596473443307836, "grad_norm": 0.42921972274780273, "learning_rate": 1.9842782858139264e-05, "loss": 0.6161, "step": 3775 }, { "epoch": 0.11599545356802753, "grad_norm": 0.353121280670166, "learning_rate": 1.9842697487969563e-05, "loss": 0.6383, "step": 3776 }, { "epoch": 0.11602617270297669, "grad_norm": 0.3229205906391144, "learning_rate": 1.9842612094811554e-05, "loss": 0.6034, "step": 3777 }, { "epoch": 0.11605689183792585, "grad_norm": 0.3326924443244934, "learning_rate": 1.9842526678665436e-05, "loss": 0.6238, "step": 3778 }, { "epoch": 0.11608761097287501, "grad_norm": 0.3515435755252838, "learning_rate": 1.984244123953141e-05, "loss": 0.6342, "step": 3779 }, { "epoch": 0.11611833010782416, "grad_norm": 0.322115421295166, "learning_rate": 1.9842355777409677e-05, "loss": 0.5998, "step": 3780 }, { "epoch": 0.11614904924277332, "grad_norm": 0.31537431478500366, "learning_rate": 1.984227029230043e-05, "loss": 0.5922, "step": 3781 }, { "epoch": 0.11617976837772248, "grad_norm": 0.4828987121582031, "learning_rate": 1.984218478420388e-05, "loss": 0.6102, "step": 3782 }, { "epoch": 0.11621048751267164, "grad_norm": 0.35756558179855347, "learning_rate": 1.9842099253120216e-05, "loss": 0.6345, "step": 3783 }, { "epoch": 0.1162412066476208, "grad_norm": 0.33306509256362915, "learning_rate": 1.9842013699049644e-05, "loss": 0.5995, "step": 3784 }, { "epoch": 0.11627192578256997, "grad_norm": 0.37958404421806335, "learning_rate": 1.984192812199236e-05, "loss": 0.5855, "step": 3785 }, { "epoch": 0.11630264491751913, "grad_norm": 0.34764793515205383, "learning_rate": 1.9841842521948565e-05, "loss": 0.634, "step": 3786 }, { "epoch": 0.11633336405246829, "grad_norm": 0.34162887930870056, "learning_rate": 1.9841756898918462e-05, "loss": 0.5449, "step": 3787 }, { "epoch": 0.11636408318741744, "grad_norm": 0.6509524583816528, "learning_rate": 1.9841671252902246e-05, "loss": 0.5426, "step": 3788 }, { "epoch": 0.1163948023223666, "grad_norm": 0.4084666073322296, "learning_rate": 1.984158558390012e-05, "loss": 0.6141, "step": 3789 }, { "epoch": 0.11642552145731576, "grad_norm": 0.33353424072265625, "learning_rate": 1.9841499891912284e-05, "loss": 0.5514, "step": 3790 }, { "epoch": 0.11645624059226492, "grad_norm": 0.37195223569869995, "learning_rate": 1.9841414176938936e-05, "loss": 0.6418, "step": 3791 }, { "epoch": 0.11648695972721408, "grad_norm": 0.34068968892097473, "learning_rate": 1.9841328438980282e-05, "loss": 0.5621, "step": 3792 }, { "epoch": 0.11651767886216324, "grad_norm": 0.3459191620349884, "learning_rate": 1.9841242678036515e-05, "loss": 0.6149, "step": 3793 }, { "epoch": 0.1165483979971124, "grad_norm": 0.3548637628555298, "learning_rate": 1.984115689410784e-05, "loss": 0.6391, "step": 3794 }, { "epoch": 0.11657911713206157, "grad_norm": 0.33104750514030457, "learning_rate": 1.9841071087194452e-05, "loss": 0.6297, "step": 3795 }, { "epoch": 0.11660983626701073, "grad_norm": 0.3076324462890625, "learning_rate": 1.984098525729656e-05, "loss": 0.5669, "step": 3796 }, { "epoch": 0.11664055540195988, "grad_norm": 0.4566512107849121, "learning_rate": 1.9840899404414355e-05, "loss": 0.6418, "step": 3797 }, { "epoch": 0.11667127453690904, "grad_norm": 0.33666276931762695, "learning_rate": 1.9840813528548045e-05, "loss": 0.7073, "step": 3798 }, { "epoch": 0.1167019936718582, "grad_norm": 0.3549008369445801, "learning_rate": 1.984072762969783e-05, "loss": 0.5787, "step": 3799 }, { "epoch": 0.11673271280680736, "grad_norm": 0.3410724699497223, "learning_rate": 1.9840641707863902e-05, "loss": 0.5126, "step": 3800 }, { "epoch": 0.11676343194175652, "grad_norm": 0.33260494470596313, "learning_rate": 1.9840555763046474e-05, "loss": 0.6209, "step": 3801 }, { "epoch": 0.11679415107670568, "grad_norm": 0.40066155791282654, "learning_rate": 1.984046979524574e-05, "loss": 0.6069, "step": 3802 }, { "epoch": 0.11682487021165484, "grad_norm": 0.3189440071582794, "learning_rate": 1.98403838044619e-05, "loss": 0.5718, "step": 3803 }, { "epoch": 0.116855589346604, "grad_norm": 0.3291054368019104, "learning_rate": 1.9840297790695152e-05, "loss": 0.5983, "step": 3804 }, { "epoch": 0.11688630848155315, "grad_norm": 0.3631213307380676, "learning_rate": 1.9840211753945707e-05, "loss": 0.555, "step": 3805 }, { "epoch": 0.11691702761650231, "grad_norm": 0.31198278069496155, "learning_rate": 1.984012569421376e-05, "loss": 0.5892, "step": 3806 }, { "epoch": 0.11694774675145148, "grad_norm": 0.3877051770687103, "learning_rate": 1.9840039611499512e-05, "loss": 0.7014, "step": 3807 }, { "epoch": 0.11697846588640064, "grad_norm": 0.3448989987373352, "learning_rate": 1.983995350580316e-05, "loss": 0.6528, "step": 3808 }, { "epoch": 0.1170091850213498, "grad_norm": 0.40667808055877686, "learning_rate": 1.9839867377124915e-05, "loss": 0.5476, "step": 3809 }, { "epoch": 0.11703990415629896, "grad_norm": 0.35547158122062683, "learning_rate": 1.9839781225464972e-05, "loss": 0.6293, "step": 3810 }, { "epoch": 0.11707062329124812, "grad_norm": 0.3515092134475708, "learning_rate": 1.983969505082353e-05, "loss": 0.6259, "step": 3811 }, { "epoch": 0.11710134242619728, "grad_norm": 0.3396672010421753, "learning_rate": 1.9839608853200795e-05, "loss": 0.6297, "step": 3812 }, { "epoch": 0.11713206156114644, "grad_norm": 0.3585609793663025, "learning_rate": 1.9839522632596964e-05, "loss": 0.6007, "step": 3813 }, { "epoch": 0.11716278069609559, "grad_norm": 0.33318907022476196, "learning_rate": 1.983943638901224e-05, "loss": 0.6413, "step": 3814 }, { "epoch": 0.11719349983104475, "grad_norm": 0.3204422891139984, "learning_rate": 1.983935012244683e-05, "loss": 0.6021, "step": 3815 }, { "epoch": 0.11722421896599391, "grad_norm": 0.33354270458221436, "learning_rate": 1.9839263832900925e-05, "loss": 0.6168, "step": 3816 }, { "epoch": 0.11725493810094308, "grad_norm": 0.3516763746738434, "learning_rate": 1.9839177520374737e-05, "loss": 0.5841, "step": 3817 }, { "epoch": 0.11728565723589224, "grad_norm": 0.39533543586730957, "learning_rate": 1.983909118486846e-05, "loss": 0.5651, "step": 3818 }, { "epoch": 0.1173163763708414, "grad_norm": 0.34915685653686523, "learning_rate": 1.98390048263823e-05, "loss": 0.5749, "step": 3819 }, { "epoch": 0.11734709550579056, "grad_norm": 0.3903295695781708, "learning_rate": 1.9838918444916456e-05, "loss": 0.6288, "step": 3820 }, { "epoch": 0.11737781464073972, "grad_norm": 0.36053407192230225, "learning_rate": 1.983883204047113e-05, "loss": 0.5076, "step": 3821 }, { "epoch": 0.11740853377568887, "grad_norm": 0.35079336166381836, "learning_rate": 1.9838745613046526e-05, "loss": 0.6153, "step": 3822 }, { "epoch": 0.11743925291063803, "grad_norm": 0.3538878858089447, "learning_rate": 1.9838659162642843e-05, "loss": 0.6409, "step": 3823 }, { "epoch": 0.11746997204558719, "grad_norm": 0.33198291063308716, "learning_rate": 1.9838572689260288e-05, "loss": 0.6063, "step": 3824 }, { "epoch": 0.11750069118053635, "grad_norm": 0.4453510046005249, "learning_rate": 1.9838486192899057e-05, "loss": 0.6036, "step": 3825 }, { "epoch": 0.11753141031548552, "grad_norm": 0.3433864116668701, "learning_rate": 1.9838399673559353e-05, "loss": 0.6457, "step": 3826 }, { "epoch": 0.11756212945043468, "grad_norm": 0.3363216817378998, "learning_rate": 1.9838313131241384e-05, "loss": 0.6801, "step": 3827 }, { "epoch": 0.11759284858538384, "grad_norm": 0.32750171422958374, "learning_rate": 1.983822656594534e-05, "loss": 0.5869, "step": 3828 }, { "epoch": 0.117623567720333, "grad_norm": 0.31059730052948, "learning_rate": 1.9838139977671437e-05, "loss": 0.5051, "step": 3829 }, { "epoch": 0.11765428685528216, "grad_norm": 0.33741238713264465, "learning_rate": 1.9838053366419867e-05, "loss": 0.5908, "step": 3830 }, { "epoch": 0.11768500599023131, "grad_norm": 0.3483133614063263, "learning_rate": 1.983796673219084e-05, "loss": 0.5663, "step": 3831 }, { "epoch": 0.11771572512518047, "grad_norm": 0.346077024936676, "learning_rate": 1.9837880074984554e-05, "loss": 0.6599, "step": 3832 }, { "epoch": 0.11774644426012963, "grad_norm": 0.3484307527542114, "learning_rate": 1.983779339480121e-05, "loss": 0.6264, "step": 3833 }, { "epoch": 0.1177771633950788, "grad_norm": 0.3494051694869995, "learning_rate": 1.9837706691641014e-05, "loss": 0.6573, "step": 3834 }, { "epoch": 0.11780788253002795, "grad_norm": 0.3423261344432831, "learning_rate": 1.9837619965504166e-05, "loss": 0.5985, "step": 3835 }, { "epoch": 0.11783860166497712, "grad_norm": 0.4452187418937683, "learning_rate": 1.9837533216390868e-05, "loss": 0.6038, "step": 3836 }, { "epoch": 0.11786932079992628, "grad_norm": 0.35844603180885315, "learning_rate": 1.9837446444301327e-05, "loss": 0.5782, "step": 3837 }, { "epoch": 0.11790003993487544, "grad_norm": 0.3511697053909302, "learning_rate": 1.9837359649235743e-05, "loss": 0.5472, "step": 3838 }, { "epoch": 0.1179307590698246, "grad_norm": 0.3367350995540619, "learning_rate": 1.9837272831194316e-05, "loss": 0.6328, "step": 3839 }, { "epoch": 0.11796147820477375, "grad_norm": 0.32530418038368225, "learning_rate": 1.9837185990177256e-05, "loss": 0.5681, "step": 3840 }, { "epoch": 0.11799219733972291, "grad_norm": 0.35378775000572205, "learning_rate": 1.9837099126184755e-05, "loss": 0.5468, "step": 3841 }, { "epoch": 0.11802291647467207, "grad_norm": 0.3151406943798065, "learning_rate": 1.9837012239217025e-05, "loss": 0.6262, "step": 3842 }, { "epoch": 0.11805363560962123, "grad_norm": 0.3474234640598297, "learning_rate": 1.9836925329274265e-05, "loss": 0.6024, "step": 3843 }, { "epoch": 0.1180843547445704, "grad_norm": 0.35663512349128723, "learning_rate": 1.9836838396356677e-05, "loss": 0.6183, "step": 3844 }, { "epoch": 0.11811507387951956, "grad_norm": 0.3165830373764038, "learning_rate": 1.9836751440464473e-05, "loss": 0.5777, "step": 3845 }, { "epoch": 0.11814579301446872, "grad_norm": 0.34394243359565735, "learning_rate": 1.9836664461597843e-05, "loss": 0.6572, "step": 3846 }, { "epoch": 0.11817651214941788, "grad_norm": 0.3493430018424988, "learning_rate": 1.9836577459756998e-05, "loss": 0.547, "step": 3847 }, { "epoch": 0.11820723128436703, "grad_norm": 0.35046690702438354, "learning_rate": 1.983649043494214e-05, "loss": 0.5032, "step": 3848 }, { "epoch": 0.11823795041931619, "grad_norm": 0.3600539267063141, "learning_rate": 1.9836403387153468e-05, "loss": 0.6485, "step": 3849 }, { "epoch": 0.11826866955426535, "grad_norm": 0.35563549399375916, "learning_rate": 1.9836316316391192e-05, "loss": 0.597, "step": 3850 }, { "epoch": 0.11829938868921451, "grad_norm": 0.34732431173324585, "learning_rate": 1.9836229222655513e-05, "loss": 0.5515, "step": 3851 }, { "epoch": 0.11833010782416367, "grad_norm": 0.39190781116485596, "learning_rate": 1.9836142105946633e-05, "loss": 0.6264, "step": 3852 }, { "epoch": 0.11836082695911283, "grad_norm": 0.33471590280532837, "learning_rate": 1.9836054966264756e-05, "loss": 0.7082, "step": 3853 }, { "epoch": 0.118391546094062, "grad_norm": 0.2903788685798645, "learning_rate": 1.9835967803610082e-05, "loss": 0.5498, "step": 3854 }, { "epoch": 0.11842226522901116, "grad_norm": 0.33770647644996643, "learning_rate": 1.9835880617982824e-05, "loss": 0.5813, "step": 3855 }, { "epoch": 0.11845298436396032, "grad_norm": 0.3273617923259735, "learning_rate": 1.9835793409383178e-05, "loss": 0.6033, "step": 3856 }, { "epoch": 0.11848370349890947, "grad_norm": 0.4108556807041168, "learning_rate": 1.983570617781135e-05, "loss": 0.6154, "step": 3857 }, { "epoch": 0.11851442263385863, "grad_norm": 0.32634884119033813, "learning_rate": 1.9835618923267542e-05, "loss": 0.6398, "step": 3858 }, { "epoch": 0.11854514176880779, "grad_norm": 0.35963988304138184, "learning_rate": 1.983553164575196e-05, "loss": 0.5995, "step": 3859 }, { "epoch": 0.11857586090375695, "grad_norm": 0.3171011805534363, "learning_rate": 1.9835444345264805e-05, "loss": 0.6097, "step": 3860 }, { "epoch": 0.11860658003870611, "grad_norm": 0.34885114431381226, "learning_rate": 1.9835357021806283e-05, "loss": 0.6725, "step": 3861 }, { "epoch": 0.11863729917365527, "grad_norm": 0.33659636974334717, "learning_rate": 1.98352696753766e-05, "loss": 0.6263, "step": 3862 }, { "epoch": 0.11866801830860443, "grad_norm": 0.345864862203598, "learning_rate": 1.9835182305975958e-05, "loss": 0.6878, "step": 3863 }, { "epoch": 0.1186987374435536, "grad_norm": 0.3344501256942749, "learning_rate": 1.983509491360456e-05, "loss": 0.6976, "step": 3864 }, { "epoch": 0.11872945657850274, "grad_norm": 0.3600524663925171, "learning_rate": 1.983500749826261e-05, "loss": 0.5744, "step": 3865 }, { "epoch": 0.1187601757134519, "grad_norm": 0.34627002477645874, "learning_rate": 1.9834920059950312e-05, "loss": 0.6329, "step": 3866 }, { "epoch": 0.11879089484840107, "grad_norm": 0.3632673919200897, "learning_rate": 1.9834832598667874e-05, "loss": 0.6597, "step": 3867 }, { "epoch": 0.11882161398335023, "grad_norm": 0.3345303535461426, "learning_rate": 1.9834745114415498e-05, "loss": 0.6314, "step": 3868 }, { "epoch": 0.11885233311829939, "grad_norm": 0.34338265657424927, "learning_rate": 1.9834657607193387e-05, "loss": 0.6468, "step": 3869 }, { "epoch": 0.11888305225324855, "grad_norm": 0.3748895227909088, "learning_rate": 1.9834570077001745e-05, "loss": 0.5923, "step": 3870 }, { "epoch": 0.11891377138819771, "grad_norm": 0.32660892605781555, "learning_rate": 1.983448252384078e-05, "loss": 0.5221, "step": 3871 }, { "epoch": 0.11894449052314687, "grad_norm": 0.3502084016799927, "learning_rate": 1.9834394947710692e-05, "loss": 0.6627, "step": 3872 }, { "epoch": 0.11897520965809603, "grad_norm": 0.30891314148902893, "learning_rate": 1.9834307348611686e-05, "loss": 0.5511, "step": 3873 }, { "epoch": 0.11900592879304518, "grad_norm": 0.3325452506542206, "learning_rate": 1.9834219726543973e-05, "loss": 0.5449, "step": 3874 }, { "epoch": 0.11903664792799434, "grad_norm": 0.5842499136924744, "learning_rate": 1.9834132081507752e-05, "loss": 0.6583, "step": 3875 }, { "epoch": 0.1190673670629435, "grad_norm": 0.3352312445640564, "learning_rate": 1.9834044413503227e-05, "loss": 0.631, "step": 3876 }, { "epoch": 0.11909808619789267, "grad_norm": 0.3834969699382782, "learning_rate": 1.9833956722530607e-05, "loss": 0.6591, "step": 3877 }, { "epoch": 0.11912880533284183, "grad_norm": 0.3675040900707245, "learning_rate": 1.983386900859009e-05, "loss": 0.5557, "step": 3878 }, { "epoch": 0.11915952446779099, "grad_norm": 0.3189936578273773, "learning_rate": 1.9833781271681885e-05, "loss": 0.618, "step": 3879 }, { "epoch": 0.11919024360274015, "grad_norm": 0.34971049427986145, "learning_rate": 1.9833693511806204e-05, "loss": 0.5779, "step": 3880 }, { "epoch": 0.11922096273768931, "grad_norm": 0.33449026942253113, "learning_rate": 1.9833605728963242e-05, "loss": 0.5387, "step": 3881 }, { "epoch": 0.11925168187263846, "grad_norm": 0.3554786741733551, "learning_rate": 1.9833517923153205e-05, "loss": 0.6796, "step": 3882 }, { "epoch": 0.11928240100758762, "grad_norm": 0.33844923973083496, "learning_rate": 1.98334300943763e-05, "loss": 0.5773, "step": 3883 }, { "epoch": 0.11931312014253678, "grad_norm": 0.3381737470626831, "learning_rate": 1.983334224263273e-05, "loss": 0.6867, "step": 3884 }, { "epoch": 0.11934383927748594, "grad_norm": 0.34445738792419434, "learning_rate": 1.983325436792271e-05, "loss": 0.6461, "step": 3885 }, { "epoch": 0.1193745584124351, "grad_norm": 0.31351014971733093, "learning_rate": 1.9833166470246433e-05, "loss": 0.5947, "step": 3886 }, { "epoch": 0.11940527754738427, "grad_norm": 0.37371188402175903, "learning_rate": 1.983307854960411e-05, "loss": 0.5805, "step": 3887 }, { "epoch": 0.11943599668233343, "grad_norm": 0.33106374740600586, "learning_rate": 1.983299060599594e-05, "loss": 0.6741, "step": 3888 }, { "epoch": 0.11946671581728259, "grad_norm": 0.32118386030197144, "learning_rate": 1.983290263942214e-05, "loss": 0.6814, "step": 3889 }, { "epoch": 0.11949743495223175, "grad_norm": 0.317035973072052, "learning_rate": 1.9832814649882908e-05, "loss": 0.5218, "step": 3890 }, { "epoch": 0.1195281540871809, "grad_norm": 0.35566845536231995, "learning_rate": 1.983272663737845e-05, "loss": 0.6273, "step": 3891 }, { "epoch": 0.11955887322213006, "grad_norm": 0.3858028054237366, "learning_rate": 1.9832638601908972e-05, "loss": 0.5585, "step": 3892 }, { "epoch": 0.11958959235707922, "grad_norm": 0.3593459725379944, "learning_rate": 1.9832550543474683e-05, "loss": 0.6332, "step": 3893 }, { "epoch": 0.11962031149202838, "grad_norm": 0.33717164397239685, "learning_rate": 1.983246246207578e-05, "loss": 0.6388, "step": 3894 }, { "epoch": 0.11965103062697754, "grad_norm": 0.35333141684532166, "learning_rate": 1.9832374357712475e-05, "loss": 0.6424, "step": 3895 }, { "epoch": 0.1196817497619267, "grad_norm": 0.4183734357357025, "learning_rate": 1.9832286230384977e-05, "loss": 0.5967, "step": 3896 }, { "epoch": 0.11971246889687587, "grad_norm": 0.32164302468299866, "learning_rate": 1.9832198080093485e-05, "loss": 0.5661, "step": 3897 }, { "epoch": 0.11974318803182503, "grad_norm": 0.3270011842250824, "learning_rate": 1.983210990683821e-05, "loss": 0.5604, "step": 3898 }, { "epoch": 0.11977390716677418, "grad_norm": 0.3338240385055542, "learning_rate": 1.9832021710619355e-05, "loss": 0.6621, "step": 3899 }, { "epoch": 0.11980462630172334, "grad_norm": 0.31285688281059265, "learning_rate": 1.9831933491437124e-05, "loss": 0.638, "step": 3900 }, { "epoch": 0.1198353454366725, "grad_norm": 0.3494952917098999, "learning_rate": 1.983184524929173e-05, "loss": 0.5914, "step": 3901 }, { "epoch": 0.11986606457162166, "grad_norm": 0.46165359020233154, "learning_rate": 1.983175698418337e-05, "loss": 0.5944, "step": 3902 }, { "epoch": 0.11989678370657082, "grad_norm": 0.3261806070804596, "learning_rate": 1.9831668696112257e-05, "loss": 0.6334, "step": 3903 }, { "epoch": 0.11992750284151998, "grad_norm": 0.35612115263938904, "learning_rate": 1.9831580385078596e-05, "loss": 0.6868, "step": 3904 }, { "epoch": 0.11995822197646915, "grad_norm": 0.4452011287212372, "learning_rate": 1.983149205108259e-05, "loss": 0.5628, "step": 3905 }, { "epoch": 0.1199889411114183, "grad_norm": 0.33042868971824646, "learning_rate": 1.9831403694124448e-05, "loss": 0.6068, "step": 3906 }, { "epoch": 0.12001966024636747, "grad_norm": 0.3113292157649994, "learning_rate": 1.983131531420438e-05, "loss": 0.5913, "step": 3907 }, { "epoch": 0.12005037938131662, "grad_norm": 0.3179704546928406, "learning_rate": 1.9831226911322584e-05, "loss": 0.5751, "step": 3908 }, { "epoch": 0.12008109851626578, "grad_norm": 0.8271068930625916, "learning_rate": 1.9831138485479276e-05, "loss": 0.7179, "step": 3909 }, { "epoch": 0.12011181765121494, "grad_norm": 0.33125758171081543, "learning_rate": 1.9831050036674653e-05, "loss": 0.691, "step": 3910 }, { "epoch": 0.1201425367861641, "grad_norm": 0.3675740957260132, "learning_rate": 1.9830961564908926e-05, "loss": 0.6536, "step": 3911 }, { "epoch": 0.12017325592111326, "grad_norm": 0.42607542872428894, "learning_rate": 1.9830873070182304e-05, "loss": 0.5145, "step": 3912 }, { "epoch": 0.12020397505606242, "grad_norm": 0.35502108931541443, "learning_rate": 1.983078455249499e-05, "loss": 0.5938, "step": 3913 }, { "epoch": 0.12023469419101158, "grad_norm": 0.34407833218574524, "learning_rate": 1.983069601184719e-05, "loss": 0.6589, "step": 3914 }, { "epoch": 0.12026541332596075, "grad_norm": 0.5482780933380127, "learning_rate": 1.9830607448239118e-05, "loss": 0.6676, "step": 3915 }, { "epoch": 0.12029613246090991, "grad_norm": 0.37145930528640747, "learning_rate": 1.9830518861670973e-05, "loss": 0.5876, "step": 3916 }, { "epoch": 0.12032685159585906, "grad_norm": 0.33146893978118896, "learning_rate": 1.9830430252142966e-05, "loss": 0.6269, "step": 3917 }, { "epoch": 0.12035757073080822, "grad_norm": 0.3547433316707611, "learning_rate": 1.98303416196553e-05, "loss": 0.5836, "step": 3918 }, { "epoch": 0.12038828986575738, "grad_norm": 0.35710620880126953, "learning_rate": 1.9830252964208186e-05, "loss": 0.6977, "step": 3919 }, { "epoch": 0.12041900900070654, "grad_norm": 0.3274984061717987, "learning_rate": 1.983016428580183e-05, "loss": 0.6498, "step": 3920 }, { "epoch": 0.1204497281356557, "grad_norm": 0.3938191533088684, "learning_rate": 1.9830075584436437e-05, "loss": 0.5975, "step": 3921 }, { "epoch": 0.12048044727060486, "grad_norm": 0.3467743694782257, "learning_rate": 1.9829986860112217e-05, "loss": 0.6599, "step": 3922 }, { "epoch": 0.12051116640555402, "grad_norm": 0.346174955368042, "learning_rate": 1.982989811282938e-05, "loss": 0.6363, "step": 3923 }, { "epoch": 0.12054188554050319, "grad_norm": 0.33710408210754395, "learning_rate": 1.9829809342588126e-05, "loss": 0.5381, "step": 3924 }, { "epoch": 0.12057260467545233, "grad_norm": 0.3725569546222687, "learning_rate": 1.9829720549388662e-05, "loss": 0.6222, "step": 3925 }, { "epoch": 0.1206033238104015, "grad_norm": 0.3377701938152313, "learning_rate": 1.9829631733231207e-05, "loss": 0.6047, "step": 3926 }, { "epoch": 0.12063404294535066, "grad_norm": 0.3403909504413605, "learning_rate": 1.9829542894115953e-05, "loss": 0.673, "step": 3927 }, { "epoch": 0.12066476208029982, "grad_norm": 0.38384753465652466, "learning_rate": 1.982945403204312e-05, "loss": 0.5977, "step": 3928 }, { "epoch": 0.12069548121524898, "grad_norm": 0.3164873719215393, "learning_rate": 1.982936514701291e-05, "loss": 0.5639, "step": 3929 }, { "epoch": 0.12072620035019814, "grad_norm": 0.41482552886009216, "learning_rate": 1.982927623902553e-05, "loss": 0.5967, "step": 3930 }, { "epoch": 0.1207569194851473, "grad_norm": 0.35810133814811707, "learning_rate": 1.9829187308081188e-05, "loss": 0.5431, "step": 3931 }, { "epoch": 0.12078763862009646, "grad_norm": 0.3880980610847473, "learning_rate": 1.9829098354180095e-05, "loss": 0.6813, "step": 3932 }, { "epoch": 0.12081835775504562, "grad_norm": 0.38677525520324707, "learning_rate": 1.9829009377322455e-05, "loss": 0.5897, "step": 3933 }, { "epoch": 0.12084907688999477, "grad_norm": 0.37059515714645386, "learning_rate": 1.9828920377508477e-05, "loss": 0.5981, "step": 3934 }, { "epoch": 0.12087979602494393, "grad_norm": 0.37518709897994995, "learning_rate": 1.9828831354738368e-05, "loss": 0.7018, "step": 3935 }, { "epoch": 0.1209105151598931, "grad_norm": 0.35395392775535583, "learning_rate": 1.982874230901234e-05, "loss": 0.5805, "step": 3936 }, { "epoch": 0.12094123429484226, "grad_norm": 0.3598932921886444, "learning_rate": 1.9828653240330596e-05, "loss": 0.6751, "step": 3937 }, { "epoch": 0.12097195342979142, "grad_norm": 0.33778977394104004, "learning_rate": 1.9828564148693344e-05, "loss": 0.5732, "step": 3938 }, { "epoch": 0.12100267256474058, "grad_norm": 0.40862467885017395, "learning_rate": 1.9828475034100798e-05, "loss": 0.5727, "step": 3939 }, { "epoch": 0.12103339169968974, "grad_norm": 0.4066392183303833, "learning_rate": 1.982838589655316e-05, "loss": 0.5524, "step": 3940 }, { "epoch": 0.1210641108346389, "grad_norm": 0.3237483501434326, "learning_rate": 1.9828296736050642e-05, "loss": 0.5487, "step": 3941 }, { "epoch": 0.12109482996958805, "grad_norm": 0.39570796489715576, "learning_rate": 1.982820755259345e-05, "loss": 0.5519, "step": 3942 }, { "epoch": 0.12112554910453721, "grad_norm": 0.3457581400871277, "learning_rate": 1.982811834618179e-05, "loss": 0.5305, "step": 3943 }, { "epoch": 0.12115626823948637, "grad_norm": 0.41069817543029785, "learning_rate": 1.9828029116815876e-05, "loss": 0.6147, "step": 3944 }, { "epoch": 0.12118698737443553, "grad_norm": 0.36869239807128906, "learning_rate": 1.982793986449591e-05, "loss": 0.6376, "step": 3945 }, { "epoch": 0.1212177065093847, "grad_norm": 0.3301643133163452, "learning_rate": 1.9827850589222112e-05, "loss": 0.6026, "step": 3946 }, { "epoch": 0.12124842564433386, "grad_norm": 0.3382440209388733, "learning_rate": 1.9827761290994676e-05, "loss": 0.604, "step": 3947 }, { "epoch": 0.12127914477928302, "grad_norm": 0.34945160150527954, "learning_rate": 1.9827671969813816e-05, "loss": 0.6132, "step": 3948 }, { "epoch": 0.12130986391423218, "grad_norm": 0.348328560590744, "learning_rate": 1.9827582625679745e-05, "loss": 0.6777, "step": 3949 }, { "epoch": 0.12134058304918134, "grad_norm": 0.34425222873687744, "learning_rate": 1.982749325859267e-05, "loss": 0.4752, "step": 3950 }, { "epoch": 0.12137130218413049, "grad_norm": 0.3280732035636902, "learning_rate": 1.9827403868552796e-05, "loss": 0.6825, "step": 3951 }, { "epoch": 0.12140202131907965, "grad_norm": 0.3628527522087097, "learning_rate": 1.9827314455560334e-05, "loss": 0.6261, "step": 3952 }, { "epoch": 0.12143274045402881, "grad_norm": 0.5230919122695923, "learning_rate": 1.982722501961549e-05, "loss": 0.5608, "step": 3953 }, { "epoch": 0.12146345958897797, "grad_norm": 0.3235064446926117, "learning_rate": 1.982713556071848e-05, "loss": 0.5473, "step": 3954 }, { "epoch": 0.12149417872392713, "grad_norm": 0.4075738489627838, "learning_rate": 1.982704607886951e-05, "loss": 0.6289, "step": 3955 }, { "epoch": 0.1215248978588763, "grad_norm": 0.39766696095466614, "learning_rate": 1.9826956574068782e-05, "loss": 0.6519, "step": 3956 }, { "epoch": 0.12155561699382546, "grad_norm": 0.32231828570365906, "learning_rate": 1.982686704631651e-05, "loss": 0.6576, "step": 3957 }, { "epoch": 0.12158633612877462, "grad_norm": 0.33975353837013245, "learning_rate": 1.9826777495612908e-05, "loss": 0.5573, "step": 3958 }, { "epoch": 0.12161705526372377, "grad_norm": 0.36280354857444763, "learning_rate": 1.982668792195818e-05, "loss": 0.6411, "step": 3959 }, { "epoch": 0.12164777439867293, "grad_norm": 0.36505335569381714, "learning_rate": 1.9826598325352537e-05, "loss": 0.6328, "step": 3960 }, { "epoch": 0.12167849353362209, "grad_norm": 0.32857245206832886, "learning_rate": 1.9826508705796186e-05, "loss": 0.6363, "step": 3961 }, { "epoch": 0.12170921266857125, "grad_norm": 0.3662493824958801, "learning_rate": 1.982641906328934e-05, "loss": 0.6378, "step": 3962 }, { "epoch": 0.12173993180352041, "grad_norm": 0.35013043880462646, "learning_rate": 1.98263293978322e-05, "loss": 0.6158, "step": 3963 }, { "epoch": 0.12177065093846957, "grad_norm": 0.4397788345813751, "learning_rate": 1.9826239709424984e-05, "loss": 0.6515, "step": 3964 }, { "epoch": 0.12180137007341874, "grad_norm": 0.3326026201248169, "learning_rate": 1.9826149998067904e-05, "loss": 0.6147, "step": 3965 }, { "epoch": 0.1218320892083679, "grad_norm": 0.37156540155410767, "learning_rate": 1.9826060263761158e-05, "loss": 0.5788, "step": 3966 }, { "epoch": 0.12186280834331706, "grad_norm": 0.40321114659309387, "learning_rate": 1.9825970506504965e-05, "loss": 0.7567, "step": 3967 }, { "epoch": 0.1218935274782662, "grad_norm": 0.34397241473197937, "learning_rate": 1.9825880726299532e-05, "loss": 0.547, "step": 3968 }, { "epoch": 0.12192424661321537, "grad_norm": 0.30680570006370544, "learning_rate": 1.9825790923145066e-05, "loss": 0.6319, "step": 3969 }, { "epoch": 0.12195496574816453, "grad_norm": 0.32828232645988464, "learning_rate": 1.982570109704178e-05, "loss": 0.6188, "step": 3970 }, { "epoch": 0.12198568488311369, "grad_norm": 0.40697893500328064, "learning_rate": 1.9825611247989887e-05, "loss": 0.5798, "step": 3971 }, { "epoch": 0.12201640401806285, "grad_norm": 0.32895222306251526, "learning_rate": 1.9825521375989586e-05, "loss": 0.6937, "step": 3972 }, { "epoch": 0.12204712315301201, "grad_norm": 0.3389405608177185, "learning_rate": 1.9825431481041096e-05, "loss": 0.7321, "step": 3973 }, { "epoch": 0.12207784228796117, "grad_norm": 0.34431394934654236, "learning_rate": 1.9825341563144627e-05, "loss": 0.596, "step": 3974 }, { "epoch": 0.12210856142291034, "grad_norm": 0.3791449964046478, "learning_rate": 1.9825251622300386e-05, "loss": 0.5922, "step": 3975 }, { "epoch": 0.1221392805578595, "grad_norm": 0.37983354926109314, "learning_rate": 1.982516165850858e-05, "loss": 0.6301, "step": 3976 }, { "epoch": 0.12216999969280865, "grad_norm": 0.4144596755504608, "learning_rate": 1.9825071671769428e-05, "loss": 0.5662, "step": 3977 }, { "epoch": 0.1222007188277578, "grad_norm": 0.34635189175605774, "learning_rate": 1.9824981662083133e-05, "loss": 0.6157, "step": 3978 }, { "epoch": 0.12223143796270697, "grad_norm": 0.4916316568851471, "learning_rate": 1.9824891629449905e-05, "loss": 0.6309, "step": 3979 }, { "epoch": 0.12226215709765613, "grad_norm": 0.3522813618183136, "learning_rate": 1.9824801573869958e-05, "loss": 0.5777, "step": 3980 }, { "epoch": 0.12229287623260529, "grad_norm": 0.33436158299446106, "learning_rate": 1.98247114953435e-05, "loss": 0.6122, "step": 3981 }, { "epoch": 0.12232359536755445, "grad_norm": 0.3479422926902771, "learning_rate": 1.9824621393870745e-05, "loss": 0.5839, "step": 3982 }, { "epoch": 0.12235431450250361, "grad_norm": 0.3738364279270172, "learning_rate": 1.9824531269451895e-05, "loss": 0.6317, "step": 3983 }, { "epoch": 0.12238503363745278, "grad_norm": 0.3325660526752472, "learning_rate": 1.982444112208717e-05, "loss": 0.5761, "step": 3984 }, { "epoch": 0.12241575277240192, "grad_norm": 0.4070984423160553, "learning_rate": 1.9824350951776777e-05, "loss": 0.6037, "step": 3985 }, { "epoch": 0.12244647190735108, "grad_norm": 0.3364180028438568, "learning_rate": 1.9824260758520925e-05, "loss": 0.6233, "step": 3986 }, { "epoch": 0.12247719104230025, "grad_norm": 0.34717434644699097, "learning_rate": 1.9824170542319824e-05, "loss": 0.6043, "step": 3987 }, { "epoch": 0.12250791017724941, "grad_norm": 0.3137715458869934, "learning_rate": 1.9824080303173692e-05, "loss": 0.5445, "step": 3988 }, { "epoch": 0.12253862931219857, "grad_norm": 0.34967148303985596, "learning_rate": 1.982399004108273e-05, "loss": 0.6045, "step": 3989 }, { "epoch": 0.12256934844714773, "grad_norm": 0.34322500228881836, "learning_rate": 1.9823899756047154e-05, "loss": 0.6196, "step": 3990 }, { "epoch": 0.12260006758209689, "grad_norm": 0.3416459858417511, "learning_rate": 1.9823809448067174e-05, "loss": 0.6035, "step": 3991 }, { "epoch": 0.12263078671704605, "grad_norm": 0.33335167169570923, "learning_rate": 1.9823719117143003e-05, "loss": 0.5563, "step": 3992 }, { "epoch": 0.12266150585199521, "grad_norm": 0.34754320979118347, "learning_rate": 1.9823628763274847e-05, "loss": 0.6162, "step": 3993 }, { "epoch": 0.12269222498694436, "grad_norm": 0.29994997382164, "learning_rate": 1.982353838646292e-05, "loss": 0.5561, "step": 3994 }, { "epoch": 0.12272294412189352, "grad_norm": 0.3437344431877136, "learning_rate": 1.9823447986707436e-05, "loss": 0.6195, "step": 3995 }, { "epoch": 0.12275366325684268, "grad_norm": 0.33683550357818604, "learning_rate": 1.98233575640086e-05, "loss": 0.5331, "step": 3996 }, { "epoch": 0.12278438239179185, "grad_norm": 0.3869773745536804, "learning_rate": 1.9823267118366627e-05, "loss": 0.6351, "step": 3997 }, { "epoch": 0.12281510152674101, "grad_norm": 0.33615756034851074, "learning_rate": 1.982317664978173e-05, "loss": 0.5921, "step": 3998 }, { "epoch": 0.12284582066169017, "grad_norm": 0.434918075799942, "learning_rate": 1.9823086158254115e-05, "loss": 0.6165, "step": 3999 }, { "epoch": 0.12287653979663933, "grad_norm": 0.3312654495239258, "learning_rate": 1.9822995643784e-05, "loss": 0.6075, "step": 4000 }, { "epoch": 0.12290725893158849, "grad_norm": 0.341169148683548, "learning_rate": 1.9822905106371587e-05, "loss": 0.5655, "step": 4001 }, { "epoch": 0.12293797806653764, "grad_norm": 0.335526704788208, "learning_rate": 1.9822814546017097e-05, "loss": 0.54, "step": 4002 }, { "epoch": 0.1229686972014868, "grad_norm": 0.34037959575653076, "learning_rate": 1.9822723962720736e-05, "loss": 0.512, "step": 4003 }, { "epoch": 0.12299941633643596, "grad_norm": 0.35534003376960754, "learning_rate": 1.982263335648272e-05, "loss": 0.6263, "step": 4004 }, { "epoch": 0.12303013547138512, "grad_norm": 0.37607112526893616, "learning_rate": 1.9822542727303255e-05, "loss": 0.673, "step": 4005 }, { "epoch": 0.12306085460633429, "grad_norm": 0.3173495829105377, "learning_rate": 1.9822452075182557e-05, "loss": 0.5673, "step": 4006 }, { "epoch": 0.12309157374128345, "grad_norm": 0.35260820388793945, "learning_rate": 1.9822361400120833e-05, "loss": 0.5247, "step": 4007 }, { "epoch": 0.12312229287623261, "grad_norm": 1.023559331893921, "learning_rate": 1.98222707021183e-05, "loss": 0.5274, "step": 4008 }, { "epoch": 0.12315301201118177, "grad_norm": 0.34445062279701233, "learning_rate": 1.982217998117517e-05, "loss": 0.6498, "step": 4009 }, { "epoch": 0.12318373114613093, "grad_norm": 0.3192446231842041, "learning_rate": 1.982208923729165e-05, "loss": 0.598, "step": 4010 }, { "epoch": 0.12321445028108008, "grad_norm": 0.3213036358356476, "learning_rate": 1.9821998470467956e-05, "loss": 0.6102, "step": 4011 }, { "epoch": 0.12324516941602924, "grad_norm": 0.3353501260280609, "learning_rate": 1.98219076807043e-05, "loss": 0.5829, "step": 4012 }, { "epoch": 0.1232758885509784, "grad_norm": 0.3570382297039032, "learning_rate": 1.982181686800089e-05, "loss": 0.5752, "step": 4013 }, { "epoch": 0.12330660768592756, "grad_norm": 0.4134616553783417, "learning_rate": 1.982172603235794e-05, "loss": 0.573, "step": 4014 }, { "epoch": 0.12333732682087672, "grad_norm": 0.4141969084739685, "learning_rate": 1.9821635173775666e-05, "loss": 0.5871, "step": 4015 }, { "epoch": 0.12336804595582589, "grad_norm": 0.34857290983200073, "learning_rate": 1.9821544292254272e-05, "loss": 0.5816, "step": 4016 }, { "epoch": 0.12339876509077505, "grad_norm": 0.32643958926200867, "learning_rate": 1.9821453387793982e-05, "loss": 0.5542, "step": 4017 }, { "epoch": 0.12342948422572421, "grad_norm": 0.36153870820999146, "learning_rate": 1.9821362460394996e-05, "loss": 0.5405, "step": 4018 }, { "epoch": 0.12346020336067336, "grad_norm": 0.35618409514427185, "learning_rate": 1.9821271510057535e-05, "loss": 0.6565, "step": 4019 }, { "epoch": 0.12349092249562252, "grad_norm": 0.3445967137813568, "learning_rate": 1.982118053678181e-05, "loss": 0.6054, "step": 4020 }, { "epoch": 0.12352164163057168, "grad_norm": 0.32705429196357727, "learning_rate": 1.9821089540568027e-05, "loss": 0.5797, "step": 4021 }, { "epoch": 0.12355236076552084, "grad_norm": 0.36389535665512085, "learning_rate": 1.982099852141641e-05, "loss": 0.6561, "step": 4022 }, { "epoch": 0.12358307990047, "grad_norm": 0.41328972578048706, "learning_rate": 1.982090747932716e-05, "loss": 0.5125, "step": 4023 }, { "epoch": 0.12361379903541916, "grad_norm": 0.3937087059020996, "learning_rate": 1.9820816414300496e-05, "loss": 0.6931, "step": 4024 }, { "epoch": 0.12364451817036833, "grad_norm": 0.329829603433609, "learning_rate": 1.982072532633663e-05, "loss": 0.6015, "step": 4025 }, { "epoch": 0.12367523730531749, "grad_norm": 0.3478061258792877, "learning_rate": 1.9820634215435774e-05, "loss": 0.6173, "step": 4026 }, { "epoch": 0.12370595644026665, "grad_norm": 0.3430708646774292, "learning_rate": 1.9820543081598142e-05, "loss": 0.5596, "step": 4027 }, { "epoch": 0.1237366755752158, "grad_norm": 0.3253670334815979, "learning_rate": 1.9820451924823945e-05, "loss": 0.5575, "step": 4028 }, { "epoch": 0.12376739471016496, "grad_norm": 0.2984340786933899, "learning_rate": 1.9820360745113394e-05, "loss": 0.5504, "step": 4029 }, { "epoch": 0.12379811384511412, "grad_norm": 0.32694923877716064, "learning_rate": 1.982026954246671e-05, "loss": 0.6261, "step": 4030 }, { "epoch": 0.12382883298006328, "grad_norm": 0.3490961790084839, "learning_rate": 1.9820178316884096e-05, "loss": 0.6279, "step": 4031 }, { "epoch": 0.12385955211501244, "grad_norm": 0.3355979323387146, "learning_rate": 1.982008706836577e-05, "loss": 0.6135, "step": 4032 }, { "epoch": 0.1238902712499616, "grad_norm": 0.3568604290485382, "learning_rate": 1.9819995796911947e-05, "loss": 0.6029, "step": 4033 }, { "epoch": 0.12392099038491076, "grad_norm": 0.3725469410419464, "learning_rate": 1.981990450252284e-05, "loss": 0.6214, "step": 4034 }, { "epoch": 0.12395170951985993, "grad_norm": 0.33064812421798706, "learning_rate": 1.9819813185198653e-05, "loss": 0.6069, "step": 4035 }, { "epoch": 0.12398242865480907, "grad_norm": 0.3642444610595703, "learning_rate": 1.981972184493961e-05, "loss": 0.5816, "step": 4036 }, { "epoch": 0.12401314778975824, "grad_norm": 0.33744901418685913, "learning_rate": 1.9819630481745924e-05, "loss": 0.6184, "step": 4037 }, { "epoch": 0.1240438669247074, "grad_norm": 0.341227650642395, "learning_rate": 1.98195390956178e-05, "loss": 0.7287, "step": 4038 }, { "epoch": 0.12407458605965656, "grad_norm": 0.3672991096973419, "learning_rate": 1.981944768655546e-05, "loss": 0.6314, "step": 4039 }, { "epoch": 0.12410530519460572, "grad_norm": 0.32346588373184204, "learning_rate": 1.9819356254559115e-05, "loss": 0.5335, "step": 4040 }, { "epoch": 0.12413602432955488, "grad_norm": 0.3156619071960449, "learning_rate": 1.9819264799628975e-05, "loss": 0.534, "step": 4041 }, { "epoch": 0.12416674346450404, "grad_norm": 0.3547787666320801, "learning_rate": 1.9819173321765256e-05, "loss": 0.6041, "step": 4042 }, { "epoch": 0.1241974625994532, "grad_norm": 0.36787083745002747, "learning_rate": 1.9819081820968174e-05, "loss": 0.6687, "step": 4043 }, { "epoch": 0.12422818173440237, "grad_norm": 0.3263132572174072, "learning_rate": 1.9818990297237943e-05, "loss": 0.6019, "step": 4044 }, { "epoch": 0.12425890086935151, "grad_norm": 0.4869674742221832, "learning_rate": 1.9818898750574772e-05, "loss": 0.6187, "step": 4045 }, { "epoch": 0.12428962000430067, "grad_norm": 0.5172162055969238, "learning_rate": 1.9818807180978875e-05, "loss": 0.6106, "step": 4046 }, { "epoch": 0.12432033913924984, "grad_norm": 0.3472331166267395, "learning_rate": 1.981871558845047e-05, "loss": 0.5912, "step": 4047 }, { "epoch": 0.124351058274199, "grad_norm": 0.3214357793331146, "learning_rate": 1.981862397298977e-05, "loss": 0.6031, "step": 4048 }, { "epoch": 0.12438177740914816, "grad_norm": 0.3514290153980255, "learning_rate": 1.9818532334596985e-05, "loss": 0.6251, "step": 4049 }, { "epoch": 0.12441249654409732, "grad_norm": 0.36372897028923035, "learning_rate": 1.9818440673272333e-05, "loss": 0.555, "step": 4050 }, { "epoch": 0.12444321567904648, "grad_norm": 0.32103201746940613, "learning_rate": 1.981834898901603e-05, "loss": 0.5883, "step": 4051 }, { "epoch": 0.12447393481399564, "grad_norm": 0.33416178822517395, "learning_rate": 1.9818257281828287e-05, "loss": 0.6361, "step": 4052 }, { "epoch": 0.1245046539489448, "grad_norm": 0.3045358657836914, "learning_rate": 1.9818165551709313e-05, "loss": 0.6378, "step": 4053 }, { "epoch": 0.12453537308389395, "grad_norm": 0.32837557792663574, "learning_rate": 1.981807379865933e-05, "loss": 0.5781, "step": 4054 }, { "epoch": 0.12456609221884311, "grad_norm": 0.3130966126918793, "learning_rate": 1.9817982022678555e-05, "loss": 0.6105, "step": 4055 }, { "epoch": 0.12459681135379228, "grad_norm": 0.338255375623703, "learning_rate": 1.9817890223767194e-05, "loss": 0.6446, "step": 4056 }, { "epoch": 0.12462753048874144, "grad_norm": 0.3252028226852417, "learning_rate": 1.9817798401925465e-05, "loss": 0.5365, "step": 4057 }, { "epoch": 0.1246582496236906, "grad_norm": 0.3865405023097992, "learning_rate": 1.9817706557153578e-05, "loss": 0.6176, "step": 4058 }, { "epoch": 0.12468896875863976, "grad_norm": 0.3588906526565552, "learning_rate": 1.9817614689451757e-05, "loss": 0.6364, "step": 4059 }, { "epoch": 0.12471968789358892, "grad_norm": 0.3471241891384125, "learning_rate": 1.981752279882021e-05, "loss": 0.6205, "step": 4060 }, { "epoch": 0.12475040702853808, "grad_norm": 0.394599974155426, "learning_rate": 1.981743088525915e-05, "loss": 0.5721, "step": 4061 }, { "epoch": 0.12478112616348723, "grad_norm": 0.3496752679347992, "learning_rate": 1.9817338948768795e-05, "loss": 0.6135, "step": 4062 }, { "epoch": 0.12481184529843639, "grad_norm": 0.34640687704086304, "learning_rate": 1.9817246989349363e-05, "loss": 0.6161, "step": 4063 }, { "epoch": 0.12484256443338555, "grad_norm": 0.33708661794662476, "learning_rate": 1.9817155007001064e-05, "loss": 0.5113, "step": 4064 }, { "epoch": 0.12487328356833471, "grad_norm": 0.3403373956680298, "learning_rate": 1.981706300172411e-05, "loss": 0.5763, "step": 4065 }, { "epoch": 0.12490400270328388, "grad_norm": 0.3514322340488434, "learning_rate": 1.9816970973518725e-05, "loss": 0.6399, "step": 4066 }, { "epoch": 0.12493472183823304, "grad_norm": 0.3370497524738312, "learning_rate": 1.9816878922385117e-05, "loss": 0.5519, "step": 4067 }, { "epoch": 0.1249654409731822, "grad_norm": 0.32534220814704895, "learning_rate": 1.98167868483235e-05, "loss": 0.5223, "step": 4068 }, { "epoch": 0.12499616010813136, "grad_norm": 0.35554641485214233, "learning_rate": 1.9816694751334094e-05, "loss": 0.7176, "step": 4069 }, { "epoch": 0.1250268792430805, "grad_norm": 0.41027936339378357, "learning_rate": 1.981660263141711e-05, "loss": 0.6406, "step": 4070 }, { "epoch": 0.12505759837802968, "grad_norm": 0.35321253538131714, "learning_rate": 1.9816510488572768e-05, "loss": 0.6702, "step": 4071 }, { "epoch": 0.12508831751297883, "grad_norm": 0.4247465431690216, "learning_rate": 1.981641832280128e-05, "loss": 0.5829, "step": 4072 }, { "epoch": 0.125119036647928, "grad_norm": 0.3844532072544098, "learning_rate": 1.981632613410286e-05, "loss": 0.6498, "step": 4073 }, { "epoch": 0.12514975578287715, "grad_norm": 0.3773910701274872, "learning_rate": 1.9816233922477723e-05, "loss": 0.5949, "step": 4074 }, { "epoch": 0.1251804749178263, "grad_norm": 0.30154159665107727, "learning_rate": 1.981614168792609e-05, "loss": 0.529, "step": 4075 }, { "epoch": 0.12521119405277548, "grad_norm": 0.3629368245601654, "learning_rate": 1.981604943044817e-05, "loss": 0.6, "step": 4076 }, { "epoch": 0.12524191318772462, "grad_norm": 0.3227355480194092, "learning_rate": 1.981595715004418e-05, "loss": 0.6354, "step": 4077 }, { "epoch": 0.1252726323226738, "grad_norm": 0.3186333477497101, "learning_rate": 1.981586484671434e-05, "loss": 0.6366, "step": 4078 }, { "epoch": 0.12530335145762295, "grad_norm": 0.3339761197566986, "learning_rate": 1.9815772520458857e-05, "loss": 0.6654, "step": 4079 }, { "epoch": 0.12533407059257212, "grad_norm": 0.40091216564178467, "learning_rate": 1.9815680171277956e-05, "loss": 0.6013, "step": 4080 }, { "epoch": 0.12536478972752127, "grad_norm": 0.3458215296268463, "learning_rate": 1.981558779917185e-05, "loss": 0.579, "step": 4081 }, { "epoch": 0.12539550886247045, "grad_norm": 0.41100654006004333, "learning_rate": 1.9815495404140747e-05, "loss": 0.5997, "step": 4082 }, { "epoch": 0.1254262279974196, "grad_norm": 0.3338417410850525, "learning_rate": 1.9815402986184874e-05, "loss": 0.5685, "step": 4083 }, { "epoch": 0.12545694713236874, "grad_norm": 0.3260349631309509, "learning_rate": 1.981531054530444e-05, "loss": 0.6094, "step": 4084 }, { "epoch": 0.12548766626731792, "grad_norm": 0.34699514508247375, "learning_rate": 1.9815218081499662e-05, "loss": 0.632, "step": 4085 }, { "epoch": 0.12551838540226706, "grad_norm": 0.425396591424942, "learning_rate": 1.9815125594770758e-05, "loss": 0.6026, "step": 4086 }, { "epoch": 0.12554910453721624, "grad_norm": 0.3404061198234558, "learning_rate": 1.9815033085117944e-05, "loss": 0.5417, "step": 4087 }, { "epoch": 0.12557982367216539, "grad_norm": 0.3974739611148834, "learning_rate": 1.9814940552541433e-05, "loss": 0.6238, "step": 4088 }, { "epoch": 0.12561054280711456, "grad_norm": 0.3673137426376343, "learning_rate": 1.9814847997041443e-05, "loss": 0.6242, "step": 4089 }, { "epoch": 0.1256412619420637, "grad_norm": 0.3146202862262726, "learning_rate": 1.981475541861819e-05, "loss": 0.5558, "step": 4090 }, { "epoch": 0.12567198107701288, "grad_norm": 0.3716023862361908, "learning_rate": 1.981466281727189e-05, "loss": 0.5798, "step": 4091 }, { "epoch": 0.12570270021196203, "grad_norm": 0.3551847040653229, "learning_rate": 1.981457019300276e-05, "loss": 0.611, "step": 4092 }, { "epoch": 0.12573341934691118, "grad_norm": 0.3965971767902374, "learning_rate": 1.9814477545811015e-05, "loss": 0.6503, "step": 4093 }, { "epoch": 0.12576413848186035, "grad_norm": 0.3657153844833374, "learning_rate": 1.981438487569687e-05, "loss": 0.6818, "step": 4094 }, { "epoch": 0.1257948576168095, "grad_norm": 0.368564248085022, "learning_rate": 1.981429218266055e-05, "loss": 0.5671, "step": 4095 }, { "epoch": 0.12582557675175868, "grad_norm": 0.37346938252449036, "learning_rate": 1.9814199466702264e-05, "loss": 0.6251, "step": 4096 }, { "epoch": 0.12585629588670783, "grad_norm": 0.46099886298179626, "learning_rate": 1.9814106727822225e-05, "loss": 0.5951, "step": 4097 }, { "epoch": 0.125887015021657, "grad_norm": 0.9393325448036194, "learning_rate": 1.981401396602066e-05, "loss": 0.5993, "step": 4098 }, { "epoch": 0.12591773415660615, "grad_norm": 0.32662755250930786, "learning_rate": 1.9813921181297774e-05, "loss": 0.5492, "step": 4099 }, { "epoch": 0.12594845329155532, "grad_norm": 0.29848650097846985, "learning_rate": 1.9813828373653795e-05, "loss": 0.5467, "step": 4100 }, { "epoch": 0.12597917242650447, "grad_norm": 0.3566511571407318, "learning_rate": 1.981373554308893e-05, "loss": 0.4794, "step": 4101 }, { "epoch": 0.12600989156145362, "grad_norm": 0.33830347657203674, "learning_rate": 1.98136426896034e-05, "loss": 0.6224, "step": 4102 }, { "epoch": 0.1260406106964028, "grad_norm": 0.3949575424194336, "learning_rate": 1.9813549813197427e-05, "loss": 0.6093, "step": 4103 }, { "epoch": 0.12607132983135194, "grad_norm": 0.3097324073314667, "learning_rate": 1.981345691387122e-05, "loss": 0.5877, "step": 4104 }, { "epoch": 0.12610204896630112, "grad_norm": 0.30452415347099304, "learning_rate": 1.9813363991625e-05, "loss": 0.6941, "step": 4105 }, { "epoch": 0.12613276810125026, "grad_norm": 0.3659488260746002, "learning_rate": 1.981327104645898e-05, "loss": 0.6005, "step": 4106 }, { "epoch": 0.12616348723619944, "grad_norm": 0.3741089701652527, "learning_rate": 1.9813178078373385e-05, "loss": 0.649, "step": 4107 }, { "epoch": 0.1261942063711486, "grad_norm": 0.39291682839393616, "learning_rate": 1.9813085087368424e-05, "loss": 0.5739, "step": 4108 }, { "epoch": 0.12622492550609773, "grad_norm": 0.33800652623176575, "learning_rate": 1.981299207344432e-05, "loss": 0.5692, "step": 4109 }, { "epoch": 0.1262556446410469, "grad_norm": 0.298652708530426, "learning_rate": 1.9812899036601285e-05, "loss": 0.5772, "step": 4110 }, { "epoch": 0.12628636377599606, "grad_norm": 0.36987534165382385, "learning_rate": 1.981280597683954e-05, "loss": 0.6267, "step": 4111 }, { "epoch": 0.12631708291094523, "grad_norm": 0.33516284823417664, "learning_rate": 1.98127128941593e-05, "loss": 0.5912, "step": 4112 }, { "epoch": 0.12634780204589438, "grad_norm": 0.66704922914505, "learning_rate": 1.9812619788560785e-05, "loss": 0.6405, "step": 4113 }, { "epoch": 0.12637852118084356, "grad_norm": 0.34584134817123413, "learning_rate": 1.9812526660044212e-05, "loss": 0.6525, "step": 4114 }, { "epoch": 0.1264092403157927, "grad_norm": 0.33145758509635925, "learning_rate": 1.9812433508609797e-05, "loss": 0.5952, "step": 4115 }, { "epoch": 0.12643995945074188, "grad_norm": 0.2905735969543457, "learning_rate": 1.9812340334257757e-05, "loss": 0.5177, "step": 4116 }, { "epoch": 0.12647067858569103, "grad_norm": 0.3490558862686157, "learning_rate": 1.9812247136988313e-05, "loss": 0.614, "step": 4117 }, { "epoch": 0.12650139772064017, "grad_norm": 0.32255879044532776, "learning_rate": 1.981215391680168e-05, "loss": 0.5445, "step": 4118 }, { "epoch": 0.12653211685558935, "grad_norm": 0.40051403641700745, "learning_rate": 1.9812060673698073e-05, "loss": 0.6469, "step": 4119 }, { "epoch": 0.1265628359905385, "grad_norm": 0.35837772488594055, "learning_rate": 1.981196740767772e-05, "loss": 0.6473, "step": 4120 }, { "epoch": 0.12659355512548767, "grad_norm": 0.35840678215026855, "learning_rate": 1.9811874118740827e-05, "loss": 0.6246, "step": 4121 }, { "epoch": 0.12662427426043682, "grad_norm": 0.36751827597618103, "learning_rate": 1.981178080688762e-05, "loss": 0.6084, "step": 4122 }, { "epoch": 0.126654993395386, "grad_norm": 0.34752824902534485, "learning_rate": 1.981168747211831e-05, "loss": 0.6762, "step": 4123 }, { "epoch": 0.12668571253033514, "grad_norm": 0.3345435857772827, "learning_rate": 1.981159411443312e-05, "loss": 0.5452, "step": 4124 }, { "epoch": 0.12671643166528432, "grad_norm": 0.3513478934764862, "learning_rate": 1.9811500733832264e-05, "loss": 0.6362, "step": 4125 }, { "epoch": 0.12674715080023347, "grad_norm": 0.3629491329193115, "learning_rate": 1.9811407330315965e-05, "loss": 0.5663, "step": 4126 }, { "epoch": 0.1267778699351826, "grad_norm": 0.44739046692848206, "learning_rate": 1.981131390388444e-05, "loss": 0.4968, "step": 4127 }, { "epoch": 0.1268085890701318, "grad_norm": 0.3355485498905182, "learning_rate": 1.9811220454537906e-05, "loss": 0.531, "step": 4128 }, { "epoch": 0.12683930820508094, "grad_norm": 0.3767832815647125, "learning_rate": 1.9811126982276582e-05, "loss": 0.6285, "step": 4129 }, { "epoch": 0.1268700273400301, "grad_norm": 0.3364431858062744, "learning_rate": 1.9811033487100683e-05, "loss": 0.5892, "step": 4130 }, { "epoch": 0.12690074647497926, "grad_norm": 0.32281869649887085, "learning_rate": 1.9810939969010432e-05, "loss": 0.6149, "step": 4131 }, { "epoch": 0.12693146560992843, "grad_norm": 0.35355496406555176, "learning_rate": 1.9810846428006046e-05, "loss": 0.6069, "step": 4132 }, { "epoch": 0.12696218474487758, "grad_norm": 0.37478742003440857, "learning_rate": 1.981075286408774e-05, "loss": 0.6631, "step": 4133 }, { "epoch": 0.12699290387982676, "grad_norm": 0.37912115454673767, "learning_rate": 1.9810659277255738e-05, "loss": 0.5834, "step": 4134 }, { "epoch": 0.1270236230147759, "grad_norm": 0.37822598218917847, "learning_rate": 1.9810565667510254e-05, "loss": 0.5613, "step": 4135 }, { "epoch": 0.12705434214972505, "grad_norm": 0.3545427620410919, "learning_rate": 1.9810472034851512e-05, "loss": 0.6691, "step": 4136 }, { "epoch": 0.12708506128467423, "grad_norm": 0.3341190218925476, "learning_rate": 1.9810378379279726e-05, "loss": 0.6055, "step": 4137 }, { "epoch": 0.12711578041962338, "grad_norm": 0.3469977378845215, "learning_rate": 1.9810284700795116e-05, "loss": 0.6054, "step": 4138 }, { "epoch": 0.12714649955457255, "grad_norm": 0.3458942770957947, "learning_rate": 1.9810190999397903e-05, "loss": 0.607, "step": 4139 }, { "epoch": 0.1271772186895217, "grad_norm": 0.31423330307006836, "learning_rate": 1.98100972750883e-05, "loss": 0.6812, "step": 4140 }, { "epoch": 0.12720793782447087, "grad_norm": 0.3494483530521393, "learning_rate": 1.981000352786653e-05, "loss": 0.5985, "step": 4141 }, { "epoch": 0.12723865695942002, "grad_norm": 0.32029852271080017, "learning_rate": 1.9809909757732814e-05, "loss": 0.5398, "step": 4142 }, { "epoch": 0.12726937609436917, "grad_norm": 0.3512006402015686, "learning_rate": 1.9809815964687368e-05, "loss": 0.6145, "step": 4143 }, { "epoch": 0.12730009522931834, "grad_norm": 0.44339439272880554, "learning_rate": 1.980972214873041e-05, "loss": 0.4751, "step": 4144 }, { "epoch": 0.1273308143642675, "grad_norm": 0.3727782666683197, "learning_rate": 1.9809628309862163e-05, "loss": 0.5949, "step": 4145 }, { "epoch": 0.12736153349921667, "grad_norm": 0.3588993549346924, "learning_rate": 1.9809534448082845e-05, "loss": 0.5955, "step": 4146 }, { "epoch": 0.12739225263416581, "grad_norm": 0.3903571367263794, "learning_rate": 1.9809440563392672e-05, "loss": 0.5023, "step": 4147 }, { "epoch": 0.127422971769115, "grad_norm": 0.33360254764556885, "learning_rate": 1.9809346655791867e-05, "loss": 0.6339, "step": 4148 }, { "epoch": 0.12745369090406414, "grad_norm": 0.35657694935798645, "learning_rate": 1.9809252725280647e-05, "loss": 0.7013, "step": 4149 }, { "epoch": 0.1274844100390133, "grad_norm": 0.3323673605918884, "learning_rate": 1.9809158771859232e-05, "loss": 0.571, "step": 4150 }, { "epoch": 0.12751512917396246, "grad_norm": 0.42442458868026733, "learning_rate": 1.980906479552784e-05, "loss": 0.6201, "step": 4151 }, { "epoch": 0.1275458483089116, "grad_norm": 0.8313340544700623, "learning_rate": 1.9808970796286693e-05, "loss": 0.6505, "step": 4152 }, { "epoch": 0.12757656744386078, "grad_norm": 0.31842145323753357, "learning_rate": 1.980887677413601e-05, "loss": 0.5961, "step": 4153 }, { "epoch": 0.12760728657880993, "grad_norm": 0.3233353793621063, "learning_rate": 1.9808782729076013e-05, "loss": 0.6518, "step": 4154 }, { "epoch": 0.1276380057137591, "grad_norm": 0.366288959980011, "learning_rate": 1.9808688661106914e-05, "loss": 0.6116, "step": 4155 }, { "epoch": 0.12766872484870825, "grad_norm": 0.33674097061157227, "learning_rate": 1.9808594570228942e-05, "loss": 0.5, "step": 4156 }, { "epoch": 0.12769944398365743, "grad_norm": 0.30180782079696655, "learning_rate": 1.980850045644231e-05, "loss": 0.6146, "step": 4157 }, { "epoch": 0.12773016311860658, "grad_norm": 0.35168373584747314, "learning_rate": 1.980840631974724e-05, "loss": 0.5956, "step": 4158 }, { "epoch": 0.12776088225355575, "grad_norm": 0.3350053131580353, "learning_rate": 1.9808312160143953e-05, "loss": 0.6214, "step": 4159 }, { "epoch": 0.1277916013885049, "grad_norm": 0.32799032330513, "learning_rate": 1.980821797763267e-05, "loss": 0.623, "step": 4160 }, { "epoch": 0.12782232052345405, "grad_norm": 0.33669546246528625, "learning_rate": 1.9808123772213603e-05, "loss": 0.5743, "step": 4161 }, { "epoch": 0.12785303965840322, "grad_norm": 0.35535046458244324, "learning_rate": 1.980802954388698e-05, "loss": 0.6624, "step": 4162 }, { "epoch": 0.12788375879335237, "grad_norm": 0.35869717597961426, "learning_rate": 1.9807935292653024e-05, "loss": 0.6037, "step": 4163 }, { "epoch": 0.12791447792830155, "grad_norm": 0.35203149914741516, "learning_rate": 1.9807841018511945e-05, "loss": 0.5727, "step": 4164 }, { "epoch": 0.1279451970632507, "grad_norm": 0.31383320689201355, "learning_rate": 1.980774672146397e-05, "loss": 0.5718, "step": 4165 }, { "epoch": 0.12797591619819987, "grad_norm": 0.36762869358062744, "learning_rate": 1.980765240150932e-05, "loss": 0.5872, "step": 4166 }, { "epoch": 0.12800663533314902, "grad_norm": 0.5352159142494202, "learning_rate": 1.9807558058648207e-05, "loss": 0.5424, "step": 4167 }, { "epoch": 0.1280373544680982, "grad_norm": 0.3300909101963043, "learning_rate": 1.9807463692880864e-05, "loss": 0.6546, "step": 4168 }, { "epoch": 0.12806807360304734, "grad_norm": 0.3230202794075012, "learning_rate": 1.98073693042075e-05, "loss": 0.6002, "step": 4169 }, { "epoch": 0.1280987927379965, "grad_norm": 0.3218047618865967, "learning_rate": 1.980727489262834e-05, "loss": 0.5515, "step": 4170 }, { "epoch": 0.12812951187294566, "grad_norm": 0.3347569406032562, "learning_rate": 1.9807180458143605e-05, "loss": 0.5845, "step": 4171 }, { "epoch": 0.1281602310078948, "grad_norm": 0.3820458948612213, "learning_rate": 1.9807086000753518e-05, "loss": 0.6707, "step": 4172 }, { "epoch": 0.12819095014284398, "grad_norm": 0.3322044014930725, "learning_rate": 1.9806991520458294e-05, "loss": 0.5272, "step": 4173 }, { "epoch": 0.12822166927779313, "grad_norm": 0.3289972245693207, "learning_rate": 1.9806897017258158e-05, "loss": 0.5804, "step": 4174 }, { "epoch": 0.1282523884127423, "grad_norm": 0.323691725730896, "learning_rate": 1.980680249115333e-05, "loss": 0.5593, "step": 4175 }, { "epoch": 0.12828310754769146, "grad_norm": 0.31711336970329285, "learning_rate": 1.9806707942144024e-05, "loss": 0.6161, "step": 4176 }, { "epoch": 0.12831382668264063, "grad_norm": 0.35475096106529236, "learning_rate": 1.980661337023047e-05, "loss": 0.6122, "step": 4177 }, { "epoch": 0.12834454581758978, "grad_norm": 0.32817718386650085, "learning_rate": 1.9806518775412886e-05, "loss": 0.6436, "step": 4178 }, { "epoch": 0.12837526495253893, "grad_norm": 0.464938223361969, "learning_rate": 1.9806424157691497e-05, "loss": 0.6106, "step": 4179 }, { "epoch": 0.1284059840874881, "grad_norm": 0.3981035053730011, "learning_rate": 1.9806329517066512e-05, "loss": 0.6089, "step": 4180 }, { "epoch": 0.12843670322243725, "grad_norm": 0.3213074803352356, "learning_rate": 1.9806234853538164e-05, "loss": 0.556, "step": 4181 }, { "epoch": 0.12846742235738642, "grad_norm": 0.2999817430973053, "learning_rate": 1.9806140167106667e-05, "loss": 0.5873, "step": 4182 }, { "epoch": 0.12849814149233557, "grad_norm": 0.3213011920452118, "learning_rate": 1.9806045457772247e-05, "loss": 0.5746, "step": 4183 }, { "epoch": 0.12852886062728475, "grad_norm": 0.3249133229255676, "learning_rate": 1.9805950725535124e-05, "loss": 0.6231, "step": 4184 }, { "epoch": 0.1285595797622339, "grad_norm": 0.4175771474838257, "learning_rate": 1.9805855970395517e-05, "loss": 0.6454, "step": 4185 }, { "epoch": 0.12859029889718304, "grad_norm": 0.43649062514305115, "learning_rate": 1.9805761192353646e-05, "loss": 0.6067, "step": 4186 }, { "epoch": 0.12862101803213222, "grad_norm": 0.4572054147720337, "learning_rate": 1.9805666391409737e-05, "loss": 0.6238, "step": 4187 }, { "epoch": 0.12865173716708136, "grad_norm": 0.32612159848213196, "learning_rate": 1.980557156756401e-05, "loss": 0.65, "step": 4188 }, { "epoch": 0.12868245630203054, "grad_norm": 0.3082496225833893, "learning_rate": 1.9805476720816683e-05, "loss": 0.5985, "step": 4189 }, { "epoch": 0.1287131754369797, "grad_norm": 0.3602977991104126, "learning_rate": 1.9805381851167986e-05, "loss": 0.5571, "step": 4190 }, { "epoch": 0.12874389457192886, "grad_norm": 0.3504650890827179, "learning_rate": 1.9805286958618127e-05, "loss": 0.6559, "step": 4191 }, { "epoch": 0.128774613706878, "grad_norm": 0.3600429892539978, "learning_rate": 1.980519204316734e-05, "loss": 0.6604, "step": 4192 }, { "epoch": 0.12880533284182719, "grad_norm": 0.3629418611526489, "learning_rate": 1.9805097104815844e-05, "loss": 0.5883, "step": 4193 }, { "epoch": 0.12883605197677633, "grad_norm": 0.35946372151374817, "learning_rate": 1.9805002143563858e-05, "loss": 0.5541, "step": 4194 }, { "epoch": 0.12886677111172548, "grad_norm": 0.5136677622795105, "learning_rate": 1.9804907159411604e-05, "loss": 0.6262, "step": 4195 }, { "epoch": 0.12889749024667466, "grad_norm": 0.32676243782043457, "learning_rate": 1.9804812152359303e-05, "loss": 0.6257, "step": 4196 }, { "epoch": 0.1289282093816238, "grad_norm": 0.37862542271614075, "learning_rate": 1.980471712240718e-05, "loss": 0.592, "step": 4197 }, { "epoch": 0.12895892851657298, "grad_norm": 0.3604101836681366, "learning_rate": 1.9804622069555456e-05, "loss": 0.566, "step": 4198 }, { "epoch": 0.12898964765152213, "grad_norm": 0.34152135252952576, "learning_rate": 1.980452699380435e-05, "loss": 0.5531, "step": 4199 }, { "epoch": 0.1290203667864713, "grad_norm": 0.42523515224456787, "learning_rate": 1.980443189515409e-05, "loss": 0.6015, "step": 4200 }, { "epoch": 0.12905108592142045, "grad_norm": 0.30686524510383606, "learning_rate": 1.9804336773604892e-05, "loss": 0.5382, "step": 4201 }, { "epoch": 0.12908180505636963, "grad_norm": 0.3675273060798645, "learning_rate": 1.980424162915698e-05, "loss": 0.6195, "step": 4202 }, { "epoch": 0.12911252419131877, "grad_norm": 0.735525906085968, "learning_rate": 1.9804146461810577e-05, "loss": 0.658, "step": 4203 }, { "epoch": 0.12914324332626792, "grad_norm": 0.38372528553009033, "learning_rate": 1.9804051271565907e-05, "loss": 0.5329, "step": 4204 }, { "epoch": 0.1291739624612171, "grad_norm": 0.33951514959335327, "learning_rate": 1.980395605842319e-05, "loss": 0.6332, "step": 4205 }, { "epoch": 0.12920468159616624, "grad_norm": 0.3604397177696228, "learning_rate": 1.980386082238265e-05, "loss": 0.6786, "step": 4206 }, { "epoch": 0.12923540073111542, "grad_norm": 1.0651787519454956, "learning_rate": 1.9803765563444507e-05, "loss": 0.5598, "step": 4207 }, { "epoch": 0.12926611986606457, "grad_norm": 0.336865097284317, "learning_rate": 1.9803670281608984e-05, "loss": 0.6172, "step": 4208 }, { "epoch": 0.12929683900101374, "grad_norm": 0.36271917819976807, "learning_rate": 1.9803574976876305e-05, "loss": 0.6619, "step": 4209 }, { "epoch": 0.1293275581359629, "grad_norm": 0.3532068729400635, "learning_rate": 1.9803479649246692e-05, "loss": 0.6085, "step": 4210 }, { "epoch": 0.12935827727091206, "grad_norm": 0.349139928817749, "learning_rate": 1.980338429872037e-05, "loss": 0.5731, "step": 4211 }, { "epoch": 0.1293889964058612, "grad_norm": 0.3591964840888977, "learning_rate": 1.9803288925297556e-05, "loss": 0.6943, "step": 4212 }, { "epoch": 0.12941971554081036, "grad_norm": 0.4120277166366577, "learning_rate": 1.9803193528978477e-05, "loss": 0.6488, "step": 4213 }, { "epoch": 0.12945043467575953, "grad_norm": 0.3952070474624634, "learning_rate": 1.9803098109763355e-05, "loss": 0.5829, "step": 4214 }, { "epoch": 0.12948115381070868, "grad_norm": 0.499146968126297, "learning_rate": 1.9803002667652415e-05, "loss": 0.6279, "step": 4215 }, { "epoch": 0.12951187294565786, "grad_norm": 0.3574534058570862, "learning_rate": 1.9802907202645876e-05, "loss": 0.6222, "step": 4216 }, { "epoch": 0.129542592080607, "grad_norm": 0.4165816307067871, "learning_rate": 1.980281171474396e-05, "loss": 0.5968, "step": 4217 }, { "epoch": 0.12957331121555618, "grad_norm": 0.34909817576408386, "learning_rate": 1.9802716203946895e-05, "loss": 0.6153, "step": 4218 }, { "epoch": 0.12960403035050533, "grad_norm": 0.33065325021743774, "learning_rate": 1.9802620670254905e-05, "loss": 0.5666, "step": 4219 }, { "epoch": 0.12963474948545448, "grad_norm": 0.34706881642341614, "learning_rate": 1.9802525113668204e-05, "loss": 0.6344, "step": 4220 }, { "epoch": 0.12966546862040365, "grad_norm": 0.3462289273738861, "learning_rate": 1.9802429534187025e-05, "loss": 0.5895, "step": 4221 }, { "epoch": 0.1296961877553528, "grad_norm": 0.311054527759552, "learning_rate": 1.9802333931811586e-05, "loss": 0.5266, "step": 4222 }, { "epoch": 0.12972690689030197, "grad_norm": 0.34167975187301636, "learning_rate": 1.980223830654211e-05, "loss": 0.6194, "step": 4223 }, { "epoch": 0.12975762602525112, "grad_norm": 0.3233288526535034, "learning_rate": 1.980214265837882e-05, "loss": 0.5591, "step": 4224 }, { "epoch": 0.1297883451602003, "grad_norm": 0.3204280734062195, "learning_rate": 1.9802046987321946e-05, "loss": 0.6095, "step": 4225 }, { "epoch": 0.12981906429514944, "grad_norm": 0.3584391474723816, "learning_rate": 1.9801951293371705e-05, "loss": 0.6604, "step": 4226 }, { "epoch": 0.12984978343009862, "grad_norm": 0.3422292470932007, "learning_rate": 1.980185557652832e-05, "loss": 0.5558, "step": 4227 }, { "epoch": 0.12988050256504777, "grad_norm": 0.3297055959701538, "learning_rate": 1.9801759836792023e-05, "loss": 0.6061, "step": 4228 }, { "epoch": 0.12991122169999691, "grad_norm": 0.34796321392059326, "learning_rate": 1.980166407416303e-05, "loss": 0.6992, "step": 4229 }, { "epoch": 0.1299419408349461, "grad_norm": 0.70027756690979, "learning_rate": 1.980156828864156e-05, "loss": 0.5911, "step": 4230 }, { "epoch": 0.12997265996989524, "grad_norm": 0.3644903302192688, "learning_rate": 1.9801472480227844e-05, "loss": 0.655, "step": 4231 }, { "epoch": 0.1300033791048444, "grad_norm": 0.3245377838611603, "learning_rate": 1.980137664892211e-05, "loss": 0.5683, "step": 4232 }, { "epoch": 0.13003409823979356, "grad_norm": 0.3277278542518616, "learning_rate": 1.9801280794724568e-05, "loss": 0.553, "step": 4233 }, { "epoch": 0.13006481737474274, "grad_norm": 0.3350740969181061, "learning_rate": 1.9801184917635456e-05, "loss": 0.5624, "step": 4234 }, { "epoch": 0.13009553650969188, "grad_norm": 0.3985815644264221, "learning_rate": 1.980108901765499e-05, "loss": 0.5929, "step": 4235 }, { "epoch": 0.13012625564464106, "grad_norm": 0.33505308628082275, "learning_rate": 1.98009930947834e-05, "loss": 0.609, "step": 4236 }, { "epoch": 0.1301569747795902, "grad_norm": 0.324090838432312, "learning_rate": 1.9800897149020902e-05, "loss": 0.573, "step": 4237 }, { "epoch": 0.13018769391453935, "grad_norm": 0.3388899266719818, "learning_rate": 1.9800801180367726e-05, "loss": 0.6409, "step": 4238 }, { "epoch": 0.13021841304948853, "grad_norm": 0.33197978138923645, "learning_rate": 1.9800705188824093e-05, "loss": 0.6093, "step": 4239 }, { "epoch": 0.13024913218443768, "grad_norm": 0.3329910635948181, "learning_rate": 1.980060917439023e-05, "loss": 0.5556, "step": 4240 }, { "epoch": 0.13027985131938685, "grad_norm": 0.32539790868759155, "learning_rate": 1.980051313706636e-05, "loss": 0.6191, "step": 4241 }, { "epoch": 0.130310570454336, "grad_norm": 0.33585062623023987, "learning_rate": 1.9800417076852706e-05, "loss": 0.6274, "step": 4242 }, { "epoch": 0.13034128958928518, "grad_norm": 0.3267397880554199, "learning_rate": 1.9800320993749495e-05, "loss": 0.6119, "step": 4243 }, { "epoch": 0.13037200872423432, "grad_norm": 0.3327142596244812, "learning_rate": 1.9800224887756946e-05, "loss": 0.6097, "step": 4244 }, { "epoch": 0.1304027278591835, "grad_norm": 0.29694288969039917, "learning_rate": 1.9800128758875293e-05, "loss": 0.5281, "step": 4245 }, { "epoch": 0.13043344699413265, "grad_norm": 0.31498339772224426, "learning_rate": 1.980003260710475e-05, "loss": 0.5453, "step": 4246 }, { "epoch": 0.1304641661290818, "grad_norm": 0.3311323821544647, "learning_rate": 1.979993643244555e-05, "loss": 0.6721, "step": 4247 }, { "epoch": 0.13049488526403097, "grad_norm": 0.3245963454246521, "learning_rate": 1.9799840234897912e-05, "loss": 0.6109, "step": 4248 }, { "epoch": 0.13052560439898012, "grad_norm": 0.3155243992805481, "learning_rate": 1.979974401446206e-05, "loss": 0.641, "step": 4249 }, { "epoch": 0.1305563235339293, "grad_norm": 0.31478723883628845, "learning_rate": 1.9799647771138227e-05, "loss": 0.5683, "step": 4250 }, { "epoch": 0.13058704266887844, "grad_norm": 1.5693448781967163, "learning_rate": 1.9799551504926628e-05, "loss": 0.5521, "step": 4251 }, { "epoch": 0.13061776180382761, "grad_norm": 0.3539428114891052, "learning_rate": 1.9799455215827494e-05, "loss": 0.6763, "step": 4252 }, { "epoch": 0.13064848093877676, "grad_norm": 0.40604153275489807, "learning_rate": 1.979935890384105e-05, "loss": 0.655, "step": 4253 }, { "epoch": 0.13067920007372594, "grad_norm": 0.37342336773872375, "learning_rate": 1.9799262568967516e-05, "loss": 0.574, "step": 4254 }, { "epoch": 0.13070991920867508, "grad_norm": 0.3450070023536682, "learning_rate": 1.9799166211207122e-05, "loss": 0.7382, "step": 4255 }, { "epoch": 0.13074063834362423, "grad_norm": 0.3235737681388855, "learning_rate": 1.9799069830560088e-05, "loss": 0.5912, "step": 4256 }, { "epoch": 0.1307713574785734, "grad_norm": 0.3245849609375, "learning_rate": 1.9798973427026643e-05, "loss": 0.5616, "step": 4257 }, { "epoch": 0.13080207661352256, "grad_norm": 0.3204537630081177, "learning_rate": 1.9798877000607012e-05, "loss": 0.5962, "step": 4258 }, { "epoch": 0.13083279574847173, "grad_norm": 0.3556336462497711, "learning_rate": 1.979878055130142e-05, "loss": 0.5549, "step": 4259 }, { "epoch": 0.13086351488342088, "grad_norm": 0.3368959128856659, "learning_rate": 1.9798684079110092e-05, "loss": 0.5767, "step": 4260 }, { "epoch": 0.13089423401837005, "grad_norm": 0.3417651057243347, "learning_rate": 1.979858758403325e-05, "loss": 0.5933, "step": 4261 }, { "epoch": 0.1309249531533192, "grad_norm": 0.36948224902153015, "learning_rate": 1.9798491066071123e-05, "loss": 0.6437, "step": 4262 }, { "epoch": 0.13095567228826835, "grad_norm": 0.33175426721572876, "learning_rate": 1.9798394525223938e-05, "loss": 0.5737, "step": 4263 }, { "epoch": 0.13098639142321752, "grad_norm": 0.36188217997550964, "learning_rate": 1.9798297961491918e-05, "loss": 0.6362, "step": 4264 }, { "epoch": 0.13101711055816667, "grad_norm": 0.3442288935184479, "learning_rate": 1.9798201374875287e-05, "loss": 0.605, "step": 4265 }, { "epoch": 0.13104782969311585, "grad_norm": 0.3236364722251892, "learning_rate": 1.9798104765374276e-05, "loss": 0.6085, "step": 4266 }, { "epoch": 0.131078548828065, "grad_norm": 0.37995025515556335, "learning_rate": 1.97980081329891e-05, "loss": 0.5845, "step": 4267 }, { "epoch": 0.13110926796301417, "grad_norm": 0.31641361117362976, "learning_rate": 1.979791147772e-05, "loss": 0.513, "step": 4268 }, { "epoch": 0.13113998709796332, "grad_norm": 0.3252931535243988, "learning_rate": 1.979781479956719e-05, "loss": 0.6443, "step": 4269 }, { "epoch": 0.1311707062329125, "grad_norm": 0.3615519106388092, "learning_rate": 1.97977180985309e-05, "loss": 0.5138, "step": 4270 }, { "epoch": 0.13120142536786164, "grad_norm": 0.3259580135345459, "learning_rate": 1.9797621374611352e-05, "loss": 0.6058, "step": 4271 }, { "epoch": 0.1312321445028108, "grad_norm": 0.33849358558654785, "learning_rate": 1.9797524627808776e-05, "loss": 0.6891, "step": 4272 }, { "epoch": 0.13126286363775996, "grad_norm": 0.34179776906967163, "learning_rate": 1.97974278581234e-05, "loss": 0.668, "step": 4273 }, { "epoch": 0.1312935827727091, "grad_norm": 0.3398226201534271, "learning_rate": 1.9797331065555447e-05, "loss": 0.5683, "step": 4274 }, { "epoch": 0.1313243019076583, "grad_norm": 0.36450478434562683, "learning_rate": 1.979723425010514e-05, "loss": 0.6672, "step": 4275 }, { "epoch": 0.13135502104260743, "grad_norm": 0.3161754310131073, "learning_rate": 1.9797137411772706e-05, "loss": 0.5813, "step": 4276 }, { "epoch": 0.1313857401775566, "grad_norm": 0.3997567594051361, "learning_rate": 1.9797040550558378e-05, "loss": 0.5843, "step": 4277 }, { "epoch": 0.13141645931250576, "grad_norm": 0.3380495309829712, "learning_rate": 1.9796943666462377e-05, "loss": 0.6336, "step": 4278 }, { "epoch": 0.13144717844745493, "grad_norm": 0.37068691849708557, "learning_rate": 1.979684675948493e-05, "loss": 0.585, "step": 4279 }, { "epoch": 0.13147789758240408, "grad_norm": 0.3373292088508606, "learning_rate": 1.979674982962626e-05, "loss": 0.6921, "step": 4280 }, { "epoch": 0.13150861671735323, "grad_norm": 0.35809484124183655, "learning_rate": 1.9796652876886598e-05, "loss": 0.6877, "step": 4281 }, { "epoch": 0.1315393358523024, "grad_norm": 0.3724095821380615, "learning_rate": 1.9796555901266173e-05, "loss": 0.6082, "step": 4282 }, { "epoch": 0.13157005498725155, "grad_norm": 0.44795355200767517, "learning_rate": 1.9796458902765202e-05, "loss": 0.6928, "step": 4283 }, { "epoch": 0.13160077412220073, "grad_norm": 0.3761241137981415, "learning_rate": 1.979636188138392e-05, "loss": 0.6092, "step": 4284 }, { "epoch": 0.13163149325714987, "grad_norm": 0.34612640738487244, "learning_rate": 1.979626483712255e-05, "loss": 0.6317, "step": 4285 }, { "epoch": 0.13166221239209905, "grad_norm": 0.3491370677947998, "learning_rate": 1.9796167769981323e-05, "loss": 0.5922, "step": 4286 }, { "epoch": 0.1316929315270482, "grad_norm": 0.35416269302368164, "learning_rate": 1.9796070679960455e-05, "loss": 0.616, "step": 4287 }, { "epoch": 0.13172365066199737, "grad_norm": 0.3798975348472595, "learning_rate": 1.9795973567060184e-05, "loss": 0.6498, "step": 4288 }, { "epoch": 0.13175436979694652, "grad_norm": 0.32274964451789856, "learning_rate": 1.9795876431280732e-05, "loss": 0.5367, "step": 4289 }, { "epoch": 0.13178508893189567, "grad_norm": 0.36896470189094543, "learning_rate": 1.9795779272622325e-05, "loss": 0.621, "step": 4290 }, { "epoch": 0.13181580806684484, "grad_norm": 0.33969131112098694, "learning_rate": 1.9795682091085195e-05, "loss": 0.6385, "step": 4291 }, { "epoch": 0.131846527201794, "grad_norm": 0.33554041385650635, "learning_rate": 1.9795584886669563e-05, "loss": 0.7011, "step": 4292 }, { "epoch": 0.13187724633674316, "grad_norm": 0.34219199419021606, "learning_rate": 1.979548765937566e-05, "loss": 0.5393, "step": 4293 }, { "epoch": 0.1319079654716923, "grad_norm": 0.34001001715660095, "learning_rate": 1.9795390409203707e-05, "loss": 0.598, "step": 4294 }, { "epoch": 0.1319386846066415, "grad_norm": 0.36672675609588623, "learning_rate": 1.979529313615394e-05, "loss": 0.6373, "step": 4295 }, { "epoch": 0.13196940374159064, "grad_norm": 0.3439866006374359, "learning_rate": 1.979519584022658e-05, "loss": 0.6316, "step": 4296 }, { "epoch": 0.1320001228765398, "grad_norm": 0.3510702848434448, "learning_rate": 1.9795098521421855e-05, "loss": 0.5251, "step": 4297 }, { "epoch": 0.13203084201148896, "grad_norm": 0.3555569052696228, "learning_rate": 1.9795001179739994e-05, "loss": 0.5707, "step": 4298 }, { "epoch": 0.1320615611464381, "grad_norm": 0.3365461826324463, "learning_rate": 1.9794903815181226e-05, "loss": 0.6253, "step": 4299 }, { "epoch": 0.13209228028138728, "grad_norm": 0.32344210147857666, "learning_rate": 1.9794806427745776e-05, "loss": 0.5724, "step": 4300 }, { "epoch": 0.13212299941633643, "grad_norm": 0.6185758113861084, "learning_rate": 1.979470901743387e-05, "loss": 0.5468, "step": 4301 }, { "epoch": 0.1321537185512856, "grad_norm": 0.32118409872055054, "learning_rate": 1.9794611584245734e-05, "loss": 0.5242, "step": 4302 }, { "epoch": 0.13218443768623475, "grad_norm": 0.3544711768627167, "learning_rate": 1.9794514128181604e-05, "loss": 0.6119, "step": 4303 }, { "epoch": 0.13221515682118393, "grad_norm": 0.34140098094940186, "learning_rate": 1.9794416649241696e-05, "loss": 0.6576, "step": 4304 }, { "epoch": 0.13224587595613307, "grad_norm": 0.48005542159080505, "learning_rate": 1.9794319147426247e-05, "loss": 0.635, "step": 4305 }, { "epoch": 0.13227659509108222, "grad_norm": 0.33215007185935974, "learning_rate": 1.9794221622735487e-05, "loss": 0.5298, "step": 4306 }, { "epoch": 0.1323073142260314, "grad_norm": 0.3357847034931183, "learning_rate": 1.979412407516963e-05, "loss": 0.5329, "step": 4307 }, { "epoch": 0.13233803336098054, "grad_norm": 0.31066787242889404, "learning_rate": 1.9794026504728917e-05, "loss": 0.5649, "step": 4308 }, { "epoch": 0.13236875249592972, "grad_norm": 0.3548673093318939, "learning_rate": 1.979392891141357e-05, "loss": 0.6288, "step": 4309 }, { "epoch": 0.13239947163087887, "grad_norm": 0.4137327969074249, "learning_rate": 1.9793831295223818e-05, "loss": 0.5691, "step": 4310 }, { "epoch": 0.13243019076582804, "grad_norm": 0.338111937046051, "learning_rate": 1.979373365615989e-05, "loss": 0.5899, "step": 4311 }, { "epoch": 0.1324609099007772, "grad_norm": 0.36080965399742126, "learning_rate": 1.979363599422201e-05, "loss": 0.6012, "step": 4312 }, { "epoch": 0.13249162903572637, "grad_norm": 0.3582145869731903, "learning_rate": 1.9793538309410412e-05, "loss": 0.585, "step": 4313 }, { "epoch": 0.1325223481706755, "grad_norm": 0.37289300560951233, "learning_rate": 1.979344060172532e-05, "loss": 0.5524, "step": 4314 }, { "epoch": 0.13255306730562466, "grad_norm": 0.3706514537334442, "learning_rate": 1.9793342871166966e-05, "loss": 0.6612, "step": 4315 }, { "epoch": 0.13258378644057384, "grad_norm": 0.30807971954345703, "learning_rate": 1.9793245117735572e-05, "loss": 0.545, "step": 4316 }, { "epoch": 0.13261450557552298, "grad_norm": 0.7305949926376343, "learning_rate": 1.9793147341431372e-05, "loss": 0.627, "step": 4317 }, { "epoch": 0.13264522471047216, "grad_norm": 0.3361455202102661, "learning_rate": 1.979304954225459e-05, "loss": 0.5985, "step": 4318 }, { "epoch": 0.1326759438454213, "grad_norm": 0.35588130354881287, "learning_rate": 1.979295172020546e-05, "loss": 0.6707, "step": 4319 }, { "epoch": 0.13270666298037048, "grad_norm": 0.4502997398376465, "learning_rate": 1.9792853875284206e-05, "loss": 0.5664, "step": 4320 }, { "epoch": 0.13273738211531963, "grad_norm": 0.33355608582496643, "learning_rate": 1.9792756007491062e-05, "loss": 0.5473, "step": 4321 }, { "epoch": 0.1327681012502688, "grad_norm": 0.3379453420639038, "learning_rate": 1.9792658116826246e-05, "loss": 0.6497, "step": 4322 }, { "epoch": 0.13279882038521795, "grad_norm": 0.34069690108299255, "learning_rate": 1.9792560203289996e-05, "loss": 0.6374, "step": 4323 }, { "epoch": 0.1328295395201671, "grad_norm": 0.34663498401641846, "learning_rate": 1.9792462266882538e-05, "loss": 0.6109, "step": 4324 }, { "epoch": 0.13286025865511628, "grad_norm": 0.3214302361011505, "learning_rate": 1.97923643076041e-05, "loss": 0.6454, "step": 4325 }, { "epoch": 0.13289097779006542, "grad_norm": 0.3539555072784424, "learning_rate": 1.979226632545491e-05, "loss": 0.6387, "step": 4326 }, { "epoch": 0.1329216969250146, "grad_norm": 0.33972111344337463, "learning_rate": 1.9792168320435202e-05, "loss": 0.5789, "step": 4327 }, { "epoch": 0.13295241605996375, "grad_norm": 0.33387529850006104, "learning_rate": 1.9792070292545194e-05, "loss": 0.607, "step": 4328 }, { "epoch": 0.13298313519491292, "grad_norm": 0.3223400115966797, "learning_rate": 1.9791972241785128e-05, "loss": 0.5137, "step": 4329 }, { "epoch": 0.13301385432986207, "grad_norm": 0.3193095028400421, "learning_rate": 1.979187416815523e-05, "loss": 0.5703, "step": 4330 }, { "epoch": 0.13304457346481124, "grad_norm": 0.3480003774166107, "learning_rate": 1.9791776071655718e-05, "loss": 0.5364, "step": 4331 }, { "epoch": 0.1330752925997604, "grad_norm": 0.3290407061576843, "learning_rate": 1.9791677952286833e-05, "loss": 0.5318, "step": 4332 }, { "epoch": 0.13310601173470954, "grad_norm": 0.35076090693473816, "learning_rate": 1.97915798100488e-05, "loss": 0.6265, "step": 4333 }, { "epoch": 0.13313673086965871, "grad_norm": 0.3215453028678894, "learning_rate": 1.9791481644941846e-05, "loss": 0.6457, "step": 4334 }, { "epoch": 0.13316745000460786, "grad_norm": 0.32855623960494995, "learning_rate": 1.9791383456966206e-05, "loss": 0.6014, "step": 4335 }, { "epoch": 0.13319816913955704, "grad_norm": 0.2952263057231903, "learning_rate": 1.9791285246122106e-05, "loss": 0.6235, "step": 4336 }, { "epoch": 0.13322888827450619, "grad_norm": 0.319308876991272, "learning_rate": 1.9791187012409774e-05, "loss": 0.5512, "step": 4337 }, { "epoch": 0.13325960740945536, "grad_norm": 0.3745132088661194, "learning_rate": 1.9791088755829442e-05, "loss": 0.6279, "step": 4338 }, { "epoch": 0.1332903265444045, "grad_norm": 0.36250582337379456, "learning_rate": 1.9790990476381338e-05, "loss": 0.6062, "step": 4339 }, { "epoch": 0.13332104567935366, "grad_norm": 0.3208746016025543, "learning_rate": 1.979089217406569e-05, "loss": 0.5858, "step": 4340 }, { "epoch": 0.13335176481430283, "grad_norm": 0.43410617113113403, "learning_rate": 1.9790793848882733e-05, "loss": 0.5598, "step": 4341 }, { "epoch": 0.13338248394925198, "grad_norm": 0.3328767716884613, "learning_rate": 1.9790695500832692e-05, "loss": 0.5297, "step": 4342 }, { "epoch": 0.13341320308420115, "grad_norm": 0.3389059901237488, "learning_rate": 1.9790597129915798e-05, "loss": 0.6435, "step": 4343 }, { "epoch": 0.1334439222191503, "grad_norm": 0.3368482291698456, "learning_rate": 1.979049873613228e-05, "loss": 0.6036, "step": 4344 }, { "epoch": 0.13347464135409948, "grad_norm": 0.32054680585861206, "learning_rate": 1.9790400319482365e-05, "loss": 0.6164, "step": 4345 }, { "epoch": 0.13350536048904862, "grad_norm": 0.35870561003685, "learning_rate": 1.9790301879966295e-05, "loss": 0.5644, "step": 4346 }, { "epoch": 0.1335360796239978, "grad_norm": 0.32240307331085205, "learning_rate": 1.9790203417584282e-05, "loss": 0.5292, "step": 4347 }, { "epoch": 0.13356679875894695, "grad_norm": 0.31549590826034546, "learning_rate": 1.9790104932336568e-05, "loss": 0.6133, "step": 4348 }, { "epoch": 0.1335975178938961, "grad_norm": 0.3353824019432068, "learning_rate": 1.9790006424223385e-05, "loss": 0.6261, "step": 4349 }, { "epoch": 0.13362823702884527, "grad_norm": 0.37589672207832336, "learning_rate": 1.978990789324495e-05, "loss": 0.6025, "step": 4350 }, { "epoch": 0.13365895616379442, "grad_norm": 0.3105200529098511, "learning_rate": 1.978980933940151e-05, "loss": 0.5072, "step": 4351 }, { "epoch": 0.1336896752987436, "grad_norm": 0.3886089622974396, "learning_rate": 1.9789710762693282e-05, "loss": 0.5371, "step": 4352 }, { "epoch": 0.13372039443369274, "grad_norm": 0.33407777547836304, "learning_rate": 1.97896121631205e-05, "loss": 0.6313, "step": 4353 }, { "epoch": 0.13375111356864192, "grad_norm": 0.30718809366226196, "learning_rate": 1.97895135406834e-05, "loss": 0.5947, "step": 4354 }, { "epoch": 0.13378183270359106, "grad_norm": 0.3320011496543884, "learning_rate": 1.9789414895382203e-05, "loss": 0.6711, "step": 4355 }, { "epoch": 0.13381255183854024, "grad_norm": 0.337852418422699, "learning_rate": 1.9789316227217145e-05, "loss": 0.6173, "step": 4356 }, { "epoch": 0.1338432709734894, "grad_norm": 0.3337247371673584, "learning_rate": 1.9789217536188454e-05, "loss": 0.6986, "step": 4357 }, { "epoch": 0.13387399010843853, "grad_norm": 0.37444108724594116, "learning_rate": 1.9789118822296362e-05, "loss": 0.618, "step": 4358 }, { "epoch": 0.1339047092433877, "grad_norm": 0.3417365252971649, "learning_rate": 1.9789020085541103e-05, "loss": 0.6108, "step": 4359 }, { "epoch": 0.13393542837833686, "grad_norm": 0.3381365239620209, "learning_rate": 1.97889213259229e-05, "loss": 0.5741, "step": 4360 }, { "epoch": 0.13396614751328603, "grad_norm": 0.4111553728580475, "learning_rate": 1.9788822543441986e-05, "loss": 0.654, "step": 4361 }, { "epoch": 0.13399686664823518, "grad_norm": 0.3512863218784332, "learning_rate": 1.97887237380986e-05, "loss": 0.5569, "step": 4362 }, { "epoch": 0.13402758578318436, "grad_norm": 0.3039816617965698, "learning_rate": 1.978862490989296e-05, "loss": 0.6099, "step": 4363 }, { "epoch": 0.1340583049181335, "grad_norm": 0.40568414330482483, "learning_rate": 1.978852605882531e-05, "loss": 0.6174, "step": 4364 }, { "epoch": 0.13408902405308268, "grad_norm": 0.33759164810180664, "learning_rate": 1.9788427184895863e-05, "loss": 0.6121, "step": 4365 }, { "epoch": 0.13411974318803183, "grad_norm": 0.35140833258628845, "learning_rate": 1.978832828810487e-05, "loss": 0.6022, "step": 4366 }, { "epoch": 0.13415046232298097, "grad_norm": 0.32849910855293274, "learning_rate": 1.978822936845255e-05, "loss": 0.6171, "step": 4367 }, { "epoch": 0.13418118145793015, "grad_norm": 0.3553868532180786, "learning_rate": 1.9788130425939133e-05, "loss": 0.614, "step": 4368 }, { "epoch": 0.1342119005928793, "grad_norm": 0.37060636281967163, "learning_rate": 1.9788031460564857e-05, "loss": 0.6201, "step": 4369 }, { "epoch": 0.13424261972782847, "grad_norm": 0.3612472116947174, "learning_rate": 1.978793247232995e-05, "loss": 0.6294, "step": 4370 }, { "epoch": 0.13427333886277762, "grad_norm": 0.3392025828361511, "learning_rate": 1.978783346123464e-05, "loss": 0.5875, "step": 4371 }, { "epoch": 0.1343040579977268, "grad_norm": 0.32892054319381714, "learning_rate": 1.9787734427279167e-05, "loss": 0.6101, "step": 4372 }, { "epoch": 0.13433477713267594, "grad_norm": 0.3510526418685913, "learning_rate": 1.9787635370463753e-05, "loss": 0.5671, "step": 4373 }, { "epoch": 0.13436549626762512, "grad_norm": 0.3518812358379364, "learning_rate": 1.9787536290788635e-05, "loss": 0.6524, "step": 4374 }, { "epoch": 0.13439621540257427, "grad_norm": 0.31839925050735474, "learning_rate": 1.9787437188254042e-05, "loss": 0.5399, "step": 4375 }, { "epoch": 0.1344269345375234, "grad_norm": 0.3263569176197052, "learning_rate": 1.9787338062860206e-05, "loss": 0.6333, "step": 4376 }, { "epoch": 0.1344576536724726, "grad_norm": 0.42989686131477356, "learning_rate": 1.9787238914607355e-05, "loss": 0.6924, "step": 4377 }, { "epoch": 0.13448837280742174, "grad_norm": 0.384536474943161, "learning_rate": 1.9787139743495728e-05, "loss": 0.6706, "step": 4378 }, { "epoch": 0.1345190919423709, "grad_norm": 0.3507455587387085, "learning_rate": 1.9787040549525552e-05, "loss": 0.6537, "step": 4379 }, { "epoch": 0.13454981107732006, "grad_norm": 0.3457149267196655, "learning_rate": 1.9786941332697056e-05, "loss": 0.6269, "step": 4380 }, { "epoch": 0.13458053021226923, "grad_norm": 0.36148393154144287, "learning_rate": 1.978684209301048e-05, "loss": 0.5836, "step": 4381 }, { "epoch": 0.13461124934721838, "grad_norm": 0.3404276669025421, "learning_rate": 1.9786742830466046e-05, "loss": 0.6, "step": 4382 }, { "epoch": 0.13464196848216753, "grad_norm": 1.3200973272323608, "learning_rate": 1.9786643545063993e-05, "loss": 0.5598, "step": 4383 }, { "epoch": 0.1346726876171167, "grad_norm": 0.3306865692138672, "learning_rate": 1.978654423680455e-05, "loss": 0.6678, "step": 4384 }, { "epoch": 0.13470340675206585, "grad_norm": 0.3671681880950928, "learning_rate": 1.978644490568795e-05, "loss": 0.646, "step": 4385 }, { "epoch": 0.13473412588701503, "grad_norm": 0.39160609245300293, "learning_rate": 1.9786345551714426e-05, "loss": 0.6118, "step": 4386 }, { "epoch": 0.13476484502196417, "grad_norm": 0.33711469173431396, "learning_rate": 1.978624617488421e-05, "loss": 0.6542, "step": 4387 }, { "epoch": 0.13479556415691335, "grad_norm": 0.37090814113616943, "learning_rate": 1.9786146775197526e-05, "loss": 0.6544, "step": 4388 }, { "epoch": 0.1348262832918625, "grad_norm": 0.32710468769073486, "learning_rate": 1.9786047352654617e-05, "loss": 0.6253, "step": 4389 }, { "epoch": 0.13485700242681167, "grad_norm": 0.3323386609554291, "learning_rate": 1.978594790725571e-05, "loss": 0.6112, "step": 4390 }, { "epoch": 0.13488772156176082, "grad_norm": 0.39377593994140625, "learning_rate": 1.978584843900104e-05, "loss": 0.5541, "step": 4391 }, { "epoch": 0.13491844069670997, "grad_norm": 0.34699395298957825, "learning_rate": 1.9785748947890835e-05, "loss": 0.6419, "step": 4392 }, { "epoch": 0.13494915983165914, "grad_norm": 0.37308019399642944, "learning_rate": 1.978564943392533e-05, "loss": 0.5382, "step": 4393 }, { "epoch": 0.1349798789666083, "grad_norm": 0.3299615979194641, "learning_rate": 1.9785549897104757e-05, "loss": 0.5395, "step": 4394 }, { "epoch": 0.13501059810155747, "grad_norm": 0.2974483072757721, "learning_rate": 1.978545033742935e-05, "loss": 0.5797, "step": 4395 }, { "epoch": 0.13504131723650661, "grad_norm": 0.4714646339416504, "learning_rate": 1.978535075489934e-05, "loss": 0.6083, "step": 4396 }, { "epoch": 0.1350720363714558, "grad_norm": 0.3137882649898529, "learning_rate": 1.978525114951496e-05, "loss": 0.5225, "step": 4397 }, { "epoch": 0.13510275550640494, "grad_norm": 0.3381197452545166, "learning_rate": 1.9785151521276443e-05, "loss": 0.6981, "step": 4398 }, { "epoch": 0.1351334746413541, "grad_norm": 0.3154951333999634, "learning_rate": 1.978505187018402e-05, "loss": 0.5508, "step": 4399 }, { "epoch": 0.13516419377630326, "grad_norm": 0.4397277235984802, "learning_rate": 1.9784952196237927e-05, "loss": 0.6968, "step": 4400 }, { "epoch": 0.1351949129112524, "grad_norm": 0.3178330361843109, "learning_rate": 1.9784852499438394e-05, "loss": 0.5215, "step": 4401 }, { "epoch": 0.13522563204620158, "grad_norm": 0.3823036849498749, "learning_rate": 1.9784752779785656e-05, "loss": 0.5532, "step": 4402 }, { "epoch": 0.13525635118115073, "grad_norm": 0.30318835377693176, "learning_rate": 1.9784653037279938e-05, "loss": 0.5542, "step": 4403 }, { "epoch": 0.1352870703160999, "grad_norm": 0.33618056774139404, "learning_rate": 1.9784553271921488e-05, "loss": 0.6276, "step": 4404 }, { "epoch": 0.13531778945104905, "grad_norm": 0.3566704988479614, "learning_rate": 1.9784453483710523e-05, "loss": 0.5843, "step": 4405 }, { "epoch": 0.13534850858599823, "grad_norm": 0.3529864251613617, "learning_rate": 1.978435367264729e-05, "loss": 0.6952, "step": 4406 }, { "epoch": 0.13537922772094738, "grad_norm": 0.31415602564811707, "learning_rate": 1.9784253838732012e-05, "loss": 0.5595, "step": 4407 }, { "epoch": 0.13540994685589655, "grad_norm": 0.350422203540802, "learning_rate": 1.9784153981964927e-05, "loss": 0.5693, "step": 4408 }, { "epoch": 0.1354406659908457, "grad_norm": 13.392049789428711, "learning_rate": 1.978405410234627e-05, "loss": 0.5735, "step": 4409 }, { "epoch": 0.13547138512579485, "grad_norm": 0.34208405017852783, "learning_rate": 1.9783954199876265e-05, "loss": 0.6302, "step": 4410 }, { "epoch": 0.13550210426074402, "grad_norm": 0.355143278837204, "learning_rate": 1.9783854274555155e-05, "loss": 0.6641, "step": 4411 }, { "epoch": 0.13553282339569317, "grad_norm": 0.31702661514282227, "learning_rate": 1.978375432638317e-05, "loss": 0.5458, "step": 4412 }, { "epoch": 0.13556354253064234, "grad_norm": 0.35622265934944153, "learning_rate": 1.9783654355360543e-05, "loss": 0.5393, "step": 4413 }, { "epoch": 0.1355942616655915, "grad_norm": 0.34844425320625305, "learning_rate": 1.9783554361487508e-05, "loss": 0.6131, "step": 4414 }, { "epoch": 0.13562498080054067, "grad_norm": 0.328110009431839, "learning_rate": 1.9783454344764302e-05, "loss": 0.5693, "step": 4415 }, { "epoch": 0.13565569993548982, "grad_norm": 0.31540820002555847, "learning_rate": 1.978335430519115e-05, "loss": 0.6007, "step": 4416 }, { "epoch": 0.13568641907043896, "grad_norm": 0.3329354226589203, "learning_rate": 1.9783254242768294e-05, "loss": 0.6848, "step": 4417 }, { "epoch": 0.13571713820538814, "grad_norm": 0.3557731807231903, "learning_rate": 1.978315415749596e-05, "loss": 0.5817, "step": 4418 }, { "epoch": 0.13574785734033729, "grad_norm": 0.35920459032058716, "learning_rate": 1.9783054049374392e-05, "loss": 0.5782, "step": 4419 }, { "epoch": 0.13577857647528646, "grad_norm": 0.6937598586082458, "learning_rate": 1.9782953918403813e-05, "loss": 0.6473, "step": 4420 }, { "epoch": 0.1358092956102356, "grad_norm": 0.36984673142433167, "learning_rate": 1.9782853764584466e-05, "loss": 0.5697, "step": 4421 }, { "epoch": 0.13584001474518478, "grad_norm": 0.32663390040397644, "learning_rate": 1.9782753587916578e-05, "loss": 0.6273, "step": 4422 }, { "epoch": 0.13587073388013393, "grad_norm": 0.3354629576206207, "learning_rate": 1.978265338840039e-05, "loss": 0.5586, "step": 4423 }, { "epoch": 0.1359014530150831, "grad_norm": 0.338153600692749, "learning_rate": 1.9782553166036128e-05, "loss": 0.6676, "step": 4424 }, { "epoch": 0.13593217215003225, "grad_norm": 0.39694347977638245, "learning_rate": 1.9782452920824033e-05, "loss": 0.5915, "step": 4425 }, { "epoch": 0.1359628912849814, "grad_norm": 0.3430796265602112, "learning_rate": 1.9782352652764332e-05, "loss": 0.6111, "step": 4426 }, { "epoch": 0.13599361041993058, "grad_norm": 0.3363812267780304, "learning_rate": 1.9782252361857263e-05, "loss": 0.6259, "step": 4427 }, { "epoch": 0.13602432955487972, "grad_norm": 0.32690146565437317, "learning_rate": 1.9782152048103063e-05, "loss": 0.6073, "step": 4428 }, { "epoch": 0.1360550486898289, "grad_norm": 0.33469367027282715, "learning_rate": 1.978205171150196e-05, "loss": 0.5679, "step": 4429 }, { "epoch": 0.13608576782477805, "grad_norm": 0.33856797218322754, "learning_rate": 1.97819513520542e-05, "loss": 0.6809, "step": 4430 }, { "epoch": 0.13611648695972722, "grad_norm": 0.35788214206695557, "learning_rate": 1.9781850969760003e-05, "loss": 0.5386, "step": 4431 }, { "epoch": 0.13614720609467637, "grad_norm": 0.3780348002910614, "learning_rate": 1.9781750564619607e-05, "loss": 0.6244, "step": 4432 }, { "epoch": 0.13617792522962555, "grad_norm": 0.44539856910705566, "learning_rate": 1.9781650136633256e-05, "loss": 0.4841, "step": 4433 }, { "epoch": 0.1362086443645747, "grad_norm": 0.3708028793334961, "learning_rate": 1.9781549685801175e-05, "loss": 0.5427, "step": 4434 }, { "epoch": 0.13623936349952384, "grad_norm": 0.3530467450618744, "learning_rate": 1.9781449212123604e-05, "loss": 0.5521, "step": 4435 }, { "epoch": 0.13627008263447302, "grad_norm": 0.3464943766593933, "learning_rate": 1.9781348715600774e-05, "loss": 0.6049, "step": 4436 }, { "epoch": 0.13630080176942216, "grad_norm": 0.3399164378643036, "learning_rate": 1.978124819623292e-05, "loss": 0.5881, "step": 4437 }, { "epoch": 0.13633152090437134, "grad_norm": 0.37794429063796997, "learning_rate": 1.978114765402028e-05, "loss": 0.6751, "step": 4438 }, { "epoch": 0.1363622400393205, "grad_norm": 0.34909164905548096, "learning_rate": 1.9781047088963082e-05, "loss": 0.5876, "step": 4439 }, { "epoch": 0.13639295917426966, "grad_norm": 0.31627652049064636, "learning_rate": 1.978094650106157e-05, "loss": 0.5638, "step": 4440 }, { "epoch": 0.1364236783092188, "grad_norm": 0.3764617443084717, "learning_rate": 1.978084589031597e-05, "loss": 0.5043, "step": 4441 }, { "epoch": 0.13645439744416799, "grad_norm": 0.3675718903541565, "learning_rate": 1.9780745256726526e-05, "loss": 0.6804, "step": 4442 }, { "epoch": 0.13648511657911713, "grad_norm": 0.32106924057006836, "learning_rate": 1.9780644600293465e-05, "loss": 0.6138, "step": 4443 }, { "epoch": 0.13651583571406628, "grad_norm": 0.34370118379592896, "learning_rate": 1.9780543921017026e-05, "loss": 0.5981, "step": 4444 }, { "epoch": 0.13654655484901546, "grad_norm": 0.5437188744544983, "learning_rate": 1.9780443218897446e-05, "loss": 0.671, "step": 4445 }, { "epoch": 0.1365772739839646, "grad_norm": 0.36525827646255493, "learning_rate": 1.9780342493934953e-05, "loss": 0.591, "step": 4446 }, { "epoch": 0.13660799311891378, "grad_norm": 0.30241113901138306, "learning_rate": 1.9780241746129795e-05, "loss": 0.594, "step": 4447 }, { "epoch": 0.13663871225386293, "grad_norm": 0.30361974239349365, "learning_rate": 1.9780140975482194e-05, "loss": 0.6036, "step": 4448 }, { "epoch": 0.1366694313888121, "grad_norm": 0.34674033522605896, "learning_rate": 1.978004018199239e-05, "loss": 0.641, "step": 4449 }, { "epoch": 0.13670015052376125, "grad_norm": 0.5231220126152039, "learning_rate": 1.977993936566062e-05, "loss": 0.5573, "step": 4450 }, { "epoch": 0.13673086965871042, "grad_norm": 0.3711162507534027, "learning_rate": 1.977983852648712e-05, "loss": 0.5209, "step": 4451 }, { "epoch": 0.13676158879365957, "grad_norm": 0.3749333620071411, "learning_rate": 1.977973766447212e-05, "loss": 0.5656, "step": 4452 }, { "epoch": 0.13679230792860872, "grad_norm": 0.31625911593437195, "learning_rate": 1.9779636779615865e-05, "loss": 0.5494, "step": 4453 }, { "epoch": 0.1368230270635579, "grad_norm": 0.34151190519332886, "learning_rate": 1.9779535871918584e-05, "loss": 0.6531, "step": 4454 }, { "epoch": 0.13685374619850704, "grad_norm": 0.34074828028678894, "learning_rate": 1.9779434941380514e-05, "loss": 0.6009, "step": 4455 }, { "epoch": 0.13688446533345622, "grad_norm": 0.3442020118236542, "learning_rate": 1.977933398800189e-05, "loss": 0.5974, "step": 4456 }, { "epoch": 0.13691518446840537, "grad_norm": 0.4164305031299591, "learning_rate": 1.9779233011782945e-05, "loss": 0.6199, "step": 4457 }, { "epoch": 0.13694590360335454, "grad_norm": 0.32570451498031616, "learning_rate": 1.977913201272392e-05, "loss": 0.5474, "step": 4458 }, { "epoch": 0.1369766227383037, "grad_norm": 0.36461371183395386, "learning_rate": 1.977903099082505e-05, "loss": 0.5586, "step": 4459 }, { "epoch": 0.13700734187325284, "grad_norm": 0.32858142256736755, "learning_rate": 1.9778929946086573e-05, "loss": 0.5747, "step": 4460 }, { "epoch": 0.137038061008202, "grad_norm": 0.7754678726196289, "learning_rate": 1.977882887850872e-05, "loss": 0.6536, "step": 4461 }, { "epoch": 0.13706878014315116, "grad_norm": 0.387199729681015, "learning_rate": 1.977872778809173e-05, "loss": 0.6112, "step": 4462 }, { "epoch": 0.13709949927810033, "grad_norm": 0.45527201890945435, "learning_rate": 1.9778626674835837e-05, "loss": 0.5727, "step": 4463 }, { "epoch": 0.13713021841304948, "grad_norm": 0.346097469329834, "learning_rate": 1.977852553874128e-05, "loss": 0.5973, "step": 4464 }, { "epoch": 0.13716093754799866, "grad_norm": 0.32482415437698364, "learning_rate": 1.9778424379808292e-05, "loss": 0.5918, "step": 4465 }, { "epoch": 0.1371916566829478, "grad_norm": 0.32827022671699524, "learning_rate": 1.977832319803711e-05, "loss": 0.6945, "step": 4466 }, { "epoch": 0.13722237581789698, "grad_norm": 0.31616514921188354, "learning_rate": 1.9778221993427976e-05, "loss": 0.4367, "step": 4467 }, { "epoch": 0.13725309495284613, "grad_norm": 0.345186710357666, "learning_rate": 1.9778120765981118e-05, "loss": 0.542, "step": 4468 }, { "epoch": 0.13728381408779528, "grad_norm": 0.3524705767631531, "learning_rate": 1.9778019515696777e-05, "loss": 0.6466, "step": 4469 }, { "epoch": 0.13731453322274445, "grad_norm": 0.37942609190940857, "learning_rate": 1.9777918242575188e-05, "loss": 0.6425, "step": 4470 }, { "epoch": 0.1373452523576936, "grad_norm": 0.33622172474861145, "learning_rate": 1.9777816946616588e-05, "loss": 0.5825, "step": 4471 }, { "epoch": 0.13737597149264277, "grad_norm": 0.3774675130844116, "learning_rate": 1.9777715627821213e-05, "loss": 0.6542, "step": 4472 }, { "epoch": 0.13740669062759192, "grad_norm": 0.32253867387771606, "learning_rate": 1.9777614286189302e-05, "loss": 0.6017, "step": 4473 }, { "epoch": 0.1374374097625411, "grad_norm": 0.3692786693572998, "learning_rate": 1.9777512921721088e-05, "loss": 0.6364, "step": 4474 }, { "epoch": 0.13746812889749024, "grad_norm": 0.3408094048500061, "learning_rate": 1.977741153441681e-05, "loss": 0.6171, "step": 4475 }, { "epoch": 0.13749884803243942, "grad_norm": 0.34938549995422363, "learning_rate": 1.9777310124276706e-05, "loss": 0.5816, "step": 4476 }, { "epoch": 0.13752956716738857, "grad_norm": 0.35605061054229736, "learning_rate": 1.977720869130101e-05, "loss": 0.6307, "step": 4477 }, { "epoch": 0.13756028630233771, "grad_norm": 0.31992679834365845, "learning_rate": 1.9777107235489962e-05, "loss": 0.6542, "step": 4478 }, { "epoch": 0.1375910054372869, "grad_norm": 0.40645700693130493, "learning_rate": 1.9777005756843797e-05, "loss": 0.6119, "step": 4479 }, { "epoch": 0.13762172457223604, "grad_norm": 0.35264357924461365, "learning_rate": 1.9776904255362752e-05, "loss": 0.5535, "step": 4480 }, { "epoch": 0.1376524437071852, "grad_norm": 0.33254024386405945, "learning_rate": 1.9776802731047065e-05, "loss": 0.6284, "step": 4481 }, { "epoch": 0.13768316284213436, "grad_norm": 0.36652883887290955, "learning_rate": 1.9776701183896968e-05, "loss": 0.6613, "step": 4482 }, { "epoch": 0.13771388197708354, "grad_norm": 0.3307726979255676, "learning_rate": 1.977659961391271e-05, "loss": 0.582, "step": 4483 }, { "epoch": 0.13774460111203268, "grad_norm": 0.2965764105319977, "learning_rate": 1.9776498021094516e-05, "loss": 0.5862, "step": 4484 }, { "epoch": 0.13777532024698186, "grad_norm": 0.35749056935310364, "learning_rate": 1.9776396405442625e-05, "loss": 0.5841, "step": 4485 }, { "epoch": 0.137806039381931, "grad_norm": 0.35105475783348083, "learning_rate": 1.977629476695728e-05, "loss": 0.5771, "step": 4486 }, { "epoch": 0.13783675851688015, "grad_norm": 0.37409940361976624, "learning_rate": 1.977619310563872e-05, "loss": 0.6656, "step": 4487 }, { "epoch": 0.13786747765182933, "grad_norm": 0.37122705578804016, "learning_rate": 1.9776091421487173e-05, "loss": 0.6135, "step": 4488 }, { "epoch": 0.13789819678677848, "grad_norm": 0.4183451235294342, "learning_rate": 1.9775989714502884e-05, "loss": 0.5791, "step": 4489 }, { "epoch": 0.13792891592172765, "grad_norm": 0.31003618240356445, "learning_rate": 1.9775887984686087e-05, "loss": 0.582, "step": 4490 }, { "epoch": 0.1379596350566768, "grad_norm": 0.313114732503891, "learning_rate": 1.9775786232037025e-05, "loss": 0.5952, "step": 4491 }, { "epoch": 0.13799035419162597, "grad_norm": 0.34014058113098145, "learning_rate": 1.977568445655593e-05, "loss": 0.6888, "step": 4492 }, { "epoch": 0.13802107332657512, "grad_norm": 0.4131802022457123, "learning_rate": 1.9775582658243037e-05, "loss": 0.5485, "step": 4493 }, { "epoch": 0.13805179246152427, "grad_norm": 0.33067095279693604, "learning_rate": 1.9775480837098592e-05, "loss": 0.5968, "step": 4494 }, { "epoch": 0.13808251159647345, "grad_norm": 0.3271838128566742, "learning_rate": 1.9775378993122827e-05, "loss": 0.5798, "step": 4495 }, { "epoch": 0.1381132307314226, "grad_norm": 0.3524806797504425, "learning_rate": 1.977527712631598e-05, "loss": 0.6271, "step": 4496 }, { "epoch": 0.13814394986637177, "grad_norm": 0.33001023530960083, "learning_rate": 1.9775175236678293e-05, "loss": 0.6359, "step": 4497 }, { "epoch": 0.13817466900132092, "grad_norm": 0.3772483170032501, "learning_rate": 1.9775073324210005e-05, "loss": 0.5992, "step": 4498 }, { "epoch": 0.1382053881362701, "grad_norm": 0.348190039396286, "learning_rate": 1.9774971388911343e-05, "loss": 0.7023, "step": 4499 }, { "epoch": 0.13823610727121924, "grad_norm": 0.33349624276161194, "learning_rate": 1.977486943078256e-05, "loss": 0.6305, "step": 4500 }, { "epoch": 0.1382668264061684, "grad_norm": 0.3829382359981537, "learning_rate": 1.9774767449823883e-05, "loss": 0.6555, "step": 4501 }, { "epoch": 0.13829754554111756, "grad_norm": 0.4552218019962311, "learning_rate": 1.9774665446035552e-05, "loss": 0.6286, "step": 4502 }, { "epoch": 0.1383282646760667, "grad_norm": 0.3777869939804077, "learning_rate": 1.977456341941781e-05, "loss": 0.6435, "step": 4503 }, { "epoch": 0.13835898381101588, "grad_norm": 0.34562554955482483, "learning_rate": 1.977446136997089e-05, "loss": 0.583, "step": 4504 }, { "epoch": 0.13838970294596503, "grad_norm": 0.3227694630622864, "learning_rate": 1.9774359297695035e-05, "loss": 0.6725, "step": 4505 }, { "epoch": 0.1384204220809142, "grad_norm": 0.347793310880661, "learning_rate": 1.9774257202590485e-05, "loss": 0.6139, "step": 4506 }, { "epoch": 0.13845114121586335, "grad_norm": 0.39027073979377747, "learning_rate": 1.9774155084657468e-05, "loss": 0.6077, "step": 4507 }, { "epoch": 0.13848186035081253, "grad_norm": 0.3194609582424164, "learning_rate": 1.9774052943896233e-05, "loss": 0.6012, "step": 4508 }, { "epoch": 0.13851257948576168, "grad_norm": 0.3366239368915558, "learning_rate": 1.9773950780307013e-05, "loss": 0.635, "step": 4509 }, { "epoch": 0.13854329862071085, "grad_norm": 0.34745165705680847, "learning_rate": 1.9773848593890048e-05, "loss": 0.57, "step": 4510 }, { "epoch": 0.13857401775566, "grad_norm": 0.33734166622161865, "learning_rate": 1.977374638464558e-05, "loss": 0.5467, "step": 4511 }, { "epoch": 0.13860473689060915, "grad_norm": 0.3994484841823578, "learning_rate": 1.977364415257384e-05, "loss": 0.5919, "step": 4512 }, { "epoch": 0.13863545602555832, "grad_norm": 0.3188476860523224, "learning_rate": 1.9773541897675077e-05, "loss": 0.5981, "step": 4513 }, { "epoch": 0.13866617516050747, "grad_norm": 0.3315235376358032, "learning_rate": 1.9773439619949522e-05, "loss": 0.6307, "step": 4514 }, { "epoch": 0.13869689429545665, "grad_norm": 0.4052271842956543, "learning_rate": 1.9773337319397412e-05, "loss": 0.6871, "step": 4515 }, { "epoch": 0.1387276134304058, "grad_norm": 0.3325587511062622, "learning_rate": 1.9773234996018994e-05, "loss": 0.5672, "step": 4516 }, { "epoch": 0.13875833256535497, "grad_norm": 0.48509910702705383, "learning_rate": 1.9773132649814505e-05, "loss": 0.669, "step": 4517 }, { "epoch": 0.13878905170030412, "grad_norm": 0.33822670578956604, "learning_rate": 1.977303028078418e-05, "loss": 0.6276, "step": 4518 }, { "epoch": 0.1388197708352533, "grad_norm": 0.358920156955719, "learning_rate": 1.977292788892826e-05, "loss": 0.5958, "step": 4519 }, { "epoch": 0.13885048997020244, "grad_norm": 0.5548242330551147, "learning_rate": 1.9772825474246984e-05, "loss": 0.5679, "step": 4520 }, { "epoch": 0.1388812091051516, "grad_norm": 0.3179902732372284, "learning_rate": 1.9772723036740594e-05, "loss": 0.5314, "step": 4521 }, { "epoch": 0.13891192824010076, "grad_norm": 0.35208913683891296, "learning_rate": 1.977262057640932e-05, "loss": 0.644, "step": 4522 }, { "epoch": 0.1389426473750499, "grad_norm": 0.34875544905662537, "learning_rate": 1.9772518093253418e-05, "loss": 0.6574, "step": 4523 }, { "epoch": 0.13897336650999909, "grad_norm": 0.4637799859046936, "learning_rate": 1.977241558727311e-05, "loss": 0.6507, "step": 4524 }, { "epoch": 0.13900408564494823, "grad_norm": 0.3328540027141571, "learning_rate": 1.9772313058468647e-05, "loss": 0.5286, "step": 4525 }, { "epoch": 0.1390348047798974, "grad_norm": 0.46615636348724365, "learning_rate": 1.9772210506840264e-05, "loss": 0.6131, "step": 4526 }, { "epoch": 0.13906552391484656, "grad_norm": 0.44181686639785767, "learning_rate": 1.97721079323882e-05, "loss": 0.5832, "step": 4527 }, { "epoch": 0.13909624304979573, "grad_norm": 0.35354846715927124, "learning_rate": 1.9772005335112695e-05, "loss": 0.6113, "step": 4528 }, { "epoch": 0.13912696218474488, "grad_norm": 0.34187406301498413, "learning_rate": 1.9771902715013987e-05, "loss": 0.5809, "step": 4529 }, { "epoch": 0.13915768131969403, "grad_norm": 0.366234689950943, "learning_rate": 1.9771800072092322e-05, "loss": 0.5248, "step": 4530 }, { "epoch": 0.1391884004546432, "grad_norm": 0.3485596179962158, "learning_rate": 1.9771697406347933e-05, "loss": 0.5189, "step": 4531 }, { "epoch": 0.13921911958959235, "grad_norm": 0.5691792964935303, "learning_rate": 1.9771594717781065e-05, "loss": 0.5844, "step": 4532 }, { "epoch": 0.13924983872454152, "grad_norm": 0.3414352238178253, "learning_rate": 1.977149200639195e-05, "loss": 0.6125, "step": 4533 }, { "epoch": 0.13928055785949067, "grad_norm": 0.36736172437667847, "learning_rate": 1.977138927218084e-05, "loss": 0.649, "step": 4534 }, { "epoch": 0.13931127699443985, "grad_norm": 0.338529109954834, "learning_rate": 1.977128651514796e-05, "loss": 0.585, "step": 4535 }, { "epoch": 0.139341996129389, "grad_norm": 0.33002421259880066, "learning_rate": 1.9771183735293565e-05, "loss": 0.6044, "step": 4536 }, { "epoch": 0.13937271526433814, "grad_norm": 0.34177497029304504, "learning_rate": 1.9771080932617884e-05, "loss": 0.606, "step": 4537 }, { "epoch": 0.13940343439928732, "grad_norm": 0.3579985499382019, "learning_rate": 1.977097810712116e-05, "loss": 0.5689, "step": 4538 }, { "epoch": 0.13943415353423647, "grad_norm": 0.4389031231403351, "learning_rate": 1.977087525880364e-05, "loss": 0.5173, "step": 4539 }, { "epoch": 0.13946487266918564, "grad_norm": 0.30808335542678833, "learning_rate": 1.977077238766555e-05, "loss": 0.6109, "step": 4540 }, { "epoch": 0.1394955918041348, "grad_norm": 0.3606942594051361, "learning_rate": 1.9770669493707145e-05, "loss": 0.5744, "step": 4541 }, { "epoch": 0.13952631093908396, "grad_norm": 0.34838902950286865, "learning_rate": 1.977056657692866e-05, "loss": 0.6014, "step": 4542 }, { "epoch": 0.1395570300740331, "grad_norm": 0.33253204822540283, "learning_rate": 1.977046363733033e-05, "loss": 0.6009, "step": 4543 }, { "epoch": 0.1395877492089823, "grad_norm": 0.34475043416023254, "learning_rate": 1.97703606749124e-05, "loss": 0.5847, "step": 4544 }, { "epoch": 0.13961846834393143, "grad_norm": 0.32102882862091064, "learning_rate": 1.9770257689675112e-05, "loss": 0.6143, "step": 4545 }, { "epoch": 0.13964918747888058, "grad_norm": 0.5088224411010742, "learning_rate": 1.9770154681618705e-05, "loss": 0.6689, "step": 4546 }, { "epoch": 0.13967990661382976, "grad_norm": 0.32154762744903564, "learning_rate": 1.9770051650743418e-05, "loss": 0.6034, "step": 4547 }, { "epoch": 0.1397106257487789, "grad_norm": 0.32160094380378723, "learning_rate": 1.9769948597049496e-05, "loss": 0.6133, "step": 4548 }, { "epoch": 0.13974134488372808, "grad_norm": 0.34840020537376404, "learning_rate": 1.9769845520537174e-05, "loss": 0.657, "step": 4549 }, { "epoch": 0.13977206401867723, "grad_norm": 0.36230310797691345, "learning_rate": 1.9769742421206696e-05, "loss": 0.5963, "step": 4550 }, { "epoch": 0.1398027831536264, "grad_norm": 0.33288830518722534, "learning_rate": 1.9769639299058302e-05, "loss": 0.5489, "step": 4551 }, { "epoch": 0.13983350228857555, "grad_norm": 0.3317974805831909, "learning_rate": 1.9769536154092233e-05, "loss": 0.6185, "step": 4552 }, { "epoch": 0.13986422142352473, "grad_norm": 0.3555822968482971, "learning_rate": 1.976943298630873e-05, "loss": 0.6684, "step": 4553 }, { "epoch": 0.13989494055847387, "grad_norm": 0.33563390374183655, "learning_rate": 1.9769329795708033e-05, "loss": 0.6243, "step": 4554 }, { "epoch": 0.13992565969342302, "grad_norm": 0.32244664430618286, "learning_rate": 1.9769226582290385e-05, "loss": 0.5904, "step": 4555 }, { "epoch": 0.1399563788283722, "grad_norm": 0.2958434224128723, "learning_rate": 1.976912334605603e-05, "loss": 0.4783, "step": 4556 }, { "epoch": 0.13998709796332134, "grad_norm": 0.3816477060317993, "learning_rate": 1.97690200870052e-05, "loss": 0.5685, "step": 4557 }, { "epoch": 0.14001781709827052, "grad_norm": 0.34382790327072144, "learning_rate": 1.9768916805138146e-05, "loss": 0.6079, "step": 4558 }, { "epoch": 0.14004853623321967, "grad_norm": 0.33329030871391296, "learning_rate": 1.97688135004551e-05, "loss": 0.5812, "step": 4559 }, { "epoch": 0.14007925536816884, "grad_norm": 0.3279561996459961, "learning_rate": 1.9768710172956308e-05, "loss": 0.6198, "step": 4560 }, { "epoch": 0.140109974503118, "grad_norm": 0.31055861711502075, "learning_rate": 1.9768606822642012e-05, "loss": 0.6109, "step": 4561 }, { "epoch": 0.14014069363806717, "grad_norm": 0.36717167496681213, "learning_rate": 1.9768503449512454e-05, "loss": 0.6079, "step": 4562 }, { "epoch": 0.1401714127730163, "grad_norm": 0.3305298984050751, "learning_rate": 1.9768400053567873e-05, "loss": 0.5804, "step": 4563 }, { "epoch": 0.14020213190796546, "grad_norm": 0.36205098032951355, "learning_rate": 1.976829663480851e-05, "loss": 0.639, "step": 4564 }, { "epoch": 0.14023285104291464, "grad_norm": 0.2915742099285126, "learning_rate": 1.976819319323461e-05, "loss": 0.5797, "step": 4565 }, { "epoch": 0.14026357017786378, "grad_norm": 0.36095234751701355, "learning_rate": 1.9768089728846414e-05, "loss": 0.6209, "step": 4566 }, { "epoch": 0.14029428931281296, "grad_norm": 0.35170885920524597, "learning_rate": 1.976798624164416e-05, "loss": 0.5636, "step": 4567 }, { "epoch": 0.1403250084477621, "grad_norm": 0.31879758834838867, "learning_rate": 1.9767882731628095e-05, "loss": 0.5702, "step": 4568 }, { "epoch": 0.14035572758271128, "grad_norm": 0.357481986284256, "learning_rate": 1.9767779198798453e-05, "loss": 0.6592, "step": 4569 }, { "epoch": 0.14038644671766043, "grad_norm": 0.3008691966533661, "learning_rate": 1.9767675643155483e-05, "loss": 0.5779, "step": 4570 }, { "epoch": 0.14041716585260958, "grad_norm": 0.37102577090263367, "learning_rate": 1.9767572064699423e-05, "loss": 0.5809, "step": 4571 }, { "epoch": 0.14044788498755875, "grad_norm": 0.3227834701538086, "learning_rate": 1.976746846343052e-05, "loss": 0.5806, "step": 4572 }, { "epoch": 0.1404786041225079, "grad_norm": 0.415253609418869, "learning_rate": 1.9767364839349005e-05, "loss": 0.5917, "step": 4573 }, { "epoch": 0.14050932325745707, "grad_norm": 0.4297698438167572, "learning_rate": 1.9767261192455135e-05, "loss": 0.6662, "step": 4574 }, { "epoch": 0.14054004239240622, "grad_norm": 0.3296424448490143, "learning_rate": 1.976715752274914e-05, "loss": 0.6346, "step": 4575 }, { "epoch": 0.1405707615273554, "grad_norm": 0.6424480080604553, "learning_rate": 1.9767053830231267e-05, "loss": 0.6679, "step": 4576 }, { "epoch": 0.14060148066230455, "grad_norm": 0.29943251609802246, "learning_rate": 1.976695011490176e-05, "loss": 0.5085, "step": 4577 }, { "epoch": 0.14063219979725372, "grad_norm": 0.366623193025589, "learning_rate": 1.9766846376760856e-05, "loss": 0.4722, "step": 4578 }, { "epoch": 0.14066291893220287, "grad_norm": 0.43983981013298035, "learning_rate": 1.97667426158088e-05, "loss": 0.6263, "step": 4579 }, { "epoch": 0.14069363806715202, "grad_norm": 0.3393038511276245, "learning_rate": 1.9766638832045836e-05, "loss": 0.6323, "step": 4580 }, { "epoch": 0.1407243572021012, "grad_norm": 0.33187586069107056, "learning_rate": 1.9766535025472205e-05, "loss": 0.6052, "step": 4581 }, { "epoch": 0.14075507633705034, "grad_norm": 0.36592382192611694, "learning_rate": 1.976643119608815e-05, "loss": 0.6225, "step": 4582 }, { "epoch": 0.14078579547199951, "grad_norm": 0.33447325229644775, "learning_rate": 1.976632734389391e-05, "loss": 0.6835, "step": 4583 }, { "epoch": 0.14081651460694866, "grad_norm": 0.3892289102077484, "learning_rate": 1.9766223468889735e-05, "loss": 0.5897, "step": 4584 }, { "epoch": 0.14084723374189784, "grad_norm": 0.347449392080307, "learning_rate": 1.976611957107586e-05, "loss": 0.5681, "step": 4585 }, { "epoch": 0.14087795287684698, "grad_norm": 0.365726113319397, "learning_rate": 1.976601565045253e-05, "loss": 0.7155, "step": 4586 }, { "epoch": 0.14090867201179616, "grad_norm": 0.33018046617507935, "learning_rate": 1.976591170701999e-05, "loss": 0.583, "step": 4587 }, { "epoch": 0.1409393911467453, "grad_norm": 0.35541409254074097, "learning_rate": 1.976580774077848e-05, "loss": 0.6382, "step": 4588 }, { "epoch": 0.14097011028169446, "grad_norm": 0.35583561658859253, "learning_rate": 1.9765703751728243e-05, "loss": 0.6549, "step": 4589 }, { "epoch": 0.14100082941664363, "grad_norm": 0.3433876037597656, "learning_rate": 1.9765599739869527e-05, "loss": 0.5577, "step": 4590 }, { "epoch": 0.14103154855159278, "grad_norm": 0.3556480407714844, "learning_rate": 1.9765495705202568e-05, "loss": 0.5743, "step": 4591 }, { "epoch": 0.14106226768654195, "grad_norm": 0.3611827492713928, "learning_rate": 1.9765391647727614e-05, "loss": 0.5469, "step": 4592 }, { "epoch": 0.1410929868214911, "grad_norm": 0.3313784897327423, "learning_rate": 1.97652875674449e-05, "loss": 0.6347, "step": 4593 }, { "epoch": 0.14112370595644028, "grad_norm": 0.31829962134361267, "learning_rate": 1.9765183464354682e-05, "loss": 0.5622, "step": 4594 }, { "epoch": 0.14115442509138942, "grad_norm": 0.4398695230484009, "learning_rate": 1.976507933845719e-05, "loss": 0.714, "step": 4595 }, { "epoch": 0.1411851442263386, "grad_norm": 0.36936497688293457, "learning_rate": 1.9764975189752676e-05, "loss": 0.6616, "step": 4596 }, { "epoch": 0.14121586336128775, "grad_norm": 0.33705705404281616, "learning_rate": 1.976487101824138e-05, "loss": 0.6558, "step": 4597 }, { "epoch": 0.1412465824962369, "grad_norm": 0.33451902866363525, "learning_rate": 1.976476682392355e-05, "loss": 0.5721, "step": 4598 }, { "epoch": 0.14127730163118607, "grad_norm": 0.34084415435791016, "learning_rate": 1.9764662606799418e-05, "loss": 0.6292, "step": 4599 }, { "epoch": 0.14130802076613522, "grad_norm": 0.3403288424015045, "learning_rate": 1.976455836686924e-05, "loss": 0.4941, "step": 4600 }, { "epoch": 0.1413387399010844, "grad_norm": 0.3132542669773102, "learning_rate": 1.976445410413325e-05, "loss": 0.6366, "step": 4601 }, { "epoch": 0.14136945903603354, "grad_norm": 0.30922624468803406, "learning_rate": 1.9764349818591696e-05, "loss": 0.6164, "step": 4602 }, { "epoch": 0.14140017817098272, "grad_norm": 0.3108746409416199, "learning_rate": 1.9764245510244824e-05, "loss": 0.561, "step": 4603 }, { "epoch": 0.14143089730593186, "grad_norm": 0.36575958132743835, "learning_rate": 1.9764141179092874e-05, "loss": 0.6454, "step": 4604 }, { "epoch": 0.14146161644088104, "grad_norm": 0.3063628673553467, "learning_rate": 1.976403682513609e-05, "loss": 0.5578, "step": 4605 }, { "epoch": 0.14149233557583019, "grad_norm": 0.35037150979042053, "learning_rate": 1.9763932448374715e-05, "loss": 0.6107, "step": 4606 }, { "epoch": 0.14152305471077933, "grad_norm": 0.3389579951763153, "learning_rate": 1.976382804880899e-05, "loss": 0.5963, "step": 4607 }, { "epoch": 0.1415537738457285, "grad_norm": 0.31780222058296204, "learning_rate": 1.976372362643917e-05, "loss": 0.6054, "step": 4608 }, { "epoch": 0.14158449298067766, "grad_norm": 0.39146894216537476, "learning_rate": 1.976361918126549e-05, "loss": 0.5478, "step": 4609 }, { "epoch": 0.14161521211562683, "grad_norm": 0.3472498059272766, "learning_rate": 1.9763514713288193e-05, "loss": 0.5813, "step": 4610 }, { "epoch": 0.14164593125057598, "grad_norm": 0.34898099303245544, "learning_rate": 1.9763410222507527e-05, "loss": 0.6486, "step": 4611 }, { "epoch": 0.14167665038552515, "grad_norm": 0.3310079276561737, "learning_rate": 1.9763305708923735e-05, "loss": 0.5921, "step": 4612 }, { "epoch": 0.1417073695204743, "grad_norm": 0.32489585876464844, "learning_rate": 1.9763201172537057e-05, "loss": 0.5924, "step": 4613 }, { "epoch": 0.14173808865542345, "grad_norm": 0.6517223715782166, "learning_rate": 1.9763096613347744e-05, "loss": 0.6533, "step": 4614 }, { "epoch": 0.14176880779037263, "grad_norm": 0.34020447731018066, "learning_rate": 1.9762992031356037e-05, "loss": 0.6598, "step": 4615 }, { "epoch": 0.14179952692532177, "grad_norm": 0.4258897006511688, "learning_rate": 1.9762887426562182e-05, "loss": 0.4998, "step": 4616 }, { "epoch": 0.14183024606027095, "grad_norm": 0.32480961084365845, "learning_rate": 1.9762782798966417e-05, "loss": 0.6019, "step": 4617 }, { "epoch": 0.1418609651952201, "grad_norm": 0.3252837359905243, "learning_rate": 1.9762678148568996e-05, "loss": 0.6327, "step": 4618 }, { "epoch": 0.14189168433016927, "grad_norm": 0.3650161325931549, "learning_rate": 1.9762573475370156e-05, "loss": 0.6141, "step": 4619 }, { "epoch": 0.14192240346511842, "grad_norm": 0.29841071367263794, "learning_rate": 1.976246877937014e-05, "loss": 0.527, "step": 4620 }, { "epoch": 0.1419531226000676, "grad_norm": 0.31855303049087524, "learning_rate": 1.9762364060569202e-05, "loss": 0.5493, "step": 4621 }, { "epoch": 0.14198384173501674, "grad_norm": 0.3496769964694977, "learning_rate": 1.976225931896758e-05, "loss": 0.6515, "step": 4622 }, { "epoch": 0.1420145608699659, "grad_norm": 0.35953453183174133, "learning_rate": 1.9762154554565518e-05, "loss": 0.6489, "step": 4623 }, { "epoch": 0.14204528000491506, "grad_norm": 0.3501867353916168, "learning_rate": 1.9762049767363263e-05, "loss": 0.6187, "step": 4624 }, { "epoch": 0.1420759991398642, "grad_norm": 0.4211646020412445, "learning_rate": 1.976194495736106e-05, "loss": 0.6369, "step": 4625 }, { "epoch": 0.1421067182748134, "grad_norm": 0.3481685519218445, "learning_rate": 1.976184012455915e-05, "loss": 0.5147, "step": 4626 }, { "epoch": 0.14213743740976253, "grad_norm": 0.3003503382205963, "learning_rate": 1.9761735268957782e-05, "loss": 0.5133, "step": 4627 }, { "epoch": 0.1421681565447117, "grad_norm": 0.3711954951286316, "learning_rate": 1.97616303905572e-05, "loss": 0.7272, "step": 4628 }, { "epoch": 0.14219887567966086, "grad_norm": 0.35885030031204224, "learning_rate": 1.9761525489357648e-05, "loss": 0.6079, "step": 4629 }, { "epoch": 0.14222959481461003, "grad_norm": 0.3178861737251282, "learning_rate": 1.9761420565359373e-05, "loss": 0.7056, "step": 4630 }, { "epoch": 0.14226031394955918, "grad_norm": 0.3245752453804016, "learning_rate": 1.9761315618562616e-05, "loss": 0.5439, "step": 4631 }, { "epoch": 0.14229103308450833, "grad_norm": 0.3200641870498657, "learning_rate": 1.9761210648967625e-05, "loss": 0.6018, "step": 4632 }, { "epoch": 0.1423217522194575, "grad_norm": 0.322952538728714, "learning_rate": 1.9761105656574648e-05, "loss": 0.6473, "step": 4633 }, { "epoch": 0.14235247135440665, "grad_norm": 0.39527618885040283, "learning_rate": 1.976100064138392e-05, "loss": 0.5723, "step": 4634 }, { "epoch": 0.14238319048935583, "grad_norm": 0.337991863489151, "learning_rate": 1.97608956033957e-05, "loss": 0.5522, "step": 4635 }, { "epoch": 0.14241390962430497, "grad_norm": 0.331394761800766, "learning_rate": 1.9760790542610226e-05, "loss": 0.6291, "step": 4636 }, { "epoch": 0.14244462875925415, "grad_norm": 0.31188562512397766, "learning_rate": 1.9760685459027745e-05, "loss": 0.6263, "step": 4637 }, { "epoch": 0.1424753478942033, "grad_norm": 0.4178604483604431, "learning_rate": 1.9760580352648497e-05, "loss": 0.6213, "step": 4638 }, { "epoch": 0.14250606702915247, "grad_norm": 0.3328765630722046, "learning_rate": 1.9760475223472736e-05, "loss": 0.6535, "step": 4639 }, { "epoch": 0.14253678616410162, "grad_norm": 0.6867932081222534, "learning_rate": 1.97603700715007e-05, "loss": 0.6237, "step": 4640 }, { "epoch": 0.14256750529905077, "grad_norm": 0.33283907175064087, "learning_rate": 1.976026489673264e-05, "loss": 0.5981, "step": 4641 }, { "epoch": 0.14259822443399994, "grad_norm": 0.3323231041431427, "learning_rate": 1.9760159699168796e-05, "loss": 0.6419, "step": 4642 }, { "epoch": 0.1426289435689491, "grad_norm": 0.30864855647087097, "learning_rate": 1.9760054478809424e-05, "loss": 0.5704, "step": 4643 }, { "epoch": 0.14265966270389827, "grad_norm": 0.3363948464393616, "learning_rate": 1.975994923565476e-05, "loss": 0.6301, "step": 4644 }, { "epoch": 0.1426903818388474, "grad_norm": 0.5032851099967957, "learning_rate": 1.975984396970505e-05, "loss": 0.5094, "step": 4645 }, { "epoch": 0.1427211009737966, "grad_norm": 0.43959033489227295, "learning_rate": 1.9759738680960544e-05, "loss": 0.6279, "step": 4646 }, { "epoch": 0.14275182010874574, "grad_norm": 0.35765501856803894, "learning_rate": 1.975963336942149e-05, "loss": 0.5976, "step": 4647 }, { "epoch": 0.14278253924369488, "grad_norm": 0.3533529043197632, "learning_rate": 1.975952803508813e-05, "loss": 0.6244, "step": 4648 }, { "epoch": 0.14281325837864406, "grad_norm": 0.35788679122924805, "learning_rate": 1.9759422677960708e-05, "loss": 0.561, "step": 4649 }, { "epoch": 0.1428439775135932, "grad_norm": 0.3286505341529846, "learning_rate": 1.9759317298039475e-05, "loss": 0.585, "step": 4650 }, { "epoch": 0.14287469664854238, "grad_norm": 0.32490217685699463, "learning_rate": 1.9759211895324676e-05, "loss": 0.504, "step": 4651 }, { "epoch": 0.14290541578349153, "grad_norm": 0.3487645387649536, "learning_rate": 1.9759106469816553e-05, "loss": 0.5937, "step": 4652 }, { "epoch": 0.1429361349184407, "grad_norm": 0.37234926223754883, "learning_rate": 1.9759001021515356e-05, "loss": 0.6518, "step": 4653 }, { "epoch": 0.14296685405338985, "grad_norm": 0.3207632303237915, "learning_rate": 1.975889555042133e-05, "loss": 0.5604, "step": 4654 }, { "epoch": 0.14299757318833903, "grad_norm": 0.41666457056999207, "learning_rate": 1.9758790056534723e-05, "loss": 0.5472, "step": 4655 }, { "epoch": 0.14302829232328818, "grad_norm": 0.34735989570617676, "learning_rate": 1.9758684539855782e-05, "loss": 0.6056, "step": 4656 }, { "epoch": 0.14305901145823732, "grad_norm": 0.31800127029418945, "learning_rate": 1.975857900038475e-05, "loss": 0.536, "step": 4657 }, { "epoch": 0.1430897305931865, "grad_norm": 0.33600714802742004, "learning_rate": 1.9758473438121873e-05, "loss": 0.642, "step": 4658 }, { "epoch": 0.14312044972813565, "grad_norm": 0.3376256227493286, "learning_rate": 1.9758367853067403e-05, "loss": 0.5418, "step": 4659 }, { "epoch": 0.14315116886308482, "grad_norm": 0.3366561532020569, "learning_rate": 1.975826224522158e-05, "loss": 0.6113, "step": 4660 }, { "epoch": 0.14318188799803397, "grad_norm": 0.4281092584133148, "learning_rate": 1.975815661458466e-05, "loss": 0.769, "step": 4661 }, { "epoch": 0.14321260713298314, "grad_norm": 0.34296301007270813, "learning_rate": 1.9758050961156877e-05, "loss": 0.5739, "step": 4662 }, { "epoch": 0.1432433262679323, "grad_norm": 0.3517555594444275, "learning_rate": 1.9757945284938488e-05, "loss": 0.6235, "step": 4663 }, { "epoch": 0.14327404540288147, "grad_norm": 0.49167194962501526, "learning_rate": 1.9757839585929735e-05, "loss": 0.5019, "step": 4664 }, { "epoch": 0.14330476453783061, "grad_norm": 0.3296694755554199, "learning_rate": 1.975773386413087e-05, "loss": 0.5945, "step": 4665 }, { "epoch": 0.14333548367277976, "grad_norm": 0.34413063526153564, "learning_rate": 1.9757628119542128e-05, "loss": 0.6177, "step": 4666 }, { "epoch": 0.14336620280772894, "grad_norm": 0.365961492061615, "learning_rate": 1.9757522352163772e-05, "loss": 0.5811, "step": 4667 }, { "epoch": 0.14339692194267809, "grad_norm": 0.3144555687904358, "learning_rate": 1.975741656199604e-05, "loss": 0.5593, "step": 4668 }, { "epoch": 0.14342764107762726, "grad_norm": 0.3517237901687622, "learning_rate": 1.9757310749039176e-05, "loss": 0.6119, "step": 4669 }, { "epoch": 0.1434583602125764, "grad_norm": 0.3399091958999634, "learning_rate": 1.9757204913293437e-05, "loss": 0.6032, "step": 4670 }, { "epoch": 0.14348907934752558, "grad_norm": 0.3615288734436035, "learning_rate": 1.9757099054759055e-05, "loss": 0.6092, "step": 4671 }, { "epoch": 0.14351979848247473, "grad_norm": 0.35799551010131836, "learning_rate": 1.9756993173436293e-05, "loss": 0.5749, "step": 4672 }, { "epoch": 0.1435505176174239, "grad_norm": 0.3326537013053894, "learning_rate": 1.9756887269325396e-05, "loss": 0.6497, "step": 4673 }, { "epoch": 0.14358123675237305, "grad_norm": 0.3433857262134552, "learning_rate": 1.97567813424266e-05, "loss": 0.6893, "step": 4674 }, { "epoch": 0.1436119558873222, "grad_norm": 0.3642665147781372, "learning_rate": 1.9756675392740163e-05, "loss": 0.5315, "step": 4675 }, { "epoch": 0.14364267502227138, "grad_norm": 0.3321240246295929, "learning_rate": 1.9756569420266332e-05, "loss": 0.5385, "step": 4676 }, { "epoch": 0.14367339415722052, "grad_norm": 0.3875254690647125, "learning_rate": 1.975646342500535e-05, "loss": 0.5856, "step": 4677 }, { "epoch": 0.1437041132921697, "grad_norm": 0.4049750566482544, "learning_rate": 1.9756357406957462e-05, "loss": 0.5636, "step": 4678 }, { "epoch": 0.14373483242711885, "grad_norm": 0.3466750383377075, "learning_rate": 1.9756251366122922e-05, "loss": 0.6623, "step": 4679 }, { "epoch": 0.14376555156206802, "grad_norm": 0.3469845652580261, "learning_rate": 1.975614530250198e-05, "loss": 0.6359, "step": 4680 }, { "epoch": 0.14379627069701717, "grad_norm": 0.36168989539146423, "learning_rate": 1.9756039216094875e-05, "loss": 0.559, "step": 4681 }, { "epoch": 0.14382698983196635, "grad_norm": 0.3183254897594452, "learning_rate": 1.9755933106901862e-05, "loss": 0.5714, "step": 4682 }, { "epoch": 0.1438577089669155, "grad_norm": 0.3528538644313812, "learning_rate": 1.975582697492318e-05, "loss": 0.6396, "step": 4683 }, { "epoch": 0.14388842810186464, "grad_norm": 0.40475693345069885, "learning_rate": 1.9755720820159088e-05, "loss": 0.5738, "step": 4684 }, { "epoch": 0.14391914723681382, "grad_norm": 0.3381171226501465, "learning_rate": 1.9755614642609825e-05, "loss": 0.4941, "step": 4685 }, { "epoch": 0.14394986637176296, "grad_norm": 0.33059805631637573, "learning_rate": 1.9755508442275647e-05, "loss": 0.604, "step": 4686 }, { "epoch": 0.14398058550671214, "grad_norm": 0.35356342792510986, "learning_rate": 1.9755402219156795e-05, "loss": 0.6365, "step": 4687 }, { "epoch": 0.1440113046416613, "grad_norm": 0.3379509150981903, "learning_rate": 1.9755295973253525e-05, "loss": 0.6089, "step": 4688 }, { "epoch": 0.14404202377661046, "grad_norm": 0.3202090561389923, "learning_rate": 1.9755189704566075e-05, "loss": 0.6272, "step": 4689 }, { "epoch": 0.1440727429115596, "grad_norm": 0.34496811032295227, "learning_rate": 1.9755083413094696e-05, "loss": 0.6063, "step": 4690 }, { "epoch": 0.14410346204650876, "grad_norm": 0.31163814663887024, "learning_rate": 1.975497709883964e-05, "loss": 0.5785, "step": 4691 }, { "epoch": 0.14413418118145793, "grad_norm": 0.4002324938774109, "learning_rate": 1.9754870761801156e-05, "loss": 0.6551, "step": 4692 }, { "epoch": 0.14416490031640708, "grad_norm": 0.29813864827156067, "learning_rate": 1.975476440197949e-05, "loss": 0.4966, "step": 4693 }, { "epoch": 0.14419561945135626, "grad_norm": 0.5003029704093933, "learning_rate": 1.9754658019374887e-05, "loss": 0.5978, "step": 4694 }, { "epoch": 0.1442263385863054, "grad_norm": 0.3628714382648468, "learning_rate": 1.9754551613987602e-05, "loss": 0.6787, "step": 4695 }, { "epoch": 0.14425705772125458, "grad_norm": 0.352323055267334, "learning_rate": 1.9754445185817878e-05, "loss": 0.6049, "step": 4696 }, { "epoch": 0.14428777685620373, "grad_norm": 0.366977334022522, "learning_rate": 1.975433873486597e-05, "loss": 0.628, "step": 4697 }, { "epoch": 0.1443184959911529, "grad_norm": 0.4657941162586212, "learning_rate": 1.975423226113212e-05, "loss": 0.5991, "step": 4698 }, { "epoch": 0.14434921512610205, "grad_norm": 0.35605621337890625, "learning_rate": 1.975412576461658e-05, "loss": 0.6437, "step": 4699 }, { "epoch": 0.1443799342610512, "grad_norm": 0.3364267647266388, "learning_rate": 1.97540192453196e-05, "loss": 0.5956, "step": 4700 }, { "epoch": 0.14441065339600037, "grad_norm": 0.3643955886363983, "learning_rate": 1.9753912703241424e-05, "loss": 0.6613, "step": 4701 }, { "epoch": 0.14444137253094952, "grad_norm": 0.36045223474502563, "learning_rate": 1.97538061383823e-05, "loss": 0.5413, "step": 4702 }, { "epoch": 0.1444720916658987, "grad_norm": 0.376110702753067, "learning_rate": 1.9753699550742487e-05, "loss": 0.6092, "step": 4703 }, { "epoch": 0.14450281080084784, "grad_norm": 0.3623325228691101, "learning_rate": 1.9753592940322227e-05, "loss": 0.5848, "step": 4704 }, { "epoch": 0.14453352993579702, "grad_norm": 0.35809409618377686, "learning_rate": 1.9753486307121768e-05, "loss": 0.6458, "step": 4705 }, { "epoch": 0.14456424907074616, "grad_norm": 0.34744828939437866, "learning_rate": 1.975337965114136e-05, "loss": 0.6586, "step": 4706 }, { "epoch": 0.14459496820569534, "grad_norm": 0.38858652114868164, "learning_rate": 1.9753272972381253e-05, "loss": 0.586, "step": 4707 }, { "epoch": 0.1446256873406445, "grad_norm": 0.3550422489643097, "learning_rate": 1.9753166270841695e-05, "loss": 0.5197, "step": 4708 }, { "epoch": 0.14465640647559364, "grad_norm": 0.42899903655052185, "learning_rate": 1.9753059546522937e-05, "loss": 0.5903, "step": 4709 }, { "epoch": 0.1446871256105428, "grad_norm": 0.30753591656684875, "learning_rate": 1.9752952799425227e-05, "loss": 0.5174, "step": 4710 }, { "epoch": 0.14471784474549196, "grad_norm": 0.3572216331958771, "learning_rate": 1.9752846029548816e-05, "loss": 0.6057, "step": 4711 }, { "epoch": 0.14474856388044113, "grad_norm": 0.32215389609336853, "learning_rate": 1.975273923689395e-05, "loss": 0.5788, "step": 4712 }, { "epoch": 0.14477928301539028, "grad_norm": 0.31658607721328735, "learning_rate": 1.975263242146088e-05, "loss": 0.6641, "step": 4713 }, { "epoch": 0.14481000215033946, "grad_norm": 0.33361199498176575, "learning_rate": 1.9752525583249858e-05, "loss": 0.6225, "step": 4714 }, { "epoch": 0.1448407212852886, "grad_norm": 0.329679012298584, "learning_rate": 1.975241872226113e-05, "loss": 0.6208, "step": 4715 }, { "epoch": 0.14487144042023778, "grad_norm": 0.3380034267902374, "learning_rate": 1.9752311838494947e-05, "loss": 0.6643, "step": 4716 }, { "epoch": 0.14490215955518693, "grad_norm": 0.3397764265537262, "learning_rate": 1.975220493195156e-05, "loss": 0.6701, "step": 4717 }, { "epoch": 0.14493287869013607, "grad_norm": 0.36223381757736206, "learning_rate": 1.9752098002631214e-05, "loss": 0.6128, "step": 4718 }, { "epoch": 0.14496359782508525, "grad_norm": 0.37036824226379395, "learning_rate": 1.9751991050534164e-05, "loss": 0.5787, "step": 4719 }, { "epoch": 0.1449943169600344, "grad_norm": 0.3545697331428528, "learning_rate": 1.975188407566066e-05, "loss": 0.5512, "step": 4720 }, { "epoch": 0.14502503609498357, "grad_norm": 0.3577895760536194, "learning_rate": 1.975177707801095e-05, "loss": 0.5072, "step": 4721 }, { "epoch": 0.14505575522993272, "grad_norm": 0.3444124758243561, "learning_rate": 1.9751670057585278e-05, "loss": 0.5157, "step": 4722 }, { "epoch": 0.1450864743648819, "grad_norm": 0.3379819989204407, "learning_rate": 1.9751563014383905e-05, "loss": 0.6145, "step": 4723 }, { "epoch": 0.14511719349983104, "grad_norm": 0.32723814249038696, "learning_rate": 1.9751455948407073e-05, "loss": 0.5407, "step": 4724 }, { "epoch": 0.14514791263478022, "grad_norm": 0.3471226096153259, "learning_rate": 1.9751348859655036e-05, "loss": 0.5213, "step": 4725 }, { "epoch": 0.14517863176972937, "grad_norm": 0.3105807304382324, "learning_rate": 1.975124174812804e-05, "loss": 0.5027, "step": 4726 }, { "epoch": 0.1452093509046785, "grad_norm": 0.34118297696113586, "learning_rate": 1.975113461382634e-05, "loss": 0.6623, "step": 4727 }, { "epoch": 0.1452400700396277, "grad_norm": 0.32293620705604553, "learning_rate": 1.9751027456750186e-05, "loss": 0.5236, "step": 4728 }, { "epoch": 0.14527078917457684, "grad_norm": 0.33103328943252563, "learning_rate": 1.975092027689982e-05, "loss": 0.6646, "step": 4729 }, { "epoch": 0.145301508309526, "grad_norm": 0.3426695466041565, "learning_rate": 1.9750813074275506e-05, "loss": 0.5524, "step": 4730 }, { "epoch": 0.14533222744447516, "grad_norm": 0.3151072561740875, "learning_rate": 1.9750705848877484e-05, "loss": 0.5938, "step": 4731 }, { "epoch": 0.14536294657942433, "grad_norm": 0.4840388000011444, "learning_rate": 1.9750598600706006e-05, "loss": 0.5666, "step": 4732 }, { "epoch": 0.14539366571437348, "grad_norm": 0.36896973848342896, "learning_rate": 1.9750491329761323e-05, "loss": 0.4947, "step": 4733 }, { "epoch": 0.14542438484932263, "grad_norm": 0.3303647041320801, "learning_rate": 1.975038403604369e-05, "loss": 0.6568, "step": 4734 }, { "epoch": 0.1454551039842718, "grad_norm": 0.31108859181404114, "learning_rate": 1.9750276719553354e-05, "loss": 0.5143, "step": 4735 }, { "epoch": 0.14548582311922095, "grad_norm": 0.30921560525894165, "learning_rate": 1.9750169380290564e-05, "loss": 0.5197, "step": 4736 }, { "epoch": 0.14551654225417013, "grad_norm": 0.30200105905532837, "learning_rate": 1.9750062018255574e-05, "loss": 0.5763, "step": 4737 }, { "epoch": 0.14554726138911928, "grad_norm": 0.3989561200141907, "learning_rate": 1.9749954633448633e-05, "loss": 0.6312, "step": 4738 }, { "epoch": 0.14557798052406845, "grad_norm": 0.3365642726421356, "learning_rate": 1.974984722586999e-05, "loss": 0.6452, "step": 4739 }, { "epoch": 0.1456086996590176, "grad_norm": 0.4408011734485626, "learning_rate": 1.9749739795519898e-05, "loss": 0.6171, "step": 4740 }, { "epoch": 0.14563941879396677, "grad_norm": 0.38660791516304016, "learning_rate": 1.9749632342398614e-05, "loss": 0.698, "step": 4741 }, { "epoch": 0.14567013792891592, "grad_norm": 0.33004090189933777, "learning_rate": 1.9749524866506374e-05, "loss": 0.5913, "step": 4742 }, { "epoch": 0.14570085706386507, "grad_norm": 0.4109882414340973, "learning_rate": 1.9749417367843442e-05, "loss": 0.6664, "step": 4743 }, { "epoch": 0.14573157619881424, "grad_norm": 0.3226583003997803, "learning_rate": 1.9749309846410065e-05, "loss": 0.5147, "step": 4744 }, { "epoch": 0.1457622953337634, "grad_norm": 0.301925390958786, "learning_rate": 1.9749202302206495e-05, "loss": 0.6203, "step": 4745 }, { "epoch": 0.14579301446871257, "grad_norm": 0.35408687591552734, "learning_rate": 1.9749094735232978e-05, "loss": 0.628, "step": 4746 }, { "epoch": 0.14582373360366171, "grad_norm": 0.3453104794025421, "learning_rate": 1.9748987145489775e-05, "loss": 0.6129, "step": 4747 }, { "epoch": 0.1458544527386109, "grad_norm": 0.3457273542881012, "learning_rate": 1.9748879532977128e-05, "loss": 0.5701, "step": 4748 }, { "epoch": 0.14588517187356004, "grad_norm": 0.3396053612232208, "learning_rate": 1.9748771897695293e-05, "loss": 0.6165, "step": 4749 }, { "epoch": 0.1459158910085092, "grad_norm": 0.34996071457862854, "learning_rate": 1.974866423964452e-05, "loss": 0.5614, "step": 4750 }, { "epoch": 0.14594661014345836, "grad_norm": 0.37351852655410767, "learning_rate": 1.974855655882506e-05, "loss": 0.5961, "step": 4751 }, { "epoch": 0.1459773292784075, "grad_norm": 0.380794495344162, "learning_rate": 1.9748448855237166e-05, "loss": 0.6045, "step": 4752 }, { "epoch": 0.14600804841335668, "grad_norm": 0.33946162462234497, "learning_rate": 1.974834112888109e-05, "loss": 0.5526, "step": 4753 }, { "epoch": 0.14603876754830583, "grad_norm": 0.33703482151031494, "learning_rate": 1.9748233379757083e-05, "loss": 0.6197, "step": 4754 }, { "epoch": 0.146069486683255, "grad_norm": 0.38042330741882324, "learning_rate": 1.974812560786539e-05, "loss": 0.6081, "step": 4755 }, { "epoch": 0.14610020581820415, "grad_norm": 0.34174180030822754, "learning_rate": 1.9748017813206278e-05, "loss": 0.6014, "step": 4756 }, { "epoch": 0.14613092495315333, "grad_norm": 0.34008848667144775, "learning_rate": 1.9747909995779982e-05, "loss": 0.5767, "step": 4757 }, { "epoch": 0.14616164408810248, "grad_norm": 0.3304292857646942, "learning_rate": 1.9747802155586765e-05, "loss": 0.607, "step": 4758 }, { "epoch": 0.14619236322305165, "grad_norm": 0.34929358959198, "learning_rate": 1.9747694292626875e-05, "loss": 0.5612, "step": 4759 }, { "epoch": 0.1462230823580008, "grad_norm": 0.3347609043121338, "learning_rate": 1.9747586406900566e-05, "loss": 0.5953, "step": 4760 }, { "epoch": 0.14625380149294995, "grad_norm": 0.34422579407691956, "learning_rate": 1.9747478498408084e-05, "loss": 0.7034, "step": 4761 }, { "epoch": 0.14628452062789912, "grad_norm": 0.3285083770751953, "learning_rate": 1.974737056714969e-05, "loss": 0.5626, "step": 4762 }, { "epoch": 0.14631523976284827, "grad_norm": 0.3443434238433838, "learning_rate": 1.9747262613125628e-05, "loss": 0.5659, "step": 4763 }, { "epoch": 0.14634595889779745, "grad_norm": 0.32857251167297363, "learning_rate": 1.9747154636336155e-05, "loss": 0.6223, "step": 4764 }, { "epoch": 0.1463766780327466, "grad_norm": 0.36288875341415405, "learning_rate": 1.9747046636781523e-05, "loss": 0.5472, "step": 4765 }, { "epoch": 0.14640739716769577, "grad_norm": 0.38822874426841736, "learning_rate": 1.974693861446198e-05, "loss": 0.6164, "step": 4766 }, { "epoch": 0.14643811630264492, "grad_norm": 0.34167730808258057, "learning_rate": 1.974683056937778e-05, "loss": 0.6123, "step": 4767 }, { "epoch": 0.14646883543759406, "grad_norm": 0.33870047330856323, "learning_rate": 1.974672250152918e-05, "loss": 0.5584, "step": 4768 }, { "epoch": 0.14649955457254324, "grad_norm": 0.34765103459358215, "learning_rate": 1.9746614410916425e-05, "loss": 0.5691, "step": 4769 }, { "epoch": 0.1465302737074924, "grad_norm": 0.28892984986305237, "learning_rate": 1.9746506297539775e-05, "loss": 0.5833, "step": 4770 }, { "epoch": 0.14656099284244156, "grad_norm": 0.32617899775505066, "learning_rate": 1.9746398161399478e-05, "loss": 0.6057, "step": 4771 }, { "epoch": 0.1465917119773907, "grad_norm": 0.4247855544090271, "learning_rate": 1.9746290002495786e-05, "loss": 0.6221, "step": 4772 }, { "epoch": 0.14662243111233988, "grad_norm": 0.35198086500167847, "learning_rate": 1.9746181820828955e-05, "loss": 0.5673, "step": 4773 }, { "epoch": 0.14665315024728903, "grad_norm": 0.3400936424732208, "learning_rate": 1.9746073616399232e-05, "loss": 0.5847, "step": 4774 }, { "epoch": 0.1466838693822382, "grad_norm": 0.30825960636138916, "learning_rate": 1.9745965389206877e-05, "loss": 0.6256, "step": 4775 }, { "epoch": 0.14671458851718736, "grad_norm": 0.331634521484375, "learning_rate": 1.9745857139252138e-05, "loss": 0.6003, "step": 4776 }, { "epoch": 0.1467453076521365, "grad_norm": 0.30760860443115234, "learning_rate": 1.974574886653527e-05, "loss": 0.6155, "step": 4777 }, { "epoch": 0.14677602678708568, "grad_norm": 0.35115793347358704, "learning_rate": 1.9745640571056524e-05, "loss": 0.5617, "step": 4778 }, { "epoch": 0.14680674592203483, "grad_norm": 0.3482967019081116, "learning_rate": 1.9745532252816153e-05, "loss": 0.6114, "step": 4779 }, { "epoch": 0.146837465056984, "grad_norm": 0.34234291315078735, "learning_rate": 1.9745423911814412e-05, "loss": 0.5958, "step": 4780 }, { "epoch": 0.14686818419193315, "grad_norm": 0.32291436195373535, "learning_rate": 1.974531554805155e-05, "loss": 0.6199, "step": 4781 }, { "epoch": 0.14689890332688232, "grad_norm": 0.34957388043403625, "learning_rate": 1.9745207161527826e-05, "loss": 0.645, "step": 4782 }, { "epoch": 0.14692962246183147, "grad_norm": 0.34600961208343506, "learning_rate": 1.974509875224349e-05, "loss": 0.6215, "step": 4783 }, { "epoch": 0.14696034159678065, "grad_norm": 0.4302138388156891, "learning_rate": 1.9744990320198797e-05, "loss": 0.7371, "step": 4784 }, { "epoch": 0.1469910607317298, "grad_norm": 0.3358970880508423, "learning_rate": 1.9744881865393995e-05, "loss": 0.623, "step": 4785 }, { "epoch": 0.14702177986667894, "grad_norm": 0.31257015466690063, "learning_rate": 1.974477338782934e-05, "loss": 0.584, "step": 4786 }, { "epoch": 0.14705249900162812, "grad_norm": 0.3119466006755829, "learning_rate": 1.9744664887505092e-05, "loss": 0.6548, "step": 4787 }, { "epoch": 0.14708321813657727, "grad_norm": 0.3779031038284302, "learning_rate": 1.9744556364421492e-05, "loss": 0.6092, "step": 4788 }, { "epoch": 0.14711393727152644, "grad_norm": 0.37200111150741577, "learning_rate": 1.9744447818578805e-05, "loss": 0.5909, "step": 4789 }, { "epoch": 0.1471446564064756, "grad_norm": 0.3327055871486664, "learning_rate": 1.9744339249977278e-05, "loss": 0.5838, "step": 4790 }, { "epoch": 0.14717537554142476, "grad_norm": 0.3468448221683502, "learning_rate": 1.9744230658617165e-05, "loss": 0.6124, "step": 4791 }, { "epoch": 0.1472060946763739, "grad_norm": 0.3502403497695923, "learning_rate": 1.974412204449872e-05, "loss": 0.6174, "step": 4792 }, { "epoch": 0.1472368138113231, "grad_norm": 0.3176158368587494, "learning_rate": 1.97440134076222e-05, "loss": 0.6229, "step": 4793 }, { "epoch": 0.14726753294627223, "grad_norm": 0.3774160146713257, "learning_rate": 1.9743904747987853e-05, "loss": 0.7141, "step": 4794 }, { "epoch": 0.14729825208122138, "grad_norm": 0.34015393257141113, "learning_rate": 1.974379606559594e-05, "loss": 0.6389, "step": 4795 }, { "epoch": 0.14732897121617056, "grad_norm": 0.4488866627216339, "learning_rate": 1.974368736044671e-05, "loss": 0.5891, "step": 4796 }, { "epoch": 0.1473596903511197, "grad_norm": 0.336093932390213, "learning_rate": 1.9743578632540415e-05, "loss": 0.6328, "step": 4797 }, { "epoch": 0.14739040948606888, "grad_norm": 0.4007457494735718, "learning_rate": 1.9743469881877315e-05, "loss": 0.661, "step": 4798 }, { "epoch": 0.14742112862101803, "grad_norm": 0.35673320293426514, "learning_rate": 1.9743361108457658e-05, "loss": 0.5848, "step": 4799 }, { "epoch": 0.1474518477559672, "grad_norm": 0.5031554102897644, "learning_rate": 1.97432523122817e-05, "loss": 0.585, "step": 4800 }, { "epoch": 0.14748256689091635, "grad_norm": 0.36293116211891174, "learning_rate": 1.9743143493349697e-05, "loss": 0.5921, "step": 4801 }, { "epoch": 0.14751328602586553, "grad_norm": 0.30360284447669983, "learning_rate": 1.9743034651661905e-05, "loss": 0.558, "step": 4802 }, { "epoch": 0.14754400516081467, "grad_norm": 0.38399043679237366, "learning_rate": 1.974292578721857e-05, "loss": 0.5604, "step": 4803 }, { "epoch": 0.14757472429576382, "grad_norm": 0.3365553319454193, "learning_rate": 1.9742816900019954e-05, "loss": 0.5896, "step": 4804 }, { "epoch": 0.147605443430713, "grad_norm": 0.35880932211875916, "learning_rate": 1.974270799006631e-05, "loss": 0.5915, "step": 4805 }, { "epoch": 0.14763616256566214, "grad_norm": 0.3471909761428833, "learning_rate": 1.9742599057357887e-05, "loss": 0.5888, "step": 4806 }, { "epoch": 0.14766688170061132, "grad_norm": 0.3521879315376282, "learning_rate": 1.9742490101894945e-05, "loss": 0.6289, "step": 4807 }, { "epoch": 0.14769760083556047, "grad_norm": 2.2807838916778564, "learning_rate": 1.974238112367774e-05, "loss": 0.6294, "step": 4808 }, { "epoch": 0.14772831997050964, "grad_norm": 0.33911046385765076, "learning_rate": 1.974227212270652e-05, "loss": 0.5627, "step": 4809 }, { "epoch": 0.1477590391054588, "grad_norm": 0.3293204605579376, "learning_rate": 1.9742163098981547e-05, "loss": 0.6173, "step": 4810 }, { "epoch": 0.14778975824040794, "grad_norm": 0.3551715910434723, "learning_rate": 1.974205405250307e-05, "loss": 0.6545, "step": 4811 }, { "epoch": 0.1478204773753571, "grad_norm": 0.34474244713783264, "learning_rate": 1.974194498327134e-05, "loss": 0.6563, "step": 4812 }, { "epoch": 0.14785119651030626, "grad_norm": 0.3799642324447632, "learning_rate": 1.974183589128662e-05, "loss": 0.5095, "step": 4813 }, { "epoch": 0.14788191564525544, "grad_norm": 0.3548165261745453, "learning_rate": 1.9741726776549166e-05, "loss": 0.6373, "step": 4814 }, { "epoch": 0.14791263478020458, "grad_norm": 0.30076438188552856, "learning_rate": 1.9741617639059227e-05, "loss": 0.546, "step": 4815 }, { "epoch": 0.14794335391515376, "grad_norm": 0.3394749164581299, "learning_rate": 1.974150847881706e-05, "loss": 0.5952, "step": 4816 }, { "epoch": 0.1479740730501029, "grad_norm": 0.4098278880119324, "learning_rate": 1.9741399295822917e-05, "loss": 0.6344, "step": 4817 }, { "epoch": 0.14800479218505208, "grad_norm": 0.34193432331085205, "learning_rate": 1.9741290090077058e-05, "loss": 0.5785, "step": 4818 }, { "epoch": 0.14803551132000123, "grad_norm": 0.36692535877227783, "learning_rate": 1.9741180861579734e-05, "loss": 0.5973, "step": 4819 }, { "epoch": 0.14806623045495038, "grad_norm": 0.3485074043273926, "learning_rate": 1.9741071610331203e-05, "loss": 0.5421, "step": 4820 }, { "epoch": 0.14809694958989955, "grad_norm": 0.37052252888679504, "learning_rate": 1.974096233633172e-05, "loss": 0.651, "step": 4821 }, { "epoch": 0.1481276687248487, "grad_norm": 0.3533651828765869, "learning_rate": 1.9740853039581535e-05, "loss": 0.6351, "step": 4822 }, { "epoch": 0.14815838785979787, "grad_norm": 0.3772926330566406, "learning_rate": 1.974074372008091e-05, "loss": 0.6659, "step": 4823 }, { "epoch": 0.14818910699474702, "grad_norm": 0.6057236194610596, "learning_rate": 1.97406343778301e-05, "loss": 0.5891, "step": 4824 }, { "epoch": 0.1482198261296962, "grad_norm": 0.3263765573501587, "learning_rate": 1.9740525012829354e-05, "loss": 0.6219, "step": 4825 }, { "epoch": 0.14825054526464534, "grad_norm": 0.3561840057373047, "learning_rate": 1.9740415625078933e-05, "loss": 0.5654, "step": 4826 }, { "epoch": 0.14828126439959452, "grad_norm": 0.3668535351753235, "learning_rate": 1.974030621457909e-05, "loss": 0.616, "step": 4827 }, { "epoch": 0.14831198353454367, "grad_norm": 0.3572273254394531, "learning_rate": 1.974019678133009e-05, "loss": 0.6474, "step": 4828 }, { "epoch": 0.14834270266949282, "grad_norm": 0.5557000041007996, "learning_rate": 1.974008732533217e-05, "loss": 0.5906, "step": 4829 }, { "epoch": 0.148373421804442, "grad_norm": 0.3393394649028778, "learning_rate": 1.97399778465856e-05, "loss": 0.5538, "step": 4830 }, { "epoch": 0.14840414093939114, "grad_norm": 0.30344414710998535, "learning_rate": 1.9739868345090628e-05, "loss": 0.5774, "step": 4831 }, { "epoch": 0.1484348600743403, "grad_norm": 0.3095031678676605, "learning_rate": 1.973975882084752e-05, "loss": 0.5802, "step": 4832 }, { "epoch": 0.14846557920928946, "grad_norm": 0.3181275427341461, "learning_rate": 1.9739649273856517e-05, "loss": 0.5321, "step": 4833 }, { "epoch": 0.14849629834423864, "grad_norm": 0.37298038601875305, "learning_rate": 1.9739539704117885e-05, "loss": 0.5557, "step": 4834 }, { "epoch": 0.14852701747918778, "grad_norm": 0.3339169919490814, "learning_rate": 1.9739430111631883e-05, "loss": 0.6488, "step": 4835 }, { "epoch": 0.14855773661413696, "grad_norm": 0.32940781116485596, "learning_rate": 1.973932049639876e-05, "loss": 0.6137, "step": 4836 }, { "epoch": 0.1485884557490861, "grad_norm": 0.3499702513217926, "learning_rate": 1.973921085841877e-05, "loss": 0.6311, "step": 4837 }, { "epoch": 0.14861917488403525, "grad_norm": 0.44025367498397827, "learning_rate": 1.9739101197692175e-05, "loss": 0.6075, "step": 4838 }, { "epoch": 0.14864989401898443, "grad_norm": 0.36970919370651245, "learning_rate": 1.973899151421923e-05, "loss": 0.6755, "step": 4839 }, { "epoch": 0.14868061315393358, "grad_norm": 0.3728230893611908, "learning_rate": 1.9738881808000188e-05, "loss": 0.568, "step": 4840 }, { "epoch": 0.14871133228888275, "grad_norm": 0.3705703616142273, "learning_rate": 1.9738772079035308e-05, "loss": 0.5974, "step": 4841 }, { "epoch": 0.1487420514238319, "grad_norm": 0.3555648624897003, "learning_rate": 1.9738662327324848e-05, "loss": 0.6622, "step": 4842 }, { "epoch": 0.14877277055878108, "grad_norm": 0.34021955728530884, "learning_rate": 1.973855255286906e-05, "loss": 0.6098, "step": 4843 }, { "epoch": 0.14880348969373022, "grad_norm": 0.4137188792228699, "learning_rate": 1.9738442755668205e-05, "loss": 0.5729, "step": 4844 }, { "epoch": 0.14883420882867937, "grad_norm": 0.3055739998817444, "learning_rate": 1.9738332935722533e-05, "loss": 0.5811, "step": 4845 }, { "epoch": 0.14886492796362855, "grad_norm": 0.46475502848625183, "learning_rate": 1.9738223093032305e-05, "loss": 0.4683, "step": 4846 }, { "epoch": 0.1488956470985777, "grad_norm": 0.33170413970947266, "learning_rate": 1.9738113227597775e-05, "loss": 0.6634, "step": 4847 }, { "epoch": 0.14892636623352687, "grad_norm": 0.8211101293563843, "learning_rate": 1.973800333941921e-05, "loss": 0.6607, "step": 4848 }, { "epoch": 0.14895708536847602, "grad_norm": 0.36944326758384705, "learning_rate": 1.9737893428496848e-05, "loss": 0.7089, "step": 4849 }, { "epoch": 0.1489878045034252, "grad_norm": 0.3557504415512085, "learning_rate": 1.9737783494830962e-05, "loss": 0.6058, "step": 4850 }, { "epoch": 0.14901852363837434, "grad_norm": 0.36084431409835815, "learning_rate": 1.97376735384218e-05, "loss": 0.6009, "step": 4851 }, { "epoch": 0.14904924277332351, "grad_norm": 0.32979193329811096, "learning_rate": 1.9737563559269622e-05, "loss": 0.5818, "step": 4852 }, { "epoch": 0.14907996190827266, "grad_norm": 0.32719457149505615, "learning_rate": 1.9737453557374686e-05, "loss": 0.5469, "step": 4853 }, { "epoch": 0.1491106810432218, "grad_norm": 0.35036420822143555, "learning_rate": 1.9737343532737245e-05, "loss": 0.5393, "step": 4854 }, { "epoch": 0.14914140017817099, "grad_norm": 0.3432389795780182, "learning_rate": 1.9737233485357558e-05, "loss": 0.5812, "step": 4855 }, { "epoch": 0.14917211931312013, "grad_norm": 0.3721047043800354, "learning_rate": 1.973712341523588e-05, "loss": 0.6876, "step": 4856 }, { "epoch": 0.1492028384480693, "grad_norm": 0.35495609045028687, "learning_rate": 1.9737013322372476e-05, "loss": 0.5553, "step": 4857 }, { "epoch": 0.14923355758301846, "grad_norm": 0.3209853172302246, "learning_rate": 1.9736903206767596e-05, "loss": 0.544, "step": 4858 }, { "epoch": 0.14926427671796763, "grad_norm": 0.404523640871048, "learning_rate": 1.9736793068421493e-05, "loss": 0.6521, "step": 4859 }, { "epoch": 0.14929499585291678, "grad_norm": 0.36585500836372375, "learning_rate": 1.9736682907334437e-05, "loss": 0.6226, "step": 4860 }, { "epoch": 0.14932571498786595, "grad_norm": 0.3311029076576233, "learning_rate": 1.9736572723506673e-05, "loss": 0.5848, "step": 4861 }, { "epoch": 0.1493564341228151, "grad_norm": 0.9910761713981628, "learning_rate": 1.9736462516938464e-05, "loss": 0.5675, "step": 4862 }, { "epoch": 0.14938715325776425, "grad_norm": 0.3465051054954529, "learning_rate": 1.9736352287630066e-05, "loss": 0.639, "step": 4863 }, { "epoch": 0.14941787239271342, "grad_norm": 0.3524898886680603, "learning_rate": 1.973624203558174e-05, "loss": 0.5916, "step": 4864 }, { "epoch": 0.14944859152766257, "grad_norm": 0.2954869866371155, "learning_rate": 1.973613176079374e-05, "loss": 0.6079, "step": 4865 }, { "epoch": 0.14947931066261175, "grad_norm": 0.310528963804245, "learning_rate": 1.9736021463266327e-05, "loss": 0.5408, "step": 4866 }, { "epoch": 0.1495100297975609, "grad_norm": 0.39237895607948303, "learning_rate": 1.9735911142999753e-05, "loss": 0.6318, "step": 4867 }, { "epoch": 0.14954074893251007, "grad_norm": 0.35592129826545715, "learning_rate": 1.973580079999428e-05, "loss": 0.5945, "step": 4868 }, { "epoch": 0.14957146806745922, "grad_norm": 0.3211767375469208, "learning_rate": 1.973569043425016e-05, "loss": 0.6383, "step": 4869 }, { "epoch": 0.1496021872024084, "grad_norm": 0.34301164746284485, "learning_rate": 1.973558004576766e-05, "loss": 0.5979, "step": 4870 }, { "epoch": 0.14963290633735754, "grad_norm": 0.4573571979999542, "learning_rate": 1.973546963454703e-05, "loss": 0.6157, "step": 4871 }, { "epoch": 0.1496636254723067, "grad_norm": 0.3335179388523102, "learning_rate": 1.9735359200588534e-05, "loss": 0.6014, "step": 4872 }, { "epoch": 0.14969434460725586, "grad_norm": 0.32947319746017456, "learning_rate": 1.9735248743892424e-05, "loss": 0.617, "step": 4873 }, { "epoch": 0.149725063742205, "grad_norm": 0.3283652663230896, "learning_rate": 1.973513826445896e-05, "loss": 0.6119, "step": 4874 }, { "epoch": 0.1497557828771542, "grad_norm": 0.3368741571903229, "learning_rate": 1.9735027762288402e-05, "loss": 0.5733, "step": 4875 }, { "epoch": 0.14978650201210333, "grad_norm": 0.34320947527885437, "learning_rate": 1.9734917237381003e-05, "loss": 0.5962, "step": 4876 }, { "epoch": 0.1498172211470525, "grad_norm": 0.3396119773387909, "learning_rate": 1.973480668973703e-05, "loss": 0.6678, "step": 4877 }, { "epoch": 0.14984794028200166, "grad_norm": 0.40041738748550415, "learning_rate": 1.9734696119356736e-05, "loss": 0.5517, "step": 4878 }, { "epoch": 0.14987865941695083, "grad_norm": 0.3256848156452179, "learning_rate": 1.973458552624038e-05, "loss": 0.6205, "step": 4879 }, { "epoch": 0.14990937855189998, "grad_norm": 0.34792962670326233, "learning_rate": 1.9734474910388214e-05, "loss": 0.6562, "step": 4880 }, { "epoch": 0.14994009768684913, "grad_norm": 0.4179995656013489, "learning_rate": 1.9734364271800506e-05, "loss": 0.6121, "step": 4881 }, { "epoch": 0.1499708168217983, "grad_norm": 0.3125300407409668, "learning_rate": 1.973425361047751e-05, "loss": 0.6282, "step": 4882 }, { "epoch": 0.15000153595674745, "grad_norm": 0.36008089780807495, "learning_rate": 1.9734142926419487e-05, "loss": 0.5374, "step": 4883 }, { "epoch": 0.15003225509169663, "grad_norm": 0.3300665318965912, "learning_rate": 1.9734032219626687e-05, "loss": 0.632, "step": 4884 }, { "epoch": 0.15006297422664577, "grad_norm": 0.32623791694641113, "learning_rate": 1.973392149009938e-05, "loss": 0.6238, "step": 4885 }, { "epoch": 0.15009369336159495, "grad_norm": 0.33197078108787537, "learning_rate": 1.9733810737837818e-05, "loss": 0.4938, "step": 4886 }, { "epoch": 0.1501244124965441, "grad_norm": 0.32357266545295715, "learning_rate": 1.9733699962842264e-05, "loss": 0.6822, "step": 4887 }, { "epoch": 0.15015513163149324, "grad_norm": 0.35154107213020325, "learning_rate": 1.973358916511297e-05, "loss": 0.5751, "step": 4888 }, { "epoch": 0.15018585076644242, "grad_norm": 0.35157617926597595, "learning_rate": 1.9733478344650205e-05, "loss": 0.5589, "step": 4889 }, { "epoch": 0.15021656990139157, "grad_norm": 0.3679426312446594, "learning_rate": 1.9733367501454214e-05, "loss": 0.6564, "step": 4890 }, { "epoch": 0.15024728903634074, "grad_norm": 0.3270335793495178, "learning_rate": 1.9733256635525265e-05, "loss": 0.6073, "step": 4891 }, { "epoch": 0.1502780081712899, "grad_norm": 0.3486824631690979, "learning_rate": 1.973314574686362e-05, "loss": 0.6348, "step": 4892 }, { "epoch": 0.15030872730623906, "grad_norm": 0.4083581566810608, "learning_rate": 1.973303483546953e-05, "loss": 0.6308, "step": 4893 }, { "epoch": 0.1503394464411882, "grad_norm": 0.3388324975967407, "learning_rate": 1.9732923901343258e-05, "loss": 0.6337, "step": 4894 }, { "epoch": 0.1503701655761374, "grad_norm": 0.35632890462875366, "learning_rate": 1.973281294448506e-05, "loss": 0.6184, "step": 4895 }, { "epoch": 0.15040088471108654, "grad_norm": 0.3201945424079895, "learning_rate": 1.97327019648952e-05, "loss": 0.5966, "step": 4896 }, { "epoch": 0.15043160384603568, "grad_norm": 0.32549065351486206, "learning_rate": 1.9732590962573934e-05, "loss": 0.5933, "step": 4897 }, { "epoch": 0.15046232298098486, "grad_norm": 0.3797222971916199, "learning_rate": 1.973247993752152e-05, "loss": 0.6695, "step": 4898 }, { "epoch": 0.150493042115934, "grad_norm": 0.33088791370391846, "learning_rate": 1.9732368889738224e-05, "loss": 0.5841, "step": 4899 }, { "epoch": 0.15052376125088318, "grad_norm": 0.3576861023902893, "learning_rate": 1.97322578192243e-05, "loss": 0.6714, "step": 4900 }, { "epoch": 0.15055448038583233, "grad_norm": 0.39559316635131836, "learning_rate": 1.9732146725980006e-05, "loss": 0.6624, "step": 4901 }, { "epoch": 0.1505851995207815, "grad_norm": 0.3517100512981415, "learning_rate": 1.9732035610005606e-05, "loss": 0.5378, "step": 4902 }, { "epoch": 0.15061591865573065, "grad_norm": 0.33203622698783875, "learning_rate": 1.9731924471301355e-05, "loss": 0.6021, "step": 4903 }, { "epoch": 0.15064663779067983, "grad_norm": 0.33199042081832886, "learning_rate": 1.9731813309867517e-05, "loss": 0.6023, "step": 4904 }, { "epoch": 0.15067735692562897, "grad_norm": 0.3226483464241028, "learning_rate": 1.9731702125704346e-05, "loss": 0.518, "step": 4905 }, { "epoch": 0.15070807606057812, "grad_norm": 0.3974003195762634, "learning_rate": 1.9731590918812107e-05, "loss": 0.6347, "step": 4906 }, { "epoch": 0.1507387951955273, "grad_norm": 0.3119557499885559, "learning_rate": 1.9731479689191056e-05, "loss": 0.5276, "step": 4907 }, { "epoch": 0.15076951433047645, "grad_norm": 0.5529022216796875, "learning_rate": 1.973136843684146e-05, "loss": 0.5759, "step": 4908 }, { "epoch": 0.15080023346542562, "grad_norm": 0.32860803604125977, "learning_rate": 1.9731257161763565e-05, "loss": 0.5692, "step": 4909 }, { "epoch": 0.15083095260037477, "grad_norm": 0.3538283109664917, "learning_rate": 1.973114586395765e-05, "loss": 0.6305, "step": 4910 }, { "epoch": 0.15086167173532394, "grad_norm": 0.2902280390262604, "learning_rate": 1.9731034543423956e-05, "loss": 0.6115, "step": 4911 }, { "epoch": 0.1508923908702731, "grad_norm": 0.36382487416267395, "learning_rate": 1.9730923200162753e-05, "loss": 0.5919, "step": 4912 }, { "epoch": 0.15092311000522227, "grad_norm": 0.3328670859336853, "learning_rate": 1.97308118341743e-05, "loss": 0.6552, "step": 4913 }, { "epoch": 0.1509538291401714, "grad_norm": 0.3893597722053528, "learning_rate": 1.9730700445458856e-05, "loss": 0.5811, "step": 4914 }, { "epoch": 0.15098454827512056, "grad_norm": 0.3250792324542999, "learning_rate": 1.9730589034016678e-05, "loss": 0.6666, "step": 4915 }, { "epoch": 0.15101526741006974, "grad_norm": 0.3149586617946625, "learning_rate": 1.9730477599848036e-05, "loss": 0.5287, "step": 4916 }, { "epoch": 0.15104598654501888, "grad_norm": 0.34516385197639465, "learning_rate": 1.9730366142953178e-05, "loss": 0.6476, "step": 4917 }, { "epoch": 0.15107670567996806, "grad_norm": 0.3535425662994385, "learning_rate": 1.9730254663332376e-05, "loss": 0.6283, "step": 4918 }, { "epoch": 0.1511074248149172, "grad_norm": 0.3435814678668976, "learning_rate": 1.9730143160985883e-05, "loss": 0.6527, "step": 4919 }, { "epoch": 0.15113814394986638, "grad_norm": 0.4311700165271759, "learning_rate": 1.973003163591396e-05, "loss": 0.6276, "step": 4920 }, { "epoch": 0.15116886308481553, "grad_norm": 0.4034916162490845, "learning_rate": 1.972992008811687e-05, "loss": 0.5686, "step": 4921 }, { "epoch": 0.15119958221976468, "grad_norm": 0.3205583393573761, "learning_rate": 1.972980851759487e-05, "loss": 0.5913, "step": 4922 }, { "epoch": 0.15123030135471385, "grad_norm": 0.35011646151542664, "learning_rate": 1.972969692434822e-05, "loss": 0.6284, "step": 4923 }, { "epoch": 0.151261020489663, "grad_norm": 0.3304441273212433, "learning_rate": 1.9729585308377192e-05, "loss": 0.5177, "step": 4924 }, { "epoch": 0.15129173962461218, "grad_norm": 0.33244651556015015, "learning_rate": 1.972947366968203e-05, "loss": 0.5814, "step": 4925 }, { "epoch": 0.15132245875956132, "grad_norm": 0.35305142402648926, "learning_rate": 1.9729362008263003e-05, "loss": 0.5601, "step": 4926 }, { "epoch": 0.1513531778945105, "grad_norm": 0.33509311079978943, "learning_rate": 1.9729250324120377e-05, "loss": 0.602, "step": 4927 }, { "epoch": 0.15138389702945965, "grad_norm": 0.433647096157074, "learning_rate": 1.9729138617254408e-05, "loss": 0.6239, "step": 4928 }, { "epoch": 0.15141461616440882, "grad_norm": 0.3567454218864441, "learning_rate": 1.972902688766535e-05, "loss": 0.5556, "step": 4929 }, { "epoch": 0.15144533529935797, "grad_norm": 0.33331063389778137, "learning_rate": 1.9728915135353474e-05, "loss": 0.6492, "step": 4930 }, { "epoch": 0.15147605443430712, "grad_norm": 0.35652223229408264, "learning_rate": 1.9728803360319035e-05, "loss": 0.6878, "step": 4931 }, { "epoch": 0.1515067735692563, "grad_norm": 0.3296626806259155, "learning_rate": 1.9728691562562297e-05, "loss": 0.5978, "step": 4932 }, { "epoch": 0.15153749270420544, "grad_norm": 0.32020148634910583, "learning_rate": 1.972857974208352e-05, "loss": 0.6552, "step": 4933 }, { "epoch": 0.15156821183915462, "grad_norm": 0.3121306300163269, "learning_rate": 1.9728467898882967e-05, "loss": 0.5679, "step": 4934 }, { "epoch": 0.15159893097410376, "grad_norm": 0.32141393423080444, "learning_rate": 1.97283560329609e-05, "loss": 0.6285, "step": 4935 }, { "epoch": 0.15162965010905294, "grad_norm": 0.35459214448928833, "learning_rate": 1.9728244144317578e-05, "loss": 0.6157, "step": 4936 }, { "epoch": 0.15166036924400209, "grad_norm": 0.35573428869247437, "learning_rate": 1.9728132232953257e-05, "loss": 0.7715, "step": 4937 }, { "epoch": 0.15169108837895126, "grad_norm": 0.3394521176815033, "learning_rate": 1.9728020298868206e-05, "loss": 0.6352, "step": 4938 }, { "epoch": 0.1517218075139004, "grad_norm": 0.3097068965435028, "learning_rate": 1.9727908342062685e-05, "loss": 0.588, "step": 4939 }, { "epoch": 0.15175252664884956, "grad_norm": 0.3012785017490387, "learning_rate": 1.9727796362536956e-05, "loss": 0.6145, "step": 4940 }, { "epoch": 0.15178324578379873, "grad_norm": 0.32570454478263855, "learning_rate": 1.972768436029128e-05, "loss": 0.6511, "step": 4941 }, { "epoch": 0.15181396491874788, "grad_norm": 0.39569348096847534, "learning_rate": 1.9727572335325914e-05, "loss": 0.6145, "step": 4942 }, { "epoch": 0.15184468405369705, "grad_norm": 0.800865113735199, "learning_rate": 1.9727460287641124e-05, "loss": 0.5693, "step": 4943 }, { "epoch": 0.1518754031886462, "grad_norm": 0.32617682218551636, "learning_rate": 1.9727348217237172e-05, "loss": 0.5608, "step": 4944 }, { "epoch": 0.15190612232359538, "grad_norm": 0.34555384516716003, "learning_rate": 1.9727236124114316e-05, "loss": 0.6074, "step": 4945 }, { "epoch": 0.15193684145854452, "grad_norm": 0.4082109332084656, "learning_rate": 1.9727124008272827e-05, "loss": 0.5635, "step": 4946 }, { "epoch": 0.1519675605934937, "grad_norm": 0.34150490164756775, "learning_rate": 1.972701186971296e-05, "loss": 0.5238, "step": 4947 }, { "epoch": 0.15199827972844285, "grad_norm": 0.4103369116783142, "learning_rate": 1.9726899708434972e-05, "loss": 0.6349, "step": 4948 }, { "epoch": 0.152028998863392, "grad_norm": 0.6631600856781006, "learning_rate": 1.972678752443913e-05, "loss": 0.7341, "step": 4949 }, { "epoch": 0.15205971799834117, "grad_norm": 0.3640218675136566, "learning_rate": 1.97266753177257e-05, "loss": 0.5982, "step": 4950 }, { "epoch": 0.15209043713329032, "grad_norm": 0.3596055507659912, "learning_rate": 1.9726563088294942e-05, "loss": 0.6268, "step": 4951 }, { "epoch": 0.1521211562682395, "grad_norm": 0.3377299904823303, "learning_rate": 1.9726450836147115e-05, "loss": 0.669, "step": 4952 }, { "epoch": 0.15215187540318864, "grad_norm": 0.3247532546520233, "learning_rate": 1.972633856128248e-05, "loss": 0.5829, "step": 4953 }, { "epoch": 0.15218259453813782, "grad_norm": 0.343746155500412, "learning_rate": 1.9726226263701308e-05, "loss": 0.6164, "step": 4954 }, { "epoch": 0.15221331367308696, "grad_norm": 0.3226998448371887, "learning_rate": 1.972611394340385e-05, "loss": 0.6133, "step": 4955 }, { "epoch": 0.15224403280803614, "grad_norm": 0.36335867643356323, "learning_rate": 1.9726001600390377e-05, "loss": 0.6216, "step": 4956 }, { "epoch": 0.1522747519429853, "grad_norm": 0.346691370010376, "learning_rate": 1.9725889234661144e-05, "loss": 0.6003, "step": 4957 }, { "epoch": 0.15230547107793443, "grad_norm": 0.32536736130714417, "learning_rate": 1.972577684621642e-05, "loss": 0.5976, "step": 4958 }, { "epoch": 0.1523361902128836, "grad_norm": 0.3481276333332062, "learning_rate": 1.9725664435056465e-05, "loss": 0.5523, "step": 4959 }, { "epoch": 0.15236690934783276, "grad_norm": 0.4066382646560669, "learning_rate": 1.972555200118154e-05, "loss": 0.5911, "step": 4960 }, { "epoch": 0.15239762848278193, "grad_norm": 0.34515976905822754, "learning_rate": 1.9725439544591915e-05, "loss": 0.5923, "step": 4961 }, { "epoch": 0.15242834761773108, "grad_norm": 0.3588673770427704, "learning_rate": 1.972532706528784e-05, "loss": 0.6143, "step": 4962 }, { "epoch": 0.15245906675268026, "grad_norm": 0.34160295128822327, "learning_rate": 1.9725214563269585e-05, "loss": 0.7171, "step": 4963 }, { "epoch": 0.1524897858876294, "grad_norm": 0.35982197523117065, "learning_rate": 1.9725102038537415e-05, "loss": 0.6319, "step": 4964 }, { "epoch": 0.15252050502257855, "grad_norm": 0.3422704041004181, "learning_rate": 1.9724989491091588e-05, "loss": 0.5777, "step": 4965 }, { "epoch": 0.15255122415752773, "grad_norm": 0.35097262263298035, "learning_rate": 1.972487692093237e-05, "loss": 0.662, "step": 4966 }, { "epoch": 0.15258194329247687, "grad_norm": 0.3383050262928009, "learning_rate": 1.972476432806002e-05, "loss": 0.6296, "step": 4967 }, { "epoch": 0.15261266242742605, "grad_norm": 0.3413738012313843, "learning_rate": 1.9724651712474808e-05, "loss": 0.6212, "step": 4968 }, { "epoch": 0.1526433815623752, "grad_norm": 0.3678542971611023, "learning_rate": 1.972453907417699e-05, "loss": 0.6, "step": 4969 }, { "epoch": 0.15267410069732437, "grad_norm": 0.35124602913856506, "learning_rate": 1.9724426413166834e-05, "loss": 0.5901, "step": 4970 }, { "epoch": 0.15270481983227352, "grad_norm": 0.33392852544784546, "learning_rate": 1.97243137294446e-05, "loss": 0.5496, "step": 4971 }, { "epoch": 0.1527355389672227, "grad_norm": 0.3229723274707794, "learning_rate": 1.972420102301055e-05, "loss": 0.5744, "step": 4972 }, { "epoch": 0.15276625810217184, "grad_norm": 0.5563663244247437, "learning_rate": 1.9724088293864952e-05, "loss": 0.5271, "step": 4973 }, { "epoch": 0.152796977237121, "grad_norm": 0.341386616230011, "learning_rate": 1.9723975542008066e-05, "loss": 0.5892, "step": 4974 }, { "epoch": 0.15282769637207017, "grad_norm": 0.6006923913955688, "learning_rate": 1.9723862767440157e-05, "loss": 0.7016, "step": 4975 }, { "epoch": 0.1528584155070193, "grad_norm": 0.34445998072624207, "learning_rate": 1.9723749970161485e-05, "loss": 0.5739, "step": 4976 }, { "epoch": 0.1528891346419685, "grad_norm": 0.31865936517715454, "learning_rate": 1.9723637150172318e-05, "loss": 0.5886, "step": 4977 }, { "epoch": 0.15291985377691764, "grad_norm": 0.35978955030441284, "learning_rate": 1.9723524307472917e-05, "loss": 0.5543, "step": 4978 }, { "epoch": 0.1529505729118668, "grad_norm": 0.4027895927429199, "learning_rate": 1.9723411442063542e-05, "loss": 0.6055, "step": 4979 }, { "epoch": 0.15298129204681596, "grad_norm": 0.35369014739990234, "learning_rate": 1.9723298553944466e-05, "loss": 0.5632, "step": 4980 }, { "epoch": 0.15301201118176513, "grad_norm": 0.3629381060600281, "learning_rate": 1.9723185643115945e-05, "loss": 0.612, "step": 4981 }, { "epoch": 0.15304273031671428, "grad_norm": 0.35895636677742004, "learning_rate": 1.9723072709578244e-05, "loss": 0.6089, "step": 4982 }, { "epoch": 0.15307344945166343, "grad_norm": 0.3464110195636749, "learning_rate": 1.9722959753331627e-05, "loss": 0.6546, "step": 4983 }, { "epoch": 0.1531041685866126, "grad_norm": 0.3057441711425781, "learning_rate": 1.972284677437636e-05, "loss": 0.5315, "step": 4984 }, { "epoch": 0.15313488772156175, "grad_norm": 0.3098561465740204, "learning_rate": 1.9722733772712708e-05, "loss": 0.6316, "step": 4985 }, { "epoch": 0.15316560685651093, "grad_norm": 0.3206138610839844, "learning_rate": 1.972262074834093e-05, "loss": 0.6093, "step": 4986 }, { "epoch": 0.15319632599146008, "grad_norm": 0.34347784519195557, "learning_rate": 1.972250770126129e-05, "loss": 0.5443, "step": 4987 }, { "epoch": 0.15322704512640925, "grad_norm": 0.35404878854751587, "learning_rate": 1.9722394631474056e-05, "loss": 0.5499, "step": 4988 }, { "epoch": 0.1532577642613584, "grad_norm": 0.3880816698074341, "learning_rate": 1.972228153897949e-05, "loss": 0.6642, "step": 4989 }, { "epoch": 0.15328848339630757, "grad_norm": 0.3267437219619751, "learning_rate": 1.9722168423777856e-05, "loss": 0.5389, "step": 4990 }, { "epoch": 0.15331920253125672, "grad_norm": 0.27207812666893005, "learning_rate": 1.972205528586942e-05, "loss": 0.5188, "step": 4991 }, { "epoch": 0.15334992166620587, "grad_norm": 0.3556445837020874, "learning_rate": 1.9721942125254445e-05, "loss": 0.604, "step": 4992 }, { "epoch": 0.15338064080115504, "grad_norm": 0.36455896496772766, "learning_rate": 1.9721828941933195e-05, "loss": 0.6505, "step": 4993 }, { "epoch": 0.1534113599361042, "grad_norm": 0.3516797721385956, "learning_rate": 1.9721715735905934e-05, "loss": 0.6061, "step": 4994 }, { "epoch": 0.15344207907105337, "grad_norm": 0.3720945715904236, "learning_rate": 1.9721602507172927e-05, "loss": 0.6657, "step": 4995 }, { "epoch": 0.15347279820600251, "grad_norm": 0.32599520683288574, "learning_rate": 1.9721489255734437e-05, "loss": 0.5692, "step": 4996 }, { "epoch": 0.1535035173409517, "grad_norm": 0.3009876012802124, "learning_rate": 1.972137598159073e-05, "loss": 0.541, "step": 4997 }, { "epoch": 0.15353423647590084, "grad_norm": 0.32991501688957214, "learning_rate": 1.972126268474207e-05, "loss": 0.6418, "step": 4998 }, { "epoch": 0.15356495561084998, "grad_norm": 0.3624096214771271, "learning_rate": 1.972114936518873e-05, "loss": 0.6493, "step": 4999 }, { "epoch": 0.15359567474579916, "grad_norm": 0.345829576253891, "learning_rate": 1.972103602293096e-05, "loss": 0.6285, "step": 5000 }, { "epoch": 0.1536263938807483, "grad_norm": 0.31113511323928833, "learning_rate": 1.972092265796903e-05, "loss": 0.665, "step": 5001 }, { "epoch": 0.15365711301569748, "grad_norm": 0.31589585542678833, "learning_rate": 1.9720809270303207e-05, "loss": 0.5583, "step": 5002 }, { "epoch": 0.15368783215064663, "grad_norm": 0.31817612051963806, "learning_rate": 1.972069585993376e-05, "loss": 0.6137, "step": 5003 }, { "epoch": 0.1537185512855958, "grad_norm": 0.35874807834625244, "learning_rate": 1.9720582426860942e-05, "loss": 0.6342, "step": 5004 }, { "epoch": 0.15374927042054495, "grad_norm": 0.3284189999103546, "learning_rate": 1.972046897108503e-05, "loss": 0.6091, "step": 5005 }, { "epoch": 0.15377998955549413, "grad_norm": 0.3909201920032501, "learning_rate": 1.9720355492606283e-05, "loss": 0.6204, "step": 5006 }, { "epoch": 0.15381070869044328, "grad_norm": 0.3562530279159546, "learning_rate": 1.9720241991424963e-05, "loss": 0.6546, "step": 5007 }, { "epoch": 0.15384142782539242, "grad_norm": 0.3294016420841217, "learning_rate": 1.9720128467541344e-05, "loss": 0.6939, "step": 5008 }, { "epoch": 0.1538721469603416, "grad_norm": 0.38732486963272095, "learning_rate": 1.972001492095568e-05, "loss": 0.529, "step": 5009 }, { "epoch": 0.15390286609529075, "grad_norm": 0.3355622887611389, "learning_rate": 1.971990135166825e-05, "loss": 0.5951, "step": 5010 }, { "epoch": 0.15393358523023992, "grad_norm": 0.3148454427719116, "learning_rate": 1.9719787759679305e-05, "loss": 0.5542, "step": 5011 }, { "epoch": 0.15396430436518907, "grad_norm": 0.317731648683548, "learning_rate": 1.971967414498912e-05, "loss": 0.643, "step": 5012 }, { "epoch": 0.15399502350013825, "grad_norm": 0.3330687880516052, "learning_rate": 1.9719560507597955e-05, "loss": 0.6598, "step": 5013 }, { "epoch": 0.1540257426350874, "grad_norm": 0.35995063185691833, "learning_rate": 1.971944684750608e-05, "loss": 0.6476, "step": 5014 }, { "epoch": 0.15405646177003657, "grad_norm": 0.31915372610092163, "learning_rate": 1.9719333164713762e-05, "loss": 0.6064, "step": 5015 }, { "epoch": 0.15408718090498572, "grad_norm": 0.32760220766067505, "learning_rate": 1.9719219459221258e-05, "loss": 0.5773, "step": 5016 }, { "epoch": 0.15411790003993486, "grad_norm": 0.3440483808517456, "learning_rate": 1.9719105731028837e-05, "loss": 0.6683, "step": 5017 }, { "epoch": 0.15414861917488404, "grad_norm": 0.3330777585506439, "learning_rate": 1.971899198013677e-05, "loss": 0.5291, "step": 5018 }, { "epoch": 0.15417933830983319, "grad_norm": 0.35699528455734253, "learning_rate": 1.9718878206545313e-05, "loss": 0.5431, "step": 5019 }, { "epoch": 0.15421005744478236, "grad_norm": 0.29825013875961304, "learning_rate": 1.971876441025474e-05, "loss": 0.5114, "step": 5020 }, { "epoch": 0.1542407765797315, "grad_norm": 0.3256905674934387, "learning_rate": 1.9718650591265313e-05, "loss": 0.5708, "step": 5021 }, { "epoch": 0.15427149571468068, "grad_norm": 0.3094388246536255, "learning_rate": 1.9718536749577303e-05, "loss": 0.5954, "step": 5022 }, { "epoch": 0.15430221484962983, "grad_norm": 0.3380347788333893, "learning_rate": 1.9718422885190963e-05, "loss": 0.66, "step": 5023 }, { "epoch": 0.154332933984579, "grad_norm": 0.3990163207054138, "learning_rate": 1.9718308998106575e-05, "loss": 0.5513, "step": 5024 }, { "epoch": 0.15436365311952815, "grad_norm": 0.35449719429016113, "learning_rate": 1.9718195088324394e-05, "loss": 0.6397, "step": 5025 }, { "epoch": 0.1543943722544773, "grad_norm": 0.3179326057434082, "learning_rate": 1.9718081155844696e-05, "loss": 0.6421, "step": 5026 }, { "epoch": 0.15442509138942648, "grad_norm": 0.3849300146102905, "learning_rate": 1.9717967200667733e-05, "loss": 0.614, "step": 5027 }, { "epoch": 0.15445581052437563, "grad_norm": 0.3502308428287506, "learning_rate": 1.9717853222793783e-05, "loss": 0.6502, "step": 5028 }, { "epoch": 0.1544865296593248, "grad_norm": 0.4148201048374176, "learning_rate": 1.9717739222223105e-05, "loss": 0.6605, "step": 5029 }, { "epoch": 0.15451724879427395, "grad_norm": 0.3516957461833954, "learning_rate": 1.971762519895597e-05, "loss": 0.644, "step": 5030 }, { "epoch": 0.15454796792922312, "grad_norm": 0.3283853828907013, "learning_rate": 1.971751115299264e-05, "loss": 0.5842, "step": 5031 }, { "epoch": 0.15457868706417227, "grad_norm": 0.36647510528564453, "learning_rate": 1.9717397084333392e-05, "loss": 0.612, "step": 5032 }, { "epoch": 0.15460940619912145, "grad_norm": 0.3753528594970703, "learning_rate": 1.9717282992978478e-05, "loss": 0.6232, "step": 5033 }, { "epoch": 0.1546401253340706, "grad_norm": 0.4416557252407074, "learning_rate": 1.9717168878928173e-05, "loss": 0.4763, "step": 5034 }, { "epoch": 0.15467084446901974, "grad_norm": 0.3052119314670563, "learning_rate": 1.9717054742182742e-05, "loss": 0.5685, "step": 5035 }, { "epoch": 0.15470156360396892, "grad_norm": 0.3113239109516144, "learning_rate": 1.971694058274245e-05, "loss": 0.6127, "step": 5036 }, { "epoch": 0.15473228273891806, "grad_norm": 0.3531402051448822, "learning_rate": 1.9716826400607565e-05, "loss": 0.5812, "step": 5037 }, { "epoch": 0.15476300187386724, "grad_norm": 0.33439117670059204, "learning_rate": 1.971671219577835e-05, "loss": 0.5915, "step": 5038 }, { "epoch": 0.1547937210088164, "grad_norm": 0.3257230520248413, "learning_rate": 1.9716597968255077e-05, "loss": 0.5965, "step": 5039 }, { "epoch": 0.15482444014376556, "grad_norm": 0.3451187014579773, "learning_rate": 1.9716483718038012e-05, "loss": 0.7093, "step": 5040 }, { "epoch": 0.1548551592787147, "grad_norm": 0.32665252685546875, "learning_rate": 1.971636944512742e-05, "loss": 0.5646, "step": 5041 }, { "epoch": 0.15488587841366386, "grad_norm": 0.32626980543136597, "learning_rate": 1.971625514952357e-05, "loss": 0.6131, "step": 5042 }, { "epoch": 0.15491659754861303, "grad_norm": 0.3340093493461609, "learning_rate": 1.9716140831226728e-05, "loss": 0.6249, "step": 5043 }, { "epoch": 0.15494731668356218, "grad_norm": 0.35985082387924194, "learning_rate": 1.9716026490237157e-05, "loss": 0.7364, "step": 5044 }, { "epoch": 0.15497803581851136, "grad_norm": 0.45398813486099243, "learning_rate": 1.9715912126555133e-05, "loss": 0.5563, "step": 5045 }, { "epoch": 0.1550087549534605, "grad_norm": 0.3353818953037262, "learning_rate": 1.9715797740180914e-05, "loss": 0.6421, "step": 5046 }, { "epoch": 0.15503947408840968, "grad_norm": 0.3066404163837433, "learning_rate": 1.9715683331114773e-05, "loss": 0.5191, "step": 5047 }, { "epoch": 0.15507019322335883, "grad_norm": 0.3753024637699127, "learning_rate": 1.9715568899356968e-05, "loss": 0.6097, "step": 5048 }, { "epoch": 0.155100912358308, "grad_norm": 0.32660579681396484, "learning_rate": 1.971545444490778e-05, "loss": 0.5891, "step": 5049 }, { "epoch": 0.15513163149325715, "grad_norm": 0.37823933362960815, "learning_rate": 1.971533996776747e-05, "loss": 0.6066, "step": 5050 }, { "epoch": 0.1551623506282063, "grad_norm": 0.29149770736694336, "learning_rate": 1.9715225467936303e-05, "loss": 0.5542, "step": 5051 }, { "epoch": 0.15519306976315547, "grad_norm": 0.3353050649166107, "learning_rate": 1.9715110945414548e-05, "loss": 0.5777, "step": 5052 }, { "epoch": 0.15522378889810462, "grad_norm": 0.3575619161128998, "learning_rate": 1.9714996400202474e-05, "loss": 0.5967, "step": 5053 }, { "epoch": 0.1552545080330538, "grad_norm": 0.39224323630332947, "learning_rate": 1.971488183230035e-05, "loss": 0.5886, "step": 5054 }, { "epoch": 0.15528522716800294, "grad_norm": 0.3113049864768982, "learning_rate": 1.9714767241708434e-05, "loss": 0.5912, "step": 5055 }, { "epoch": 0.15531594630295212, "grad_norm": 0.39248034358024597, "learning_rate": 1.9714652628427006e-05, "loss": 0.6635, "step": 5056 }, { "epoch": 0.15534666543790127, "grad_norm": 0.3156565725803375, "learning_rate": 1.9714537992456324e-05, "loss": 0.5928, "step": 5057 }, { "epoch": 0.15537738457285044, "grad_norm": 0.33467355370521545, "learning_rate": 1.9714423333796665e-05, "loss": 0.6518, "step": 5058 }, { "epoch": 0.1554081037077996, "grad_norm": 0.33010849356651306, "learning_rate": 1.971430865244829e-05, "loss": 0.6289, "step": 5059 }, { "epoch": 0.15543882284274874, "grad_norm": 0.3161852955818176, "learning_rate": 1.9714193948411467e-05, "loss": 0.5874, "step": 5060 }, { "epoch": 0.1554695419776979, "grad_norm": 0.34696653485298157, "learning_rate": 1.9714079221686467e-05, "loss": 0.6134, "step": 5061 }, { "epoch": 0.15550026111264706, "grad_norm": 0.34161293506622314, "learning_rate": 1.9713964472273556e-05, "loss": 0.5698, "step": 5062 }, { "epoch": 0.15553098024759623, "grad_norm": 0.3741774559020996, "learning_rate": 1.9713849700173004e-05, "loss": 0.6451, "step": 5063 }, { "epoch": 0.15556169938254538, "grad_norm": 0.363383412361145, "learning_rate": 1.9713734905385074e-05, "loss": 0.5928, "step": 5064 }, { "epoch": 0.15559241851749456, "grad_norm": 0.4020008146762848, "learning_rate": 1.9713620087910045e-05, "loss": 0.5885, "step": 5065 }, { "epoch": 0.1556231376524437, "grad_norm": 0.37068241834640503, "learning_rate": 1.9713505247748173e-05, "loss": 0.6265, "step": 5066 }, { "epoch": 0.15565385678739288, "grad_norm": 0.32770898938179016, "learning_rate": 1.9713390384899733e-05, "loss": 0.5934, "step": 5067 }, { "epoch": 0.15568457592234203, "grad_norm": 0.3148091733455658, "learning_rate": 1.971327549936499e-05, "loss": 0.5917, "step": 5068 }, { "epoch": 0.15571529505729118, "grad_norm": 0.330154687166214, "learning_rate": 1.9713160591144213e-05, "loss": 0.668, "step": 5069 }, { "epoch": 0.15574601419224035, "grad_norm": 0.3466147184371948, "learning_rate": 1.971304566023767e-05, "loss": 0.614, "step": 5070 }, { "epoch": 0.1557767333271895, "grad_norm": 0.4033340811729431, "learning_rate": 1.9712930706645633e-05, "loss": 0.5869, "step": 5071 }, { "epoch": 0.15580745246213867, "grad_norm": 0.42030107975006104, "learning_rate": 1.9712815730368365e-05, "loss": 0.6261, "step": 5072 }, { "epoch": 0.15583817159708782, "grad_norm": 0.33607107400894165, "learning_rate": 1.971270073140614e-05, "loss": 0.6059, "step": 5073 }, { "epoch": 0.155868890732037, "grad_norm": 0.3669484257698059, "learning_rate": 1.9712585709759224e-05, "loss": 0.5982, "step": 5074 }, { "epoch": 0.15589960986698614, "grad_norm": 0.3347281813621521, "learning_rate": 1.9712470665427887e-05, "loss": 0.6588, "step": 5075 }, { "epoch": 0.1559303290019353, "grad_norm": 0.37605470418930054, "learning_rate": 1.9712355598412394e-05, "loss": 0.5734, "step": 5076 }, { "epoch": 0.15596104813688447, "grad_norm": 0.3508245348930359, "learning_rate": 1.971224050871302e-05, "loss": 0.6483, "step": 5077 }, { "epoch": 0.15599176727183361, "grad_norm": 0.3361445665359497, "learning_rate": 1.9712125396330025e-05, "loss": 0.6425, "step": 5078 }, { "epoch": 0.1560224864067828, "grad_norm": 0.31319549679756165, "learning_rate": 1.9712010261263685e-05, "loss": 0.5854, "step": 5079 }, { "epoch": 0.15605320554173194, "grad_norm": 0.3893001079559326, "learning_rate": 1.9711895103514267e-05, "loss": 0.6927, "step": 5080 }, { "epoch": 0.1560839246766811, "grad_norm": 0.30647194385528564, "learning_rate": 1.971177992308204e-05, "loss": 0.6038, "step": 5081 }, { "epoch": 0.15611464381163026, "grad_norm": 0.3465379476547241, "learning_rate": 1.9711664719967273e-05, "loss": 0.632, "step": 5082 }, { "epoch": 0.15614536294657944, "grad_norm": 0.3015036880970001, "learning_rate": 1.9711549494170233e-05, "loss": 0.6324, "step": 5083 }, { "epoch": 0.15617608208152858, "grad_norm": 0.3154418468475342, "learning_rate": 1.9711434245691192e-05, "loss": 0.619, "step": 5084 }, { "epoch": 0.15620680121647773, "grad_norm": 0.38356342911720276, "learning_rate": 1.9711318974530418e-05, "loss": 0.5953, "step": 5085 }, { "epoch": 0.1562375203514269, "grad_norm": 0.3928440511226654, "learning_rate": 1.971120368068818e-05, "loss": 0.6491, "step": 5086 }, { "epoch": 0.15626823948637605, "grad_norm": 0.36644038558006287, "learning_rate": 1.971108836416475e-05, "loss": 0.6708, "step": 5087 }, { "epoch": 0.15629895862132523, "grad_norm": 0.3821943700313568, "learning_rate": 1.9710973024960388e-05, "loss": 0.5586, "step": 5088 }, { "epoch": 0.15632967775627438, "grad_norm": 0.361045241355896, "learning_rate": 1.971085766307538e-05, "loss": 0.6688, "step": 5089 }, { "epoch": 0.15636039689122355, "grad_norm": 0.3347843885421753, "learning_rate": 1.971074227850998e-05, "loss": 0.5927, "step": 5090 }, { "epoch": 0.1563911160261727, "grad_norm": 0.31018415093421936, "learning_rate": 1.971062687126446e-05, "loss": 0.5543, "step": 5091 }, { "epoch": 0.15642183516112187, "grad_norm": 0.334972083568573, "learning_rate": 1.97105114413391e-05, "loss": 0.539, "step": 5092 }, { "epoch": 0.15645255429607102, "grad_norm": 0.36420905590057373, "learning_rate": 1.971039598873416e-05, "loss": 0.6469, "step": 5093 }, { "epoch": 0.15648327343102017, "grad_norm": 0.38430678844451904, "learning_rate": 1.971028051344991e-05, "loss": 0.5372, "step": 5094 }, { "epoch": 0.15651399256596935, "grad_norm": 0.39892882108688354, "learning_rate": 1.9710165015486623e-05, "loss": 0.6915, "step": 5095 }, { "epoch": 0.1565447117009185, "grad_norm": 0.3162928819656372, "learning_rate": 1.9710049494844566e-05, "loss": 0.5924, "step": 5096 }, { "epoch": 0.15657543083586767, "grad_norm": 0.3141995370388031, "learning_rate": 1.9709933951524012e-05, "loss": 0.6327, "step": 5097 }, { "epoch": 0.15660614997081682, "grad_norm": 0.3645155727863312, "learning_rate": 1.9709818385525228e-05, "loss": 0.4943, "step": 5098 }, { "epoch": 0.156636869105766, "grad_norm": 0.32271870970726013, "learning_rate": 1.9709702796848486e-05, "loss": 0.5524, "step": 5099 }, { "epoch": 0.15666758824071514, "grad_norm": 0.3616364300251007, "learning_rate": 1.9709587185494055e-05, "loss": 0.6197, "step": 5100 }, { "epoch": 0.15669830737566431, "grad_norm": 0.3578072786331177, "learning_rate": 1.970947155146221e-05, "loss": 0.5822, "step": 5101 }, { "epoch": 0.15672902651061346, "grad_norm": 0.2915003299713135, "learning_rate": 1.9709355894753206e-05, "loss": 0.587, "step": 5102 }, { "epoch": 0.1567597456455626, "grad_norm": 0.3141416609287262, "learning_rate": 1.970924021536733e-05, "loss": 0.5388, "step": 5103 }, { "epoch": 0.15679046478051178, "grad_norm": 0.35077422857284546, "learning_rate": 1.9709124513304842e-05, "loss": 0.6054, "step": 5104 }, { "epoch": 0.15682118391546093, "grad_norm": 0.3118748664855957, "learning_rate": 1.9709008788566016e-05, "loss": 0.5744, "step": 5105 }, { "epoch": 0.1568519030504101, "grad_norm": 0.3269415497779846, "learning_rate": 1.9708893041151125e-05, "loss": 0.5567, "step": 5106 }, { "epoch": 0.15688262218535926, "grad_norm": 0.3921496868133545, "learning_rate": 1.9708777271060434e-05, "loss": 0.5777, "step": 5107 }, { "epoch": 0.15691334132030843, "grad_norm": 0.3422585129737854, "learning_rate": 1.9708661478294216e-05, "loss": 0.5412, "step": 5108 }, { "epoch": 0.15694406045525758, "grad_norm": 0.3489620089530945, "learning_rate": 1.970854566285274e-05, "loss": 0.5813, "step": 5109 }, { "epoch": 0.15697477959020675, "grad_norm": 0.3350701332092285, "learning_rate": 1.970842982473628e-05, "loss": 0.6031, "step": 5110 }, { "epoch": 0.1570054987251559, "grad_norm": 0.36141687631607056, "learning_rate": 1.97083139639451e-05, "loss": 0.6232, "step": 5111 }, { "epoch": 0.15703621786010505, "grad_norm": 0.35578489303588867, "learning_rate": 1.970819808047948e-05, "loss": 0.6217, "step": 5112 }, { "epoch": 0.15706693699505422, "grad_norm": 0.3484112620353699, "learning_rate": 1.970808217433968e-05, "loss": 0.6598, "step": 5113 }, { "epoch": 0.15709765613000337, "grad_norm": 0.3109627664089203, "learning_rate": 1.970796624552598e-05, "loss": 0.5889, "step": 5114 }, { "epoch": 0.15712837526495255, "grad_norm": 0.3086998462677002, "learning_rate": 1.9707850294038646e-05, "loss": 0.6138, "step": 5115 }, { "epoch": 0.1571590943999017, "grad_norm": 0.36381208896636963, "learning_rate": 1.9707734319877948e-05, "loss": 0.6587, "step": 5116 }, { "epoch": 0.15718981353485087, "grad_norm": 0.36470386385917664, "learning_rate": 1.9707618323044163e-05, "loss": 0.6568, "step": 5117 }, { "epoch": 0.15722053266980002, "grad_norm": 0.3373723030090332, "learning_rate": 1.9707502303537553e-05, "loss": 0.4709, "step": 5118 }, { "epoch": 0.15725125180474916, "grad_norm": 0.33749860525131226, "learning_rate": 1.9707386261358394e-05, "loss": 0.5927, "step": 5119 }, { "epoch": 0.15728197093969834, "grad_norm": 1.1777713298797607, "learning_rate": 1.9707270196506956e-05, "loss": 0.6035, "step": 5120 }, { "epoch": 0.1573126900746475, "grad_norm": 0.29923778772354126, "learning_rate": 1.9707154108983512e-05, "loss": 0.5358, "step": 5121 }, { "epoch": 0.15734340920959666, "grad_norm": 0.30905428528785706, "learning_rate": 1.9707037998788333e-05, "loss": 0.5973, "step": 5122 }, { "epoch": 0.1573741283445458, "grad_norm": 0.5176726579666138, "learning_rate": 1.9706921865921687e-05, "loss": 0.5694, "step": 5123 }, { "epoch": 0.15740484747949499, "grad_norm": 0.3763086795806885, "learning_rate": 1.9706805710383845e-05, "loss": 0.659, "step": 5124 }, { "epoch": 0.15743556661444413, "grad_norm": 0.32315120100975037, "learning_rate": 1.9706689532175085e-05, "loss": 0.5902, "step": 5125 }, { "epoch": 0.1574662857493933, "grad_norm": 0.3604733943939209, "learning_rate": 1.9706573331295668e-05, "loss": 0.6004, "step": 5126 }, { "epoch": 0.15749700488434246, "grad_norm": 0.33654603362083435, "learning_rate": 1.9706457107745875e-05, "loss": 0.5894, "step": 5127 }, { "epoch": 0.1575277240192916, "grad_norm": 0.31940239667892456, "learning_rate": 1.9706340861525973e-05, "loss": 0.6127, "step": 5128 }, { "epoch": 0.15755844315424078, "grad_norm": 0.3555581867694855, "learning_rate": 1.9706224592636234e-05, "loss": 0.5403, "step": 5129 }, { "epoch": 0.15758916228918993, "grad_norm": 0.3392537236213684, "learning_rate": 1.9706108301076932e-05, "loss": 0.6875, "step": 5130 }, { "epoch": 0.1576198814241391, "grad_norm": 0.3268117308616638, "learning_rate": 1.970599198684833e-05, "loss": 0.6422, "step": 5131 }, { "epoch": 0.15765060055908825, "grad_norm": 0.3811015486717224, "learning_rate": 1.970587564995071e-05, "loss": 0.5602, "step": 5132 }, { "epoch": 0.15768131969403743, "grad_norm": 0.37915781140327454, "learning_rate": 1.970575929038434e-05, "loss": 0.5601, "step": 5133 }, { "epoch": 0.15771203882898657, "grad_norm": 0.3932828903198242, "learning_rate": 1.9705642908149493e-05, "loss": 0.6335, "step": 5134 }, { "epoch": 0.15774275796393575, "grad_norm": 0.3161843717098236, "learning_rate": 1.9705526503246432e-05, "loss": 0.627, "step": 5135 }, { "epoch": 0.1577734770988849, "grad_norm": 0.3467511236667633, "learning_rate": 1.970541007567544e-05, "loss": 0.6328, "step": 5136 }, { "epoch": 0.15780419623383404, "grad_norm": 0.34904566407203674, "learning_rate": 1.970529362543679e-05, "loss": 0.6308, "step": 5137 }, { "epoch": 0.15783491536878322, "grad_norm": 0.31158873438835144, "learning_rate": 1.9705177152530745e-05, "loss": 0.5902, "step": 5138 }, { "epoch": 0.15786563450373237, "grad_norm": 0.3390234410762787, "learning_rate": 1.9705060656957578e-05, "loss": 0.6838, "step": 5139 }, { "epoch": 0.15789635363868154, "grad_norm": 0.34048932790756226, "learning_rate": 1.9704944138717565e-05, "loss": 0.6215, "step": 5140 }, { "epoch": 0.1579270727736307, "grad_norm": 0.30955857038497925, "learning_rate": 1.9704827597810977e-05, "loss": 0.635, "step": 5141 }, { "epoch": 0.15795779190857986, "grad_norm": 0.36451244354248047, "learning_rate": 1.970471103423809e-05, "loss": 0.6074, "step": 5142 }, { "epoch": 0.157988511043529, "grad_norm": 0.5044623017311096, "learning_rate": 1.970459444799917e-05, "loss": 0.6485, "step": 5143 }, { "epoch": 0.1580192301784782, "grad_norm": 0.3349839150905609, "learning_rate": 1.9704477839094495e-05, "loss": 0.5895, "step": 5144 }, { "epoch": 0.15804994931342733, "grad_norm": 0.3308391273021698, "learning_rate": 1.970436120752433e-05, "loss": 0.6138, "step": 5145 }, { "epoch": 0.15808066844837648, "grad_norm": 0.3300028443336487, "learning_rate": 1.9704244553288952e-05, "loss": 0.6453, "step": 5146 }, { "epoch": 0.15811138758332566, "grad_norm": 0.3379002809524536, "learning_rate": 1.9704127876388636e-05, "loss": 0.5822, "step": 5147 }, { "epoch": 0.1581421067182748, "grad_norm": 0.39057376980781555, "learning_rate": 1.970401117682365e-05, "loss": 0.651, "step": 5148 }, { "epoch": 0.15817282585322398, "grad_norm": 0.3065847158432007, "learning_rate": 1.9703894454594268e-05, "loss": 0.5743, "step": 5149 }, { "epoch": 0.15820354498817313, "grad_norm": 0.3382980227470398, "learning_rate": 1.9703777709700764e-05, "loss": 0.5844, "step": 5150 }, { "epoch": 0.1582342641231223, "grad_norm": 0.36961451172828674, "learning_rate": 1.9703660942143407e-05, "loss": 0.6351, "step": 5151 }, { "epoch": 0.15826498325807145, "grad_norm": 0.3272973597049713, "learning_rate": 1.9703544151922473e-05, "loss": 0.5643, "step": 5152 }, { "epoch": 0.15829570239302063, "grad_norm": 0.3178774118423462, "learning_rate": 1.9703427339038238e-05, "loss": 0.5442, "step": 5153 }, { "epoch": 0.15832642152796977, "grad_norm": 0.33646345138549805, "learning_rate": 1.9703310503490965e-05, "loss": 0.5512, "step": 5154 }, { "epoch": 0.15835714066291892, "grad_norm": 0.3227737247943878, "learning_rate": 1.9703193645280936e-05, "loss": 0.5735, "step": 5155 }, { "epoch": 0.1583878597978681, "grad_norm": 0.357884019613266, "learning_rate": 1.970307676440842e-05, "loss": 0.5542, "step": 5156 }, { "epoch": 0.15841857893281724, "grad_norm": 0.35693418979644775, "learning_rate": 1.9702959860873688e-05, "loss": 0.6174, "step": 5157 }, { "epoch": 0.15844929806776642, "grad_norm": 0.30237433314323425, "learning_rate": 1.9702842934677017e-05, "loss": 0.6371, "step": 5158 }, { "epoch": 0.15848001720271557, "grad_norm": 0.34544384479522705, "learning_rate": 1.970272598581868e-05, "loss": 0.566, "step": 5159 }, { "epoch": 0.15851073633766474, "grad_norm": 0.5389071702957153, "learning_rate": 1.970260901429895e-05, "loss": 0.6852, "step": 5160 }, { "epoch": 0.1585414554726139, "grad_norm": 0.35351991653442383, "learning_rate": 1.9702492020118098e-05, "loss": 0.597, "step": 5161 }, { "epoch": 0.15857217460756304, "grad_norm": 0.2935900390148163, "learning_rate": 1.9702375003276396e-05, "loss": 0.5487, "step": 5162 }, { "epoch": 0.1586028937425122, "grad_norm": 0.3843997120857239, "learning_rate": 1.9702257963774123e-05, "loss": 0.5173, "step": 5163 }, { "epoch": 0.15863361287746136, "grad_norm": 0.3258801996707916, "learning_rate": 1.9702140901611547e-05, "loss": 0.5648, "step": 5164 }, { "epoch": 0.15866433201241054, "grad_norm": 0.3837225139141083, "learning_rate": 1.9702023816788943e-05, "loss": 0.5596, "step": 5165 }, { "epoch": 0.15869505114735968, "grad_norm": 0.33142024278640747, "learning_rate": 1.9701906709306584e-05, "loss": 0.6292, "step": 5166 }, { "epoch": 0.15872577028230886, "grad_norm": 0.34624502062797546, "learning_rate": 1.9701789579164746e-05, "loss": 0.6918, "step": 5167 }, { "epoch": 0.158756489417258, "grad_norm": 0.3280618190765381, "learning_rate": 1.9701672426363702e-05, "loss": 0.5754, "step": 5168 }, { "epoch": 0.15878720855220718, "grad_norm": 0.34918448328971863, "learning_rate": 1.9701555250903724e-05, "loss": 0.6759, "step": 5169 }, { "epoch": 0.15881792768715633, "grad_norm": 0.3929654061794281, "learning_rate": 1.9701438052785082e-05, "loss": 0.5321, "step": 5170 }, { "epoch": 0.15884864682210548, "grad_norm": 0.3324804902076721, "learning_rate": 1.970132083200806e-05, "loss": 0.6444, "step": 5171 }, { "epoch": 0.15887936595705465, "grad_norm": 0.32850444316864014, "learning_rate": 1.970120358857292e-05, "loss": 0.594, "step": 5172 }, { "epoch": 0.1589100850920038, "grad_norm": 0.347822904586792, "learning_rate": 1.9701086322479943e-05, "loss": 0.7273, "step": 5173 }, { "epoch": 0.15894080422695298, "grad_norm": 0.3416129946708679, "learning_rate": 1.9700969033729406e-05, "loss": 0.5489, "step": 5174 }, { "epoch": 0.15897152336190212, "grad_norm": 0.3403720557689667, "learning_rate": 1.970085172232157e-05, "loss": 0.5663, "step": 5175 }, { "epoch": 0.1590022424968513, "grad_norm": 0.3209337592124939, "learning_rate": 1.9700734388256727e-05, "loss": 0.5954, "step": 5176 }, { "epoch": 0.15903296163180045, "grad_norm": 0.3270992934703827, "learning_rate": 1.9700617031535134e-05, "loss": 0.5603, "step": 5177 }, { "epoch": 0.15906368076674962, "grad_norm": 0.31808924674987793, "learning_rate": 1.9700499652157076e-05, "loss": 0.6405, "step": 5178 }, { "epoch": 0.15909439990169877, "grad_norm": 0.33045193552970886, "learning_rate": 1.970038225012282e-05, "loss": 0.5658, "step": 5179 }, { "epoch": 0.15912511903664792, "grad_norm": 0.3482549488544464, "learning_rate": 1.9700264825432648e-05, "loss": 0.6767, "step": 5180 }, { "epoch": 0.1591558381715971, "grad_norm": 0.4217830002307892, "learning_rate": 1.970014737808683e-05, "loss": 0.6136, "step": 5181 }, { "epoch": 0.15918655730654624, "grad_norm": 0.3292510211467743, "learning_rate": 1.970002990808564e-05, "loss": 0.501, "step": 5182 }, { "epoch": 0.15921727644149541, "grad_norm": 0.36666372418403625, "learning_rate": 1.969991241542935e-05, "loss": 0.538, "step": 5183 }, { "epoch": 0.15924799557644456, "grad_norm": 0.3636276423931122, "learning_rate": 1.9699794900118238e-05, "loss": 0.6474, "step": 5184 }, { "epoch": 0.15927871471139374, "grad_norm": 0.344793438911438, "learning_rate": 1.969967736215258e-05, "loss": 0.6585, "step": 5185 }, { "epoch": 0.15930943384634288, "grad_norm": 0.33774876594543457, "learning_rate": 1.9699559801532643e-05, "loss": 0.7183, "step": 5186 }, { "epoch": 0.15934015298129206, "grad_norm": 0.37210583686828613, "learning_rate": 1.969944221825871e-05, "loss": 0.5378, "step": 5187 }, { "epoch": 0.1593708721162412, "grad_norm": 0.3302317261695862, "learning_rate": 1.9699324612331053e-05, "loss": 0.5836, "step": 5188 }, { "epoch": 0.15940159125119036, "grad_norm": 0.32586872577667236, "learning_rate": 1.9699206983749946e-05, "loss": 0.5532, "step": 5189 }, { "epoch": 0.15943231038613953, "grad_norm": 0.32500937581062317, "learning_rate": 1.969908933251566e-05, "loss": 0.6612, "step": 5190 }, { "epoch": 0.15946302952108868, "grad_norm": 0.3125622570514679, "learning_rate": 1.9698971658628476e-05, "loss": 0.543, "step": 5191 }, { "epoch": 0.15949374865603785, "grad_norm": 0.30098849534988403, "learning_rate": 1.969885396208867e-05, "loss": 0.5245, "step": 5192 }, { "epoch": 0.159524467790987, "grad_norm": 0.35671812295913696, "learning_rate": 1.9698736242896506e-05, "loss": 0.6387, "step": 5193 }, { "epoch": 0.15955518692593618, "grad_norm": 0.3325496017932892, "learning_rate": 1.969861850105227e-05, "loss": 0.647, "step": 5194 }, { "epoch": 0.15958590606088532, "grad_norm": 0.37960296869277954, "learning_rate": 1.9698500736556237e-05, "loss": 0.638, "step": 5195 }, { "epoch": 0.15961662519583447, "grad_norm": 0.37326857447624207, "learning_rate": 1.969838294940867e-05, "loss": 0.6183, "step": 5196 }, { "epoch": 0.15964734433078365, "grad_norm": 0.3570660650730133, "learning_rate": 1.969826513960986e-05, "loss": 0.5941, "step": 5197 }, { "epoch": 0.1596780634657328, "grad_norm": 0.35007134079933167, "learning_rate": 1.9698147307160068e-05, "loss": 0.682, "step": 5198 }, { "epoch": 0.15970878260068197, "grad_norm": 0.3070717751979828, "learning_rate": 1.9698029452059583e-05, "loss": 0.6004, "step": 5199 }, { "epoch": 0.15973950173563112, "grad_norm": 0.3476990759372711, "learning_rate": 1.9697911574308667e-05, "loss": 0.67, "step": 5200 }, { "epoch": 0.1597702208705803, "grad_norm": 0.31259071826934814, "learning_rate": 1.9697793673907603e-05, "loss": 0.6478, "step": 5201 }, { "epoch": 0.15980094000552944, "grad_norm": 0.3569611608982086, "learning_rate": 1.9697675750856664e-05, "loss": 0.6071, "step": 5202 }, { "epoch": 0.15983165914047862, "grad_norm": 0.3261544704437256, "learning_rate": 1.9697557805156128e-05, "loss": 0.5586, "step": 5203 }, { "epoch": 0.15986237827542776, "grad_norm": 0.3263084590435028, "learning_rate": 1.9697439836806267e-05, "loss": 0.5498, "step": 5204 }, { "epoch": 0.1598930974103769, "grad_norm": 0.37663936614990234, "learning_rate": 1.9697321845807358e-05, "loss": 0.6367, "step": 5205 }, { "epoch": 0.1599238165453261, "grad_norm": 0.3402377963066101, "learning_rate": 1.9697203832159675e-05, "loss": 0.5698, "step": 5206 }, { "epoch": 0.15995453568027523, "grad_norm": 0.3652283251285553, "learning_rate": 1.9697085795863494e-05, "loss": 0.6567, "step": 5207 }, { "epoch": 0.1599852548152244, "grad_norm": 0.3536292314529419, "learning_rate": 1.9696967736919095e-05, "loss": 0.6023, "step": 5208 }, { "epoch": 0.16001597395017356, "grad_norm": 0.3380635976791382, "learning_rate": 1.9696849655326753e-05, "loss": 0.5804, "step": 5209 }, { "epoch": 0.16004669308512273, "grad_norm": 0.32408276200294495, "learning_rate": 1.969673155108674e-05, "loss": 0.5797, "step": 5210 }, { "epoch": 0.16007741222007188, "grad_norm": 0.40607455372810364, "learning_rate": 1.969661342419933e-05, "loss": 0.6235, "step": 5211 }, { "epoch": 0.16010813135502105, "grad_norm": 0.31204289197921753, "learning_rate": 1.9696495274664805e-05, "loss": 0.5893, "step": 5212 }, { "epoch": 0.1601388504899702, "grad_norm": 0.31977295875549316, "learning_rate": 1.9696377102483435e-05, "loss": 0.6201, "step": 5213 }, { "epoch": 0.16016956962491935, "grad_norm": 0.40177756547927856, "learning_rate": 1.96962589076555e-05, "loss": 0.6806, "step": 5214 }, { "epoch": 0.16020028875986853, "grad_norm": 0.3245067000389099, "learning_rate": 1.969614069018128e-05, "loss": 0.5598, "step": 5215 }, { "epoch": 0.16023100789481767, "grad_norm": 0.3404003083705902, "learning_rate": 1.9696022450061042e-05, "loss": 0.5933, "step": 5216 }, { "epoch": 0.16026172702976685, "grad_norm": 0.36772772669792175, "learning_rate": 1.9695904187295065e-05, "loss": 0.6136, "step": 5217 }, { "epoch": 0.160292446164716, "grad_norm": 0.3485218286514282, "learning_rate": 1.969578590188363e-05, "loss": 0.6708, "step": 5218 }, { "epoch": 0.16032316529966517, "grad_norm": 0.34387528896331787, "learning_rate": 1.9695667593827008e-05, "loss": 0.5947, "step": 5219 }, { "epoch": 0.16035388443461432, "grad_norm": 0.31417205929756165, "learning_rate": 1.9695549263125478e-05, "loss": 0.5538, "step": 5220 }, { "epoch": 0.1603846035695635, "grad_norm": 0.3125837445259094, "learning_rate": 1.9695430909779315e-05, "loss": 0.5514, "step": 5221 }, { "epoch": 0.16041532270451264, "grad_norm": 0.31472256779670715, "learning_rate": 1.9695312533788797e-05, "loss": 0.6102, "step": 5222 }, { "epoch": 0.1604460418394618, "grad_norm": 0.3471154272556305, "learning_rate": 1.9695194135154198e-05, "loss": 0.5869, "step": 5223 }, { "epoch": 0.16047676097441096, "grad_norm": 0.3256840705871582, "learning_rate": 1.96950757138758e-05, "loss": 0.5688, "step": 5224 }, { "epoch": 0.1605074801093601, "grad_norm": 0.40161705017089844, "learning_rate": 1.969495726995387e-05, "loss": 0.5926, "step": 5225 }, { "epoch": 0.1605381992443093, "grad_norm": 0.31321895122528076, "learning_rate": 1.969483880338869e-05, "loss": 0.5618, "step": 5226 }, { "epoch": 0.16056891837925844, "grad_norm": 0.3343587815761566, "learning_rate": 1.9694720314180544e-05, "loss": 0.587, "step": 5227 }, { "epoch": 0.1605996375142076, "grad_norm": 0.3324110507965088, "learning_rate": 1.9694601802329695e-05, "loss": 0.5383, "step": 5228 }, { "epoch": 0.16063035664915676, "grad_norm": 0.3130726218223572, "learning_rate": 1.969448326783643e-05, "loss": 0.5457, "step": 5229 }, { "epoch": 0.16066107578410593, "grad_norm": 0.3425898253917694, "learning_rate": 1.9694364710701016e-05, "loss": 0.6507, "step": 5230 }, { "epoch": 0.16069179491905508, "grad_norm": 0.3453551232814789, "learning_rate": 1.9694246130923746e-05, "loss": 0.6408, "step": 5231 }, { "epoch": 0.16072251405400423, "grad_norm": 0.32624587416648865, "learning_rate": 1.969412752850488e-05, "loss": 0.6749, "step": 5232 }, { "epoch": 0.1607532331889534, "grad_norm": 0.328003466129303, "learning_rate": 1.9694008903444705e-05, "loss": 0.6267, "step": 5233 }, { "epoch": 0.16078395232390255, "grad_norm": 0.3431280553340912, "learning_rate": 1.9693890255743493e-05, "loss": 0.5572, "step": 5234 }, { "epoch": 0.16081467145885173, "grad_norm": 0.3098984956741333, "learning_rate": 1.9693771585401524e-05, "loss": 0.6087, "step": 5235 }, { "epoch": 0.16084539059380087, "grad_norm": 0.333173930644989, "learning_rate": 1.9693652892419076e-05, "loss": 0.6089, "step": 5236 }, { "epoch": 0.16087610972875005, "grad_norm": 0.32636842131614685, "learning_rate": 1.9693534176796423e-05, "loss": 0.5653, "step": 5237 }, { "epoch": 0.1609068288636992, "grad_norm": 0.4108021557331085, "learning_rate": 1.9693415438533844e-05, "loss": 0.5422, "step": 5238 }, { "epoch": 0.16093754799864834, "grad_norm": 0.3470809757709503, "learning_rate": 1.969329667763162e-05, "loss": 0.6326, "step": 5239 }, { "epoch": 0.16096826713359752, "grad_norm": 0.3575906753540039, "learning_rate": 1.9693177894090022e-05, "loss": 0.559, "step": 5240 }, { "epoch": 0.16099898626854667, "grad_norm": 0.3617265820503235, "learning_rate": 1.969305908790933e-05, "loss": 0.5868, "step": 5241 }, { "epoch": 0.16102970540349584, "grad_norm": 0.3126714825630188, "learning_rate": 1.969294025908982e-05, "loss": 0.5321, "step": 5242 }, { "epoch": 0.161060424538445, "grad_norm": 0.34919944405555725, "learning_rate": 1.969282140763177e-05, "loss": 0.5749, "step": 5243 }, { "epoch": 0.16109114367339417, "grad_norm": 0.30407243967056274, "learning_rate": 1.9692702533535463e-05, "loss": 0.5066, "step": 5244 }, { "epoch": 0.1611218628083433, "grad_norm": 0.33554431796073914, "learning_rate": 1.9692583636801167e-05, "loss": 0.6208, "step": 5245 }, { "epoch": 0.1611525819432925, "grad_norm": 0.3609890043735504, "learning_rate": 1.969246471742917e-05, "loss": 0.5412, "step": 5246 }, { "epoch": 0.16118330107824164, "grad_norm": 0.37021157145500183, "learning_rate": 1.9692345775419745e-05, "loss": 0.6252, "step": 5247 }, { "epoch": 0.16121402021319078, "grad_norm": 0.32604336738586426, "learning_rate": 1.9692226810773166e-05, "loss": 0.6503, "step": 5248 }, { "epoch": 0.16124473934813996, "grad_norm": 0.31392809748649597, "learning_rate": 1.9692107823489714e-05, "loss": 0.5369, "step": 5249 }, { "epoch": 0.1612754584830891, "grad_norm": 0.3235829472541809, "learning_rate": 1.969198881356967e-05, "loss": 0.5472, "step": 5250 }, { "epoch": 0.16130617761803828, "grad_norm": 0.34102678298950195, "learning_rate": 1.9691869781013308e-05, "loss": 0.6374, "step": 5251 }, { "epoch": 0.16133689675298743, "grad_norm": 0.3147267997264862, "learning_rate": 1.969175072582091e-05, "loss": 0.6006, "step": 5252 }, { "epoch": 0.1613676158879366, "grad_norm": 0.3273099362850189, "learning_rate": 1.969163164799275e-05, "loss": 0.6422, "step": 5253 }, { "epoch": 0.16139833502288575, "grad_norm": 0.37581002712249756, "learning_rate": 1.9691512547529106e-05, "loss": 0.6048, "step": 5254 }, { "epoch": 0.16142905415783493, "grad_norm": 0.3420572578907013, "learning_rate": 1.9691393424430256e-05, "loss": 0.5918, "step": 5255 }, { "epoch": 0.16145977329278408, "grad_norm": 0.3604351580142975, "learning_rate": 1.969127427869648e-05, "loss": 0.5722, "step": 5256 }, { "epoch": 0.16149049242773322, "grad_norm": 0.39591073989868164, "learning_rate": 1.969115511032806e-05, "loss": 0.6721, "step": 5257 }, { "epoch": 0.1615212115626824, "grad_norm": 0.3498663604259491, "learning_rate": 1.969103591932527e-05, "loss": 0.5485, "step": 5258 }, { "epoch": 0.16155193069763155, "grad_norm": 0.35169386863708496, "learning_rate": 1.9690916705688385e-05, "loss": 0.5314, "step": 5259 }, { "epoch": 0.16158264983258072, "grad_norm": 0.3233790695667267, "learning_rate": 1.969079746941769e-05, "loss": 0.5882, "step": 5260 }, { "epoch": 0.16161336896752987, "grad_norm": 0.3261513411998749, "learning_rate": 1.9690678210513462e-05, "loss": 0.5039, "step": 5261 }, { "epoch": 0.16164408810247904, "grad_norm": 0.32704079151153564, "learning_rate": 1.9690558928975974e-05, "loss": 0.5915, "step": 5262 }, { "epoch": 0.1616748072374282, "grad_norm": 0.321974515914917, "learning_rate": 1.9690439624805512e-05, "loss": 0.6081, "step": 5263 }, { "epoch": 0.16170552637237737, "grad_norm": 0.35222843289375305, "learning_rate": 1.969032029800235e-05, "loss": 0.5861, "step": 5264 }, { "epoch": 0.16173624550732651, "grad_norm": 0.3516051173210144, "learning_rate": 1.969020094856677e-05, "loss": 0.6016, "step": 5265 }, { "epoch": 0.16176696464227566, "grad_norm": 0.39189010858535767, "learning_rate": 1.9690081576499048e-05, "loss": 0.5806, "step": 5266 }, { "epoch": 0.16179768377722484, "grad_norm": 0.33755064010620117, "learning_rate": 1.9689962181799463e-05, "loss": 0.577, "step": 5267 }, { "epoch": 0.16182840291217399, "grad_norm": 0.3604521155357361, "learning_rate": 1.9689842764468293e-05, "loss": 0.651, "step": 5268 }, { "epoch": 0.16185912204712316, "grad_norm": 0.3424067497253418, "learning_rate": 1.9689723324505822e-05, "loss": 0.5492, "step": 5269 }, { "epoch": 0.1618898411820723, "grad_norm": 0.357449471950531, "learning_rate": 1.9689603861912325e-05, "loss": 0.5656, "step": 5270 }, { "epoch": 0.16192056031702148, "grad_norm": 0.3356103003025055, "learning_rate": 1.968948437668808e-05, "loss": 0.6312, "step": 5271 }, { "epoch": 0.16195127945197063, "grad_norm": 0.3489040434360504, "learning_rate": 1.9689364868833366e-05, "loss": 0.6335, "step": 5272 }, { "epoch": 0.16198199858691978, "grad_norm": 0.3454354405403137, "learning_rate": 1.9689245338348467e-05, "loss": 0.5801, "step": 5273 }, { "epoch": 0.16201271772186895, "grad_norm": 0.6527231931686401, "learning_rate": 1.9689125785233657e-05, "loss": 0.6563, "step": 5274 }, { "epoch": 0.1620434368568181, "grad_norm": 0.35746994614601135, "learning_rate": 1.968900620948922e-05, "loss": 0.6738, "step": 5275 }, { "epoch": 0.16207415599176728, "grad_norm": 0.3397464156150818, "learning_rate": 1.968888661111543e-05, "loss": 0.6318, "step": 5276 }, { "epoch": 0.16210487512671642, "grad_norm": 0.3128187656402588, "learning_rate": 1.9688766990112567e-05, "loss": 0.5682, "step": 5277 }, { "epoch": 0.1621355942616656, "grad_norm": 0.3579995036125183, "learning_rate": 1.9688647346480913e-05, "loss": 0.6502, "step": 5278 }, { "epoch": 0.16216631339661475, "grad_norm": 0.31388410925865173, "learning_rate": 1.968852768022075e-05, "loss": 0.5634, "step": 5279 }, { "epoch": 0.16219703253156392, "grad_norm": 0.4248453378677368, "learning_rate": 1.968840799133235e-05, "loss": 0.5504, "step": 5280 }, { "epoch": 0.16222775166651307, "grad_norm": 0.393790602684021, "learning_rate": 1.9688288279815996e-05, "loss": 0.6433, "step": 5281 }, { "epoch": 0.16225847080146222, "grad_norm": 0.3591558039188385, "learning_rate": 1.9688168545671972e-05, "loss": 0.6283, "step": 5282 }, { "epoch": 0.1622891899364114, "grad_norm": 0.4068864583969116, "learning_rate": 1.9688048788900547e-05, "loss": 0.5808, "step": 5283 }, { "epoch": 0.16231990907136054, "grad_norm": 0.35760825872421265, "learning_rate": 1.9687929009502013e-05, "loss": 0.5443, "step": 5284 }, { "epoch": 0.16235062820630972, "grad_norm": 0.34620776772499084, "learning_rate": 1.9687809207476644e-05, "loss": 0.6164, "step": 5285 }, { "epoch": 0.16238134734125886, "grad_norm": 0.3223918378353119, "learning_rate": 1.968768938282472e-05, "loss": 0.6362, "step": 5286 }, { "epoch": 0.16241206647620804, "grad_norm": 0.3275776505470276, "learning_rate": 1.9687569535546518e-05, "loss": 0.6025, "step": 5287 }, { "epoch": 0.1624427856111572, "grad_norm": 0.36523669958114624, "learning_rate": 1.9687449665642324e-05, "loss": 0.6503, "step": 5288 }, { "epoch": 0.16247350474610636, "grad_norm": 0.3069891929626465, "learning_rate": 1.9687329773112412e-05, "loss": 0.5954, "step": 5289 }, { "epoch": 0.1625042238810555, "grad_norm": 0.3452269732952118, "learning_rate": 1.9687209857957068e-05, "loss": 0.6042, "step": 5290 }, { "epoch": 0.16253494301600466, "grad_norm": 0.33941105008125305, "learning_rate": 1.9687089920176566e-05, "loss": 0.5673, "step": 5291 }, { "epoch": 0.16256566215095383, "grad_norm": 0.3286815285682678, "learning_rate": 1.968696995977119e-05, "loss": 0.6643, "step": 5292 }, { "epoch": 0.16259638128590298, "grad_norm": 0.3441813886165619, "learning_rate": 1.9686849976741217e-05, "loss": 0.5481, "step": 5293 }, { "epoch": 0.16262710042085216, "grad_norm": 0.3541944622993469, "learning_rate": 1.9686729971086935e-05, "loss": 0.529, "step": 5294 }, { "epoch": 0.1626578195558013, "grad_norm": 0.52463299036026, "learning_rate": 1.9686609942808613e-05, "loss": 0.6299, "step": 5295 }, { "epoch": 0.16268853869075048, "grad_norm": 0.3742150068283081, "learning_rate": 1.968648989190654e-05, "loss": 0.6135, "step": 5296 }, { "epoch": 0.16271925782569963, "grad_norm": 0.3055885136127472, "learning_rate": 1.9686369818380994e-05, "loss": 0.5655, "step": 5297 }, { "epoch": 0.1627499769606488, "grad_norm": 0.4754386246204376, "learning_rate": 1.9686249722232253e-05, "loss": 0.6414, "step": 5298 }, { "epoch": 0.16278069609559795, "grad_norm": 0.3428970277309418, "learning_rate": 1.96861296034606e-05, "loss": 0.6113, "step": 5299 }, { "epoch": 0.1628114152305471, "grad_norm": 0.3269561529159546, "learning_rate": 1.9686009462066314e-05, "loss": 0.6106, "step": 5300 }, { "epoch": 0.16284213436549627, "grad_norm": 0.32629409432411194, "learning_rate": 1.9685889298049675e-05, "loss": 0.5803, "step": 5301 }, { "epoch": 0.16287285350044542, "grad_norm": 0.39829862117767334, "learning_rate": 1.9685769111410968e-05, "loss": 0.5574, "step": 5302 }, { "epoch": 0.1629035726353946, "grad_norm": 0.3965721130371094, "learning_rate": 1.968564890215047e-05, "loss": 0.5197, "step": 5303 }, { "epoch": 0.16293429177034374, "grad_norm": 0.3512623906135559, "learning_rate": 1.968552867026846e-05, "loss": 0.6745, "step": 5304 }, { "epoch": 0.16296501090529292, "grad_norm": 0.36277469992637634, "learning_rate": 1.9685408415765222e-05, "loss": 0.6313, "step": 5305 }, { "epoch": 0.16299573004024207, "grad_norm": 0.32855215668678284, "learning_rate": 1.968528813864104e-05, "loss": 0.5459, "step": 5306 }, { "epoch": 0.16302644917519124, "grad_norm": 0.4192831814289093, "learning_rate": 1.968516783889619e-05, "loss": 0.5718, "step": 5307 }, { "epoch": 0.1630571683101404, "grad_norm": 0.35166653990745544, "learning_rate": 1.9685047516530955e-05, "loss": 0.564, "step": 5308 }, { "epoch": 0.16308788744508954, "grad_norm": 0.3353976607322693, "learning_rate": 1.968492717154561e-05, "loss": 0.4826, "step": 5309 }, { "epoch": 0.1631186065800387, "grad_norm": 0.3151121735572815, "learning_rate": 1.9684806803940442e-05, "loss": 0.6614, "step": 5310 }, { "epoch": 0.16314932571498786, "grad_norm": 0.37897735834121704, "learning_rate": 1.9684686413715736e-05, "loss": 0.6307, "step": 5311 }, { "epoch": 0.16318004484993703, "grad_norm": 0.35527166724205017, "learning_rate": 1.9684566000871768e-05, "loss": 0.6225, "step": 5312 }, { "epoch": 0.16321076398488618, "grad_norm": 0.32356253266334534, "learning_rate": 1.9684445565408818e-05, "loss": 0.5813, "step": 5313 }, { "epoch": 0.16324148311983536, "grad_norm": 0.32807084918022156, "learning_rate": 1.9684325107327168e-05, "loss": 0.5394, "step": 5314 }, { "epoch": 0.1632722022547845, "grad_norm": 0.33022332191467285, "learning_rate": 1.96842046266271e-05, "loss": 0.5257, "step": 5315 }, { "epoch": 0.16330292138973365, "grad_norm": 0.34857019782066345, "learning_rate": 1.9684084123308898e-05, "loss": 0.6127, "step": 5316 }, { "epoch": 0.16333364052468283, "grad_norm": 0.36707398295402527, "learning_rate": 1.968396359737284e-05, "loss": 0.5762, "step": 5317 }, { "epoch": 0.16336435965963197, "grad_norm": 0.3125094175338745, "learning_rate": 1.9683843048819212e-05, "loss": 0.5887, "step": 5318 }, { "epoch": 0.16339507879458115, "grad_norm": 0.330702006816864, "learning_rate": 1.968372247764829e-05, "loss": 0.6494, "step": 5319 }, { "epoch": 0.1634257979295303, "grad_norm": 0.3455737829208374, "learning_rate": 1.9683601883860356e-05, "loss": 0.5347, "step": 5320 }, { "epoch": 0.16345651706447947, "grad_norm": 0.5314297676086426, "learning_rate": 1.968348126745569e-05, "loss": 0.562, "step": 5321 }, { "epoch": 0.16348723619942862, "grad_norm": 0.34794801473617554, "learning_rate": 1.9683360628434583e-05, "loss": 0.606, "step": 5322 }, { "epoch": 0.1635179553343778, "grad_norm": 0.3585067689418793, "learning_rate": 1.9683239966797312e-05, "loss": 0.6376, "step": 5323 }, { "epoch": 0.16354867446932694, "grad_norm": 0.36627197265625, "learning_rate": 1.9683119282544157e-05, "loss": 0.5998, "step": 5324 }, { "epoch": 0.1635793936042761, "grad_norm": 0.32843565940856934, "learning_rate": 1.9682998575675398e-05, "loss": 0.6057, "step": 5325 }, { "epoch": 0.16361011273922527, "grad_norm": 0.3344746530056, "learning_rate": 1.9682877846191323e-05, "loss": 0.6403, "step": 5326 }, { "epoch": 0.16364083187417441, "grad_norm": 0.3308068811893463, "learning_rate": 1.9682757094092207e-05, "loss": 0.6088, "step": 5327 }, { "epoch": 0.1636715510091236, "grad_norm": 0.3679450452327728, "learning_rate": 1.9682636319378335e-05, "loss": 0.6283, "step": 5328 }, { "epoch": 0.16370227014407274, "grad_norm": 0.3255029618740082, "learning_rate": 1.968251552204999e-05, "loss": 0.5368, "step": 5329 }, { "epoch": 0.1637329892790219, "grad_norm": 0.3226368725299835, "learning_rate": 1.9682394702107457e-05, "loss": 0.5748, "step": 5330 }, { "epoch": 0.16376370841397106, "grad_norm": 0.3381098806858063, "learning_rate": 1.9682273859551016e-05, "loss": 0.6592, "step": 5331 }, { "epoch": 0.16379442754892024, "grad_norm": 0.3346593677997589, "learning_rate": 1.9682152994380943e-05, "loss": 0.5996, "step": 5332 }, { "epoch": 0.16382514668386938, "grad_norm": 0.32243385910987854, "learning_rate": 1.9682032106597528e-05, "loss": 0.5643, "step": 5333 }, { "epoch": 0.16385586581881853, "grad_norm": 0.3202296197414398, "learning_rate": 1.9681911196201053e-05, "loss": 0.5786, "step": 5334 }, { "epoch": 0.1638865849537677, "grad_norm": 0.2858325242996216, "learning_rate": 1.9681790263191796e-05, "loss": 0.4511, "step": 5335 }, { "epoch": 0.16391730408871685, "grad_norm": 0.35394930839538574, "learning_rate": 1.968166930757004e-05, "loss": 0.5689, "step": 5336 }, { "epoch": 0.16394802322366603, "grad_norm": 0.3471541404724121, "learning_rate": 1.9681548329336072e-05, "loss": 0.6396, "step": 5337 }, { "epoch": 0.16397874235861518, "grad_norm": 0.331368625164032, "learning_rate": 1.968142732849017e-05, "loss": 0.5676, "step": 5338 }, { "epoch": 0.16400946149356435, "grad_norm": 0.3250707983970642, "learning_rate": 1.9681306305032618e-05, "loss": 0.583, "step": 5339 }, { "epoch": 0.1640401806285135, "grad_norm": 0.34573280811309814, "learning_rate": 1.96811852589637e-05, "loss": 0.6224, "step": 5340 }, { "epoch": 0.16407089976346267, "grad_norm": 0.345918744802475, "learning_rate": 1.9681064190283697e-05, "loss": 0.6119, "step": 5341 }, { "epoch": 0.16410161889841182, "grad_norm": 0.3374018669128418, "learning_rate": 1.9680943098992894e-05, "loss": 0.6389, "step": 5342 }, { "epoch": 0.16413233803336097, "grad_norm": 0.35697877407073975, "learning_rate": 1.9680821985091573e-05, "loss": 0.5927, "step": 5343 }, { "epoch": 0.16416305716831014, "grad_norm": 0.47392159700393677, "learning_rate": 1.9680700848580015e-05, "loss": 0.6313, "step": 5344 }, { "epoch": 0.1641937763032593, "grad_norm": 0.314201295375824, "learning_rate": 1.9680579689458502e-05, "loss": 0.5934, "step": 5345 }, { "epoch": 0.16422449543820847, "grad_norm": 0.3263356387615204, "learning_rate": 1.968045850772732e-05, "loss": 0.5391, "step": 5346 }, { "epoch": 0.16425521457315762, "grad_norm": 0.3759841024875641, "learning_rate": 1.968033730338675e-05, "loss": 0.6163, "step": 5347 }, { "epoch": 0.1642859337081068, "grad_norm": 0.3118003010749817, "learning_rate": 1.9680216076437082e-05, "loss": 0.514, "step": 5348 }, { "epoch": 0.16431665284305594, "grad_norm": 0.34171655774116516, "learning_rate": 1.968009482687859e-05, "loss": 0.6204, "step": 5349 }, { "epoch": 0.16434737197800509, "grad_norm": 0.43404650688171387, "learning_rate": 1.9679973554711558e-05, "loss": 0.7406, "step": 5350 }, { "epoch": 0.16437809111295426, "grad_norm": 0.33806777000427246, "learning_rate": 1.967985225993627e-05, "loss": 0.6496, "step": 5351 }, { "epoch": 0.1644088102479034, "grad_norm": 0.31538257002830505, "learning_rate": 1.9679730942553018e-05, "loss": 0.5972, "step": 5352 }, { "epoch": 0.16443952938285258, "grad_norm": 0.32359957695007324, "learning_rate": 1.9679609602562076e-05, "loss": 0.5445, "step": 5353 }, { "epoch": 0.16447024851780173, "grad_norm": 0.35439279675483704, "learning_rate": 1.967948823996373e-05, "loss": 0.6748, "step": 5354 }, { "epoch": 0.1645009676527509, "grad_norm": 0.35975921154022217, "learning_rate": 1.967936685475826e-05, "loss": 0.6578, "step": 5355 }, { "epoch": 0.16453168678770005, "grad_norm": 0.34926027059555054, "learning_rate": 1.9679245446945953e-05, "loss": 0.5568, "step": 5356 }, { "epoch": 0.16456240592264923, "grad_norm": 0.3480025827884674, "learning_rate": 1.9679124016527095e-05, "loss": 0.539, "step": 5357 }, { "epoch": 0.16459312505759838, "grad_norm": 0.33309102058410645, "learning_rate": 1.9679002563501964e-05, "loss": 0.5534, "step": 5358 }, { "epoch": 0.16462384419254752, "grad_norm": 0.36215001344680786, "learning_rate": 1.967888108787085e-05, "loss": 0.5564, "step": 5359 }, { "epoch": 0.1646545633274967, "grad_norm": 0.3058740198612213, "learning_rate": 1.9678759589634027e-05, "loss": 0.6158, "step": 5360 }, { "epoch": 0.16468528246244585, "grad_norm": 0.38918182253837585, "learning_rate": 1.9678638068791793e-05, "loss": 0.6584, "step": 5361 }, { "epoch": 0.16471600159739502, "grad_norm": 0.32340502738952637, "learning_rate": 1.9678516525344418e-05, "loss": 0.6106, "step": 5362 }, { "epoch": 0.16474672073234417, "grad_norm": 0.3745855689048767, "learning_rate": 1.9678394959292197e-05, "loss": 0.6757, "step": 5363 }, { "epoch": 0.16477743986729335, "grad_norm": 0.3551986515522003, "learning_rate": 1.9678273370635404e-05, "loss": 0.5826, "step": 5364 }, { "epoch": 0.1648081590022425, "grad_norm": 0.4287644624710083, "learning_rate": 1.967815175937433e-05, "loss": 0.6953, "step": 5365 }, { "epoch": 0.16483887813719167, "grad_norm": 0.3573298156261444, "learning_rate": 1.967803012550926e-05, "loss": 0.5906, "step": 5366 }, { "epoch": 0.16486959727214082, "grad_norm": 0.36167728900909424, "learning_rate": 1.9677908469040465e-05, "loss": 0.5747, "step": 5367 }, { "epoch": 0.16490031640708996, "grad_norm": 0.3276723325252533, "learning_rate": 1.9677786789968248e-05, "loss": 0.5837, "step": 5368 }, { "epoch": 0.16493103554203914, "grad_norm": 0.35728615522384644, "learning_rate": 1.967766508829288e-05, "loss": 0.6359, "step": 5369 }, { "epoch": 0.1649617546769883, "grad_norm": 0.3153449594974518, "learning_rate": 1.967754336401465e-05, "loss": 0.5975, "step": 5370 }, { "epoch": 0.16499247381193746, "grad_norm": 0.4196654260158539, "learning_rate": 1.967742161713384e-05, "loss": 0.6195, "step": 5371 }, { "epoch": 0.1650231929468866, "grad_norm": 0.36502978205680847, "learning_rate": 1.967729984765074e-05, "loss": 0.6329, "step": 5372 }, { "epoch": 0.16505391208183579, "grad_norm": 0.31954309344291687, "learning_rate": 1.9677178055565624e-05, "loss": 0.5561, "step": 5373 }, { "epoch": 0.16508463121678493, "grad_norm": 0.34217333793640137, "learning_rate": 1.967705624087879e-05, "loss": 0.6465, "step": 5374 }, { "epoch": 0.1651153503517341, "grad_norm": 0.4199885427951813, "learning_rate": 1.9676934403590512e-05, "loss": 0.4585, "step": 5375 }, { "epoch": 0.16514606948668326, "grad_norm": 0.33183538913726807, "learning_rate": 1.9676812543701074e-05, "loss": 0.6063, "step": 5376 }, { "epoch": 0.1651767886216324, "grad_norm": 0.3799321949481964, "learning_rate": 1.967669066121077e-05, "loss": 0.658, "step": 5377 }, { "epoch": 0.16520750775658158, "grad_norm": 0.3886784315109253, "learning_rate": 1.9676568756119878e-05, "loss": 0.6458, "step": 5378 }, { "epoch": 0.16523822689153073, "grad_norm": 0.33512598276138306, "learning_rate": 1.967644682842868e-05, "loss": 0.5215, "step": 5379 }, { "epoch": 0.1652689460264799, "grad_norm": 0.3505151867866516, "learning_rate": 1.967632487813747e-05, "loss": 0.6205, "step": 5380 }, { "epoch": 0.16529966516142905, "grad_norm": 0.3699437379837036, "learning_rate": 1.9676202905246525e-05, "loss": 0.6516, "step": 5381 }, { "epoch": 0.16533038429637822, "grad_norm": 0.3599522113800049, "learning_rate": 1.9676080909756135e-05, "loss": 0.6274, "step": 5382 }, { "epoch": 0.16536110343132737, "grad_norm": 0.33870628476142883, "learning_rate": 1.967595889166658e-05, "loss": 0.6828, "step": 5383 }, { "epoch": 0.16539182256627655, "grad_norm": 0.32093527913093567, "learning_rate": 1.9675836850978146e-05, "loss": 0.6441, "step": 5384 }, { "epoch": 0.1654225417012257, "grad_norm": 0.31124845147132874, "learning_rate": 1.967571478769112e-05, "loss": 0.5633, "step": 5385 }, { "epoch": 0.16545326083617484, "grad_norm": 0.3664240539073944, "learning_rate": 1.9675592701805787e-05, "loss": 0.6131, "step": 5386 }, { "epoch": 0.16548397997112402, "grad_norm": 0.2987099587917328, "learning_rate": 1.9675470593322432e-05, "loss": 0.4995, "step": 5387 }, { "epoch": 0.16551469910607317, "grad_norm": 0.3330497443675995, "learning_rate": 1.967534846224134e-05, "loss": 0.5884, "step": 5388 }, { "epoch": 0.16554541824102234, "grad_norm": 0.3859827518463135, "learning_rate": 1.9675226308562795e-05, "loss": 0.658, "step": 5389 }, { "epoch": 0.1655761373759715, "grad_norm": 0.3449532091617584, "learning_rate": 1.9675104132287083e-05, "loss": 0.6513, "step": 5390 }, { "epoch": 0.16560685651092066, "grad_norm": 0.3169204294681549, "learning_rate": 1.967498193341449e-05, "loss": 0.6196, "step": 5391 }, { "epoch": 0.1656375756458698, "grad_norm": 0.39059674739837646, "learning_rate": 1.96748597119453e-05, "loss": 0.6361, "step": 5392 }, { "epoch": 0.16566829478081896, "grad_norm": 0.3559875786304474, "learning_rate": 1.96747374678798e-05, "loss": 0.5599, "step": 5393 }, { "epoch": 0.16569901391576813, "grad_norm": 0.3604243993759155, "learning_rate": 1.9674615201218278e-05, "loss": 0.6343, "step": 5394 }, { "epoch": 0.16572973305071728, "grad_norm": 0.3353118300437927, "learning_rate": 1.9674492911961012e-05, "loss": 0.5582, "step": 5395 }, { "epoch": 0.16576045218566646, "grad_norm": 0.3450133800506592, "learning_rate": 1.967437060010829e-05, "loss": 0.6132, "step": 5396 }, { "epoch": 0.1657911713206156, "grad_norm": 0.48463043570518494, "learning_rate": 1.967424826566041e-05, "loss": 0.5874, "step": 5397 }, { "epoch": 0.16582189045556478, "grad_norm": 0.3199705183506012, "learning_rate": 1.9674125908617637e-05, "loss": 0.5652, "step": 5398 }, { "epoch": 0.16585260959051393, "grad_norm": 0.3407282531261444, "learning_rate": 1.9674003528980275e-05, "loss": 0.588, "step": 5399 }, { "epoch": 0.1658833287254631, "grad_norm": 0.3423719108104706, "learning_rate": 1.9673881126748597e-05, "loss": 0.5919, "step": 5400 }, { "epoch": 0.16591404786041225, "grad_norm": 0.32458770275115967, "learning_rate": 1.9673758701922894e-05, "loss": 0.5446, "step": 5401 }, { "epoch": 0.1659447669953614, "grad_norm": 0.3258867859840393, "learning_rate": 1.9673636254503456e-05, "loss": 0.5691, "step": 5402 }, { "epoch": 0.16597548613031057, "grad_norm": 0.31917309761047363, "learning_rate": 1.967351378449056e-05, "loss": 0.6108, "step": 5403 }, { "epoch": 0.16600620526525972, "grad_norm": 0.35296937823295593, "learning_rate": 1.9673391291884503e-05, "loss": 0.7027, "step": 5404 }, { "epoch": 0.1660369244002089, "grad_norm": 0.3144349455833435, "learning_rate": 1.9673268776685563e-05, "loss": 0.573, "step": 5405 }, { "epoch": 0.16606764353515804, "grad_norm": 0.3367253243923187, "learning_rate": 1.9673146238894024e-05, "loss": 0.5272, "step": 5406 }, { "epoch": 0.16609836267010722, "grad_norm": 0.3842020034790039, "learning_rate": 1.9673023678510178e-05, "loss": 0.6339, "step": 5407 }, { "epoch": 0.16612908180505637, "grad_norm": 0.3236764967441559, "learning_rate": 1.967290109553431e-05, "loss": 0.5413, "step": 5408 }, { "epoch": 0.16615980094000554, "grad_norm": 0.34252655506134033, "learning_rate": 1.9672778489966712e-05, "loss": 0.6174, "step": 5409 }, { "epoch": 0.1661905200749547, "grad_norm": 0.342131108045578, "learning_rate": 1.9672655861807657e-05, "loss": 0.5708, "step": 5410 }, { "epoch": 0.16622123920990384, "grad_norm": 0.3338090777397156, "learning_rate": 1.967253321105744e-05, "loss": 0.6129, "step": 5411 }, { "epoch": 0.166251958344853, "grad_norm": 0.41813674569129944, "learning_rate": 1.9672410537716348e-05, "loss": 0.6114, "step": 5412 }, { "epoch": 0.16628267747980216, "grad_norm": 0.3538258969783783, "learning_rate": 1.9672287841784662e-05, "loss": 0.6083, "step": 5413 }, { "epoch": 0.16631339661475134, "grad_norm": 0.3479083776473999, "learning_rate": 1.967216512326268e-05, "loss": 0.5931, "step": 5414 }, { "epoch": 0.16634411574970048, "grad_norm": 0.3640499711036682, "learning_rate": 1.9672042382150673e-05, "loss": 0.5552, "step": 5415 }, { "epoch": 0.16637483488464966, "grad_norm": 0.3335842490196228, "learning_rate": 1.967191961844894e-05, "loss": 0.5285, "step": 5416 }, { "epoch": 0.1664055540195988, "grad_norm": 0.3161357641220093, "learning_rate": 1.967179683215776e-05, "loss": 0.5748, "step": 5417 }, { "epoch": 0.16643627315454798, "grad_norm": 0.30205774307250977, "learning_rate": 1.9671674023277427e-05, "loss": 0.5408, "step": 5418 }, { "epoch": 0.16646699228949713, "grad_norm": 0.4041442275047302, "learning_rate": 1.9671551191808218e-05, "loss": 0.5877, "step": 5419 }, { "epoch": 0.16649771142444628, "grad_norm": 0.5493183732032776, "learning_rate": 1.967142833775043e-05, "loss": 0.6753, "step": 5420 }, { "epoch": 0.16652843055939545, "grad_norm": 0.32911431789398193, "learning_rate": 1.9671305461104343e-05, "loss": 0.6112, "step": 5421 }, { "epoch": 0.1665591496943446, "grad_norm": 0.32975608110427856, "learning_rate": 1.9671182561870252e-05, "loss": 0.5404, "step": 5422 }, { "epoch": 0.16658986882929377, "grad_norm": 0.3181585967540741, "learning_rate": 1.9671059640048432e-05, "loss": 0.6146, "step": 5423 }, { "epoch": 0.16662058796424292, "grad_norm": 0.35299283266067505, "learning_rate": 1.967093669563918e-05, "loss": 0.6335, "step": 5424 }, { "epoch": 0.1666513070991921, "grad_norm": 0.3515104055404663, "learning_rate": 1.967081372864278e-05, "loss": 0.5294, "step": 5425 }, { "epoch": 0.16668202623414125, "grad_norm": 0.3566954433917999, "learning_rate": 1.967069073905952e-05, "loss": 0.6426, "step": 5426 }, { "epoch": 0.1667127453690904, "grad_norm": 0.35962820053100586, "learning_rate": 1.9670567726889686e-05, "loss": 0.5502, "step": 5427 }, { "epoch": 0.16674346450403957, "grad_norm": 0.3539987802505493, "learning_rate": 1.9670444692133567e-05, "loss": 0.6813, "step": 5428 }, { "epoch": 0.16677418363898872, "grad_norm": 0.38511890172958374, "learning_rate": 1.967032163479145e-05, "loss": 0.6322, "step": 5429 }, { "epoch": 0.1668049027739379, "grad_norm": 0.40917596220970154, "learning_rate": 1.9670198554863614e-05, "loss": 0.5033, "step": 5430 }, { "epoch": 0.16683562190888704, "grad_norm": 0.32460641860961914, "learning_rate": 1.9670075452350358e-05, "loss": 0.6393, "step": 5431 }, { "epoch": 0.1668663410438362, "grad_norm": 0.3231023848056793, "learning_rate": 1.966995232725197e-05, "loss": 0.6322, "step": 5432 }, { "epoch": 0.16689706017878536, "grad_norm": 0.3566509187221527, "learning_rate": 1.9669829179568727e-05, "loss": 0.6558, "step": 5433 }, { "epoch": 0.16692777931373454, "grad_norm": 0.352836012840271, "learning_rate": 1.9669706009300925e-05, "loss": 0.4982, "step": 5434 }, { "epoch": 0.16695849844868368, "grad_norm": 0.3491121828556061, "learning_rate": 1.966958281644885e-05, "loss": 0.6232, "step": 5435 }, { "epoch": 0.16698921758363283, "grad_norm": 0.3234473168849945, "learning_rate": 1.9669459601012787e-05, "loss": 0.5554, "step": 5436 }, { "epoch": 0.167019936718582, "grad_norm": 0.338652640581131, "learning_rate": 1.9669336362993025e-05, "loss": 0.5528, "step": 5437 }, { "epoch": 0.16705065585353115, "grad_norm": 0.3754640221595764, "learning_rate": 1.9669213102389854e-05, "loss": 0.5635, "step": 5438 }, { "epoch": 0.16708137498848033, "grad_norm": 0.3180187940597534, "learning_rate": 1.9669089819203562e-05, "loss": 0.6251, "step": 5439 }, { "epoch": 0.16711209412342948, "grad_norm": 0.3795541822910309, "learning_rate": 1.9668966513434436e-05, "loss": 0.6047, "step": 5440 }, { "epoch": 0.16714281325837865, "grad_norm": 0.37644296884536743, "learning_rate": 1.966884318508276e-05, "loss": 0.5493, "step": 5441 }, { "epoch": 0.1671735323933278, "grad_norm": 0.3289998769760132, "learning_rate": 1.966871983414883e-05, "loss": 0.5862, "step": 5442 }, { "epoch": 0.16720425152827698, "grad_norm": 0.3375934958457947, "learning_rate": 1.966859646063292e-05, "loss": 0.5548, "step": 5443 }, { "epoch": 0.16723497066322612, "grad_norm": 0.3771961033344269, "learning_rate": 1.9668473064535338e-05, "loss": 0.5479, "step": 5444 }, { "epoch": 0.16726568979817527, "grad_norm": 0.3798579275608063, "learning_rate": 1.966834964585636e-05, "loss": 0.5935, "step": 5445 }, { "epoch": 0.16729640893312445, "grad_norm": 0.3980357050895691, "learning_rate": 1.9668226204596275e-05, "loss": 0.6286, "step": 5446 }, { "epoch": 0.1673271280680736, "grad_norm": 0.3453928232192993, "learning_rate": 1.966810274075537e-05, "loss": 0.6282, "step": 5447 }, { "epoch": 0.16735784720302277, "grad_norm": 0.33319106698036194, "learning_rate": 1.966797925433394e-05, "loss": 0.5813, "step": 5448 }, { "epoch": 0.16738856633797192, "grad_norm": 0.6843703985214233, "learning_rate": 1.9667855745332268e-05, "loss": 0.5798, "step": 5449 }, { "epoch": 0.1674192854729211, "grad_norm": 0.35616663098335266, "learning_rate": 1.966773221375064e-05, "loss": 0.6086, "step": 5450 }, { "epoch": 0.16745000460787024, "grad_norm": 0.396986722946167, "learning_rate": 1.9667608659589353e-05, "loss": 0.6329, "step": 5451 }, { "epoch": 0.16748072374281942, "grad_norm": 0.4249308407306671, "learning_rate": 1.966748508284869e-05, "loss": 0.6636, "step": 5452 }, { "epoch": 0.16751144287776856, "grad_norm": 0.33221033215522766, "learning_rate": 1.9667361483528938e-05, "loss": 0.5285, "step": 5453 }, { "epoch": 0.1675421620127177, "grad_norm": 0.3347570598125458, "learning_rate": 1.966723786163039e-05, "loss": 0.6131, "step": 5454 }, { "epoch": 0.16757288114766689, "grad_norm": 1.1101282835006714, "learning_rate": 1.966711421715333e-05, "loss": 0.6017, "step": 5455 }, { "epoch": 0.16760360028261603, "grad_norm": 0.30101072788238525, "learning_rate": 1.9666990550098052e-05, "loss": 0.4967, "step": 5456 }, { "epoch": 0.1676343194175652, "grad_norm": 0.3163009285926819, "learning_rate": 1.9666866860464843e-05, "loss": 0.5292, "step": 5457 }, { "epoch": 0.16766503855251436, "grad_norm": 0.3248298168182373, "learning_rate": 1.966674314825399e-05, "loss": 0.5991, "step": 5458 }, { "epoch": 0.16769575768746353, "grad_norm": 0.3347892463207245, "learning_rate": 1.9666619413465785e-05, "loss": 0.5758, "step": 5459 }, { "epoch": 0.16772647682241268, "grad_norm": 0.3290795385837555, "learning_rate": 1.966649565610051e-05, "loss": 0.6311, "step": 5460 }, { "epoch": 0.16775719595736185, "grad_norm": 0.34801584482192993, "learning_rate": 1.966637187615847e-05, "loss": 0.5713, "step": 5461 }, { "epoch": 0.167787915092311, "grad_norm": 0.34441784024238586, "learning_rate": 1.9666248073639936e-05, "loss": 0.6524, "step": 5462 }, { "epoch": 0.16781863422726015, "grad_norm": 0.3603041470050812, "learning_rate": 1.9666124248545203e-05, "loss": 0.6266, "step": 5463 }, { "epoch": 0.16784935336220932, "grad_norm": 0.36286452412605286, "learning_rate": 1.9666000400874562e-05, "loss": 0.5828, "step": 5464 }, { "epoch": 0.16788007249715847, "grad_norm": 0.4100978672504425, "learning_rate": 1.9665876530628308e-05, "loss": 0.5383, "step": 5465 }, { "epoch": 0.16791079163210765, "grad_norm": 0.4050084948539734, "learning_rate": 1.9665752637806718e-05, "loss": 0.5945, "step": 5466 }, { "epoch": 0.1679415107670568, "grad_norm": 0.4664899408817291, "learning_rate": 1.966562872241009e-05, "loss": 0.5991, "step": 5467 }, { "epoch": 0.16797222990200597, "grad_norm": 0.30774539709091187, "learning_rate": 1.966550478443871e-05, "loss": 0.6616, "step": 5468 }, { "epoch": 0.16800294903695512, "grad_norm": 0.36028748750686646, "learning_rate": 1.9665380823892873e-05, "loss": 0.5744, "step": 5469 }, { "epoch": 0.16803366817190427, "grad_norm": 0.34222137928009033, "learning_rate": 1.9665256840772858e-05, "loss": 0.5334, "step": 5470 }, { "epoch": 0.16806438730685344, "grad_norm": 0.3483627736568451, "learning_rate": 1.9665132835078964e-05, "loss": 0.5762, "step": 5471 }, { "epoch": 0.1680951064418026, "grad_norm": 0.3024025559425354, "learning_rate": 1.9665008806811477e-05, "loss": 0.569, "step": 5472 }, { "epoch": 0.16812582557675176, "grad_norm": 0.33766546845436096, "learning_rate": 1.9664884755970686e-05, "loss": 0.6068, "step": 5473 }, { "epoch": 0.1681565447117009, "grad_norm": 0.3804508447647095, "learning_rate": 1.9664760682556885e-05, "loss": 0.6683, "step": 5474 }, { "epoch": 0.1681872638466501, "grad_norm": 0.31862974166870117, "learning_rate": 1.9664636586570357e-05, "loss": 0.6039, "step": 5475 }, { "epoch": 0.16821798298159923, "grad_norm": 0.5267075896263123, "learning_rate": 1.9664512468011393e-05, "loss": 0.6237, "step": 5476 }, { "epoch": 0.1682487021165484, "grad_norm": 0.3678168058395386, "learning_rate": 1.9664388326880292e-05, "loss": 0.6489, "step": 5477 }, { "epoch": 0.16827942125149756, "grad_norm": 0.3613138198852539, "learning_rate": 1.9664264163177332e-05, "loss": 0.6148, "step": 5478 }, { "epoch": 0.1683101403864467, "grad_norm": 0.3144119679927826, "learning_rate": 1.966413997690281e-05, "loss": 0.6042, "step": 5479 }, { "epoch": 0.16834085952139588, "grad_norm": 0.3248876631259918, "learning_rate": 1.9664015768057013e-05, "loss": 0.6415, "step": 5480 }, { "epoch": 0.16837157865634503, "grad_norm": 0.3442743718624115, "learning_rate": 1.9663891536640233e-05, "loss": 0.6253, "step": 5481 }, { "epoch": 0.1684022977912942, "grad_norm": 0.3040180802345276, "learning_rate": 1.9663767282652756e-05, "loss": 0.5976, "step": 5482 }, { "epoch": 0.16843301692624335, "grad_norm": 0.34871551394462585, "learning_rate": 1.966364300609488e-05, "loss": 0.6093, "step": 5483 }, { "epoch": 0.16846373606119253, "grad_norm": 0.3716799020767212, "learning_rate": 1.966351870696689e-05, "loss": 0.6747, "step": 5484 }, { "epoch": 0.16849445519614167, "grad_norm": 0.3259848654270172, "learning_rate": 1.966339438526908e-05, "loss": 0.6017, "step": 5485 }, { "epoch": 0.16852517433109085, "grad_norm": 0.30608105659484863, "learning_rate": 1.966327004100173e-05, "loss": 0.5608, "step": 5486 }, { "epoch": 0.16855589346604, "grad_norm": 0.3442974090576172, "learning_rate": 1.9663145674165145e-05, "loss": 0.6228, "step": 5487 }, { "epoch": 0.16858661260098914, "grad_norm": 0.29958638548851013, "learning_rate": 1.9663021284759604e-05, "loss": 0.5074, "step": 5488 }, { "epoch": 0.16861733173593832, "grad_norm": 0.35637542605400085, "learning_rate": 1.9662896872785405e-05, "loss": 0.5675, "step": 5489 }, { "epoch": 0.16864805087088747, "grad_norm": 0.3653794825077057, "learning_rate": 1.9662772438242834e-05, "loss": 0.6587, "step": 5490 }, { "epoch": 0.16867877000583664, "grad_norm": 0.3165616989135742, "learning_rate": 1.966264798113218e-05, "loss": 0.6352, "step": 5491 }, { "epoch": 0.1687094891407858, "grad_norm": 0.3383413255214691, "learning_rate": 1.9662523501453743e-05, "loss": 0.6319, "step": 5492 }, { "epoch": 0.16874020827573497, "grad_norm": 0.3425038754940033, "learning_rate": 1.9662398999207805e-05, "loss": 0.623, "step": 5493 }, { "epoch": 0.1687709274106841, "grad_norm": 0.3209141790866852, "learning_rate": 1.9662274474394658e-05, "loss": 0.5514, "step": 5494 }, { "epoch": 0.1688016465456333, "grad_norm": 0.3829180598258972, "learning_rate": 1.9662149927014596e-05, "loss": 0.5182, "step": 5495 }, { "epoch": 0.16883236568058244, "grad_norm": 0.36745816469192505, "learning_rate": 1.9662025357067905e-05, "loss": 0.6362, "step": 5496 }, { "epoch": 0.16886308481553158, "grad_norm": 0.32287704944610596, "learning_rate": 1.966190076455488e-05, "loss": 0.5635, "step": 5497 }, { "epoch": 0.16889380395048076, "grad_norm": 0.38495224714279175, "learning_rate": 1.9661776149475816e-05, "loss": 0.6491, "step": 5498 }, { "epoch": 0.1689245230854299, "grad_norm": 0.38525599241256714, "learning_rate": 1.9661651511830993e-05, "loss": 0.6353, "step": 5499 }, { "epoch": 0.16895524222037908, "grad_norm": 0.3129827082157135, "learning_rate": 1.966152685162071e-05, "loss": 0.5633, "step": 5500 }, { "epoch": 0.16898596135532823, "grad_norm": 0.32940658926963806, "learning_rate": 1.9661402168845257e-05, "loss": 0.6109, "step": 5501 }, { "epoch": 0.1690166804902774, "grad_norm": 0.3468550741672516, "learning_rate": 1.9661277463504924e-05, "loss": 0.5997, "step": 5502 }, { "epoch": 0.16904739962522655, "grad_norm": 0.3326278626918793, "learning_rate": 1.9661152735600003e-05, "loss": 0.5902, "step": 5503 }, { "epoch": 0.1690781187601757, "grad_norm": 0.38214775919914246, "learning_rate": 1.9661027985130784e-05, "loss": 0.5965, "step": 5504 }, { "epoch": 0.16910883789512487, "grad_norm": 0.3150218427181244, "learning_rate": 1.9660903212097563e-05, "loss": 0.6574, "step": 5505 }, { "epoch": 0.16913955703007402, "grad_norm": 0.3302452266216278, "learning_rate": 1.9660778416500625e-05, "loss": 0.5942, "step": 5506 }, { "epoch": 0.1691702761650232, "grad_norm": 0.34165868163108826, "learning_rate": 1.9660653598340263e-05, "loss": 0.579, "step": 5507 }, { "epoch": 0.16920099529997235, "grad_norm": 0.36234378814697266, "learning_rate": 1.966052875761677e-05, "loss": 0.5981, "step": 5508 }, { "epoch": 0.16923171443492152, "grad_norm": 0.32809343934059143, "learning_rate": 1.966040389433044e-05, "loss": 0.5654, "step": 5509 }, { "epoch": 0.16926243356987067, "grad_norm": 0.36838576197624207, "learning_rate": 1.966027900848156e-05, "loss": 0.7033, "step": 5510 }, { "epoch": 0.16929315270481984, "grad_norm": 0.39379027485847473, "learning_rate": 1.9660154100070423e-05, "loss": 0.4851, "step": 5511 }, { "epoch": 0.169323871839769, "grad_norm": 0.35460153222084045, "learning_rate": 1.966002916909732e-05, "loss": 0.668, "step": 5512 }, { "epoch": 0.16935459097471814, "grad_norm": 0.39369797706604004, "learning_rate": 1.9659904215562552e-05, "loss": 0.5624, "step": 5513 }, { "epoch": 0.16938531010966731, "grad_norm": 0.30127206444740295, "learning_rate": 1.96597792394664e-05, "loss": 0.5292, "step": 5514 }, { "epoch": 0.16941602924461646, "grad_norm": 0.35094118118286133, "learning_rate": 1.9659654240809155e-05, "loss": 0.5666, "step": 5515 }, { "epoch": 0.16944674837956564, "grad_norm": 0.4313194453716278, "learning_rate": 1.9659529219591116e-05, "loss": 0.5425, "step": 5516 }, { "epoch": 0.16947746751451478, "grad_norm": 0.3164213299751282, "learning_rate": 1.9659404175812572e-05, "loss": 0.5671, "step": 5517 }, { "epoch": 0.16950818664946396, "grad_norm": 0.4806155264377594, "learning_rate": 1.965927910947381e-05, "loss": 0.5423, "step": 5518 }, { "epoch": 0.1695389057844131, "grad_norm": 0.34958770871162415, "learning_rate": 1.9659154020575132e-05, "loss": 0.592, "step": 5519 }, { "epoch": 0.16956962491936228, "grad_norm": 0.3365020155906677, "learning_rate": 1.9659028909116823e-05, "loss": 0.5583, "step": 5520 }, { "epoch": 0.16960034405431143, "grad_norm": 0.4219428300857544, "learning_rate": 1.965890377509918e-05, "loss": 0.5329, "step": 5521 }, { "epoch": 0.16963106318926058, "grad_norm": 0.3420490324497223, "learning_rate": 1.965877861852249e-05, "loss": 0.5823, "step": 5522 }, { "epoch": 0.16966178232420975, "grad_norm": 0.35515448451042175, "learning_rate": 1.965865343938705e-05, "loss": 0.5393, "step": 5523 }, { "epoch": 0.1696925014591589, "grad_norm": 0.3513754606246948, "learning_rate": 1.9658528237693147e-05, "loss": 0.4994, "step": 5524 }, { "epoch": 0.16972322059410808, "grad_norm": 0.4636346399784088, "learning_rate": 1.965840301344108e-05, "loss": 0.6757, "step": 5525 }, { "epoch": 0.16975393972905722, "grad_norm": 0.3626987040042877, "learning_rate": 1.9658277766631138e-05, "loss": 0.5566, "step": 5526 }, { "epoch": 0.1697846588640064, "grad_norm": 0.27770087122917175, "learning_rate": 1.9658152497263614e-05, "loss": 0.552, "step": 5527 }, { "epoch": 0.16981537799895555, "grad_norm": 0.35424143075942993, "learning_rate": 1.96580272053388e-05, "loss": 0.6079, "step": 5528 }, { "epoch": 0.16984609713390472, "grad_norm": 0.34435805678367615, "learning_rate": 1.9657901890856988e-05, "loss": 0.6248, "step": 5529 }, { "epoch": 0.16987681626885387, "grad_norm": 0.32738181948661804, "learning_rate": 1.9657776553818474e-05, "loss": 0.6076, "step": 5530 }, { "epoch": 0.16990753540380302, "grad_norm": 0.3548453450202942, "learning_rate": 1.9657651194223543e-05, "loss": 0.625, "step": 5531 }, { "epoch": 0.1699382545387522, "grad_norm": 0.32058414816856384, "learning_rate": 1.9657525812072497e-05, "loss": 0.6169, "step": 5532 }, { "epoch": 0.16996897367370134, "grad_norm": 0.3727938234806061, "learning_rate": 1.9657400407365625e-05, "loss": 0.6555, "step": 5533 }, { "epoch": 0.16999969280865052, "grad_norm": 0.32011500000953674, "learning_rate": 1.9657274980103217e-05, "loss": 0.5646, "step": 5534 }, { "epoch": 0.17003041194359966, "grad_norm": 0.3470783233642578, "learning_rate": 1.9657149530285572e-05, "loss": 0.6156, "step": 5535 }, { "epoch": 0.17006113107854884, "grad_norm": 0.3156416714191437, "learning_rate": 1.965702405791298e-05, "loss": 0.5514, "step": 5536 }, { "epoch": 0.17009185021349799, "grad_norm": 0.3582402169704437, "learning_rate": 1.9656898562985733e-05, "loss": 0.5452, "step": 5537 }, { "epoch": 0.17012256934844716, "grad_norm": 0.30616992712020874, "learning_rate": 1.9656773045504125e-05, "loss": 0.5283, "step": 5538 }, { "epoch": 0.1701532884833963, "grad_norm": 0.3308858871459961, "learning_rate": 1.9656647505468452e-05, "loss": 0.5851, "step": 5539 }, { "epoch": 0.17018400761834546, "grad_norm": 0.3321027457714081, "learning_rate": 1.9656521942879002e-05, "loss": 0.5211, "step": 5540 }, { "epoch": 0.17021472675329463, "grad_norm": 0.34497690200805664, "learning_rate": 1.9656396357736067e-05, "loss": 0.6227, "step": 5541 }, { "epoch": 0.17024544588824378, "grad_norm": 0.36517247557640076, "learning_rate": 1.9656270750039948e-05, "loss": 0.6266, "step": 5542 }, { "epoch": 0.17027616502319295, "grad_norm": 0.3612678050994873, "learning_rate": 1.9656145119790933e-05, "loss": 0.6609, "step": 5543 }, { "epoch": 0.1703068841581421, "grad_norm": 0.3385012447834015, "learning_rate": 1.9656019466989316e-05, "loss": 0.5795, "step": 5544 }, { "epoch": 0.17033760329309128, "grad_norm": 0.34869012236595154, "learning_rate": 1.9655893791635393e-05, "loss": 0.6222, "step": 5545 }, { "epoch": 0.17036832242804043, "grad_norm": 0.3779274523258209, "learning_rate": 1.9655768093729456e-05, "loss": 0.6475, "step": 5546 }, { "epoch": 0.17039904156298957, "grad_norm": 0.4023529589176178, "learning_rate": 1.9655642373271794e-05, "loss": 0.5915, "step": 5547 }, { "epoch": 0.17042976069793875, "grad_norm": 0.31498199701309204, "learning_rate": 1.965551663026271e-05, "loss": 0.5787, "step": 5548 }, { "epoch": 0.1704604798328879, "grad_norm": 0.8628299832344055, "learning_rate": 1.965539086470249e-05, "loss": 0.6005, "step": 5549 }, { "epoch": 0.17049119896783707, "grad_norm": 0.3211572468280792, "learning_rate": 1.965526507659143e-05, "loss": 0.5627, "step": 5550 }, { "epoch": 0.17052191810278622, "grad_norm": 0.33536428213119507, "learning_rate": 1.9655139265929827e-05, "loss": 0.6693, "step": 5551 }, { "epoch": 0.1705526372377354, "grad_norm": 0.33465224504470825, "learning_rate": 1.9655013432717968e-05, "loss": 0.5415, "step": 5552 }, { "epoch": 0.17058335637268454, "grad_norm": 0.38165777921676636, "learning_rate": 1.9654887576956155e-05, "loss": 0.712, "step": 5553 }, { "epoch": 0.17061407550763372, "grad_norm": 0.35175400972366333, "learning_rate": 1.9654761698644674e-05, "loss": 0.5735, "step": 5554 }, { "epoch": 0.17064479464258286, "grad_norm": 0.3485037684440613, "learning_rate": 1.9654635797783825e-05, "loss": 0.5848, "step": 5555 }, { "epoch": 0.170675513777532, "grad_norm": 0.32226577401161194, "learning_rate": 1.9654509874373898e-05, "loss": 0.5662, "step": 5556 }, { "epoch": 0.1707062329124812, "grad_norm": 0.34683430194854736, "learning_rate": 1.9654383928415188e-05, "loss": 0.579, "step": 5557 }, { "epoch": 0.17073695204743033, "grad_norm": 0.3476565480232239, "learning_rate": 1.9654257959907994e-05, "loss": 0.6512, "step": 5558 }, { "epoch": 0.1707676711823795, "grad_norm": 0.3678407073020935, "learning_rate": 1.9654131968852606e-05, "loss": 0.6936, "step": 5559 }, { "epoch": 0.17079839031732866, "grad_norm": 0.34487271308898926, "learning_rate": 1.9654005955249316e-05, "loss": 0.6476, "step": 5560 }, { "epoch": 0.17082910945227783, "grad_norm": 0.3642725944519043, "learning_rate": 1.9653879919098423e-05, "loss": 0.5894, "step": 5561 }, { "epoch": 0.17085982858722698, "grad_norm": 0.3281816244125366, "learning_rate": 1.9653753860400214e-05, "loss": 0.6368, "step": 5562 }, { "epoch": 0.17089054772217616, "grad_norm": 0.36691707372665405, "learning_rate": 1.9653627779154995e-05, "loss": 0.6099, "step": 5563 }, { "epoch": 0.1709212668571253, "grad_norm": 0.39117464423179626, "learning_rate": 1.9653501675363052e-05, "loss": 0.6431, "step": 5564 }, { "epoch": 0.17095198599207445, "grad_norm": 0.3332730829715729, "learning_rate": 1.965337554902468e-05, "loss": 0.5611, "step": 5565 }, { "epoch": 0.17098270512702363, "grad_norm": 0.35758838057518005, "learning_rate": 1.9653249400140178e-05, "loss": 0.5759, "step": 5566 }, { "epoch": 0.17101342426197277, "grad_norm": 0.3162406384944916, "learning_rate": 1.9653123228709836e-05, "loss": 0.5963, "step": 5567 }, { "epoch": 0.17104414339692195, "grad_norm": 0.32974550127983093, "learning_rate": 1.9652997034733953e-05, "loss": 0.5524, "step": 5568 }, { "epoch": 0.1710748625318711, "grad_norm": 0.3820188343524933, "learning_rate": 1.9652870818212818e-05, "loss": 0.5305, "step": 5569 }, { "epoch": 0.17110558166682027, "grad_norm": 0.351349413394928, "learning_rate": 1.965274457914673e-05, "loss": 0.5954, "step": 5570 }, { "epoch": 0.17113630080176942, "grad_norm": 0.3260761499404907, "learning_rate": 1.965261831753598e-05, "loss": 0.6611, "step": 5571 }, { "epoch": 0.1711670199367186, "grad_norm": 0.3345634937286377, "learning_rate": 1.965249203338087e-05, "loss": 0.5799, "step": 5572 }, { "epoch": 0.17119773907166774, "grad_norm": 0.34981727600097656, "learning_rate": 1.965236572668169e-05, "loss": 0.5858, "step": 5573 }, { "epoch": 0.1712284582066169, "grad_norm": 0.37465599179267883, "learning_rate": 1.9652239397438736e-05, "loss": 0.616, "step": 5574 }, { "epoch": 0.17125917734156607, "grad_norm": 0.3405710756778717, "learning_rate": 1.96521130456523e-05, "loss": 0.5776, "step": 5575 }, { "epoch": 0.1712898964765152, "grad_norm": 0.35614854097366333, "learning_rate": 1.9651986671322684e-05, "loss": 0.6096, "step": 5576 }, { "epoch": 0.1713206156114644, "grad_norm": 0.3522805869579315, "learning_rate": 1.965186027445018e-05, "loss": 0.5824, "step": 5577 }, { "epoch": 0.17135133474641354, "grad_norm": 0.32862916588783264, "learning_rate": 1.9651733855035078e-05, "loss": 0.5915, "step": 5578 }, { "epoch": 0.1713820538813627, "grad_norm": 0.3735373318195343, "learning_rate": 1.965160741307768e-05, "loss": 0.7005, "step": 5579 }, { "epoch": 0.17141277301631186, "grad_norm": 0.3380180299282074, "learning_rate": 1.9651480948578278e-05, "loss": 0.5852, "step": 5580 }, { "epoch": 0.17144349215126103, "grad_norm": 0.3365321755409241, "learning_rate": 1.9651354461537167e-05, "loss": 0.5574, "step": 5581 }, { "epoch": 0.17147421128621018, "grad_norm": 0.35373666882514954, "learning_rate": 1.9651227951954644e-05, "loss": 0.5954, "step": 5582 }, { "epoch": 0.17150493042115933, "grad_norm": 0.31934884190559387, "learning_rate": 1.9651101419831004e-05, "loss": 0.583, "step": 5583 }, { "epoch": 0.1715356495561085, "grad_norm": 0.3511893153190613, "learning_rate": 1.9650974865166545e-05, "loss": 0.5808, "step": 5584 }, { "epoch": 0.17156636869105765, "grad_norm": 0.34635064005851746, "learning_rate": 1.9650848287961556e-05, "loss": 0.6995, "step": 5585 }, { "epoch": 0.17159708782600683, "grad_norm": 0.37466922402381897, "learning_rate": 1.9650721688216342e-05, "loss": 0.5633, "step": 5586 }, { "epoch": 0.17162780696095598, "grad_norm": 0.31966257095336914, "learning_rate": 1.965059506593119e-05, "loss": 0.5755, "step": 5587 }, { "epoch": 0.17165852609590515, "grad_norm": 0.3190842866897583, "learning_rate": 1.9650468421106397e-05, "loss": 0.5986, "step": 5588 }, { "epoch": 0.1716892452308543, "grad_norm": 0.30955755710601807, "learning_rate": 1.9650341753742266e-05, "loss": 0.5614, "step": 5589 }, { "epoch": 0.17171996436580345, "grad_norm": 0.37814977765083313, "learning_rate": 1.9650215063839086e-05, "loss": 0.5732, "step": 5590 }, { "epoch": 0.17175068350075262, "grad_norm": 0.3915259838104248, "learning_rate": 1.9650088351397155e-05, "loss": 0.5641, "step": 5591 }, { "epoch": 0.17178140263570177, "grad_norm": 0.37087368965148926, "learning_rate": 1.964996161641677e-05, "loss": 0.6211, "step": 5592 }, { "epoch": 0.17181212177065094, "grad_norm": 0.3033600151538849, "learning_rate": 1.964983485889822e-05, "loss": 0.5395, "step": 5593 }, { "epoch": 0.1718428409056001, "grad_norm": 0.3242299258708954, "learning_rate": 1.964970807884181e-05, "loss": 0.5849, "step": 5594 }, { "epoch": 0.17187356004054927, "grad_norm": 0.34584879875183105, "learning_rate": 1.9649581276247836e-05, "loss": 0.5214, "step": 5595 }, { "epoch": 0.17190427917549841, "grad_norm": 0.33015429973602295, "learning_rate": 1.964945445111659e-05, "loss": 0.5971, "step": 5596 }, { "epoch": 0.1719349983104476, "grad_norm": 0.3855443596839905, "learning_rate": 1.9649327603448368e-05, "loss": 0.6004, "step": 5597 }, { "epoch": 0.17196571744539674, "grad_norm": 0.31823456287384033, "learning_rate": 1.9649200733243464e-05, "loss": 0.6093, "step": 5598 }, { "epoch": 0.17199643658034589, "grad_norm": 0.337131530046463, "learning_rate": 1.9649073840502185e-05, "loss": 0.5629, "step": 5599 }, { "epoch": 0.17202715571529506, "grad_norm": 0.34101352095603943, "learning_rate": 1.9648946925224815e-05, "loss": 0.5843, "step": 5600 }, { "epoch": 0.1720578748502442, "grad_norm": 0.3649226725101471, "learning_rate": 1.9648819987411656e-05, "loss": 0.5723, "step": 5601 }, { "epoch": 0.17208859398519338, "grad_norm": 0.3183785378932953, "learning_rate": 1.9648693027063006e-05, "loss": 0.6286, "step": 5602 }, { "epoch": 0.17211931312014253, "grad_norm": 0.38478654623031616, "learning_rate": 1.964856604417916e-05, "loss": 0.6409, "step": 5603 }, { "epoch": 0.1721500322550917, "grad_norm": 0.40133506059646606, "learning_rate": 1.964843903876041e-05, "loss": 0.6786, "step": 5604 }, { "epoch": 0.17218075139004085, "grad_norm": 0.32408133149147034, "learning_rate": 1.964831201080706e-05, "loss": 0.5962, "step": 5605 }, { "epoch": 0.17221147052499003, "grad_norm": 0.37347209453582764, "learning_rate": 1.96481849603194e-05, "loss": 0.6239, "step": 5606 }, { "epoch": 0.17224218965993918, "grad_norm": 0.35333937406539917, "learning_rate": 1.9648057887297737e-05, "loss": 0.6208, "step": 5607 }, { "epoch": 0.17227290879488832, "grad_norm": 0.33590003848075867, "learning_rate": 1.9647930791742354e-05, "loss": 0.6243, "step": 5608 }, { "epoch": 0.1723036279298375, "grad_norm": 0.33774659037590027, "learning_rate": 1.964780367365356e-05, "loss": 0.6297, "step": 5609 }, { "epoch": 0.17233434706478665, "grad_norm": 0.3409034311771393, "learning_rate": 1.9647676533031646e-05, "loss": 0.6314, "step": 5610 }, { "epoch": 0.17236506619973582, "grad_norm": 0.30403509736061096, "learning_rate": 1.9647549369876908e-05, "loss": 0.5888, "step": 5611 }, { "epoch": 0.17239578533468497, "grad_norm": 0.45810380578041077, "learning_rate": 1.9647422184189647e-05, "loss": 0.6147, "step": 5612 }, { "epoch": 0.17242650446963415, "grad_norm": 0.3310184180736542, "learning_rate": 1.9647294975970153e-05, "loss": 0.5985, "step": 5613 }, { "epoch": 0.1724572236045833, "grad_norm": 0.34987056255340576, "learning_rate": 1.964716774521873e-05, "loss": 0.5666, "step": 5614 }, { "epoch": 0.17248794273953247, "grad_norm": 0.4518604278564453, "learning_rate": 1.9647040491935675e-05, "loss": 0.55, "step": 5615 }, { "epoch": 0.17251866187448162, "grad_norm": 0.3221533000469208, "learning_rate": 1.9646913216121282e-05, "loss": 0.588, "step": 5616 }, { "epoch": 0.17254938100943076, "grad_norm": 0.35560575127601624, "learning_rate": 1.964678591777585e-05, "loss": 0.5107, "step": 5617 }, { "epoch": 0.17258010014437994, "grad_norm": 0.3450881242752075, "learning_rate": 1.9646658596899674e-05, "loss": 0.5672, "step": 5618 }, { "epoch": 0.1726108192793291, "grad_norm": 0.37284907698631287, "learning_rate": 1.9646531253493055e-05, "loss": 0.6562, "step": 5619 }, { "epoch": 0.17264153841427826, "grad_norm": 0.36055877804756165, "learning_rate": 1.9646403887556288e-05, "loss": 0.5545, "step": 5620 }, { "epoch": 0.1726722575492274, "grad_norm": 0.37546882033348083, "learning_rate": 1.964627649908967e-05, "loss": 0.6487, "step": 5621 }, { "epoch": 0.17270297668417658, "grad_norm": 0.32225096225738525, "learning_rate": 1.96461490880935e-05, "loss": 0.6672, "step": 5622 }, { "epoch": 0.17273369581912573, "grad_norm": 0.3978888690471649, "learning_rate": 1.9646021654568073e-05, "loss": 0.6324, "step": 5623 }, { "epoch": 0.17276441495407488, "grad_norm": 0.3681177496910095, "learning_rate": 1.9645894198513692e-05, "loss": 0.5847, "step": 5624 }, { "epoch": 0.17279513408902406, "grad_norm": 0.3708992600440979, "learning_rate": 1.964576671993065e-05, "loss": 0.5986, "step": 5625 }, { "epoch": 0.1728258532239732, "grad_norm": 0.34115391969680786, "learning_rate": 1.964563921881925e-05, "loss": 0.5365, "step": 5626 }, { "epoch": 0.17285657235892238, "grad_norm": 0.42487725615501404, "learning_rate": 1.964551169517978e-05, "loss": 0.6385, "step": 5627 }, { "epoch": 0.17288729149387153, "grad_norm": 0.32360678911209106, "learning_rate": 1.9645384149012546e-05, "loss": 0.5662, "step": 5628 }, { "epoch": 0.1729180106288207, "grad_norm": 0.35036876797676086, "learning_rate": 1.9645256580317843e-05, "loss": 0.5994, "step": 5629 }, { "epoch": 0.17294872976376985, "grad_norm": 0.3198074698448181, "learning_rate": 1.964512898909597e-05, "loss": 0.5302, "step": 5630 }, { "epoch": 0.17297944889871902, "grad_norm": 0.3233049213886261, "learning_rate": 1.964500137534723e-05, "loss": 0.5657, "step": 5631 }, { "epoch": 0.17301016803366817, "grad_norm": 0.3180990219116211, "learning_rate": 1.9644873739071912e-05, "loss": 0.6293, "step": 5632 }, { "epoch": 0.17304088716861732, "grad_norm": 0.3301975429058075, "learning_rate": 1.9644746080270315e-05, "loss": 0.6084, "step": 5633 }, { "epoch": 0.1730716063035665, "grad_norm": 0.3184007704257965, "learning_rate": 1.9644618398942744e-05, "loss": 0.5745, "step": 5634 }, { "epoch": 0.17310232543851564, "grad_norm": 0.3604748845100403, "learning_rate": 1.9644490695089488e-05, "loss": 0.6986, "step": 5635 }, { "epoch": 0.17313304457346482, "grad_norm": 0.3489220142364502, "learning_rate": 1.9644362968710854e-05, "loss": 0.5871, "step": 5636 }, { "epoch": 0.17316376370841396, "grad_norm": 0.35768797993659973, "learning_rate": 1.9644235219807137e-05, "loss": 0.6393, "step": 5637 }, { "epoch": 0.17319448284336314, "grad_norm": 0.31330159306526184, "learning_rate": 1.9644107448378632e-05, "loss": 0.6005, "step": 5638 }, { "epoch": 0.1732252019783123, "grad_norm": 0.3264901340007782, "learning_rate": 1.9643979654425644e-05, "loss": 0.5653, "step": 5639 }, { "epoch": 0.17325592111326146, "grad_norm": 0.36978164315223694, "learning_rate": 1.9643851837948467e-05, "loss": 0.6447, "step": 5640 }, { "epoch": 0.1732866402482106, "grad_norm": 0.30458885431289673, "learning_rate": 1.96437239989474e-05, "loss": 0.5918, "step": 5641 }, { "epoch": 0.17331735938315976, "grad_norm": 0.3346673548221588, "learning_rate": 1.9643596137422742e-05, "loss": 0.6009, "step": 5642 }, { "epoch": 0.17334807851810893, "grad_norm": 0.3385678827762604, "learning_rate": 1.9643468253374793e-05, "loss": 0.6466, "step": 5643 }, { "epoch": 0.17337879765305808, "grad_norm": 0.35234975814819336, "learning_rate": 1.964334034680385e-05, "loss": 0.57, "step": 5644 }, { "epoch": 0.17340951678800726, "grad_norm": 0.3654470145702362, "learning_rate": 1.964321241771021e-05, "loss": 0.5914, "step": 5645 }, { "epoch": 0.1734402359229564, "grad_norm": 0.38410162925720215, "learning_rate": 1.9643084466094175e-05, "loss": 0.5635, "step": 5646 }, { "epoch": 0.17347095505790558, "grad_norm": 0.34213224053382874, "learning_rate": 1.9642956491956046e-05, "loss": 0.6927, "step": 5647 }, { "epoch": 0.17350167419285473, "grad_norm": 0.30476120114326477, "learning_rate": 1.9642828495296117e-05, "loss": 0.5445, "step": 5648 }, { "epoch": 0.1735323933278039, "grad_norm": 0.3183961510658264, "learning_rate": 1.9642700476114686e-05, "loss": 0.4894, "step": 5649 }, { "epoch": 0.17356311246275305, "grad_norm": 0.3175993859767914, "learning_rate": 1.9642572434412056e-05, "loss": 0.542, "step": 5650 }, { "epoch": 0.1735938315977022, "grad_norm": 0.3211466372013092, "learning_rate": 1.9642444370188523e-05, "loss": 0.5393, "step": 5651 }, { "epoch": 0.17362455073265137, "grad_norm": 0.7235485315322876, "learning_rate": 1.964231628344439e-05, "loss": 0.6612, "step": 5652 }, { "epoch": 0.17365526986760052, "grad_norm": 0.3482934534549713, "learning_rate": 1.9642188174179956e-05, "loss": 0.6013, "step": 5653 }, { "epoch": 0.1736859890025497, "grad_norm": 0.35524192452430725, "learning_rate": 1.9642060042395514e-05, "loss": 0.5851, "step": 5654 }, { "epoch": 0.17371670813749884, "grad_norm": 0.3642285466194153, "learning_rate": 1.964193188809137e-05, "loss": 0.5974, "step": 5655 }, { "epoch": 0.17374742727244802, "grad_norm": 0.308187872171402, "learning_rate": 1.9641803711267817e-05, "loss": 0.5251, "step": 5656 }, { "epoch": 0.17377814640739717, "grad_norm": 0.3165641725063324, "learning_rate": 1.9641675511925162e-05, "loss": 0.554, "step": 5657 }, { "epoch": 0.17380886554234634, "grad_norm": 0.3169659972190857, "learning_rate": 1.96415472900637e-05, "loss": 0.5436, "step": 5658 }, { "epoch": 0.1738395846772955, "grad_norm": 0.3664347231388092, "learning_rate": 1.964141904568373e-05, "loss": 0.58, "step": 5659 }, { "epoch": 0.17387030381224464, "grad_norm": 0.35595306754112244, "learning_rate": 1.9641290778785555e-05, "loss": 0.6153, "step": 5660 }, { "epoch": 0.1739010229471938, "grad_norm": 0.34305647015571594, "learning_rate": 1.9641162489369467e-05, "loss": 0.5924, "step": 5661 }, { "epoch": 0.17393174208214296, "grad_norm": 0.4135643243789673, "learning_rate": 1.9641034177435774e-05, "loss": 0.5844, "step": 5662 }, { "epoch": 0.17396246121709213, "grad_norm": 0.572309672832489, "learning_rate": 1.964090584298477e-05, "loss": 0.6661, "step": 5663 }, { "epoch": 0.17399318035204128, "grad_norm": 0.3188706040382385, "learning_rate": 1.964077748601676e-05, "loss": 0.6969, "step": 5664 }, { "epoch": 0.17402389948699046, "grad_norm": 0.34014591574668884, "learning_rate": 1.9640649106532042e-05, "loss": 0.5931, "step": 5665 }, { "epoch": 0.1740546186219396, "grad_norm": 0.3337884545326233, "learning_rate": 1.964052070453091e-05, "loss": 0.5671, "step": 5666 }, { "epoch": 0.17408533775688875, "grad_norm": 0.30931007862091064, "learning_rate": 1.964039228001367e-05, "loss": 0.5535, "step": 5667 }, { "epoch": 0.17411605689183793, "grad_norm": 0.33421021699905396, "learning_rate": 1.9640263832980624e-05, "loss": 0.5733, "step": 5668 }, { "epoch": 0.17414677602678708, "grad_norm": 0.33214837312698364, "learning_rate": 1.9640135363432064e-05, "loss": 0.6399, "step": 5669 }, { "epoch": 0.17417749516173625, "grad_norm": 0.3570050299167633, "learning_rate": 1.9640006871368297e-05, "loss": 0.6372, "step": 5670 }, { "epoch": 0.1742082142966854, "grad_norm": 0.3707194924354553, "learning_rate": 1.963987835678962e-05, "loss": 0.519, "step": 5671 }, { "epoch": 0.17423893343163457, "grad_norm": 0.31791940331459045, "learning_rate": 1.9639749819696336e-05, "loss": 0.5546, "step": 5672 }, { "epoch": 0.17426965256658372, "grad_norm": 0.35124343633651733, "learning_rate": 1.9639621260088742e-05, "loss": 0.5627, "step": 5673 }, { "epoch": 0.1743003717015329, "grad_norm": 0.31733763217926025, "learning_rate": 1.9639492677967138e-05, "loss": 0.5697, "step": 5674 }, { "epoch": 0.17433109083648204, "grad_norm": 0.3432777225971222, "learning_rate": 1.9639364073331825e-05, "loss": 0.585, "step": 5675 }, { "epoch": 0.1743618099714312, "grad_norm": 0.3301388621330261, "learning_rate": 1.9639235446183105e-05, "loss": 0.618, "step": 5676 }, { "epoch": 0.17439252910638037, "grad_norm": 0.4949572682380676, "learning_rate": 1.963910679652128e-05, "loss": 0.6066, "step": 5677 }, { "epoch": 0.17442324824132951, "grad_norm": 0.34608033299446106, "learning_rate": 1.9638978124346645e-05, "loss": 0.638, "step": 5678 }, { "epoch": 0.1744539673762787, "grad_norm": 0.32037535309791565, "learning_rate": 1.9638849429659502e-05, "loss": 0.5943, "step": 5679 }, { "epoch": 0.17448468651122784, "grad_norm": 0.4516572952270508, "learning_rate": 1.9638720712460158e-05, "loss": 0.4953, "step": 5680 }, { "epoch": 0.174515405646177, "grad_norm": 0.31835776567459106, "learning_rate": 1.9638591972748904e-05, "loss": 0.5968, "step": 5681 }, { "epoch": 0.17454612478112616, "grad_norm": 0.33262524008750916, "learning_rate": 1.9638463210526045e-05, "loss": 0.5356, "step": 5682 }, { "epoch": 0.17457684391607534, "grad_norm": 0.3662828505039215, "learning_rate": 1.9638334425791886e-05, "loss": 0.5648, "step": 5683 }, { "epoch": 0.17460756305102448, "grad_norm": 0.33483803272247314, "learning_rate": 1.9638205618546723e-05, "loss": 0.5617, "step": 5684 }, { "epoch": 0.17463828218597363, "grad_norm": 0.3551047146320343, "learning_rate": 1.9638076788790855e-05, "loss": 0.62, "step": 5685 }, { "epoch": 0.1746690013209228, "grad_norm": 0.3223101496696472, "learning_rate": 1.9637947936524586e-05, "loss": 0.6434, "step": 5686 }, { "epoch": 0.17469972045587195, "grad_norm": 0.33438369631767273, "learning_rate": 1.963781906174822e-05, "loss": 0.5946, "step": 5687 }, { "epoch": 0.17473043959082113, "grad_norm": 0.34336990118026733, "learning_rate": 1.963769016446205e-05, "loss": 0.5769, "step": 5688 }, { "epoch": 0.17476115872577028, "grad_norm": 0.36977240443229675, "learning_rate": 1.9637561244666385e-05, "loss": 0.616, "step": 5689 }, { "epoch": 0.17479187786071945, "grad_norm": 0.32806625962257385, "learning_rate": 1.963743230236152e-05, "loss": 0.6503, "step": 5690 }, { "epoch": 0.1748225969956686, "grad_norm": 0.3659497797489166, "learning_rate": 1.963730333754776e-05, "loss": 0.5782, "step": 5691 }, { "epoch": 0.17485331613061778, "grad_norm": 0.3406502902507782, "learning_rate": 1.9637174350225404e-05, "loss": 0.5474, "step": 5692 }, { "epoch": 0.17488403526556692, "grad_norm": 0.35167160630226135, "learning_rate": 1.9637045340394757e-05, "loss": 0.6464, "step": 5693 }, { "epoch": 0.17491475440051607, "grad_norm": 0.292737752199173, "learning_rate": 1.9636916308056114e-05, "loss": 0.6144, "step": 5694 }, { "epoch": 0.17494547353546525, "grad_norm": 0.31501293182373047, "learning_rate": 1.9636787253209782e-05, "loss": 0.6104, "step": 5695 }, { "epoch": 0.1749761926704144, "grad_norm": 0.3093787133693695, "learning_rate": 1.963665817585606e-05, "loss": 0.5192, "step": 5696 }, { "epoch": 0.17500691180536357, "grad_norm": 0.3553764224052429, "learning_rate": 1.963652907599525e-05, "loss": 0.603, "step": 5697 }, { "epoch": 0.17503763094031272, "grad_norm": 0.3240797221660614, "learning_rate": 1.9636399953627654e-05, "loss": 0.5767, "step": 5698 }, { "epoch": 0.1750683500752619, "grad_norm": 0.3481632173061371, "learning_rate": 1.963627080875357e-05, "loss": 0.5308, "step": 5699 }, { "epoch": 0.17509906921021104, "grad_norm": 0.31709152460098267, "learning_rate": 1.9636141641373306e-05, "loss": 0.527, "step": 5700 }, { "epoch": 0.1751297883451602, "grad_norm": 0.3323947787284851, "learning_rate": 1.9636012451487158e-05, "loss": 0.598, "step": 5701 }, { "epoch": 0.17516050748010936, "grad_norm": 0.5247969627380371, "learning_rate": 1.963588323909543e-05, "loss": 0.6457, "step": 5702 }, { "epoch": 0.1751912266150585, "grad_norm": 0.3346257209777832, "learning_rate": 1.9635754004198424e-05, "loss": 0.6205, "step": 5703 }, { "epoch": 0.17522194575000768, "grad_norm": 0.44319528341293335, "learning_rate": 1.963562474679644e-05, "loss": 0.5461, "step": 5704 }, { "epoch": 0.17525266488495683, "grad_norm": 0.3716953694820404, "learning_rate": 1.9635495466889787e-05, "loss": 0.5719, "step": 5705 }, { "epoch": 0.175283384019906, "grad_norm": 0.4499550759792328, "learning_rate": 1.9635366164478757e-05, "loss": 0.5485, "step": 5706 }, { "epoch": 0.17531410315485516, "grad_norm": 0.3294755518436432, "learning_rate": 1.9635236839563658e-05, "loss": 0.6875, "step": 5707 }, { "epoch": 0.17534482228980433, "grad_norm": 0.35993674397468567, "learning_rate": 1.963510749214479e-05, "loss": 0.6014, "step": 5708 }, { "epoch": 0.17537554142475348, "grad_norm": 0.5128867626190186, "learning_rate": 1.9634978122222454e-05, "loss": 0.6441, "step": 5709 }, { "epoch": 0.17540626055970263, "grad_norm": 0.32538819313049316, "learning_rate": 1.9634848729796955e-05, "loss": 0.6026, "step": 5710 }, { "epoch": 0.1754369796946518, "grad_norm": 0.3805907964706421, "learning_rate": 1.9634719314868596e-05, "loss": 0.6854, "step": 5711 }, { "epoch": 0.17546769882960095, "grad_norm": 0.3352387845516205, "learning_rate": 1.9634589877437676e-05, "loss": 0.5689, "step": 5712 }, { "epoch": 0.17549841796455012, "grad_norm": 0.3458799719810486, "learning_rate": 1.9634460417504495e-05, "loss": 0.5613, "step": 5713 }, { "epoch": 0.17552913709949927, "grad_norm": 0.35942038893699646, "learning_rate": 1.9634330935069362e-05, "loss": 0.5759, "step": 5714 }, { "epoch": 0.17555985623444845, "grad_norm": 0.32010528445243835, "learning_rate": 1.9634201430132575e-05, "loss": 0.5603, "step": 5715 }, { "epoch": 0.1755905753693976, "grad_norm": 0.33179616928100586, "learning_rate": 1.963407190269444e-05, "loss": 0.5816, "step": 5716 }, { "epoch": 0.17562129450434677, "grad_norm": 0.3136884570121765, "learning_rate": 1.9633942352755257e-05, "loss": 0.5191, "step": 5717 }, { "epoch": 0.17565201363929592, "grad_norm": 0.3244498670101166, "learning_rate": 1.9633812780315324e-05, "loss": 0.62, "step": 5718 }, { "epoch": 0.17568273277424507, "grad_norm": 0.7702068090438843, "learning_rate": 1.963368318537495e-05, "loss": 0.5505, "step": 5719 }, { "epoch": 0.17571345190919424, "grad_norm": 0.30794650316238403, "learning_rate": 1.9633553567934442e-05, "loss": 0.5486, "step": 5720 }, { "epoch": 0.1757441710441434, "grad_norm": 0.3979911804199219, "learning_rate": 1.963342392799409e-05, "loss": 0.5905, "step": 5721 }, { "epoch": 0.17577489017909256, "grad_norm": 0.3544567823410034, "learning_rate": 1.9633294265554208e-05, "loss": 0.5599, "step": 5722 }, { "epoch": 0.1758056093140417, "grad_norm": 0.3533678352832794, "learning_rate": 1.963316458061509e-05, "loss": 0.6137, "step": 5723 }, { "epoch": 0.1758363284489909, "grad_norm": 0.34221887588500977, "learning_rate": 1.963303487317705e-05, "loss": 0.6296, "step": 5724 }, { "epoch": 0.17586704758394003, "grad_norm": 1.1069225072860718, "learning_rate": 1.963290514324038e-05, "loss": 0.6194, "step": 5725 }, { "epoch": 0.1758977667188892, "grad_norm": 1.52217435836792, "learning_rate": 1.9632775390805387e-05, "loss": 0.5882, "step": 5726 }, { "epoch": 0.17592848585383836, "grad_norm": 0.3795982897281647, "learning_rate": 1.9632645615872376e-05, "loss": 0.656, "step": 5727 }, { "epoch": 0.1759592049887875, "grad_norm": 0.386619508266449, "learning_rate": 1.9632515818441648e-05, "loss": 0.5244, "step": 5728 }, { "epoch": 0.17598992412373668, "grad_norm": 0.30955347418785095, "learning_rate": 1.9632385998513505e-05, "loss": 0.5999, "step": 5729 }, { "epoch": 0.17602064325868583, "grad_norm": 0.338687926530838, "learning_rate": 1.963225615608825e-05, "loss": 0.5358, "step": 5730 }, { "epoch": 0.176051362393635, "grad_norm": 0.3480183482170105, "learning_rate": 1.9632126291166192e-05, "loss": 0.6566, "step": 5731 }, { "epoch": 0.17608208152858415, "grad_norm": 0.30929237604141235, "learning_rate": 1.963199640374763e-05, "loss": 0.5711, "step": 5732 }, { "epoch": 0.17611280066353333, "grad_norm": 0.32771462202072144, "learning_rate": 1.9631866493832868e-05, "loss": 0.6524, "step": 5733 }, { "epoch": 0.17614351979848247, "grad_norm": 0.32370367646217346, "learning_rate": 1.9631736561422205e-05, "loss": 0.5426, "step": 5734 }, { "epoch": 0.17617423893343165, "grad_norm": 0.32531702518463135, "learning_rate": 1.9631606606515953e-05, "loss": 0.5537, "step": 5735 }, { "epoch": 0.1762049580683808, "grad_norm": 0.3675678074359894, "learning_rate": 1.963147662911441e-05, "loss": 0.6304, "step": 5736 }, { "epoch": 0.17623567720332994, "grad_norm": 0.30853405594825745, "learning_rate": 1.963134662921788e-05, "loss": 0.5944, "step": 5737 }, { "epoch": 0.17626639633827912, "grad_norm": 0.3208543360233307, "learning_rate": 1.9631216606826668e-05, "loss": 0.6433, "step": 5738 }, { "epoch": 0.17629711547322827, "grad_norm": 0.34031128883361816, "learning_rate": 1.963108656194108e-05, "loss": 0.5917, "step": 5739 }, { "epoch": 0.17632783460817744, "grad_norm": 0.32263973355293274, "learning_rate": 1.963095649456141e-05, "loss": 0.5781, "step": 5740 }, { "epoch": 0.1763585537431266, "grad_norm": 0.3225199282169342, "learning_rate": 1.963082640468797e-05, "loss": 0.6047, "step": 5741 }, { "epoch": 0.17638927287807576, "grad_norm": 0.3392980992794037, "learning_rate": 1.9630696292321066e-05, "loss": 0.6107, "step": 5742 }, { "epoch": 0.1764199920130249, "grad_norm": 0.33307161927223206, "learning_rate": 1.9630566157460997e-05, "loss": 0.5975, "step": 5743 }, { "epoch": 0.17645071114797406, "grad_norm": 0.34703290462493896, "learning_rate": 1.9630436000108067e-05, "loss": 0.5709, "step": 5744 }, { "epoch": 0.17648143028292324, "grad_norm": 0.3018288016319275, "learning_rate": 1.9630305820262584e-05, "loss": 0.5509, "step": 5745 }, { "epoch": 0.17651214941787238, "grad_norm": 0.562344491481781, "learning_rate": 1.9630175617924845e-05, "loss": 0.6119, "step": 5746 }, { "epoch": 0.17654286855282156, "grad_norm": 0.33096569776535034, "learning_rate": 1.963004539309516e-05, "loss": 0.607, "step": 5747 }, { "epoch": 0.1765735876877707, "grad_norm": 0.3747745156288147, "learning_rate": 1.9629915145773832e-05, "loss": 0.6524, "step": 5748 }, { "epoch": 0.17660430682271988, "grad_norm": 0.31846925616264343, "learning_rate": 1.9629784875961165e-05, "loss": 0.6015, "step": 5749 }, { "epoch": 0.17663502595766903, "grad_norm": 0.33582842350006104, "learning_rate": 1.962965458365746e-05, "loss": 0.5182, "step": 5750 }, { "epoch": 0.1766657450926182, "grad_norm": 0.33399876952171326, "learning_rate": 1.962952426886303e-05, "loss": 0.581, "step": 5751 }, { "epoch": 0.17669646422756735, "grad_norm": 0.3014501631259918, "learning_rate": 1.962939393157817e-05, "loss": 0.6583, "step": 5752 }, { "epoch": 0.1767271833625165, "grad_norm": 0.3024846911430359, "learning_rate": 1.9629263571803185e-05, "loss": 0.5818, "step": 5753 }, { "epoch": 0.17675790249746567, "grad_norm": 0.3692941963672638, "learning_rate": 1.9629133189538386e-05, "loss": 0.6187, "step": 5754 }, { "epoch": 0.17678862163241482, "grad_norm": 0.32692742347717285, "learning_rate": 1.9629002784784076e-05, "loss": 0.5921, "step": 5755 }, { "epoch": 0.176819340767364, "grad_norm": 0.36419326066970825, "learning_rate": 1.962887235754055e-05, "loss": 0.6278, "step": 5756 }, { "epoch": 0.17685005990231314, "grad_norm": 0.3319969177246094, "learning_rate": 1.9628741907808126e-05, "loss": 0.5802, "step": 5757 }, { "epoch": 0.17688077903726232, "grad_norm": 0.3312731981277466, "learning_rate": 1.96286114355871e-05, "loss": 0.5754, "step": 5758 }, { "epoch": 0.17691149817221147, "grad_norm": 0.40532538294792175, "learning_rate": 1.9628480940877783e-05, "loss": 0.5676, "step": 5759 }, { "epoch": 0.17694221730716064, "grad_norm": 0.33322012424468994, "learning_rate": 1.9628350423680476e-05, "loss": 0.6138, "step": 5760 }, { "epoch": 0.1769729364421098, "grad_norm": 0.35307636857032776, "learning_rate": 1.962821988399548e-05, "loss": 0.6125, "step": 5761 }, { "epoch": 0.17700365557705894, "grad_norm": 0.35598546266555786, "learning_rate": 1.962808932182311e-05, "loss": 0.6228, "step": 5762 }, { "epoch": 0.1770343747120081, "grad_norm": 0.32881391048431396, "learning_rate": 1.962795873716366e-05, "loss": 0.6115, "step": 5763 }, { "epoch": 0.17706509384695726, "grad_norm": 0.3414410352706909, "learning_rate": 1.9627828130017442e-05, "loss": 0.6018, "step": 5764 }, { "epoch": 0.17709581298190644, "grad_norm": 0.31860968470573425, "learning_rate": 1.9627697500384758e-05, "loss": 0.6478, "step": 5765 }, { "epoch": 0.17712653211685558, "grad_norm": 0.38471007347106934, "learning_rate": 1.9627566848265914e-05, "loss": 0.5775, "step": 5766 }, { "epoch": 0.17715725125180476, "grad_norm": 0.35019633173942566, "learning_rate": 1.9627436173661215e-05, "loss": 0.5961, "step": 5767 }, { "epoch": 0.1771879703867539, "grad_norm": 0.35821962356567383, "learning_rate": 1.962730547657097e-05, "loss": 0.6364, "step": 5768 }, { "epoch": 0.17721868952170308, "grad_norm": 0.34582415223121643, "learning_rate": 1.962717475699548e-05, "loss": 0.6402, "step": 5769 }, { "epoch": 0.17724940865665223, "grad_norm": 0.3670668601989746, "learning_rate": 1.9627044014935048e-05, "loss": 0.6279, "step": 5770 }, { "epoch": 0.17728012779160138, "grad_norm": 0.3274271786212921, "learning_rate": 1.9626913250389982e-05, "loss": 0.5495, "step": 5771 }, { "epoch": 0.17731084692655055, "grad_norm": 0.3731943368911743, "learning_rate": 1.962678246336059e-05, "loss": 0.5867, "step": 5772 }, { "epoch": 0.1773415660614997, "grad_norm": 0.3146427571773529, "learning_rate": 1.9626651653847174e-05, "loss": 0.6156, "step": 5773 }, { "epoch": 0.17737228519644888, "grad_norm": 0.34232228994369507, "learning_rate": 1.962652082185004e-05, "loss": 0.5687, "step": 5774 }, { "epoch": 0.17740300433139802, "grad_norm": 0.3602336049079895, "learning_rate": 1.9626389967369497e-05, "loss": 0.6391, "step": 5775 }, { "epoch": 0.1774337234663472, "grad_norm": 0.32230815291404724, "learning_rate": 1.962625909040585e-05, "loss": 0.6528, "step": 5776 }, { "epoch": 0.17746444260129635, "grad_norm": 0.3278484046459198, "learning_rate": 1.96261281909594e-05, "loss": 0.4783, "step": 5777 }, { "epoch": 0.1774951617362455, "grad_norm": 0.3238432705402374, "learning_rate": 1.962599726903045e-05, "loss": 0.5446, "step": 5778 }, { "epoch": 0.17752588087119467, "grad_norm": 0.38078105449676514, "learning_rate": 1.9625866324619317e-05, "loss": 0.5439, "step": 5779 }, { "epoch": 0.17755660000614382, "grad_norm": 0.33114346861839294, "learning_rate": 1.96257353577263e-05, "loss": 0.6469, "step": 5780 }, { "epoch": 0.177587319141093, "grad_norm": 0.33132344484329224, "learning_rate": 1.9625604368351703e-05, "loss": 0.5814, "step": 5781 }, { "epoch": 0.17761803827604214, "grad_norm": 0.3414350748062134, "learning_rate": 1.9625473356495835e-05, "loss": 0.6917, "step": 5782 }, { "epoch": 0.17764875741099131, "grad_norm": 0.33242037892341614, "learning_rate": 1.962534232215901e-05, "loss": 0.6502, "step": 5783 }, { "epoch": 0.17767947654594046, "grad_norm": 0.33265525102615356, "learning_rate": 1.9625211265341514e-05, "loss": 0.5909, "step": 5784 }, { "epoch": 0.17771019568088964, "grad_norm": 0.35636553168296814, "learning_rate": 1.9625080186043673e-05, "loss": 0.6224, "step": 5785 }, { "epoch": 0.17774091481583879, "grad_norm": 0.3760618269443512, "learning_rate": 1.962494908426578e-05, "loss": 0.5654, "step": 5786 }, { "epoch": 0.17777163395078793, "grad_norm": 0.3437193036079407, "learning_rate": 1.962481796000815e-05, "loss": 0.5761, "step": 5787 }, { "epoch": 0.1778023530857371, "grad_norm": 0.3655988276004791, "learning_rate": 1.9624686813271085e-05, "loss": 0.6108, "step": 5788 }, { "epoch": 0.17783307222068626, "grad_norm": 0.36786213517189026, "learning_rate": 1.962455564405489e-05, "loss": 0.613, "step": 5789 }, { "epoch": 0.17786379135563543, "grad_norm": 0.33519890904426575, "learning_rate": 1.9624424452359877e-05, "loss": 0.5835, "step": 5790 }, { "epoch": 0.17789451049058458, "grad_norm": 0.2975212335586548, "learning_rate": 1.962429323818634e-05, "loss": 0.4777, "step": 5791 }, { "epoch": 0.17792522962553375, "grad_norm": 0.34186840057373047, "learning_rate": 1.9624162001534604e-05, "loss": 0.5957, "step": 5792 }, { "epoch": 0.1779559487604829, "grad_norm": 0.3320673108100891, "learning_rate": 1.9624030742404957e-05, "loss": 0.5466, "step": 5793 }, { "epoch": 0.17798666789543208, "grad_norm": 0.2832508683204651, "learning_rate": 1.9623899460797718e-05, "loss": 0.5283, "step": 5794 }, { "epoch": 0.17801738703038122, "grad_norm": 0.3308786153793335, "learning_rate": 1.9623768156713193e-05, "loss": 0.5386, "step": 5795 }, { "epoch": 0.17804810616533037, "grad_norm": 0.35667458176612854, "learning_rate": 1.9623636830151684e-05, "loss": 0.6213, "step": 5796 }, { "epoch": 0.17807882530027955, "grad_norm": 0.3427836298942566, "learning_rate": 1.9623505481113497e-05, "loss": 0.5794, "step": 5797 }, { "epoch": 0.1781095444352287, "grad_norm": 0.35619425773620605, "learning_rate": 1.962337410959894e-05, "loss": 0.6414, "step": 5798 }, { "epoch": 0.17814026357017787, "grad_norm": 0.3518011271953583, "learning_rate": 1.962324271560832e-05, "loss": 0.5827, "step": 5799 }, { "epoch": 0.17817098270512702, "grad_norm": 0.3717316687107086, "learning_rate": 1.962311129914195e-05, "loss": 0.6244, "step": 5800 }, { "epoch": 0.1782017018400762, "grad_norm": 0.31487390398979187, "learning_rate": 1.9622979860200123e-05, "loss": 0.6052, "step": 5801 }, { "epoch": 0.17823242097502534, "grad_norm": 0.39106839895248413, "learning_rate": 1.962284839878316e-05, "loss": 0.5659, "step": 5802 }, { "epoch": 0.17826314010997452, "grad_norm": 0.37291470170021057, "learning_rate": 1.9622716914891364e-05, "loss": 0.5705, "step": 5803 }, { "epoch": 0.17829385924492366, "grad_norm": 0.34869319200515747, "learning_rate": 1.9622585408525037e-05, "loss": 0.6373, "step": 5804 }, { "epoch": 0.1783245783798728, "grad_norm": 0.3345906138420105, "learning_rate": 1.962245387968449e-05, "loss": 0.5867, "step": 5805 }, { "epoch": 0.178355297514822, "grad_norm": 0.30773136019706726, "learning_rate": 1.9622322328370032e-05, "loss": 0.6047, "step": 5806 }, { "epoch": 0.17838601664977113, "grad_norm": 0.2998821437358856, "learning_rate": 1.962219075458197e-05, "loss": 0.5655, "step": 5807 }, { "epoch": 0.1784167357847203, "grad_norm": 0.3182363510131836, "learning_rate": 1.9622059158320606e-05, "loss": 0.6185, "step": 5808 }, { "epoch": 0.17844745491966946, "grad_norm": 0.3550657629966736, "learning_rate": 1.962192753958625e-05, "loss": 0.5999, "step": 5809 }, { "epoch": 0.17847817405461863, "grad_norm": 0.3329761326313019, "learning_rate": 1.9621795898379212e-05, "loss": 0.6839, "step": 5810 }, { "epoch": 0.17850889318956778, "grad_norm": 0.3710383474826813, "learning_rate": 1.96216642346998e-05, "loss": 0.6066, "step": 5811 }, { "epoch": 0.17853961232451696, "grad_norm": 0.3317925035953522, "learning_rate": 1.9621532548548314e-05, "loss": 0.5652, "step": 5812 }, { "epoch": 0.1785703314594661, "grad_norm": 0.31721335649490356, "learning_rate": 1.9621400839925072e-05, "loss": 0.5387, "step": 5813 }, { "epoch": 0.17860105059441525, "grad_norm": 0.3430209457874298, "learning_rate": 1.962126910883037e-05, "loss": 0.6194, "step": 5814 }, { "epoch": 0.17863176972936443, "grad_norm": 0.38024261593818665, "learning_rate": 1.962113735526453e-05, "loss": 0.6136, "step": 5815 }, { "epoch": 0.17866248886431357, "grad_norm": 0.33810368180274963, "learning_rate": 1.9621005579227846e-05, "loss": 0.5892, "step": 5816 }, { "epoch": 0.17869320799926275, "grad_norm": 0.3623286783695221, "learning_rate": 1.9620873780720632e-05, "loss": 0.5121, "step": 5817 }, { "epoch": 0.1787239271342119, "grad_norm": 0.33343350887298584, "learning_rate": 1.9620741959743195e-05, "loss": 0.5485, "step": 5818 }, { "epoch": 0.17875464626916107, "grad_norm": 0.3569234609603882, "learning_rate": 1.962061011629585e-05, "loss": 0.5808, "step": 5819 }, { "epoch": 0.17878536540411022, "grad_norm": 0.3416958451271057, "learning_rate": 1.962047825037889e-05, "loss": 0.591, "step": 5820 }, { "epoch": 0.17881608453905937, "grad_norm": 0.3570687174797058, "learning_rate": 1.9620346361992634e-05, "loss": 0.5956, "step": 5821 }, { "epoch": 0.17884680367400854, "grad_norm": 0.36399132013320923, "learning_rate": 1.962021445113739e-05, "loss": 0.6093, "step": 5822 }, { "epoch": 0.1788775228089577, "grad_norm": 0.37462317943573, "learning_rate": 1.9620082517813456e-05, "loss": 0.6687, "step": 5823 }, { "epoch": 0.17890824194390686, "grad_norm": 0.34316208958625793, "learning_rate": 1.9619950562021155e-05, "loss": 0.6088, "step": 5824 }, { "epoch": 0.178938961078856, "grad_norm": 0.36561399698257446, "learning_rate": 1.9619818583760786e-05, "loss": 0.7036, "step": 5825 }, { "epoch": 0.1789696802138052, "grad_norm": 0.3519943058490753, "learning_rate": 1.9619686583032655e-05, "loss": 0.624, "step": 5826 }, { "epoch": 0.17900039934875434, "grad_norm": 0.38444215059280396, "learning_rate": 1.9619554559837077e-05, "loss": 0.6326, "step": 5827 }, { "epoch": 0.1790311184837035, "grad_norm": 0.32651039958000183, "learning_rate": 1.9619422514174357e-05, "loss": 0.6166, "step": 5828 }, { "epoch": 0.17906183761865266, "grad_norm": 0.3256683945655823, "learning_rate": 1.9619290446044805e-05, "loss": 0.614, "step": 5829 }, { "epoch": 0.1790925567536018, "grad_norm": 0.37137094140052795, "learning_rate": 1.961915835544873e-05, "loss": 0.6044, "step": 5830 }, { "epoch": 0.17912327588855098, "grad_norm": 0.32280024886131287, "learning_rate": 1.9619026242386434e-05, "loss": 0.6139, "step": 5831 }, { "epoch": 0.17915399502350013, "grad_norm": 0.32023465633392334, "learning_rate": 1.9618894106858234e-05, "loss": 0.6148, "step": 5832 }, { "epoch": 0.1791847141584493, "grad_norm": 0.32320868968963623, "learning_rate": 1.9618761948864433e-05, "loss": 0.5297, "step": 5833 }, { "epoch": 0.17921543329339845, "grad_norm": 0.3266300559043884, "learning_rate": 1.961862976840534e-05, "loss": 0.5805, "step": 5834 }, { "epoch": 0.17924615242834763, "grad_norm": 0.342905730009079, "learning_rate": 1.961849756548127e-05, "loss": 0.6467, "step": 5835 }, { "epoch": 0.17927687156329677, "grad_norm": 0.33277425169944763, "learning_rate": 1.9618365340092523e-05, "loss": 0.5448, "step": 5836 }, { "epoch": 0.17930759069824595, "grad_norm": 0.35650634765625, "learning_rate": 1.9618233092239416e-05, "loss": 0.5961, "step": 5837 }, { "epoch": 0.1793383098331951, "grad_norm": 0.3802134096622467, "learning_rate": 1.9618100821922253e-05, "loss": 0.6688, "step": 5838 }, { "epoch": 0.17936902896814425, "grad_norm": 0.3339088261127472, "learning_rate": 1.9617968529141343e-05, "loss": 0.5828, "step": 5839 }, { "epoch": 0.17939974810309342, "grad_norm": 0.32241731882095337, "learning_rate": 1.9617836213896994e-05, "loss": 0.6489, "step": 5840 }, { "epoch": 0.17943046723804257, "grad_norm": 0.3094799816608429, "learning_rate": 1.9617703876189517e-05, "loss": 0.5752, "step": 5841 }, { "epoch": 0.17946118637299174, "grad_norm": 0.38574811816215515, "learning_rate": 1.9617571516019225e-05, "loss": 0.6168, "step": 5842 }, { "epoch": 0.1794919055079409, "grad_norm": 0.3265090584754944, "learning_rate": 1.961743913338642e-05, "loss": 0.5556, "step": 5843 }, { "epoch": 0.17952262464289007, "grad_norm": 0.34094005823135376, "learning_rate": 1.9617306728291416e-05, "loss": 0.6067, "step": 5844 }, { "epoch": 0.1795533437778392, "grad_norm": 0.35036829113960266, "learning_rate": 1.961717430073452e-05, "loss": 0.6297, "step": 5845 }, { "epoch": 0.1795840629127884, "grad_norm": 0.32529789209365845, "learning_rate": 1.9617041850716037e-05, "loss": 0.5409, "step": 5846 }, { "epoch": 0.17961478204773754, "grad_norm": 0.3696594536304474, "learning_rate": 1.9616909378236288e-05, "loss": 0.5857, "step": 5847 }, { "epoch": 0.17964550118268668, "grad_norm": 0.35495445132255554, "learning_rate": 1.9616776883295572e-05, "loss": 0.6142, "step": 5848 }, { "epoch": 0.17967622031763586, "grad_norm": 0.3458004295825958, "learning_rate": 1.9616644365894203e-05, "loss": 0.5518, "step": 5849 }, { "epoch": 0.179706939452585, "grad_norm": 0.3345881700515747, "learning_rate": 1.9616511826032488e-05, "loss": 0.6094, "step": 5850 }, { "epoch": 0.17973765858753418, "grad_norm": 0.3390195071697235, "learning_rate": 1.961637926371074e-05, "loss": 0.5731, "step": 5851 }, { "epoch": 0.17976837772248333, "grad_norm": 0.4023333191871643, "learning_rate": 1.9616246678929263e-05, "loss": 0.6031, "step": 5852 }, { "epoch": 0.1797990968574325, "grad_norm": 0.3400143086910248, "learning_rate": 1.9616114071688374e-05, "loss": 0.6328, "step": 5853 }, { "epoch": 0.17982981599238165, "grad_norm": 0.3326917886734009, "learning_rate": 1.961598144198838e-05, "loss": 0.55, "step": 5854 }, { "epoch": 0.1798605351273308, "grad_norm": 0.33303266763687134, "learning_rate": 1.9615848789829584e-05, "loss": 0.576, "step": 5855 }, { "epoch": 0.17989125426227998, "grad_norm": 0.3302069306373596, "learning_rate": 1.9615716115212306e-05, "loss": 0.5525, "step": 5856 }, { "epoch": 0.17992197339722912, "grad_norm": 0.4240674078464508, "learning_rate": 1.961558341813685e-05, "loss": 0.5361, "step": 5857 }, { "epoch": 0.1799526925321783, "grad_norm": 0.32118913531303406, "learning_rate": 1.9615450698603527e-05, "loss": 0.625, "step": 5858 }, { "epoch": 0.17998341166712745, "grad_norm": 0.3132808208465576, "learning_rate": 1.961531795661265e-05, "loss": 0.4957, "step": 5859 }, { "epoch": 0.18001413080207662, "grad_norm": 0.40022581815719604, "learning_rate": 1.9615185192164525e-05, "loss": 0.6497, "step": 5860 }, { "epoch": 0.18004484993702577, "grad_norm": 0.38109326362609863, "learning_rate": 1.961505240525946e-05, "loss": 0.6693, "step": 5861 }, { "epoch": 0.18007556907197494, "grad_norm": 0.3428618013858795, "learning_rate": 1.9614919595897778e-05, "loss": 0.5728, "step": 5862 }, { "epoch": 0.1801062882069241, "grad_norm": 0.3328629434108734, "learning_rate": 1.961478676407977e-05, "loss": 0.6043, "step": 5863 }, { "epoch": 0.18013700734187324, "grad_norm": 0.41157472133636475, "learning_rate": 1.961465390980576e-05, "loss": 0.5155, "step": 5864 }, { "epoch": 0.18016772647682242, "grad_norm": 0.37496158480644226, "learning_rate": 1.9614521033076055e-05, "loss": 0.6073, "step": 5865 }, { "epoch": 0.18019844561177156, "grad_norm": 0.34933289885520935, "learning_rate": 1.9614388133890966e-05, "loss": 0.713, "step": 5866 }, { "epoch": 0.18022916474672074, "grad_norm": 0.3635798692703247, "learning_rate": 1.96142552122508e-05, "loss": 0.6292, "step": 5867 }, { "epoch": 0.18025988388166989, "grad_norm": 0.3239299952983856, "learning_rate": 1.9614122268155865e-05, "loss": 0.5984, "step": 5868 }, { "epoch": 0.18029060301661906, "grad_norm": 0.582528829574585, "learning_rate": 1.9613989301606483e-05, "loss": 0.6081, "step": 5869 }, { "epoch": 0.1803213221515682, "grad_norm": 0.324512779712677, "learning_rate": 1.9613856312602952e-05, "loss": 0.5772, "step": 5870 }, { "epoch": 0.18035204128651738, "grad_norm": 0.3973548114299774, "learning_rate": 1.9613723301145594e-05, "loss": 0.6242, "step": 5871 }, { "epoch": 0.18038276042146653, "grad_norm": 0.34347814321517944, "learning_rate": 1.961359026723471e-05, "loss": 0.5784, "step": 5872 }, { "epoch": 0.18041347955641568, "grad_norm": 0.3494655191898346, "learning_rate": 1.9613457210870614e-05, "loss": 0.6223, "step": 5873 }, { "epoch": 0.18044419869136485, "grad_norm": 0.3337405025959015, "learning_rate": 1.961332413205362e-05, "loss": 0.5837, "step": 5874 }, { "epoch": 0.180474917826314, "grad_norm": 0.3243715167045593, "learning_rate": 1.9613191030784033e-05, "loss": 0.5431, "step": 5875 }, { "epoch": 0.18050563696126318, "grad_norm": 0.36223435401916504, "learning_rate": 1.9613057907062168e-05, "loss": 0.6252, "step": 5876 }, { "epoch": 0.18053635609621232, "grad_norm": 0.32095077633857727, "learning_rate": 1.9612924760888337e-05, "loss": 0.623, "step": 5877 }, { "epoch": 0.1805670752311615, "grad_norm": 0.3077680170536041, "learning_rate": 1.9612791592262847e-05, "loss": 0.5054, "step": 5878 }, { "epoch": 0.18059779436611065, "grad_norm": 0.3415769636631012, "learning_rate": 1.961265840118601e-05, "loss": 0.557, "step": 5879 }, { "epoch": 0.18062851350105982, "grad_norm": 0.32173171639442444, "learning_rate": 1.961252518765814e-05, "loss": 0.5987, "step": 5880 }, { "epoch": 0.18065923263600897, "grad_norm": 0.35862433910369873, "learning_rate": 1.9612391951679545e-05, "loss": 0.6746, "step": 5881 }, { "epoch": 0.18068995177095812, "grad_norm": 0.38454461097717285, "learning_rate": 1.9612258693250538e-05, "loss": 0.5577, "step": 5882 }, { "epoch": 0.1807206709059073, "grad_norm": 0.3392890989780426, "learning_rate": 1.961212541237143e-05, "loss": 0.5605, "step": 5883 }, { "epoch": 0.18075139004085644, "grad_norm": 0.37195006012916565, "learning_rate": 1.9611992109042532e-05, "loss": 0.7073, "step": 5884 }, { "epoch": 0.18078210917580562, "grad_norm": 0.38451117277145386, "learning_rate": 1.9611858783264153e-05, "loss": 0.5983, "step": 5885 }, { "epoch": 0.18081282831075476, "grad_norm": 0.432390034198761, "learning_rate": 1.961172543503661e-05, "loss": 0.6308, "step": 5886 }, { "epoch": 0.18084354744570394, "grad_norm": 0.7474333047866821, "learning_rate": 1.9611592064360208e-05, "loss": 0.6818, "step": 5887 }, { "epoch": 0.1808742665806531, "grad_norm": 0.36927178502082825, "learning_rate": 1.9611458671235262e-05, "loss": 0.6154, "step": 5888 }, { "epoch": 0.18090498571560226, "grad_norm": 0.3362596333026886, "learning_rate": 1.961132525566208e-05, "loss": 0.5609, "step": 5889 }, { "epoch": 0.1809357048505514, "grad_norm": 0.3263687193393707, "learning_rate": 1.9611191817640984e-05, "loss": 0.6408, "step": 5890 }, { "epoch": 0.18096642398550056, "grad_norm": 0.36071231961250305, "learning_rate": 1.961105835717227e-05, "loss": 0.6418, "step": 5891 }, { "epoch": 0.18099714312044973, "grad_norm": 0.32565635442733765, "learning_rate": 1.9610924874256262e-05, "loss": 0.5349, "step": 5892 }, { "epoch": 0.18102786225539888, "grad_norm": 0.32535284757614136, "learning_rate": 1.9610791368893268e-05, "loss": 0.5298, "step": 5893 }, { "epoch": 0.18105858139034806, "grad_norm": 0.3679327964782715, "learning_rate": 1.96106578410836e-05, "loss": 0.6625, "step": 5894 }, { "epoch": 0.1810893005252972, "grad_norm": 0.3955814242362976, "learning_rate": 1.9610524290827567e-05, "loss": 0.5782, "step": 5895 }, { "epoch": 0.18112001966024638, "grad_norm": 0.32898372411727905, "learning_rate": 1.9610390718125484e-05, "loss": 0.5602, "step": 5896 }, { "epoch": 0.18115073879519553, "grad_norm": 0.39398542046546936, "learning_rate": 1.9610257122977664e-05, "loss": 0.673, "step": 5897 }, { "epoch": 0.18118145793014467, "grad_norm": 0.30498769879341125, "learning_rate": 1.9610123505384417e-05, "loss": 0.5152, "step": 5898 }, { "epoch": 0.18121217706509385, "grad_norm": 0.36935940384864807, "learning_rate": 1.9609989865346054e-05, "loss": 0.6393, "step": 5899 }, { "epoch": 0.181242896200043, "grad_norm": 0.3456687033176422, "learning_rate": 1.9609856202862893e-05, "loss": 0.5531, "step": 5900 }, { "epoch": 0.18127361533499217, "grad_norm": 0.3408152163028717, "learning_rate": 1.9609722517935236e-05, "loss": 0.6154, "step": 5901 }, { "epoch": 0.18130433446994132, "grad_norm": 0.3023316562175751, "learning_rate": 1.96095888105634e-05, "loss": 0.6215, "step": 5902 }, { "epoch": 0.1813350536048905, "grad_norm": 0.33503296971321106, "learning_rate": 1.9609455080747705e-05, "loss": 0.6091, "step": 5903 }, { "epoch": 0.18136577273983964, "grad_norm": 0.3858528435230255, "learning_rate": 1.960932132848845e-05, "loss": 0.6319, "step": 5904 }, { "epoch": 0.18139649187478882, "grad_norm": 0.36531034111976624, "learning_rate": 1.9609187553785956e-05, "loss": 0.5362, "step": 5905 }, { "epoch": 0.18142721100973797, "grad_norm": 0.3235782980918884, "learning_rate": 1.960905375664054e-05, "loss": 0.5558, "step": 5906 }, { "epoch": 0.1814579301446871, "grad_norm": 0.36617445945739746, "learning_rate": 1.96089199370525e-05, "loss": 0.5911, "step": 5907 }, { "epoch": 0.1814886492796363, "grad_norm": 0.35041365027427673, "learning_rate": 1.9608786095022158e-05, "loss": 0.477, "step": 5908 }, { "epoch": 0.18151936841458544, "grad_norm": 0.598553478717804, "learning_rate": 1.960865223054982e-05, "loss": 0.5853, "step": 5909 }, { "epoch": 0.1815500875495346, "grad_norm": 0.3344889283180237, "learning_rate": 1.9608518343635815e-05, "loss": 0.6025, "step": 5910 }, { "epoch": 0.18158080668448376, "grad_norm": 0.40220779180526733, "learning_rate": 1.9608384434280436e-05, "loss": 0.6635, "step": 5911 }, { "epoch": 0.18161152581943293, "grad_norm": 0.3518521189689636, "learning_rate": 1.9608250502484007e-05, "loss": 0.6024, "step": 5912 }, { "epoch": 0.18164224495438208, "grad_norm": 0.3279089331626892, "learning_rate": 1.960811654824684e-05, "loss": 0.5663, "step": 5913 }, { "epoch": 0.18167296408933126, "grad_norm": 0.3207658529281616, "learning_rate": 1.960798257156924e-05, "loss": 0.6182, "step": 5914 }, { "epoch": 0.1817036832242804, "grad_norm": 0.3372577428817749, "learning_rate": 1.960784857245153e-05, "loss": 0.5652, "step": 5915 }, { "epoch": 0.18173440235922955, "grad_norm": 0.31637251377105713, "learning_rate": 1.9607714550894017e-05, "loss": 0.6118, "step": 5916 }, { "epoch": 0.18176512149417873, "grad_norm": 0.31511443853378296, "learning_rate": 1.960758050689702e-05, "loss": 0.578, "step": 5917 }, { "epoch": 0.18179584062912788, "grad_norm": 0.31717532873153687, "learning_rate": 1.960744644046084e-05, "loss": 0.5353, "step": 5918 }, { "epoch": 0.18182655976407705, "grad_norm": 0.5572600960731506, "learning_rate": 1.9607312351585803e-05, "loss": 0.6296, "step": 5919 }, { "epoch": 0.1818572788990262, "grad_norm": 0.3623197674751282, "learning_rate": 1.9607178240272216e-05, "loss": 0.607, "step": 5920 }, { "epoch": 0.18188799803397537, "grad_norm": 0.31170743703842163, "learning_rate": 1.960704410652039e-05, "loss": 0.5196, "step": 5921 }, { "epoch": 0.18191871716892452, "grad_norm": 0.372835248708725, "learning_rate": 1.9606909950330646e-05, "loss": 0.6503, "step": 5922 }, { "epoch": 0.1819494363038737, "grad_norm": 0.3382375240325928, "learning_rate": 1.9606775771703294e-05, "loss": 0.6235, "step": 5923 }, { "epoch": 0.18198015543882284, "grad_norm": 0.30778074264526367, "learning_rate": 1.960664157063864e-05, "loss": 0.6161, "step": 5924 }, { "epoch": 0.182010874573772, "grad_norm": 0.3268645107746124, "learning_rate": 1.960650734713701e-05, "loss": 0.5314, "step": 5925 }, { "epoch": 0.18204159370872117, "grad_norm": 0.34165048599243164, "learning_rate": 1.9606373101198708e-05, "loss": 0.6355, "step": 5926 }, { "epoch": 0.18207231284367031, "grad_norm": 0.39230114221572876, "learning_rate": 1.960623883282405e-05, "loss": 0.6112, "step": 5927 }, { "epoch": 0.1821030319786195, "grad_norm": 0.34133443236351013, "learning_rate": 1.9606104542013353e-05, "loss": 0.5847, "step": 5928 }, { "epoch": 0.18213375111356864, "grad_norm": 0.3248072564601898, "learning_rate": 1.9605970228766926e-05, "loss": 0.5309, "step": 5929 }, { "epoch": 0.1821644702485178, "grad_norm": 0.3303869366645813, "learning_rate": 1.9605835893085085e-05, "loss": 0.5912, "step": 5930 }, { "epoch": 0.18219518938346696, "grad_norm": 0.32488423585891724, "learning_rate": 1.9605701534968144e-05, "loss": 0.531, "step": 5931 }, { "epoch": 0.1822259085184161, "grad_norm": 0.33871930837631226, "learning_rate": 1.9605567154416418e-05, "loss": 0.4753, "step": 5932 }, { "epoch": 0.18225662765336528, "grad_norm": 0.3275834023952484, "learning_rate": 1.9605432751430217e-05, "loss": 0.6243, "step": 5933 }, { "epoch": 0.18228734678831443, "grad_norm": 0.35276320576667786, "learning_rate": 1.9605298326009857e-05, "loss": 0.6332, "step": 5934 }, { "epoch": 0.1823180659232636, "grad_norm": 0.3676069676876068, "learning_rate": 1.960516387815565e-05, "loss": 0.5366, "step": 5935 }, { "epoch": 0.18234878505821275, "grad_norm": 0.36111029982566833, "learning_rate": 1.9605029407867915e-05, "loss": 0.614, "step": 5936 }, { "epoch": 0.18237950419316193, "grad_norm": 0.37595903873443604, "learning_rate": 1.9604894915146962e-05, "loss": 0.5569, "step": 5937 }, { "epoch": 0.18241022332811108, "grad_norm": 0.33277520537376404, "learning_rate": 1.9604760399993105e-05, "loss": 0.5579, "step": 5938 }, { "epoch": 0.18244094246306025, "grad_norm": 0.31410112977027893, "learning_rate": 1.960462586240666e-05, "loss": 0.6531, "step": 5939 }, { "epoch": 0.1824716615980094, "grad_norm": 0.32106536626815796, "learning_rate": 1.9604491302387944e-05, "loss": 0.5256, "step": 5940 }, { "epoch": 0.18250238073295855, "grad_norm": 0.34931644797325134, "learning_rate": 1.960435671993726e-05, "loss": 0.5767, "step": 5941 }, { "epoch": 0.18253309986790772, "grad_norm": 0.3302990198135376, "learning_rate": 1.9604222115054936e-05, "loss": 0.5814, "step": 5942 }, { "epoch": 0.18256381900285687, "grad_norm": 0.3591599762439728, "learning_rate": 1.960408748774128e-05, "loss": 0.5521, "step": 5943 }, { "epoch": 0.18259453813780605, "grad_norm": 0.3435882329940796, "learning_rate": 1.9603952837996606e-05, "loss": 0.5469, "step": 5944 }, { "epoch": 0.1826252572727552, "grad_norm": 0.35276541113853455, "learning_rate": 1.960381816582123e-05, "loss": 0.5818, "step": 5945 }, { "epoch": 0.18265597640770437, "grad_norm": 0.3985418379306793, "learning_rate": 1.9603683471215467e-05, "loss": 0.5575, "step": 5946 }, { "epoch": 0.18268669554265352, "grad_norm": 0.33579277992248535, "learning_rate": 1.9603548754179628e-05, "loss": 0.5025, "step": 5947 }, { "epoch": 0.1827174146776027, "grad_norm": 0.34423503279685974, "learning_rate": 1.960341401471403e-05, "loss": 0.6923, "step": 5948 }, { "epoch": 0.18274813381255184, "grad_norm": 0.33499976992607117, "learning_rate": 1.960327925281899e-05, "loss": 0.5764, "step": 5949 }, { "epoch": 0.18277885294750099, "grad_norm": 0.33701130747795105, "learning_rate": 1.960314446849482e-05, "loss": 0.5487, "step": 5950 }, { "epoch": 0.18280957208245016, "grad_norm": 0.33594974875450134, "learning_rate": 1.9603009661741834e-05, "loss": 0.6243, "step": 5951 }, { "epoch": 0.1828402912173993, "grad_norm": 0.3753688931465149, "learning_rate": 1.960287483256035e-05, "loss": 0.653, "step": 5952 }, { "epoch": 0.18287101035234848, "grad_norm": 0.32881298661231995, "learning_rate": 1.960273998095068e-05, "loss": 0.5434, "step": 5953 }, { "epoch": 0.18290172948729763, "grad_norm": 0.36371633410453796, "learning_rate": 1.960260510691314e-05, "loss": 0.6551, "step": 5954 }, { "epoch": 0.1829324486222468, "grad_norm": 0.3733353912830353, "learning_rate": 1.9602470210448045e-05, "loss": 0.5304, "step": 5955 }, { "epoch": 0.18296316775719595, "grad_norm": 0.37299397587776184, "learning_rate": 1.9602335291555713e-05, "loss": 0.6461, "step": 5956 }, { "epoch": 0.18299388689214513, "grad_norm": 1.3790874481201172, "learning_rate": 1.9602200350236455e-05, "loss": 0.5752, "step": 5957 }, { "epoch": 0.18302460602709428, "grad_norm": 0.3504164218902588, "learning_rate": 1.9602065386490587e-05, "loss": 0.5882, "step": 5958 }, { "epoch": 0.18305532516204343, "grad_norm": 0.36477646231651306, "learning_rate": 1.960193040031842e-05, "loss": 0.6851, "step": 5959 }, { "epoch": 0.1830860442969926, "grad_norm": 0.33003389835357666, "learning_rate": 1.9601795391720282e-05, "loss": 0.6002, "step": 5960 }, { "epoch": 0.18311676343194175, "grad_norm": 0.36007821559906006, "learning_rate": 1.9601660360696478e-05, "loss": 0.5664, "step": 5961 }, { "epoch": 0.18314748256689092, "grad_norm": 0.5056244134902954, "learning_rate": 1.960152530724732e-05, "loss": 0.6184, "step": 5962 }, { "epoch": 0.18317820170184007, "grad_norm": 0.3341716229915619, "learning_rate": 1.9601390231373136e-05, "loss": 0.6319, "step": 5963 }, { "epoch": 0.18320892083678925, "grad_norm": 0.33340179920196533, "learning_rate": 1.9601255133074234e-05, "loss": 0.6361, "step": 5964 }, { "epoch": 0.1832396399717384, "grad_norm": 0.5584313869476318, "learning_rate": 1.960112001235093e-05, "loss": 0.6569, "step": 5965 }, { "epoch": 0.18327035910668757, "grad_norm": 0.3839835226535797, "learning_rate": 1.9600984869203534e-05, "loss": 0.6231, "step": 5966 }, { "epoch": 0.18330107824163672, "grad_norm": 0.3001989424228668, "learning_rate": 1.960084970363237e-05, "loss": 0.5293, "step": 5967 }, { "epoch": 0.18333179737658586, "grad_norm": 0.3685603141784668, "learning_rate": 1.9600714515637756e-05, "loss": 0.6185, "step": 5968 }, { "epoch": 0.18336251651153504, "grad_norm": 0.3608408570289612, "learning_rate": 1.9600579305219997e-05, "loss": 0.595, "step": 5969 }, { "epoch": 0.1833932356464842, "grad_norm": 0.4347168505191803, "learning_rate": 1.9600444072379416e-05, "loss": 0.5467, "step": 5970 }, { "epoch": 0.18342395478143336, "grad_norm": 0.32234281301498413, "learning_rate": 1.9600308817116328e-05, "loss": 0.5524, "step": 5971 }, { "epoch": 0.1834546739163825, "grad_norm": 0.32878902554512024, "learning_rate": 1.9600173539431047e-05, "loss": 0.5782, "step": 5972 }, { "epoch": 0.18348539305133169, "grad_norm": 0.3932367265224457, "learning_rate": 1.960003823932389e-05, "loss": 0.6992, "step": 5973 }, { "epoch": 0.18351611218628083, "grad_norm": 0.346364289522171, "learning_rate": 1.9599902916795178e-05, "loss": 0.6001, "step": 5974 }, { "epoch": 0.18354683132122998, "grad_norm": 0.3349640369415283, "learning_rate": 1.9599767571845215e-05, "loss": 0.6401, "step": 5975 }, { "epoch": 0.18357755045617916, "grad_norm": 0.3838599920272827, "learning_rate": 1.959963220447433e-05, "loss": 0.5242, "step": 5976 }, { "epoch": 0.1836082695911283, "grad_norm": 0.4727429747581482, "learning_rate": 1.959949681468283e-05, "loss": 0.6651, "step": 5977 }, { "epoch": 0.18363898872607748, "grad_norm": 0.3628028631210327, "learning_rate": 1.9599361402471036e-05, "loss": 0.6427, "step": 5978 }, { "epoch": 0.18366970786102663, "grad_norm": 0.32610416412353516, "learning_rate": 1.9599225967839265e-05, "loss": 0.5755, "step": 5979 }, { "epoch": 0.1837004269959758, "grad_norm": 0.32578665018081665, "learning_rate": 1.9599090510787833e-05, "loss": 0.6041, "step": 5980 }, { "epoch": 0.18373114613092495, "grad_norm": 0.3499810993671417, "learning_rate": 1.959895503131705e-05, "loss": 0.5967, "step": 5981 }, { "epoch": 0.18376186526587412, "grad_norm": 0.39366087317466736, "learning_rate": 1.959881952942724e-05, "loss": 0.5533, "step": 5982 }, { "epoch": 0.18379258440082327, "grad_norm": 0.49017149209976196, "learning_rate": 1.9598684005118717e-05, "loss": 0.5961, "step": 5983 }, { "epoch": 0.18382330353577242, "grad_norm": 0.3632493019104004, "learning_rate": 1.959854845839179e-05, "loss": 0.5753, "step": 5984 }, { "epoch": 0.1838540226707216, "grad_norm": 0.34930703043937683, "learning_rate": 1.959841288924679e-05, "loss": 0.668, "step": 5985 }, { "epoch": 0.18388474180567074, "grad_norm": 0.32629671692848206, "learning_rate": 1.9598277297684026e-05, "loss": 0.6289, "step": 5986 }, { "epoch": 0.18391546094061992, "grad_norm": 0.33979693055152893, "learning_rate": 1.9598141683703813e-05, "loss": 0.6037, "step": 5987 }, { "epoch": 0.18394618007556907, "grad_norm": 0.3243813216686249, "learning_rate": 1.959800604730647e-05, "loss": 0.519, "step": 5988 }, { "epoch": 0.18397689921051824, "grad_norm": 0.3732922077178955, "learning_rate": 1.9597870388492314e-05, "loss": 0.5793, "step": 5989 }, { "epoch": 0.1840076183454674, "grad_norm": 0.3717234432697296, "learning_rate": 1.9597734707261662e-05, "loss": 0.5592, "step": 5990 }, { "epoch": 0.18403833748041656, "grad_norm": 0.3210951089859009, "learning_rate": 1.959759900361483e-05, "loss": 0.5277, "step": 5991 }, { "epoch": 0.1840690566153657, "grad_norm": 0.38201984763145447, "learning_rate": 1.9597463277552133e-05, "loss": 0.6161, "step": 5992 }, { "epoch": 0.18409977575031486, "grad_norm": 0.33403468132019043, "learning_rate": 1.9597327529073892e-05, "loss": 0.5948, "step": 5993 }, { "epoch": 0.18413049488526403, "grad_norm": 0.32484742999076843, "learning_rate": 1.9597191758180422e-05, "loss": 0.5988, "step": 5994 }, { "epoch": 0.18416121402021318, "grad_norm": 0.34606826305389404, "learning_rate": 1.959705596487204e-05, "loss": 0.6283, "step": 5995 }, { "epoch": 0.18419193315516236, "grad_norm": 0.43828001618385315, "learning_rate": 1.9596920149149068e-05, "loss": 0.5985, "step": 5996 }, { "epoch": 0.1842226522901115, "grad_norm": 0.32886090874671936, "learning_rate": 1.9596784311011814e-05, "loss": 0.6088, "step": 5997 }, { "epoch": 0.18425337142506068, "grad_norm": 0.3961910307407379, "learning_rate": 1.95966484504606e-05, "loss": 0.5457, "step": 5998 }, { "epoch": 0.18428409056000983, "grad_norm": 0.36022260785102844, "learning_rate": 1.9596512567495748e-05, "loss": 0.6573, "step": 5999 }, { "epoch": 0.184314809694959, "grad_norm": 0.32407817244529724, "learning_rate": 1.9596376662117566e-05, "loss": 0.592, "step": 6000 }, { "epoch": 0.18434552882990815, "grad_norm": 0.3651948869228363, "learning_rate": 1.9596240734326377e-05, "loss": 0.6727, "step": 6001 }, { "epoch": 0.1843762479648573, "grad_norm": 0.35053667426109314, "learning_rate": 1.9596104784122497e-05, "loss": 0.6056, "step": 6002 }, { "epoch": 0.18440696709980647, "grad_norm": 0.41368481516838074, "learning_rate": 1.9595968811506247e-05, "loss": 0.6218, "step": 6003 }, { "epoch": 0.18443768623475562, "grad_norm": 0.3544885814189911, "learning_rate": 1.959583281647794e-05, "loss": 0.5593, "step": 6004 }, { "epoch": 0.1844684053697048, "grad_norm": 0.38088977336883545, "learning_rate": 1.959569679903789e-05, "loss": 0.5893, "step": 6005 }, { "epoch": 0.18449912450465394, "grad_norm": 0.36738619208335876, "learning_rate": 1.9595560759186428e-05, "loss": 0.5658, "step": 6006 }, { "epoch": 0.18452984363960312, "grad_norm": 0.38347238302230835, "learning_rate": 1.959542469692386e-05, "loss": 0.5553, "step": 6007 }, { "epoch": 0.18456056277455227, "grad_norm": 0.46234166622161865, "learning_rate": 1.9595288612250507e-05, "loss": 0.6165, "step": 6008 }, { "epoch": 0.18459128190950141, "grad_norm": 0.32287266850471497, "learning_rate": 1.959515250516669e-05, "loss": 0.5364, "step": 6009 }, { "epoch": 0.1846220010444506, "grad_norm": 0.3290674686431885, "learning_rate": 1.959501637567272e-05, "loss": 0.6304, "step": 6010 }, { "epoch": 0.18465272017939974, "grad_norm": 0.3726661205291748, "learning_rate": 1.9594880223768923e-05, "loss": 0.6575, "step": 6011 }, { "epoch": 0.1846834393143489, "grad_norm": 0.3764941394329071, "learning_rate": 1.9594744049455612e-05, "loss": 0.5763, "step": 6012 }, { "epoch": 0.18471415844929806, "grad_norm": 0.3217504918575287, "learning_rate": 1.9594607852733106e-05, "loss": 0.6161, "step": 6013 }, { "epoch": 0.18474487758424724, "grad_norm": 0.3317289352416992, "learning_rate": 1.9594471633601723e-05, "loss": 0.6825, "step": 6014 }, { "epoch": 0.18477559671919638, "grad_norm": 0.32416126132011414, "learning_rate": 1.9594335392061782e-05, "loss": 0.5056, "step": 6015 }, { "epoch": 0.18480631585414556, "grad_norm": 0.3297175467014313, "learning_rate": 1.95941991281136e-05, "loss": 0.5265, "step": 6016 }, { "epoch": 0.1848370349890947, "grad_norm": 0.3464498519897461, "learning_rate": 1.9594062841757496e-05, "loss": 0.6491, "step": 6017 }, { "epoch": 0.18486775412404385, "grad_norm": 0.3648136854171753, "learning_rate": 1.959392653299379e-05, "loss": 0.6917, "step": 6018 }, { "epoch": 0.18489847325899303, "grad_norm": 0.34108299016952515, "learning_rate": 1.9593790201822796e-05, "loss": 0.6821, "step": 6019 }, { "epoch": 0.18492919239394218, "grad_norm": 0.3357943296432495, "learning_rate": 1.9593653848244834e-05, "loss": 0.596, "step": 6020 }, { "epoch": 0.18495991152889135, "grad_norm": 0.3445607125759125, "learning_rate": 1.959351747226023e-05, "loss": 0.7116, "step": 6021 }, { "epoch": 0.1849906306638405, "grad_norm": 0.3208570182323456, "learning_rate": 1.959338107386929e-05, "loss": 0.5918, "step": 6022 }, { "epoch": 0.18502134979878967, "grad_norm": 0.35185733437538147, "learning_rate": 1.9593244653072337e-05, "loss": 0.5936, "step": 6023 }, { "epoch": 0.18505206893373882, "grad_norm": 0.3490707278251648, "learning_rate": 1.9593108209869695e-05, "loss": 0.6256, "step": 6024 }, { "epoch": 0.185082788068688, "grad_norm": 0.35565385222435, "learning_rate": 1.959297174426168e-05, "loss": 0.6095, "step": 6025 }, { "epoch": 0.18511350720363715, "grad_norm": 0.3533627986907959, "learning_rate": 1.9592835256248604e-05, "loss": 0.5492, "step": 6026 }, { "epoch": 0.1851442263385863, "grad_norm": 0.33954399824142456, "learning_rate": 1.9592698745830797e-05, "loss": 0.5717, "step": 6027 }, { "epoch": 0.18517494547353547, "grad_norm": 0.4030175805091858, "learning_rate": 1.9592562213008568e-05, "loss": 0.6415, "step": 6028 }, { "epoch": 0.18520566460848462, "grad_norm": 0.35756099224090576, "learning_rate": 1.959242565778224e-05, "loss": 0.5971, "step": 6029 }, { "epoch": 0.1852363837434338, "grad_norm": 0.3449825942516327, "learning_rate": 1.9592289080152135e-05, "loss": 0.5691, "step": 6030 }, { "epoch": 0.18526710287838294, "grad_norm": 0.35720646381378174, "learning_rate": 1.959215248011857e-05, "loss": 0.5682, "step": 6031 }, { "epoch": 0.18529782201333211, "grad_norm": 0.33691126108169556, "learning_rate": 1.9592015857681858e-05, "loss": 0.5717, "step": 6032 }, { "epoch": 0.18532854114828126, "grad_norm": 0.37356656789779663, "learning_rate": 1.9591879212842328e-05, "loss": 0.6284, "step": 6033 }, { "epoch": 0.18535926028323044, "grad_norm": 0.3681046962738037, "learning_rate": 1.9591742545600294e-05, "loss": 0.5986, "step": 6034 }, { "epoch": 0.18538997941817958, "grad_norm": 0.34624966979026794, "learning_rate": 1.959160585595607e-05, "loss": 0.6333, "step": 6035 }, { "epoch": 0.18542069855312873, "grad_norm": 0.3814019560813904, "learning_rate": 1.9591469143909982e-05, "loss": 0.55, "step": 6036 }, { "epoch": 0.1854514176880779, "grad_norm": 0.3457910418510437, "learning_rate": 1.959133240946235e-05, "loss": 0.5442, "step": 6037 }, { "epoch": 0.18548213682302706, "grad_norm": 0.3134711682796478, "learning_rate": 1.959119565261349e-05, "loss": 0.6376, "step": 6038 }, { "epoch": 0.18551285595797623, "grad_norm": 0.3596256971359253, "learning_rate": 1.9591058873363727e-05, "loss": 0.6035, "step": 6039 }, { "epoch": 0.18554357509292538, "grad_norm": 0.325874000787735, "learning_rate": 1.9590922071713373e-05, "loss": 0.5621, "step": 6040 }, { "epoch": 0.18557429422787455, "grad_norm": 0.3768215477466583, "learning_rate": 1.9590785247662748e-05, "loss": 0.6441, "step": 6041 }, { "epoch": 0.1856050133628237, "grad_norm": 0.3726167380809784, "learning_rate": 1.9590648401212178e-05, "loss": 0.5633, "step": 6042 }, { "epoch": 0.18563573249777288, "grad_norm": 0.3255572021007538, "learning_rate": 1.9590511532361978e-05, "loss": 0.6068, "step": 6043 }, { "epoch": 0.18566645163272202, "grad_norm": 0.48673972487449646, "learning_rate": 1.959037464111247e-05, "loss": 0.5899, "step": 6044 }, { "epoch": 0.18569717076767117, "grad_norm": 0.33345523476600647, "learning_rate": 1.959023772746397e-05, "loss": 0.639, "step": 6045 }, { "epoch": 0.18572788990262035, "grad_norm": 0.35434991121292114, "learning_rate": 1.9590100791416802e-05, "loss": 0.651, "step": 6046 }, { "epoch": 0.1857586090375695, "grad_norm": 0.35615307092666626, "learning_rate": 1.9589963832971278e-05, "loss": 0.6094, "step": 6047 }, { "epoch": 0.18578932817251867, "grad_norm": 0.3168114423751831, "learning_rate": 1.958982685212773e-05, "loss": 0.5372, "step": 6048 }, { "epoch": 0.18582004730746782, "grad_norm": 0.4172329902648926, "learning_rate": 1.9589689848886472e-05, "loss": 0.6486, "step": 6049 }, { "epoch": 0.185850766442417, "grad_norm": 0.33396264910697937, "learning_rate": 1.958955282324782e-05, "loss": 0.5853, "step": 6050 }, { "epoch": 0.18588148557736614, "grad_norm": 0.36648014187812805, "learning_rate": 1.95894157752121e-05, "loss": 0.6776, "step": 6051 }, { "epoch": 0.1859122047123153, "grad_norm": 0.3806343078613281, "learning_rate": 1.9589278704779627e-05, "loss": 0.6324, "step": 6052 }, { "epoch": 0.18594292384726446, "grad_norm": 0.3150605857372284, "learning_rate": 1.9589141611950727e-05, "loss": 0.5749, "step": 6053 }, { "epoch": 0.1859736429822136, "grad_norm": 0.330610454082489, "learning_rate": 1.9589004496725717e-05, "loss": 0.5241, "step": 6054 }, { "epoch": 0.18600436211716279, "grad_norm": 0.33437326550483704, "learning_rate": 1.9588867359104915e-05, "loss": 0.6162, "step": 6055 }, { "epoch": 0.18603508125211193, "grad_norm": 0.38292360305786133, "learning_rate": 1.9588730199088643e-05, "loss": 0.6434, "step": 6056 }, { "epoch": 0.1860658003870611, "grad_norm": 0.38716617226600647, "learning_rate": 1.9588593016677223e-05, "loss": 0.5892, "step": 6057 }, { "epoch": 0.18609651952201026, "grad_norm": 0.32561278343200684, "learning_rate": 1.9588455811870977e-05, "loss": 0.5285, "step": 6058 }, { "epoch": 0.18612723865695943, "grad_norm": 0.3736150562763214, "learning_rate": 1.958831858467022e-05, "loss": 0.6387, "step": 6059 }, { "epoch": 0.18615795779190858, "grad_norm": 0.35929790139198303, "learning_rate": 1.9588181335075275e-05, "loss": 0.6215, "step": 6060 }, { "epoch": 0.18618867692685773, "grad_norm": 0.3317405581474304, "learning_rate": 1.9588044063086465e-05, "loss": 0.5695, "step": 6061 }, { "epoch": 0.1862193960618069, "grad_norm": 0.3674466907978058, "learning_rate": 1.9587906768704106e-05, "loss": 0.6723, "step": 6062 }, { "epoch": 0.18625011519675605, "grad_norm": 0.3293972313404083, "learning_rate": 1.9587769451928524e-05, "loss": 0.6196, "step": 6063 }, { "epoch": 0.18628083433170523, "grad_norm": 0.3501985967159271, "learning_rate": 1.9587632112760034e-05, "loss": 0.61, "step": 6064 }, { "epoch": 0.18631155346665437, "grad_norm": 0.34458476305007935, "learning_rate": 1.958749475119896e-05, "loss": 0.5451, "step": 6065 }, { "epoch": 0.18634227260160355, "grad_norm": 0.3603653013706207, "learning_rate": 1.9587357367245626e-05, "loss": 0.5064, "step": 6066 }, { "epoch": 0.1863729917365527, "grad_norm": 0.46409639716148376, "learning_rate": 1.958721996090035e-05, "loss": 0.5923, "step": 6067 }, { "epoch": 0.18640371087150187, "grad_norm": 0.3251858651638031, "learning_rate": 1.9587082532163447e-05, "loss": 0.6004, "step": 6068 }, { "epoch": 0.18643443000645102, "grad_norm": 0.35332563519477844, "learning_rate": 1.9586945081035242e-05, "loss": 0.5918, "step": 6069 }, { "epoch": 0.18646514914140017, "grad_norm": 0.3431587815284729, "learning_rate": 1.958680760751606e-05, "loss": 0.6096, "step": 6070 }, { "epoch": 0.18649586827634934, "grad_norm": 0.403231143951416, "learning_rate": 1.958667011160622e-05, "loss": 0.507, "step": 6071 }, { "epoch": 0.1865265874112985, "grad_norm": 0.3182508051395416, "learning_rate": 1.958653259330604e-05, "loss": 0.5548, "step": 6072 }, { "epoch": 0.18655730654624766, "grad_norm": 0.39731693267822266, "learning_rate": 1.958639505261585e-05, "loss": 0.6122, "step": 6073 }, { "epoch": 0.1865880256811968, "grad_norm": 0.36136332154273987, "learning_rate": 1.9586257489535958e-05, "loss": 0.6078, "step": 6074 }, { "epoch": 0.186618744816146, "grad_norm": 0.3177822530269623, "learning_rate": 1.9586119904066695e-05, "loss": 0.529, "step": 6075 }, { "epoch": 0.18664946395109513, "grad_norm": 0.33696550130844116, "learning_rate": 1.958598229620838e-05, "loss": 0.5727, "step": 6076 }, { "epoch": 0.1866801830860443, "grad_norm": 0.3250274658203125, "learning_rate": 1.9585844665961334e-05, "loss": 0.641, "step": 6077 }, { "epoch": 0.18671090222099346, "grad_norm": 0.3523365557193756, "learning_rate": 1.9585707013325875e-05, "loss": 0.6453, "step": 6078 }, { "epoch": 0.1867416213559426, "grad_norm": 0.35939228534698486, "learning_rate": 1.958556933830233e-05, "loss": 0.5481, "step": 6079 }, { "epoch": 0.18677234049089178, "grad_norm": 0.34270229935646057, "learning_rate": 1.9585431640891022e-05, "loss": 0.6337, "step": 6080 }, { "epoch": 0.18680305962584093, "grad_norm": 0.4800567030906677, "learning_rate": 1.9585293921092265e-05, "loss": 0.6359, "step": 6081 }, { "epoch": 0.1868337787607901, "grad_norm": 0.3224259316921234, "learning_rate": 1.9585156178906382e-05, "loss": 0.5776, "step": 6082 }, { "epoch": 0.18686449789573925, "grad_norm": 0.33675694465637207, "learning_rate": 1.9585018414333704e-05, "loss": 0.6243, "step": 6083 }, { "epoch": 0.18689521703068843, "grad_norm": 0.3566984236240387, "learning_rate": 1.958488062737454e-05, "loss": 0.6291, "step": 6084 }, { "epoch": 0.18692593616563757, "grad_norm": 0.4642364978790283, "learning_rate": 1.9584742818029222e-05, "loss": 0.5623, "step": 6085 }, { "epoch": 0.18695665530058675, "grad_norm": 0.3475342392921448, "learning_rate": 1.9584604986298066e-05, "loss": 0.5485, "step": 6086 }, { "epoch": 0.1869873744355359, "grad_norm": 0.34220871329307556, "learning_rate": 1.9584467132181394e-05, "loss": 0.594, "step": 6087 }, { "epoch": 0.18701809357048504, "grad_norm": 0.3802169859409332, "learning_rate": 1.9584329255679532e-05, "loss": 0.52, "step": 6088 }, { "epoch": 0.18704881270543422, "grad_norm": 0.3333503305912018, "learning_rate": 1.95841913567928e-05, "loss": 0.5966, "step": 6089 }, { "epoch": 0.18707953184038337, "grad_norm": 0.3343625068664551, "learning_rate": 1.9584053435521518e-05, "loss": 0.6348, "step": 6090 }, { "epoch": 0.18711025097533254, "grad_norm": 0.3174566924571991, "learning_rate": 1.958391549186601e-05, "loss": 0.5434, "step": 6091 }, { "epoch": 0.1871409701102817, "grad_norm": 0.31437018513679504, "learning_rate": 1.95837775258266e-05, "loss": 0.5693, "step": 6092 }, { "epoch": 0.18717168924523087, "grad_norm": 0.38108348846435547, "learning_rate": 1.9583639537403608e-05, "loss": 0.6713, "step": 6093 }, { "epoch": 0.18720240838018, "grad_norm": 0.3565461337566376, "learning_rate": 1.9583501526597353e-05, "loss": 0.6449, "step": 6094 }, { "epoch": 0.18723312751512916, "grad_norm": 0.3443508744239807, "learning_rate": 1.9583363493408166e-05, "loss": 0.5979, "step": 6095 }, { "epoch": 0.18726384665007834, "grad_norm": 0.4205133616924286, "learning_rate": 1.958322543783636e-05, "loss": 0.5543, "step": 6096 }, { "epoch": 0.18729456578502748, "grad_norm": 0.3275512754917145, "learning_rate": 1.9583087359882263e-05, "loss": 0.5706, "step": 6097 }, { "epoch": 0.18732528491997666, "grad_norm": 0.3141806721687317, "learning_rate": 1.95829492595462e-05, "loss": 0.606, "step": 6098 }, { "epoch": 0.1873560040549258, "grad_norm": 0.34745439887046814, "learning_rate": 1.9582811136828484e-05, "loss": 0.585, "step": 6099 }, { "epoch": 0.18738672318987498, "grad_norm": 0.3476618826389313, "learning_rate": 1.9582672991729446e-05, "loss": 0.6234, "step": 6100 }, { "epoch": 0.18741744232482413, "grad_norm": 0.6344875693321228, "learning_rate": 1.9582534824249405e-05, "loss": 0.5197, "step": 6101 }, { "epoch": 0.1874481614597733, "grad_norm": 0.34326761960983276, "learning_rate": 1.9582396634388687e-05, "loss": 0.5949, "step": 6102 }, { "epoch": 0.18747888059472245, "grad_norm": 0.3500531017780304, "learning_rate": 1.958225842214761e-05, "loss": 0.6098, "step": 6103 }, { "epoch": 0.1875095997296716, "grad_norm": 0.4469769299030304, "learning_rate": 1.95821201875265e-05, "loss": 0.5991, "step": 6104 }, { "epoch": 0.18754031886462078, "grad_norm": 0.374724417924881, "learning_rate": 1.9581981930525677e-05, "loss": 0.557, "step": 6105 }, { "epoch": 0.18757103799956992, "grad_norm": 0.3137761652469635, "learning_rate": 1.958184365114547e-05, "loss": 0.5678, "step": 6106 }, { "epoch": 0.1876017571345191, "grad_norm": 0.3454790711402893, "learning_rate": 1.9581705349386196e-05, "loss": 0.6253, "step": 6107 }, { "epoch": 0.18763247626946825, "grad_norm": 0.40637028217315674, "learning_rate": 1.9581567025248177e-05, "loss": 0.5625, "step": 6108 }, { "epoch": 0.18766319540441742, "grad_norm": 0.32152625918388367, "learning_rate": 1.9581428678731744e-05, "loss": 0.5665, "step": 6109 }, { "epoch": 0.18769391453936657, "grad_norm": 0.3547438979148865, "learning_rate": 1.9581290309837213e-05, "loss": 0.6029, "step": 6110 }, { "epoch": 0.18772463367431574, "grad_norm": 0.38398775458335876, "learning_rate": 1.958115191856491e-05, "loss": 0.6115, "step": 6111 }, { "epoch": 0.1877553528092649, "grad_norm": 0.330425888299942, "learning_rate": 1.9581013504915157e-05, "loss": 0.6206, "step": 6112 }, { "epoch": 0.18778607194421404, "grad_norm": 0.3661397099494934, "learning_rate": 1.9580875068888277e-05, "loss": 0.6086, "step": 6113 }, { "epoch": 0.18781679107916321, "grad_norm": 0.3559572100639343, "learning_rate": 1.9580736610484596e-05, "loss": 0.6568, "step": 6114 }, { "epoch": 0.18784751021411236, "grad_norm": 0.32535356283187866, "learning_rate": 1.9580598129704435e-05, "loss": 0.6172, "step": 6115 }, { "epoch": 0.18787822934906154, "grad_norm": 0.3145642578601837, "learning_rate": 1.9580459626548117e-05, "loss": 0.5614, "step": 6116 }, { "epoch": 0.18790894848401068, "grad_norm": 0.35676515102386475, "learning_rate": 1.9580321101015967e-05, "loss": 0.6049, "step": 6117 }, { "epoch": 0.18793966761895986, "grad_norm": 0.3532150983810425, "learning_rate": 1.958018255310831e-05, "loss": 0.6234, "step": 6118 }, { "epoch": 0.187970386753909, "grad_norm": 0.3239244818687439, "learning_rate": 1.9580043982825464e-05, "loss": 0.5334, "step": 6119 }, { "epoch": 0.18800110588885818, "grad_norm": 0.3213599920272827, "learning_rate": 1.957990539016776e-05, "loss": 0.6456, "step": 6120 }, { "epoch": 0.18803182502380733, "grad_norm": 0.3204590082168579, "learning_rate": 1.9579766775135512e-05, "loss": 0.6241, "step": 6121 }, { "epoch": 0.18806254415875648, "grad_norm": 0.3527487516403198, "learning_rate": 1.9579628137729053e-05, "loss": 0.5988, "step": 6122 }, { "epoch": 0.18809326329370565, "grad_norm": 0.3620675802230835, "learning_rate": 1.957948947794871e-05, "loss": 0.6018, "step": 6123 }, { "epoch": 0.1881239824286548, "grad_norm": 0.3765774369239807, "learning_rate": 1.957935079579479e-05, "loss": 0.6001, "step": 6124 }, { "epoch": 0.18815470156360398, "grad_norm": 0.3395152986049652, "learning_rate": 1.957921209126763e-05, "loss": 0.6415, "step": 6125 }, { "epoch": 0.18818542069855312, "grad_norm": 0.3655926287174225, "learning_rate": 1.9579073364367556e-05, "loss": 0.6441, "step": 6126 }, { "epoch": 0.1882161398335023, "grad_norm": 0.3575213551521301, "learning_rate": 1.9578934615094882e-05, "loss": 0.4742, "step": 6127 }, { "epoch": 0.18824685896845145, "grad_norm": 0.7882621884346008, "learning_rate": 1.957879584344994e-05, "loss": 0.5544, "step": 6128 }, { "epoch": 0.1882775781034006, "grad_norm": 0.36553460359573364, "learning_rate": 1.9578657049433048e-05, "loss": 0.6132, "step": 6129 }, { "epoch": 0.18830829723834977, "grad_norm": 0.34742921590805054, "learning_rate": 1.9578518233044536e-05, "loss": 0.594, "step": 6130 }, { "epoch": 0.18833901637329892, "grad_norm": 0.3391280472278595, "learning_rate": 1.9578379394284724e-05, "loss": 0.6626, "step": 6131 }, { "epoch": 0.1883697355082481, "grad_norm": 0.4611179232597351, "learning_rate": 1.957824053315394e-05, "loss": 0.6273, "step": 6132 }, { "epoch": 0.18840045464319724, "grad_norm": 0.34477928280830383, "learning_rate": 1.957810164965251e-05, "loss": 0.6868, "step": 6133 }, { "epoch": 0.18843117377814642, "grad_norm": 0.35330110788345337, "learning_rate": 1.9577962743780746e-05, "loss": 0.6466, "step": 6134 }, { "epoch": 0.18846189291309556, "grad_norm": 0.3290139138698578, "learning_rate": 1.957782381553899e-05, "loss": 0.7312, "step": 6135 }, { "epoch": 0.18849261204804474, "grad_norm": 0.3132963180541992, "learning_rate": 1.957768486492755e-05, "loss": 0.5251, "step": 6136 }, { "epoch": 0.1885233311829939, "grad_norm": 0.3891313672065735, "learning_rate": 1.957754589194676e-05, "loss": 0.6431, "step": 6137 }, { "epoch": 0.18855405031794303, "grad_norm": 0.4019964933395386, "learning_rate": 1.9577406896596948e-05, "loss": 0.6326, "step": 6138 }, { "epoch": 0.1885847694528922, "grad_norm": 0.3696160316467285, "learning_rate": 1.9577267878878428e-05, "loss": 0.6113, "step": 6139 }, { "epoch": 0.18861548858784136, "grad_norm": 0.4576997458934784, "learning_rate": 1.957712883879153e-05, "loss": 0.5711, "step": 6140 }, { "epoch": 0.18864620772279053, "grad_norm": 0.3472917973995209, "learning_rate": 1.9576989776336578e-05, "loss": 0.5558, "step": 6141 }, { "epoch": 0.18867692685773968, "grad_norm": 0.37351468205451965, "learning_rate": 1.9576850691513902e-05, "loss": 0.7325, "step": 6142 }, { "epoch": 0.18870764599268885, "grad_norm": 0.3232671320438385, "learning_rate": 1.957671158432382e-05, "loss": 0.5995, "step": 6143 }, { "epoch": 0.188738365127638, "grad_norm": 0.31099098920822144, "learning_rate": 1.9576572454766655e-05, "loss": 0.6131, "step": 6144 }, { "epoch": 0.18876908426258718, "grad_norm": 0.3682072162628174, "learning_rate": 1.9576433302842744e-05, "loss": 0.5894, "step": 6145 }, { "epoch": 0.18879980339753633, "grad_norm": 0.40763911604881287, "learning_rate": 1.95762941285524e-05, "loss": 0.5584, "step": 6146 }, { "epoch": 0.18883052253248547, "grad_norm": 0.3583739101886749, "learning_rate": 1.9576154931895953e-05, "loss": 0.5638, "step": 6147 }, { "epoch": 0.18886124166743465, "grad_norm": 0.3466792404651642, "learning_rate": 1.9576015712873728e-05, "loss": 0.67, "step": 6148 }, { "epoch": 0.1888919608023838, "grad_norm": 0.3706235885620117, "learning_rate": 1.9575876471486048e-05, "loss": 0.5006, "step": 6149 }, { "epoch": 0.18892267993733297, "grad_norm": 0.32185789942741394, "learning_rate": 1.957573720773324e-05, "loss": 0.5934, "step": 6150 }, { "epoch": 0.18895339907228212, "grad_norm": 0.2972692847251892, "learning_rate": 1.957559792161563e-05, "loss": 0.5718, "step": 6151 }, { "epoch": 0.1889841182072313, "grad_norm": 0.3270997107028961, "learning_rate": 1.957545861313354e-05, "loss": 0.5904, "step": 6152 }, { "epoch": 0.18901483734218044, "grad_norm": 0.3368877172470093, "learning_rate": 1.95753192822873e-05, "loss": 0.5508, "step": 6153 }, { "epoch": 0.18904555647712962, "grad_norm": 0.33197614550590515, "learning_rate": 1.957517992907723e-05, "loss": 0.5707, "step": 6154 }, { "epoch": 0.18907627561207876, "grad_norm": 0.3469868004322052, "learning_rate": 1.9575040553503665e-05, "loss": 0.5548, "step": 6155 }, { "epoch": 0.1891069947470279, "grad_norm": 0.3339208662509918, "learning_rate": 1.957490115556692e-05, "loss": 0.6025, "step": 6156 }, { "epoch": 0.1891377138819771, "grad_norm": 0.3403169810771942, "learning_rate": 1.9574761735267324e-05, "loss": 0.5898, "step": 6157 }, { "epoch": 0.18916843301692624, "grad_norm": 0.32612285017967224, "learning_rate": 1.95746222926052e-05, "loss": 0.6212, "step": 6158 }, { "epoch": 0.1891991521518754, "grad_norm": 0.3538394868373871, "learning_rate": 1.9574482827580884e-05, "loss": 0.587, "step": 6159 }, { "epoch": 0.18922987128682456, "grad_norm": 0.36765214800834656, "learning_rate": 1.957434334019469e-05, "loss": 0.6342, "step": 6160 }, { "epoch": 0.18926059042177373, "grad_norm": 0.3214438855648041, "learning_rate": 1.9574203830446952e-05, "loss": 0.4788, "step": 6161 }, { "epoch": 0.18929130955672288, "grad_norm": 0.3274264931678772, "learning_rate": 1.957406429833799e-05, "loss": 0.6452, "step": 6162 }, { "epoch": 0.18932202869167206, "grad_norm": 0.36095449328422546, "learning_rate": 1.957392474386813e-05, "loss": 0.6745, "step": 6163 }, { "epoch": 0.1893527478266212, "grad_norm": 0.3711467683315277, "learning_rate": 1.9573785167037704e-05, "loss": 0.6417, "step": 6164 }, { "epoch": 0.18938346696157035, "grad_norm": 0.343354731798172, "learning_rate": 1.9573645567847034e-05, "loss": 0.5863, "step": 6165 }, { "epoch": 0.18941418609651953, "grad_norm": 0.33489856123924255, "learning_rate": 1.9573505946296442e-05, "loss": 0.5786, "step": 6166 }, { "epoch": 0.18944490523146867, "grad_norm": 0.33364781737327576, "learning_rate": 1.9573366302386264e-05, "loss": 0.6131, "step": 6167 }, { "epoch": 0.18947562436641785, "grad_norm": 0.36831608414649963, "learning_rate": 1.957322663611682e-05, "loss": 0.6778, "step": 6168 }, { "epoch": 0.189506343501367, "grad_norm": 0.3462827503681183, "learning_rate": 1.957308694748843e-05, "loss": 0.6051, "step": 6169 }, { "epoch": 0.18953706263631617, "grad_norm": 0.3540433645248413, "learning_rate": 1.957294723650143e-05, "loss": 0.6335, "step": 6170 }, { "epoch": 0.18956778177126532, "grad_norm": 0.31336236000061035, "learning_rate": 1.9572807503156147e-05, "loss": 0.5793, "step": 6171 }, { "epoch": 0.18959850090621447, "grad_norm": 0.32031166553497314, "learning_rate": 1.95726677474529e-05, "loss": 0.5009, "step": 6172 }, { "epoch": 0.18962922004116364, "grad_norm": 0.3557322323322296, "learning_rate": 1.957252796939202e-05, "loss": 0.5869, "step": 6173 }, { "epoch": 0.1896599391761128, "grad_norm": 0.4297019839286804, "learning_rate": 1.957238816897383e-05, "loss": 0.5939, "step": 6174 }, { "epoch": 0.18969065831106197, "grad_norm": 0.33955585956573486, "learning_rate": 1.9572248346198663e-05, "loss": 0.6065, "step": 6175 }, { "epoch": 0.1897213774460111, "grad_norm": 0.3240797221660614, "learning_rate": 1.9572108501066836e-05, "loss": 0.565, "step": 6176 }, { "epoch": 0.1897520965809603, "grad_norm": 0.354186087846756, "learning_rate": 1.9571968633578687e-05, "loss": 0.6309, "step": 6177 }, { "epoch": 0.18978281571590944, "grad_norm": 0.3319218158721924, "learning_rate": 1.9571828743734533e-05, "loss": 0.5899, "step": 6178 }, { "epoch": 0.1898135348508586, "grad_norm": 0.3251549303531647, "learning_rate": 1.957168883153471e-05, "loss": 0.5708, "step": 6179 }, { "epoch": 0.18984425398580776, "grad_norm": 0.3802778720855713, "learning_rate": 1.9571548896979534e-05, "loss": 0.5327, "step": 6180 }, { "epoch": 0.1898749731207569, "grad_norm": 0.3435022830963135, "learning_rate": 1.9571408940069337e-05, "loss": 0.5913, "step": 6181 }, { "epoch": 0.18990569225570608, "grad_norm": 0.31950774788856506, "learning_rate": 1.9571268960804447e-05, "loss": 0.5515, "step": 6182 }, { "epoch": 0.18993641139065523, "grad_norm": 1.2712351083755493, "learning_rate": 1.957112895918519e-05, "loss": 0.5646, "step": 6183 }, { "epoch": 0.1899671305256044, "grad_norm": 0.31936290860176086, "learning_rate": 1.9570988935211893e-05, "loss": 0.6678, "step": 6184 }, { "epoch": 0.18999784966055355, "grad_norm": 0.33725520968437195, "learning_rate": 1.9570848888884883e-05, "loss": 0.6853, "step": 6185 }, { "epoch": 0.19002856879550273, "grad_norm": 0.3345917761325836, "learning_rate": 1.9570708820204487e-05, "loss": 0.6064, "step": 6186 }, { "epoch": 0.19005928793045188, "grad_norm": 0.3717612326145172, "learning_rate": 1.9570568729171028e-05, "loss": 0.5422, "step": 6187 }, { "epoch": 0.19009000706540105, "grad_norm": 0.3518766164779663, "learning_rate": 1.9570428615784843e-05, "loss": 0.6344, "step": 6188 }, { "epoch": 0.1901207262003502, "grad_norm": 0.37836116552352905, "learning_rate": 1.957028848004625e-05, "loss": 0.5748, "step": 6189 }, { "epoch": 0.19015144533529935, "grad_norm": 0.5411062240600586, "learning_rate": 1.9570148321955586e-05, "loss": 0.5706, "step": 6190 }, { "epoch": 0.19018216447024852, "grad_norm": 0.4320856034755707, "learning_rate": 1.9570008141513167e-05, "loss": 0.5279, "step": 6191 }, { "epoch": 0.19021288360519767, "grad_norm": 0.3990332782268524, "learning_rate": 1.9569867938719324e-05, "loss": 0.6178, "step": 6192 }, { "epoch": 0.19024360274014684, "grad_norm": 0.3663780391216278, "learning_rate": 1.956972771357439e-05, "loss": 0.5577, "step": 6193 }, { "epoch": 0.190274321875096, "grad_norm": 0.3522452712059021, "learning_rate": 1.9569587466078688e-05, "loss": 0.5629, "step": 6194 }, { "epoch": 0.19030504101004517, "grad_norm": 0.3253523111343384, "learning_rate": 1.9569447196232544e-05, "loss": 0.6081, "step": 6195 }, { "epoch": 0.19033576014499431, "grad_norm": 0.3596937656402588, "learning_rate": 1.9569306904036288e-05, "loss": 0.5449, "step": 6196 }, { "epoch": 0.1903664792799435, "grad_norm": 0.33318009972572327, "learning_rate": 1.9569166589490248e-05, "loss": 0.6308, "step": 6197 }, { "epoch": 0.19039719841489264, "grad_norm": 0.337901771068573, "learning_rate": 1.9569026252594755e-05, "loss": 0.5527, "step": 6198 }, { "epoch": 0.19042791754984179, "grad_norm": 0.4085325002670288, "learning_rate": 1.9568885893350126e-05, "loss": 0.5376, "step": 6199 }, { "epoch": 0.19045863668479096, "grad_norm": 0.3023369312286377, "learning_rate": 1.95687455117567e-05, "loss": 0.5271, "step": 6200 }, { "epoch": 0.1904893558197401, "grad_norm": 0.31457123160362244, "learning_rate": 1.9568605107814803e-05, "loss": 0.5544, "step": 6201 }, { "epoch": 0.19052007495468928, "grad_norm": 0.44319379329681396, "learning_rate": 1.9568464681524755e-05, "loss": 0.5876, "step": 6202 }, { "epoch": 0.19055079408963843, "grad_norm": 0.384607195854187, "learning_rate": 1.956832423288689e-05, "loss": 0.5848, "step": 6203 }, { "epoch": 0.1905815132245876, "grad_norm": 0.4057403802871704, "learning_rate": 1.956818376190154e-05, "loss": 0.6232, "step": 6204 }, { "epoch": 0.19061223235953675, "grad_norm": 0.32678085565567017, "learning_rate": 1.9568043268569025e-05, "loss": 0.6102, "step": 6205 }, { "epoch": 0.1906429514944859, "grad_norm": 0.3516559898853302, "learning_rate": 1.956790275288968e-05, "loss": 0.6027, "step": 6206 }, { "epoch": 0.19067367062943508, "grad_norm": 0.3640667200088501, "learning_rate": 1.9567762214863824e-05, "loss": 0.6147, "step": 6207 }, { "epoch": 0.19070438976438422, "grad_norm": 0.32667532563209534, "learning_rate": 1.9567621654491794e-05, "loss": 0.5758, "step": 6208 }, { "epoch": 0.1907351088993334, "grad_norm": 0.36968910694122314, "learning_rate": 1.9567481071773916e-05, "loss": 0.5562, "step": 6209 }, { "epoch": 0.19076582803428255, "grad_norm": 0.35286203026771545, "learning_rate": 1.956734046671052e-05, "loss": 0.6255, "step": 6210 }, { "epoch": 0.19079654716923172, "grad_norm": 0.31759607791900635, "learning_rate": 1.956719983930193e-05, "loss": 0.5675, "step": 6211 }, { "epoch": 0.19082726630418087, "grad_norm": 0.3365676701068878, "learning_rate": 1.9567059189548476e-05, "loss": 0.6168, "step": 6212 }, { "epoch": 0.19085798543913005, "grad_norm": 0.3232490122318268, "learning_rate": 1.956691851745049e-05, "loss": 0.5718, "step": 6213 }, { "epoch": 0.1908887045740792, "grad_norm": 0.36063340306282043, "learning_rate": 1.9566777823008292e-05, "loss": 0.5519, "step": 6214 }, { "epoch": 0.19091942370902834, "grad_norm": 0.32125377655029297, "learning_rate": 1.956663710622222e-05, "loss": 0.6289, "step": 6215 }, { "epoch": 0.19095014284397752, "grad_norm": 0.3388078212738037, "learning_rate": 1.95664963670926e-05, "loss": 0.7172, "step": 6216 }, { "epoch": 0.19098086197892666, "grad_norm": 0.3145463764667511, "learning_rate": 1.9566355605619758e-05, "loss": 0.6003, "step": 6217 }, { "epoch": 0.19101158111387584, "grad_norm": 0.35213831067085266, "learning_rate": 1.9566214821804024e-05, "loss": 0.6097, "step": 6218 }, { "epoch": 0.191042300248825, "grad_norm": 0.35210782289505005, "learning_rate": 1.956607401564573e-05, "loss": 0.6225, "step": 6219 }, { "epoch": 0.19107301938377416, "grad_norm": 0.3248279392719269, "learning_rate": 1.9565933187145196e-05, "loss": 0.6619, "step": 6220 }, { "epoch": 0.1911037385187233, "grad_norm": 0.32524311542510986, "learning_rate": 1.9565792336302763e-05, "loss": 0.5821, "step": 6221 }, { "epoch": 0.19113445765367248, "grad_norm": 0.35859349370002747, "learning_rate": 1.956565146311875e-05, "loss": 0.5356, "step": 6222 }, { "epoch": 0.19116517678862163, "grad_norm": 0.3532502055168152, "learning_rate": 1.9565510567593493e-05, "loss": 0.6362, "step": 6223 }, { "epoch": 0.19119589592357078, "grad_norm": 0.3316311538219452, "learning_rate": 1.9565369649727314e-05, "loss": 0.6522, "step": 6224 }, { "epoch": 0.19122661505851996, "grad_norm": 0.3410476744174957, "learning_rate": 1.956522870952055e-05, "loss": 0.6055, "step": 6225 }, { "epoch": 0.1912573341934691, "grad_norm": 0.3383321464061737, "learning_rate": 1.9565087746973526e-05, "loss": 0.5783, "step": 6226 }, { "epoch": 0.19128805332841828, "grad_norm": 0.35049957036972046, "learning_rate": 1.956494676208657e-05, "loss": 0.5664, "step": 6227 }, { "epoch": 0.19131877246336743, "grad_norm": 0.3359719514846802, "learning_rate": 1.9564805754860014e-05, "loss": 0.5797, "step": 6228 }, { "epoch": 0.1913494915983166, "grad_norm": 0.35982653498649597, "learning_rate": 1.9564664725294188e-05, "loss": 0.5774, "step": 6229 }, { "epoch": 0.19138021073326575, "grad_norm": 0.337742418050766, "learning_rate": 1.9564523673389416e-05, "loss": 0.6192, "step": 6230 }, { "epoch": 0.19141092986821492, "grad_norm": 0.3423040509223938, "learning_rate": 1.9564382599146034e-05, "loss": 0.5362, "step": 6231 }, { "epoch": 0.19144164900316407, "grad_norm": 0.35927677154541016, "learning_rate": 1.9564241502564363e-05, "loss": 0.6206, "step": 6232 }, { "epoch": 0.19147236813811322, "grad_norm": 0.34046781063079834, "learning_rate": 1.9564100383644744e-05, "loss": 0.5926, "step": 6233 }, { "epoch": 0.1915030872730624, "grad_norm": 0.3323444128036499, "learning_rate": 1.95639592423875e-05, "loss": 0.6482, "step": 6234 }, { "epoch": 0.19153380640801154, "grad_norm": 1.0870851278305054, "learning_rate": 1.9563818078792956e-05, "loss": 0.5905, "step": 6235 }, { "epoch": 0.19156452554296072, "grad_norm": 0.33346718549728394, "learning_rate": 1.956367689286145e-05, "loss": 0.6516, "step": 6236 }, { "epoch": 0.19159524467790987, "grad_norm": 0.36201006174087524, "learning_rate": 1.9563535684593308e-05, "loss": 0.5973, "step": 6237 }, { "epoch": 0.19162596381285904, "grad_norm": 0.4010887145996094, "learning_rate": 1.9563394453988863e-05, "loss": 0.6137, "step": 6238 }, { "epoch": 0.1916566829478082, "grad_norm": 0.3818175494670868, "learning_rate": 1.956325320104844e-05, "loss": 0.5855, "step": 6239 }, { "epoch": 0.19168740208275736, "grad_norm": 0.31991758942604065, "learning_rate": 1.9563111925772374e-05, "loss": 0.5542, "step": 6240 }, { "epoch": 0.1917181212177065, "grad_norm": 0.31085509061813354, "learning_rate": 1.956297062816099e-05, "loss": 0.5462, "step": 6241 }, { "epoch": 0.19174884035265566, "grad_norm": 0.5243284106254578, "learning_rate": 1.9562829308214623e-05, "loss": 0.5698, "step": 6242 }, { "epoch": 0.19177955948760483, "grad_norm": 0.33481141924858093, "learning_rate": 1.9562687965933594e-05, "loss": 0.5645, "step": 6243 }, { "epoch": 0.19181027862255398, "grad_norm": 0.3309917151927948, "learning_rate": 1.9562546601318244e-05, "loss": 0.5747, "step": 6244 }, { "epoch": 0.19184099775750316, "grad_norm": 0.3845181465148926, "learning_rate": 1.95624052143689e-05, "loss": 0.6407, "step": 6245 }, { "epoch": 0.1918717168924523, "grad_norm": 0.3252814710140228, "learning_rate": 1.956226380508589e-05, "loss": 0.6121, "step": 6246 }, { "epoch": 0.19190243602740148, "grad_norm": 0.37284672260284424, "learning_rate": 1.9562122373469543e-05, "loss": 0.5947, "step": 6247 }, { "epoch": 0.19193315516235063, "grad_norm": 0.3408557176589966, "learning_rate": 1.9561980919520193e-05, "loss": 0.6201, "step": 6248 }, { "epoch": 0.19196387429729977, "grad_norm": 0.3703811764717102, "learning_rate": 1.9561839443238165e-05, "loss": 0.6068, "step": 6249 }, { "epoch": 0.19199459343224895, "grad_norm": 0.36165958642959595, "learning_rate": 1.9561697944623795e-05, "loss": 0.6618, "step": 6250 }, { "epoch": 0.1920253125671981, "grad_norm": 0.37223976850509644, "learning_rate": 1.9561556423677415e-05, "loss": 0.6576, "step": 6251 }, { "epoch": 0.19205603170214727, "grad_norm": 0.46847522258758545, "learning_rate": 1.956141488039935e-05, "loss": 0.6044, "step": 6252 }, { "epoch": 0.19208675083709642, "grad_norm": 0.5338642001152039, "learning_rate": 1.9561273314789933e-05, "loss": 0.6373, "step": 6253 }, { "epoch": 0.1921174699720456, "grad_norm": 0.3535264730453491, "learning_rate": 1.9561131726849495e-05, "loss": 0.5457, "step": 6254 }, { "epoch": 0.19214818910699474, "grad_norm": 0.3301853835582733, "learning_rate": 1.9560990116578365e-05, "loss": 0.538, "step": 6255 }, { "epoch": 0.19217890824194392, "grad_norm": 0.3705619275569916, "learning_rate": 1.9560848483976874e-05, "loss": 0.7194, "step": 6256 }, { "epoch": 0.19220962737689307, "grad_norm": 0.3376169204711914, "learning_rate": 1.9560706829045356e-05, "loss": 0.6667, "step": 6257 }, { "epoch": 0.19224034651184221, "grad_norm": 0.39924898743629456, "learning_rate": 1.9560565151784138e-05, "loss": 0.6504, "step": 6258 }, { "epoch": 0.1922710656467914, "grad_norm": 0.30536970496177673, "learning_rate": 1.9560423452193553e-05, "loss": 0.6106, "step": 6259 }, { "epoch": 0.19230178478174054, "grad_norm": 0.3104843497276306, "learning_rate": 1.9560281730273932e-05, "loss": 0.557, "step": 6260 }, { "epoch": 0.1923325039166897, "grad_norm": 0.3345290720462799, "learning_rate": 1.9560139986025605e-05, "loss": 0.5948, "step": 6261 }, { "epoch": 0.19236322305163886, "grad_norm": 1.3528789281845093, "learning_rate": 1.95599982194489e-05, "loss": 0.6056, "step": 6262 }, { "epoch": 0.19239394218658804, "grad_norm": 0.3465305268764496, "learning_rate": 1.9559856430544155e-05, "loss": 0.5924, "step": 6263 }, { "epoch": 0.19242466132153718, "grad_norm": 0.3358083665370941, "learning_rate": 1.9559714619311697e-05, "loss": 0.6008, "step": 6264 }, { "epoch": 0.19245538045648636, "grad_norm": 0.409743994474411, "learning_rate": 1.955957278575186e-05, "loss": 0.6569, "step": 6265 }, { "epoch": 0.1924860995914355, "grad_norm": 0.3236980140209198, "learning_rate": 1.955943092986497e-05, "loss": 0.6599, "step": 6266 }, { "epoch": 0.19251681872638465, "grad_norm": 0.3399198353290558, "learning_rate": 1.955928905165136e-05, "loss": 0.6111, "step": 6267 }, { "epoch": 0.19254753786133383, "grad_norm": 0.3521079123020172, "learning_rate": 1.9559147151111365e-05, "loss": 0.6501, "step": 6268 }, { "epoch": 0.19257825699628298, "grad_norm": 0.3668191432952881, "learning_rate": 1.9559005228245317e-05, "loss": 0.573, "step": 6269 }, { "epoch": 0.19260897613123215, "grad_norm": 0.438985139131546, "learning_rate": 1.955886328305354e-05, "loss": 0.6145, "step": 6270 }, { "epoch": 0.1926396952661813, "grad_norm": 0.3567039370536804, "learning_rate": 1.9558721315536372e-05, "loss": 0.545, "step": 6271 }, { "epoch": 0.19267041440113047, "grad_norm": 0.30235716700553894, "learning_rate": 1.9558579325694145e-05, "loss": 0.5157, "step": 6272 }, { "epoch": 0.19270113353607962, "grad_norm": 0.3147967457771301, "learning_rate": 1.9558437313527186e-05, "loss": 0.6051, "step": 6273 }, { "epoch": 0.1927318526710288, "grad_norm": 0.3339538872241974, "learning_rate": 1.955829527903583e-05, "loss": 0.5998, "step": 6274 }, { "epoch": 0.19276257180597794, "grad_norm": 0.33632832765579224, "learning_rate": 1.9558153222220405e-05, "loss": 0.6124, "step": 6275 }, { "epoch": 0.1927932909409271, "grad_norm": 0.44229355454444885, "learning_rate": 1.955801114308125e-05, "loss": 0.6218, "step": 6276 }, { "epoch": 0.19282401007587627, "grad_norm": 0.37654978036880493, "learning_rate": 1.955786904161869e-05, "loss": 0.5917, "step": 6277 }, { "epoch": 0.19285472921082542, "grad_norm": 0.4572194516658783, "learning_rate": 1.955772691783306e-05, "loss": 0.6571, "step": 6278 }, { "epoch": 0.1928854483457746, "grad_norm": 0.33117055892944336, "learning_rate": 1.955758477172469e-05, "loss": 0.5707, "step": 6279 }, { "epoch": 0.19291616748072374, "grad_norm": 0.3766689598560333, "learning_rate": 1.9557442603293916e-05, "loss": 0.6496, "step": 6280 }, { "epoch": 0.1929468866156729, "grad_norm": 0.3931124210357666, "learning_rate": 1.9557300412541064e-05, "loss": 0.622, "step": 6281 }, { "epoch": 0.19297760575062206, "grad_norm": 0.3235599994659424, "learning_rate": 1.9557158199466472e-05, "loss": 0.5176, "step": 6282 }, { "epoch": 0.1930083248855712, "grad_norm": 0.32710957527160645, "learning_rate": 1.9557015964070468e-05, "loss": 0.5808, "step": 6283 }, { "epoch": 0.19303904402052038, "grad_norm": 0.32394564151763916, "learning_rate": 1.9556873706353387e-05, "loss": 0.5374, "step": 6284 }, { "epoch": 0.19306976315546953, "grad_norm": 0.31914472579956055, "learning_rate": 1.9556731426315562e-05, "loss": 0.5724, "step": 6285 }, { "epoch": 0.1931004822904187, "grad_norm": 0.3280538320541382, "learning_rate": 1.955658912395732e-05, "loss": 0.682, "step": 6286 }, { "epoch": 0.19313120142536785, "grad_norm": 0.39125245809555054, "learning_rate": 1.9556446799279e-05, "loss": 0.5253, "step": 6287 }, { "epoch": 0.19316192056031703, "grad_norm": 0.33672550320625305, "learning_rate": 1.9556304452280925e-05, "loss": 0.7119, "step": 6288 }, { "epoch": 0.19319263969526618, "grad_norm": 0.33820539712905884, "learning_rate": 1.955616208296344e-05, "loss": 0.5715, "step": 6289 }, { "epoch": 0.19322335883021535, "grad_norm": 0.3492792248725891, "learning_rate": 1.9556019691326864e-05, "loss": 0.6281, "step": 6290 }, { "epoch": 0.1932540779651645, "grad_norm": 0.38903066515922546, "learning_rate": 1.955587727737154e-05, "loss": 0.5875, "step": 6291 }, { "epoch": 0.19328479710011365, "grad_norm": 0.33967384696006775, "learning_rate": 1.95557348410978e-05, "loss": 0.5391, "step": 6292 }, { "epoch": 0.19331551623506282, "grad_norm": 0.40521663427352905, "learning_rate": 1.9555592382505972e-05, "loss": 0.5577, "step": 6293 }, { "epoch": 0.19334623537001197, "grad_norm": 0.3108349144458771, "learning_rate": 1.955544990159639e-05, "loss": 0.5428, "step": 6294 }, { "epoch": 0.19337695450496115, "grad_norm": 0.39852166175842285, "learning_rate": 1.9555307398369388e-05, "loss": 0.5051, "step": 6295 }, { "epoch": 0.1934076736399103, "grad_norm": 0.3580273687839508, "learning_rate": 1.9555164872825297e-05, "loss": 0.5783, "step": 6296 }, { "epoch": 0.19343839277485947, "grad_norm": 0.32443422079086304, "learning_rate": 1.9555022324964453e-05, "loss": 0.5493, "step": 6297 }, { "epoch": 0.19346911190980862, "grad_norm": 0.3639150857925415, "learning_rate": 1.9554879754787187e-05, "loss": 0.6136, "step": 6298 }, { "epoch": 0.1934998310447578, "grad_norm": 0.3212595283985138, "learning_rate": 1.9554737162293832e-05, "loss": 0.5479, "step": 6299 }, { "epoch": 0.19353055017970694, "grad_norm": 0.32214656472206116, "learning_rate": 1.9554594547484724e-05, "loss": 0.5083, "step": 6300 }, { "epoch": 0.1935612693146561, "grad_norm": 0.373786598443985, "learning_rate": 1.955445191036019e-05, "loss": 0.6738, "step": 6301 }, { "epoch": 0.19359198844960526, "grad_norm": 0.3506597578525543, "learning_rate": 1.9554309250920563e-05, "loss": 0.6125, "step": 6302 }, { "epoch": 0.1936227075845544, "grad_norm": 0.3565698266029358, "learning_rate": 1.955416656916618e-05, "loss": 0.5538, "step": 6303 }, { "epoch": 0.19365342671950359, "grad_norm": 0.41609033942222595, "learning_rate": 1.9554023865097377e-05, "loss": 0.4999, "step": 6304 }, { "epoch": 0.19368414585445273, "grad_norm": 0.36770111322402954, "learning_rate": 1.9553881138714487e-05, "loss": 0.5977, "step": 6305 }, { "epoch": 0.1937148649894019, "grad_norm": 0.3982047736644745, "learning_rate": 1.9553738390017834e-05, "loss": 0.5977, "step": 6306 }, { "epoch": 0.19374558412435106, "grad_norm": 0.3077632486820221, "learning_rate": 1.955359561900776e-05, "loss": 0.5296, "step": 6307 }, { "epoch": 0.19377630325930023, "grad_norm": 0.4014892578125, "learning_rate": 1.9553452825684597e-05, "loss": 0.5763, "step": 6308 }, { "epoch": 0.19380702239424938, "grad_norm": 0.35175758600234985, "learning_rate": 1.9553310010048676e-05, "loss": 0.6052, "step": 6309 }, { "epoch": 0.19383774152919853, "grad_norm": 0.3417648673057556, "learning_rate": 1.9553167172100335e-05, "loss": 0.6335, "step": 6310 }, { "epoch": 0.1938684606641477, "grad_norm": 0.36610114574432373, "learning_rate": 1.9553024311839902e-05, "loss": 0.5628, "step": 6311 }, { "epoch": 0.19389917979909685, "grad_norm": 0.37169015407562256, "learning_rate": 1.9552881429267714e-05, "loss": 0.6149, "step": 6312 }, { "epoch": 0.19392989893404602, "grad_norm": 0.3705911338329315, "learning_rate": 1.955273852438411e-05, "loss": 0.605, "step": 6313 }, { "epoch": 0.19396061806899517, "grad_norm": 0.41803619265556335, "learning_rate": 1.9552595597189408e-05, "loss": 0.5507, "step": 6314 }, { "epoch": 0.19399133720394435, "grad_norm": 0.3338092565536499, "learning_rate": 1.9552452647683958e-05, "loss": 0.6374, "step": 6315 }, { "epoch": 0.1940220563388935, "grad_norm": 0.3624483346939087, "learning_rate": 1.9552309675868087e-05, "loss": 0.604, "step": 6316 }, { "epoch": 0.19405277547384267, "grad_norm": 0.32347074151039124, "learning_rate": 1.955216668174213e-05, "loss": 0.5208, "step": 6317 }, { "epoch": 0.19408349460879182, "grad_norm": 0.3934149146080017, "learning_rate": 1.9552023665306417e-05, "loss": 0.5954, "step": 6318 }, { "epoch": 0.19411421374374097, "grad_norm": 0.3136445879936218, "learning_rate": 1.955188062656129e-05, "loss": 0.5701, "step": 6319 }, { "epoch": 0.19414493287869014, "grad_norm": 0.38582557439804077, "learning_rate": 1.955173756550708e-05, "loss": 0.6738, "step": 6320 }, { "epoch": 0.1941756520136393, "grad_norm": 0.4027613401412964, "learning_rate": 1.9551594482144118e-05, "loss": 0.574, "step": 6321 }, { "epoch": 0.19420637114858846, "grad_norm": 0.3727557361125946, "learning_rate": 1.9551451376472738e-05, "loss": 0.5771, "step": 6322 }, { "epoch": 0.1942370902835376, "grad_norm": 0.3793748915195465, "learning_rate": 1.955130824849328e-05, "loss": 0.5729, "step": 6323 }, { "epoch": 0.1942678094184868, "grad_norm": 0.4408375322818756, "learning_rate": 1.9551165098206068e-05, "loss": 0.5529, "step": 6324 }, { "epoch": 0.19429852855343593, "grad_norm": 0.32268160581588745, "learning_rate": 1.9551021925611445e-05, "loss": 0.5119, "step": 6325 }, { "epoch": 0.19432924768838508, "grad_norm": 0.36897581815719604, "learning_rate": 1.9550878730709746e-05, "loss": 0.5919, "step": 6326 }, { "epoch": 0.19435996682333426, "grad_norm": 0.3252463936805725, "learning_rate": 1.95507355135013e-05, "loss": 0.5706, "step": 6327 }, { "epoch": 0.1943906859582834, "grad_norm": 0.3508155643939972, "learning_rate": 1.9550592273986448e-05, "loss": 0.5904, "step": 6328 }, { "epoch": 0.19442140509323258, "grad_norm": 0.3795883059501648, "learning_rate": 1.9550449012165517e-05, "loss": 0.5997, "step": 6329 }, { "epoch": 0.19445212422818173, "grad_norm": 0.3807603120803833, "learning_rate": 1.955030572803885e-05, "loss": 0.5137, "step": 6330 }, { "epoch": 0.1944828433631309, "grad_norm": 0.33746466040611267, "learning_rate": 1.9550162421606774e-05, "loss": 0.6012, "step": 6331 }, { "epoch": 0.19451356249808005, "grad_norm": 0.3324538469314575, "learning_rate": 1.9550019092869627e-05, "loss": 0.5481, "step": 6332 }, { "epoch": 0.19454428163302923, "grad_norm": 0.4570996165275574, "learning_rate": 1.954987574182774e-05, "loss": 0.5346, "step": 6333 }, { "epoch": 0.19457500076797837, "grad_norm": 0.37248119711875916, "learning_rate": 1.9549732368481454e-05, "loss": 0.6624, "step": 6334 }, { "epoch": 0.19460571990292752, "grad_norm": 0.5224558711051941, "learning_rate": 1.9549588972831103e-05, "loss": 0.572, "step": 6335 }, { "epoch": 0.1946364390378767, "grad_norm": 0.44954895973205566, "learning_rate": 1.954944555487702e-05, "loss": 0.7234, "step": 6336 }, { "epoch": 0.19466715817282584, "grad_norm": 0.3798839747905731, "learning_rate": 1.954930211461954e-05, "loss": 0.6387, "step": 6337 }, { "epoch": 0.19469787730777502, "grad_norm": 0.3576046824455261, "learning_rate": 1.954915865205899e-05, "loss": 0.6306, "step": 6338 }, { "epoch": 0.19472859644272417, "grad_norm": 0.3532197177410126, "learning_rate": 1.9549015167195725e-05, "loss": 0.5668, "step": 6339 }, { "epoch": 0.19475931557767334, "grad_norm": 0.39078983664512634, "learning_rate": 1.954887166003006e-05, "loss": 0.5656, "step": 6340 }, { "epoch": 0.1947900347126225, "grad_norm": 0.33794912695884705, "learning_rate": 1.954872813056234e-05, "loss": 0.5433, "step": 6341 }, { "epoch": 0.19482075384757166, "grad_norm": 0.3429848253726959, "learning_rate": 1.95485845787929e-05, "loss": 0.5833, "step": 6342 }, { "epoch": 0.1948514729825208, "grad_norm": 0.33688682317733765, "learning_rate": 1.9548441004722076e-05, "loss": 0.5662, "step": 6343 }, { "epoch": 0.19488219211746996, "grad_norm": 0.4458933174610138, "learning_rate": 1.95482974083502e-05, "loss": 0.638, "step": 6344 }, { "epoch": 0.19491291125241914, "grad_norm": 0.35366082191467285, "learning_rate": 1.9548153789677604e-05, "loss": 0.6756, "step": 6345 }, { "epoch": 0.19494363038736828, "grad_norm": 0.32455724477767944, "learning_rate": 1.9548010148704634e-05, "loss": 0.5505, "step": 6346 }, { "epoch": 0.19497434952231746, "grad_norm": 0.3431819677352905, "learning_rate": 1.9547866485431614e-05, "loss": 0.5775, "step": 6347 }, { "epoch": 0.1950050686572666, "grad_norm": 0.35958194732666016, "learning_rate": 1.954772279985889e-05, "loss": 0.628, "step": 6348 }, { "epoch": 0.19503578779221578, "grad_norm": 0.3164888918399811, "learning_rate": 1.954757909198679e-05, "loss": 0.5972, "step": 6349 }, { "epoch": 0.19506650692716493, "grad_norm": 0.3278234899044037, "learning_rate": 1.954743536181565e-05, "loss": 0.537, "step": 6350 }, { "epoch": 0.1950972260621141, "grad_norm": 0.3471832275390625, "learning_rate": 1.9547291609345813e-05, "loss": 0.5177, "step": 6351 }, { "epoch": 0.19512794519706325, "grad_norm": 0.3593100607395172, "learning_rate": 1.9547147834577605e-05, "loss": 0.5691, "step": 6352 }, { "epoch": 0.1951586643320124, "grad_norm": 0.3849337697029114, "learning_rate": 1.954700403751137e-05, "loss": 0.4631, "step": 6353 }, { "epoch": 0.19518938346696157, "grad_norm": 0.3261682689189911, "learning_rate": 1.9546860218147438e-05, "loss": 0.5741, "step": 6354 }, { "epoch": 0.19522010260191072, "grad_norm": 0.3060195744037628, "learning_rate": 1.954671637648615e-05, "loss": 0.5603, "step": 6355 }, { "epoch": 0.1952508217368599, "grad_norm": 0.355910986661911, "learning_rate": 1.9546572512527836e-05, "loss": 0.6092, "step": 6356 }, { "epoch": 0.19528154087180905, "grad_norm": 0.5905948877334595, "learning_rate": 1.9546428626272837e-05, "loss": 0.5583, "step": 6357 }, { "epoch": 0.19531226000675822, "grad_norm": 1.1826802492141724, "learning_rate": 1.9546284717721488e-05, "loss": 0.5888, "step": 6358 }, { "epoch": 0.19534297914170737, "grad_norm": 0.7341731190681458, "learning_rate": 1.954614078687412e-05, "loss": 0.5553, "step": 6359 }, { "epoch": 0.19537369827665652, "grad_norm": 0.3650178909301758, "learning_rate": 1.954599683373108e-05, "loss": 0.568, "step": 6360 }, { "epoch": 0.1954044174116057, "grad_norm": 0.3204135298728943, "learning_rate": 1.9545852858292692e-05, "loss": 0.5553, "step": 6361 }, { "epoch": 0.19543513654655484, "grad_norm": 0.32405468821525574, "learning_rate": 1.95457088605593e-05, "loss": 0.616, "step": 6362 }, { "epoch": 0.195465855681504, "grad_norm": 0.49710774421691895, "learning_rate": 1.954556484053124e-05, "loss": 0.6547, "step": 6363 }, { "epoch": 0.19549657481645316, "grad_norm": 0.3416456878185272, "learning_rate": 1.9545420798208842e-05, "loss": 0.5682, "step": 6364 }, { "epoch": 0.19552729395140234, "grad_norm": 0.3704783320426941, "learning_rate": 1.9545276733592452e-05, "loss": 0.6406, "step": 6365 }, { "epoch": 0.19555801308635148, "grad_norm": 0.3300439417362213, "learning_rate": 1.95451326466824e-05, "loss": 0.5627, "step": 6366 }, { "epoch": 0.19558873222130066, "grad_norm": 0.4302448332309723, "learning_rate": 1.9544988537479025e-05, "loss": 0.6634, "step": 6367 }, { "epoch": 0.1956194513562498, "grad_norm": 0.352956622838974, "learning_rate": 1.9544844405982658e-05, "loss": 0.7083, "step": 6368 }, { "epoch": 0.19565017049119895, "grad_norm": 0.3296522796154022, "learning_rate": 1.9544700252193648e-05, "loss": 0.5451, "step": 6369 }, { "epoch": 0.19568088962614813, "grad_norm": 0.3564494848251343, "learning_rate": 1.9544556076112316e-05, "loss": 0.5638, "step": 6370 }, { "epoch": 0.19571160876109728, "grad_norm": 0.30844566226005554, "learning_rate": 1.9544411877739012e-05, "loss": 0.6065, "step": 6371 }, { "epoch": 0.19574232789604645, "grad_norm": 0.3313705623149872, "learning_rate": 1.9544267657074067e-05, "loss": 0.6008, "step": 6372 }, { "epoch": 0.1957730470309956, "grad_norm": 0.35705164074897766, "learning_rate": 1.9544123414117817e-05, "loss": 0.5919, "step": 6373 }, { "epoch": 0.19580376616594478, "grad_norm": 0.3362959325313568, "learning_rate": 1.95439791488706e-05, "loss": 0.5642, "step": 6374 }, { "epoch": 0.19583448530089392, "grad_norm": 0.31940168142318726, "learning_rate": 1.9543834861332754e-05, "loss": 0.6267, "step": 6375 }, { "epoch": 0.1958652044358431, "grad_norm": 0.3309680223464966, "learning_rate": 1.9543690551504616e-05, "loss": 0.6019, "step": 6376 }, { "epoch": 0.19589592357079225, "grad_norm": 0.35244888067245483, "learning_rate": 1.954354621938652e-05, "loss": 0.6355, "step": 6377 }, { "epoch": 0.1959266427057414, "grad_norm": 0.38712674379348755, "learning_rate": 1.9543401864978807e-05, "loss": 0.5817, "step": 6378 }, { "epoch": 0.19595736184069057, "grad_norm": 0.3223326504230499, "learning_rate": 1.954325748828181e-05, "loss": 0.6073, "step": 6379 }, { "epoch": 0.19598808097563972, "grad_norm": 0.3413582742214203, "learning_rate": 1.954311308929587e-05, "loss": 0.5926, "step": 6380 }, { "epoch": 0.1960188001105889, "grad_norm": 0.3492734432220459, "learning_rate": 1.9542968668021324e-05, "loss": 0.6875, "step": 6381 }, { "epoch": 0.19604951924553804, "grad_norm": 0.37131163477897644, "learning_rate": 1.9542824224458508e-05, "loss": 0.5967, "step": 6382 }, { "epoch": 0.19608023838048722, "grad_norm": 0.327243447303772, "learning_rate": 1.954267975860776e-05, "loss": 0.6687, "step": 6383 }, { "epoch": 0.19611095751543636, "grad_norm": 0.37095773220062256, "learning_rate": 1.9542535270469415e-05, "loss": 0.6012, "step": 6384 }, { "epoch": 0.19614167665038554, "grad_norm": 0.3541910946369171, "learning_rate": 1.9542390760043816e-05, "loss": 0.6475, "step": 6385 }, { "epoch": 0.19617239578533469, "grad_norm": 0.31742486357688904, "learning_rate": 1.9542246227331293e-05, "loss": 0.6208, "step": 6386 }, { "epoch": 0.19620311492028383, "grad_norm": 0.3359781801700592, "learning_rate": 1.9542101672332188e-05, "loss": 0.5008, "step": 6387 }, { "epoch": 0.196233834055233, "grad_norm": 0.34380149841308594, "learning_rate": 1.954195709504684e-05, "loss": 0.5761, "step": 6388 }, { "epoch": 0.19626455319018216, "grad_norm": 0.31248006224632263, "learning_rate": 1.9541812495475584e-05, "loss": 0.5409, "step": 6389 }, { "epoch": 0.19629527232513133, "grad_norm": 0.3833654522895813, "learning_rate": 1.9541667873618756e-05, "loss": 0.5462, "step": 6390 }, { "epoch": 0.19632599146008048, "grad_norm": 0.35575249791145325, "learning_rate": 1.95415232294767e-05, "loss": 0.6042, "step": 6391 }, { "epoch": 0.19635671059502965, "grad_norm": 0.3209853172302246, "learning_rate": 1.9541378563049748e-05, "loss": 0.586, "step": 6392 }, { "epoch": 0.1963874297299788, "grad_norm": 0.33042851090431213, "learning_rate": 1.9541233874338242e-05, "loss": 0.647, "step": 6393 }, { "epoch": 0.19641814886492798, "grad_norm": 0.33431529998779297, "learning_rate": 1.9541089163342516e-05, "loss": 0.6227, "step": 6394 }, { "epoch": 0.19644886799987712, "grad_norm": 0.3249675929546356, "learning_rate": 1.9540944430062908e-05, "loss": 0.612, "step": 6395 }, { "epoch": 0.19647958713482627, "grad_norm": 0.3468773365020752, "learning_rate": 1.9540799674499764e-05, "loss": 0.5587, "step": 6396 }, { "epoch": 0.19651030626977545, "grad_norm": 0.3681642711162567, "learning_rate": 1.9540654896653413e-05, "loss": 0.6126, "step": 6397 }, { "epoch": 0.1965410254047246, "grad_norm": 0.4214549958705902, "learning_rate": 1.9540510096524193e-05, "loss": 0.5925, "step": 6398 }, { "epoch": 0.19657174453967377, "grad_norm": 0.34049078822135925, "learning_rate": 1.954036527411245e-05, "loss": 0.6023, "step": 6399 }, { "epoch": 0.19660246367462292, "grad_norm": 0.35471540689468384, "learning_rate": 1.9540220429418516e-05, "loss": 0.6169, "step": 6400 }, { "epoch": 0.1966331828095721, "grad_norm": 0.37032195925712585, "learning_rate": 1.954007556244273e-05, "loss": 0.6038, "step": 6401 }, { "epoch": 0.19666390194452124, "grad_norm": 0.5136650800704956, "learning_rate": 1.9539930673185433e-05, "loss": 0.6014, "step": 6402 }, { "epoch": 0.1966946210794704, "grad_norm": 0.3367775082588196, "learning_rate": 1.953978576164696e-05, "loss": 0.6358, "step": 6403 }, { "epoch": 0.19672534021441956, "grad_norm": 0.37105515599250793, "learning_rate": 1.9539640827827652e-05, "loss": 0.4775, "step": 6404 }, { "epoch": 0.1967560593493687, "grad_norm": 0.38934525847435, "learning_rate": 1.9539495871727845e-05, "loss": 0.6729, "step": 6405 }, { "epoch": 0.1967867784843179, "grad_norm": 0.3666454553604126, "learning_rate": 1.953935089334788e-05, "loss": 0.5843, "step": 6406 }, { "epoch": 0.19681749761926703, "grad_norm": 0.33023911714553833, "learning_rate": 1.9539205892688096e-05, "loss": 0.5685, "step": 6407 }, { "epoch": 0.1968482167542162, "grad_norm": 0.3335317373275757, "learning_rate": 1.953906086974883e-05, "loss": 0.5852, "step": 6408 }, { "epoch": 0.19687893588916536, "grad_norm": 0.3735830783843994, "learning_rate": 1.953891582453042e-05, "loss": 0.577, "step": 6409 }, { "epoch": 0.19690965502411453, "grad_norm": 0.38877856731414795, "learning_rate": 1.9538770757033208e-05, "loss": 0.6166, "step": 6410 }, { "epoch": 0.19694037415906368, "grad_norm": 0.3454803228378296, "learning_rate": 1.953862566725753e-05, "loss": 0.6363, "step": 6411 }, { "epoch": 0.19697109329401283, "grad_norm": 0.33244481682777405, "learning_rate": 1.953848055520372e-05, "loss": 0.5693, "step": 6412 }, { "epoch": 0.197001812428962, "grad_norm": 0.32584109902381897, "learning_rate": 1.953833542087213e-05, "loss": 0.6304, "step": 6413 }, { "epoch": 0.19703253156391115, "grad_norm": 0.3966572880744934, "learning_rate": 1.953819026426309e-05, "loss": 0.6978, "step": 6414 }, { "epoch": 0.19706325069886033, "grad_norm": 0.4159698486328125, "learning_rate": 1.953804508537694e-05, "loss": 0.5565, "step": 6415 }, { "epoch": 0.19709396983380947, "grad_norm": 0.3767610788345337, "learning_rate": 1.9537899884214016e-05, "loss": 0.6341, "step": 6416 }, { "epoch": 0.19712468896875865, "grad_norm": 0.3294268846511841, "learning_rate": 1.9537754660774666e-05, "loss": 0.583, "step": 6417 }, { "epoch": 0.1971554081037078, "grad_norm": 0.3436012268066406, "learning_rate": 1.953760941505922e-05, "loss": 0.6083, "step": 6418 }, { "epoch": 0.19718612723865697, "grad_norm": 0.33407509326934814, "learning_rate": 1.9537464147068025e-05, "loss": 0.5573, "step": 6419 }, { "epoch": 0.19721684637360612, "grad_norm": 0.38554853200912476, "learning_rate": 1.953731885680141e-05, "loss": 0.5887, "step": 6420 }, { "epoch": 0.19724756550855527, "grad_norm": 0.3414930999279022, "learning_rate": 1.953717354425973e-05, "loss": 0.7791, "step": 6421 }, { "epoch": 0.19727828464350444, "grad_norm": 0.3390360176563263, "learning_rate": 1.953702820944331e-05, "loss": 0.5811, "step": 6422 }, { "epoch": 0.1973090037784536, "grad_norm": 0.34445905685424805, "learning_rate": 1.9536882852352494e-05, "loss": 0.6269, "step": 6423 }, { "epoch": 0.19733972291340277, "grad_norm": 0.3204452395439148, "learning_rate": 1.9536737472987623e-05, "loss": 0.575, "step": 6424 }, { "epoch": 0.1973704420483519, "grad_norm": 0.3205937445163727, "learning_rate": 1.9536592071349034e-05, "loss": 0.5432, "step": 6425 }, { "epoch": 0.1974011611833011, "grad_norm": 0.3321768641471863, "learning_rate": 1.953644664743707e-05, "loss": 0.6257, "step": 6426 }, { "epoch": 0.19743188031825024, "grad_norm": 0.3452436029911041, "learning_rate": 1.9536301201252068e-05, "loss": 0.6241, "step": 6427 }, { "epoch": 0.1974625994531994, "grad_norm": 0.37319841980934143, "learning_rate": 1.9536155732794367e-05, "loss": 0.6531, "step": 6428 }, { "epoch": 0.19749331858814856, "grad_norm": 0.30958208441734314, "learning_rate": 1.953601024206431e-05, "loss": 0.5188, "step": 6429 }, { "epoch": 0.1975240377230977, "grad_norm": 0.33685338497161865, "learning_rate": 1.9535864729062236e-05, "loss": 0.5196, "step": 6430 }, { "epoch": 0.19755475685804688, "grad_norm": 0.46763330698013306, "learning_rate": 1.9535719193788486e-05, "loss": 0.5875, "step": 6431 }, { "epoch": 0.19758547599299603, "grad_norm": 0.39087626338005066, "learning_rate": 1.9535573636243394e-05, "loss": 0.6501, "step": 6432 }, { "epoch": 0.1976161951279452, "grad_norm": 0.32197001576423645, "learning_rate": 1.9535428056427305e-05, "loss": 0.5968, "step": 6433 }, { "epoch": 0.19764691426289435, "grad_norm": 0.30515867471694946, "learning_rate": 1.9535282454340555e-05, "loss": 0.6203, "step": 6434 }, { "epoch": 0.19767763339784353, "grad_norm": 0.3168692886829376, "learning_rate": 1.953513682998349e-05, "loss": 0.6163, "step": 6435 }, { "epoch": 0.19770835253279267, "grad_norm": 0.3341432511806488, "learning_rate": 1.9534991183356447e-05, "loss": 0.6112, "step": 6436 }, { "epoch": 0.19773907166774182, "grad_norm": 0.34830594062805176, "learning_rate": 1.9534845514459766e-05, "loss": 0.5733, "step": 6437 }, { "epoch": 0.197769790802691, "grad_norm": 0.3227662444114685, "learning_rate": 1.9534699823293787e-05, "loss": 0.5671, "step": 6438 }, { "epoch": 0.19780050993764015, "grad_norm": 0.3683364689350128, "learning_rate": 1.953455410985885e-05, "loss": 0.6314, "step": 6439 }, { "epoch": 0.19783122907258932, "grad_norm": 0.35587361454963684, "learning_rate": 1.9534408374155296e-05, "loss": 0.649, "step": 6440 }, { "epoch": 0.19786194820753847, "grad_norm": 0.34121397137641907, "learning_rate": 1.9534262616183465e-05, "loss": 0.5189, "step": 6441 }, { "epoch": 0.19789266734248764, "grad_norm": 0.3259091079235077, "learning_rate": 1.95341168359437e-05, "loss": 0.6323, "step": 6442 }, { "epoch": 0.1979233864774368, "grad_norm": 0.39886727929115295, "learning_rate": 1.9533971033436338e-05, "loss": 0.5936, "step": 6443 }, { "epoch": 0.19795410561238597, "grad_norm": 0.3315318822860718, "learning_rate": 1.9533825208661722e-05, "loss": 0.5729, "step": 6444 }, { "epoch": 0.19798482474733511, "grad_norm": 0.34765633940696716, "learning_rate": 1.953367936162019e-05, "loss": 0.5443, "step": 6445 }, { "epoch": 0.19801554388228426, "grad_norm": 0.3341686427593231, "learning_rate": 1.9533533492312085e-05, "loss": 0.5111, "step": 6446 }, { "epoch": 0.19804626301723344, "grad_norm": 0.8042997121810913, "learning_rate": 1.9533387600737744e-05, "loss": 0.6, "step": 6447 }, { "epoch": 0.19807698215218258, "grad_norm": 0.339678019285202, "learning_rate": 1.9533241686897516e-05, "loss": 0.5725, "step": 6448 }, { "epoch": 0.19810770128713176, "grad_norm": 0.375617116689682, "learning_rate": 1.953309575079173e-05, "loss": 0.6224, "step": 6449 }, { "epoch": 0.1981384204220809, "grad_norm": 0.5088332891464233, "learning_rate": 1.9532949792420737e-05, "loss": 0.6416, "step": 6450 }, { "epoch": 0.19816913955703008, "grad_norm": 0.38662755489349365, "learning_rate": 1.9532803811784873e-05, "loss": 0.5835, "step": 6451 }, { "epoch": 0.19819985869197923, "grad_norm": 0.3311774432659149, "learning_rate": 1.953265780888448e-05, "loss": 0.6123, "step": 6452 }, { "epoch": 0.1982305778269284, "grad_norm": 0.3402464985847473, "learning_rate": 1.9532511783719897e-05, "loss": 0.5265, "step": 6453 }, { "epoch": 0.19826129696187755, "grad_norm": 0.3653241693973541, "learning_rate": 1.953236573629147e-05, "loss": 0.6542, "step": 6454 }, { "epoch": 0.1982920160968267, "grad_norm": 0.3488057255744934, "learning_rate": 1.953221966659954e-05, "loss": 0.6252, "step": 6455 }, { "epoch": 0.19832273523177588, "grad_norm": 0.31706684827804565, "learning_rate": 1.9532073574644436e-05, "loss": 0.5761, "step": 6456 }, { "epoch": 0.19835345436672502, "grad_norm": 0.3225887715816498, "learning_rate": 1.9531927460426513e-05, "loss": 0.5736, "step": 6457 }, { "epoch": 0.1983841735016742, "grad_norm": 0.33263248205184937, "learning_rate": 1.9531781323946107e-05, "loss": 0.6117, "step": 6458 }, { "epoch": 0.19841489263662335, "grad_norm": 0.3264133930206299, "learning_rate": 1.9531635165203565e-05, "loss": 0.6037, "step": 6459 }, { "epoch": 0.19844561177157252, "grad_norm": 0.32914844155311584, "learning_rate": 1.9531488984199218e-05, "loss": 0.62, "step": 6460 }, { "epoch": 0.19847633090652167, "grad_norm": 0.325336754322052, "learning_rate": 1.9531342780933412e-05, "loss": 0.6141, "step": 6461 }, { "epoch": 0.19850705004147084, "grad_norm": 0.899788498878479, "learning_rate": 1.9531196555406496e-05, "loss": 0.5482, "step": 6462 }, { "epoch": 0.19853776917642, "grad_norm": 0.3570587635040283, "learning_rate": 1.95310503076188e-05, "loss": 0.6205, "step": 6463 }, { "epoch": 0.19856848831136914, "grad_norm": 0.32597875595092773, "learning_rate": 1.9530904037570672e-05, "loss": 0.5587, "step": 6464 }, { "epoch": 0.19859920744631832, "grad_norm": 0.35033726692199707, "learning_rate": 1.953075774526245e-05, "loss": 0.6204, "step": 6465 }, { "epoch": 0.19862992658126746, "grad_norm": 0.3471260964870453, "learning_rate": 1.9530611430694477e-05, "loss": 0.6511, "step": 6466 }, { "epoch": 0.19866064571621664, "grad_norm": 0.3436495065689087, "learning_rate": 1.9530465093867097e-05, "loss": 0.5785, "step": 6467 }, { "epoch": 0.19869136485116579, "grad_norm": 0.4071119725704193, "learning_rate": 1.953031873478065e-05, "loss": 0.5434, "step": 6468 }, { "epoch": 0.19872208398611496, "grad_norm": 0.3284960091114044, "learning_rate": 1.953017235343548e-05, "loss": 0.6406, "step": 6469 }, { "epoch": 0.1987528031210641, "grad_norm": 0.31884050369262695, "learning_rate": 1.9530025949831925e-05, "loss": 0.5618, "step": 6470 }, { "epoch": 0.19878352225601328, "grad_norm": 0.2948976457118988, "learning_rate": 1.952987952397033e-05, "loss": 0.5145, "step": 6471 }, { "epoch": 0.19881424139096243, "grad_norm": 0.31598225235939026, "learning_rate": 1.9529733075851032e-05, "loss": 0.556, "step": 6472 }, { "epoch": 0.19884496052591158, "grad_norm": 0.37477636337280273, "learning_rate": 1.952958660547438e-05, "loss": 0.5891, "step": 6473 }, { "epoch": 0.19887567966086075, "grad_norm": 0.34696149826049805, "learning_rate": 1.9529440112840712e-05, "loss": 0.5315, "step": 6474 }, { "epoch": 0.1989063987958099, "grad_norm": 0.32079872488975525, "learning_rate": 1.9529293597950373e-05, "loss": 0.4944, "step": 6475 }, { "epoch": 0.19893711793075908, "grad_norm": 0.3651439845561981, "learning_rate": 1.95291470608037e-05, "loss": 0.587, "step": 6476 }, { "epoch": 0.19896783706570823, "grad_norm": 0.35026660561561584, "learning_rate": 1.952900050140104e-05, "loss": 0.6038, "step": 6477 }, { "epoch": 0.1989985562006574, "grad_norm": 0.40626609325408936, "learning_rate": 1.9528853919742733e-05, "loss": 0.5608, "step": 6478 }, { "epoch": 0.19902927533560655, "grad_norm": 0.34212860465049744, "learning_rate": 1.9528707315829124e-05, "loss": 0.5661, "step": 6479 }, { "epoch": 0.1990599944705557, "grad_norm": 0.33822762966156006, "learning_rate": 1.9528560689660553e-05, "loss": 0.6121, "step": 6480 }, { "epoch": 0.19909071360550487, "grad_norm": 0.32257241010665894, "learning_rate": 1.952841404123736e-05, "loss": 0.5324, "step": 6481 }, { "epoch": 0.19912143274045402, "grad_norm": 0.37860241532325745, "learning_rate": 1.9528267370559896e-05, "loss": 0.5966, "step": 6482 }, { "epoch": 0.1991521518754032, "grad_norm": 0.3262556493282318, "learning_rate": 1.9528120677628494e-05, "loss": 0.5697, "step": 6483 }, { "epoch": 0.19918287101035234, "grad_norm": 0.3663138151168823, "learning_rate": 1.9527973962443503e-05, "loss": 0.5925, "step": 6484 }, { "epoch": 0.19921359014530152, "grad_norm": 0.314195454120636, "learning_rate": 1.9527827225005263e-05, "loss": 0.617, "step": 6485 }, { "epoch": 0.19924430928025066, "grad_norm": 0.31242072582244873, "learning_rate": 1.952768046531412e-05, "loss": 0.5457, "step": 6486 }, { "epoch": 0.19927502841519984, "grad_norm": 0.3955460786819458, "learning_rate": 1.952753368337041e-05, "loss": 0.5233, "step": 6487 }, { "epoch": 0.199305747550149, "grad_norm": 0.3515368700027466, "learning_rate": 1.952738687917448e-05, "loss": 0.7396, "step": 6488 }, { "epoch": 0.19933646668509813, "grad_norm": 0.346470445394516, "learning_rate": 1.9527240052726674e-05, "loss": 0.5776, "step": 6489 }, { "epoch": 0.1993671858200473, "grad_norm": 0.3209567070007324, "learning_rate": 1.952709320402733e-05, "loss": 0.6791, "step": 6490 }, { "epoch": 0.19939790495499646, "grad_norm": 0.3613731861114502, "learning_rate": 1.9526946333076798e-05, "loss": 0.5042, "step": 6491 }, { "epoch": 0.19942862408994563, "grad_norm": 0.3113194406032562, "learning_rate": 1.9526799439875415e-05, "loss": 0.5702, "step": 6492 }, { "epoch": 0.19945934322489478, "grad_norm": 0.3620237112045288, "learning_rate": 1.9526652524423528e-05, "loss": 0.6182, "step": 6493 }, { "epoch": 0.19949006235984396, "grad_norm": 0.35268092155456543, "learning_rate": 1.952650558672148e-05, "loss": 0.5627, "step": 6494 }, { "epoch": 0.1995207814947931, "grad_norm": 0.4094632565975189, "learning_rate": 1.952635862676961e-05, "loss": 0.6612, "step": 6495 }, { "epoch": 0.19955150062974228, "grad_norm": 0.3358514904975891, "learning_rate": 1.9526211644568267e-05, "loss": 0.6107, "step": 6496 }, { "epoch": 0.19958221976469143, "grad_norm": 0.3390800654888153, "learning_rate": 1.952606464011779e-05, "loss": 0.6081, "step": 6497 }, { "epoch": 0.19961293889964057, "grad_norm": 1.4707797765731812, "learning_rate": 1.9525917613418525e-05, "loss": 0.5629, "step": 6498 }, { "epoch": 0.19964365803458975, "grad_norm": 0.36978116631507874, "learning_rate": 1.9525770564470813e-05, "loss": 0.6267, "step": 6499 }, { "epoch": 0.1996743771695389, "grad_norm": 0.30593085289001465, "learning_rate": 1.9525623493274998e-05, "loss": 0.5504, "step": 6500 }, { "epoch": 0.19970509630448807, "grad_norm": 0.33329063653945923, "learning_rate": 1.9525476399831422e-05, "loss": 0.6537, "step": 6501 }, { "epoch": 0.19973581543943722, "grad_norm": 0.37859615683555603, "learning_rate": 1.952532928414043e-05, "loss": 0.664, "step": 6502 }, { "epoch": 0.1997665345743864, "grad_norm": 0.3044280409812927, "learning_rate": 1.9525182146202374e-05, "loss": 0.5611, "step": 6503 }, { "epoch": 0.19979725370933554, "grad_norm": 0.3246181607246399, "learning_rate": 1.9525034986017583e-05, "loss": 0.637, "step": 6504 }, { "epoch": 0.19982797284428472, "grad_norm": 0.3161492645740509, "learning_rate": 1.9524887803586413e-05, "loss": 0.529, "step": 6505 }, { "epoch": 0.19985869197923387, "grad_norm": 0.3204783499240875, "learning_rate": 1.9524740598909196e-05, "loss": 0.5051, "step": 6506 }, { "epoch": 0.199889411114183, "grad_norm": 0.33148109912872314, "learning_rate": 1.9524593371986286e-05, "loss": 0.5714, "step": 6507 }, { "epoch": 0.1999201302491322, "grad_norm": 0.3077986538410187, "learning_rate": 1.9524446122818024e-05, "loss": 0.5282, "step": 6508 }, { "epoch": 0.19995084938408134, "grad_norm": 0.3595099449157715, "learning_rate": 1.9524298851404752e-05, "loss": 0.7234, "step": 6509 }, { "epoch": 0.1999815685190305, "grad_norm": 0.3267649710178375, "learning_rate": 1.9524151557746813e-05, "loss": 0.5599, "step": 6510 }, { "epoch": 0.20001228765397966, "grad_norm": 0.34195294976234436, "learning_rate": 1.952400424184455e-05, "loss": 0.6082, "step": 6511 }, { "epoch": 0.20004300678892883, "grad_norm": 0.3701915442943573, "learning_rate": 1.9523856903698317e-05, "loss": 0.6498, "step": 6512 }, { "epoch": 0.20007372592387798, "grad_norm": 0.32870742678642273, "learning_rate": 1.952370954330845e-05, "loss": 0.6347, "step": 6513 }, { "epoch": 0.20010444505882716, "grad_norm": 0.3228437006473541, "learning_rate": 1.952356216067529e-05, "loss": 0.5496, "step": 6514 }, { "epoch": 0.2001351641937763, "grad_norm": 0.46836817264556885, "learning_rate": 1.952341475579919e-05, "loss": 0.6847, "step": 6515 }, { "epoch": 0.20016588332872545, "grad_norm": 0.34423258900642395, "learning_rate": 1.9523267328680483e-05, "loss": 0.6491, "step": 6516 }, { "epoch": 0.20019660246367463, "grad_norm": 0.30061933398246765, "learning_rate": 1.9523119879319527e-05, "loss": 0.5364, "step": 6517 }, { "epoch": 0.20022732159862378, "grad_norm": 0.3542007505893707, "learning_rate": 1.9522972407716656e-05, "loss": 0.6873, "step": 6518 }, { "epoch": 0.20025804073357295, "grad_norm": 0.32702869176864624, "learning_rate": 1.952282491387222e-05, "loss": 0.5673, "step": 6519 }, { "epoch": 0.2002887598685221, "grad_norm": 0.35464000701904297, "learning_rate": 1.952267739778656e-05, "loss": 0.6202, "step": 6520 }, { "epoch": 0.20031947900347127, "grad_norm": 0.3338501751422882, "learning_rate": 1.952252985946002e-05, "loss": 0.6467, "step": 6521 }, { "epoch": 0.20035019813842042, "grad_norm": 0.35147497057914734, "learning_rate": 1.952238229889295e-05, "loss": 0.5925, "step": 6522 }, { "epoch": 0.20038091727336957, "grad_norm": 0.33275121450424194, "learning_rate": 1.952223471608569e-05, "loss": 0.6181, "step": 6523 }, { "epoch": 0.20041163640831874, "grad_norm": 0.33873251080513, "learning_rate": 1.9522087111038584e-05, "loss": 0.6058, "step": 6524 }, { "epoch": 0.2004423555432679, "grad_norm": 0.4144977331161499, "learning_rate": 1.952193948375198e-05, "loss": 0.6776, "step": 6525 }, { "epoch": 0.20047307467821707, "grad_norm": 0.33509498834609985, "learning_rate": 1.952179183422622e-05, "loss": 0.666, "step": 6526 }, { "epoch": 0.20050379381316621, "grad_norm": 0.3504704236984253, "learning_rate": 1.952164416246165e-05, "loss": 0.5727, "step": 6527 }, { "epoch": 0.2005345129481154, "grad_norm": 1.9860402345657349, "learning_rate": 1.9521496468458617e-05, "loss": 0.5252, "step": 6528 }, { "epoch": 0.20056523208306454, "grad_norm": 0.3603843152523041, "learning_rate": 1.9521348752217465e-05, "loss": 0.6278, "step": 6529 }, { "epoch": 0.2005959512180137, "grad_norm": 0.364622563123703, "learning_rate": 1.9521201013738537e-05, "loss": 0.6363, "step": 6530 }, { "epoch": 0.20062667035296286, "grad_norm": 0.34830495715141296, "learning_rate": 1.952105325302218e-05, "loss": 0.6091, "step": 6531 }, { "epoch": 0.200657389487912, "grad_norm": 0.3232707977294922, "learning_rate": 1.9520905470068735e-05, "loss": 0.6263, "step": 6532 }, { "epoch": 0.20068810862286118, "grad_norm": 0.34429657459259033, "learning_rate": 1.9520757664878553e-05, "loss": 0.6265, "step": 6533 }, { "epoch": 0.20071882775781033, "grad_norm": 0.3794332444667816, "learning_rate": 1.9520609837451973e-05, "loss": 0.6412, "step": 6534 }, { "epoch": 0.2007495468927595, "grad_norm": 0.3291994631290436, "learning_rate": 1.952046198778935e-05, "loss": 0.551, "step": 6535 }, { "epoch": 0.20078026602770865, "grad_norm": 34.656639099121094, "learning_rate": 1.9520314115891016e-05, "loss": 0.547, "step": 6536 }, { "epoch": 0.20081098516265783, "grad_norm": 0.3402886390686035, "learning_rate": 1.952016622175733e-05, "loss": 0.5751, "step": 6537 }, { "epoch": 0.20084170429760698, "grad_norm": 0.33503004908561707, "learning_rate": 1.9520018305388627e-05, "loss": 0.6392, "step": 6538 }, { "epoch": 0.20087242343255615, "grad_norm": 0.3164066672325134, "learning_rate": 1.9519870366785257e-05, "loss": 0.5239, "step": 6539 }, { "epoch": 0.2009031425675053, "grad_norm": 0.3417780101299286, "learning_rate": 1.9519722405947563e-05, "loss": 0.5826, "step": 6540 }, { "epoch": 0.20093386170245445, "grad_norm": 0.37783390283584595, "learning_rate": 1.9519574422875894e-05, "loss": 0.6373, "step": 6541 }, { "epoch": 0.20096458083740362, "grad_norm": 0.32468944787979126, "learning_rate": 1.9519426417570596e-05, "loss": 0.5515, "step": 6542 }, { "epoch": 0.20099529997235277, "grad_norm": 0.3381183445453644, "learning_rate": 1.9519278390032012e-05, "loss": 0.6284, "step": 6543 }, { "epoch": 0.20102601910730195, "grad_norm": 0.3537866473197937, "learning_rate": 1.951913034026049e-05, "loss": 0.546, "step": 6544 }, { "epoch": 0.2010567382422511, "grad_norm": 0.34445920586586, "learning_rate": 1.951898226825637e-05, "loss": 0.6233, "step": 6545 }, { "epoch": 0.20108745737720027, "grad_norm": 0.32523414492607117, "learning_rate": 1.951883417402001e-05, "loss": 0.634, "step": 6546 }, { "epoch": 0.20111817651214942, "grad_norm": 0.36353418231010437, "learning_rate": 1.951868605755174e-05, "loss": 0.6551, "step": 6547 }, { "epoch": 0.2011488956470986, "grad_norm": 0.36003002524375916, "learning_rate": 1.9518537918851913e-05, "loss": 0.5799, "step": 6548 }, { "epoch": 0.20117961478204774, "grad_norm": 0.37651318311691284, "learning_rate": 1.9518389757920883e-05, "loss": 0.6421, "step": 6549 }, { "epoch": 0.2012103339169969, "grad_norm": 0.3531327247619629, "learning_rate": 1.951824157475898e-05, "loss": 0.6096, "step": 6550 }, { "epoch": 0.20124105305194606, "grad_norm": 0.3249610960483551, "learning_rate": 1.9518093369366568e-05, "loss": 0.5997, "step": 6551 }, { "epoch": 0.2012717721868952, "grad_norm": 0.33028489351272583, "learning_rate": 1.951794514174398e-05, "loss": 0.5798, "step": 6552 }, { "epoch": 0.20130249132184438, "grad_norm": 0.3379921019077301, "learning_rate": 1.951779689189157e-05, "loss": 0.6205, "step": 6553 }, { "epoch": 0.20133321045679353, "grad_norm": 0.5564730763435364, "learning_rate": 1.9517648619809673e-05, "loss": 0.5276, "step": 6554 }, { "epoch": 0.2013639295917427, "grad_norm": 0.3519015908241272, "learning_rate": 1.951750032549865e-05, "loss": 0.5724, "step": 6555 }, { "epoch": 0.20139464872669186, "grad_norm": 0.38281768560409546, "learning_rate": 1.9517352008958836e-05, "loss": 0.5122, "step": 6556 }, { "epoch": 0.201425367861641, "grad_norm": 0.33385083079338074, "learning_rate": 1.9517203670190582e-05, "loss": 0.5824, "step": 6557 }, { "epoch": 0.20145608699659018, "grad_norm": 0.4947761595249176, "learning_rate": 1.9517055309194236e-05, "loss": 0.6711, "step": 6558 }, { "epoch": 0.20148680613153933, "grad_norm": 0.341400146484375, "learning_rate": 1.9516906925970142e-05, "loss": 0.6294, "step": 6559 }, { "epoch": 0.2015175252664885, "grad_norm": 0.42718857526779175, "learning_rate": 1.951675852051865e-05, "loss": 0.6557, "step": 6560 }, { "epoch": 0.20154824440143765, "grad_norm": 0.307817667722702, "learning_rate": 1.9516610092840103e-05, "loss": 0.5752, "step": 6561 }, { "epoch": 0.20157896353638682, "grad_norm": 0.328447163105011, "learning_rate": 1.9516461642934845e-05, "loss": 0.5781, "step": 6562 }, { "epoch": 0.20160968267133597, "grad_norm": 0.29990145564079285, "learning_rate": 1.951631317080323e-05, "loss": 0.5708, "step": 6563 }, { "epoch": 0.20164040180628515, "grad_norm": 0.34242022037506104, "learning_rate": 1.9516164676445597e-05, "loss": 0.5446, "step": 6564 }, { "epoch": 0.2016711209412343, "grad_norm": 0.3540363013744354, "learning_rate": 1.9516016159862298e-05, "loss": 0.6032, "step": 6565 }, { "epoch": 0.20170184007618344, "grad_norm": 0.33882150053977966, "learning_rate": 1.951586762105368e-05, "loss": 0.6284, "step": 6566 }, { "epoch": 0.20173255921113262, "grad_norm": 0.49800148606300354, "learning_rate": 1.951571906002009e-05, "loss": 0.5379, "step": 6567 }, { "epoch": 0.20176327834608176, "grad_norm": 0.3575042188167572, "learning_rate": 1.951557047676187e-05, "loss": 0.606, "step": 6568 }, { "epoch": 0.20179399748103094, "grad_norm": 0.32699647545814514, "learning_rate": 1.951542187127937e-05, "loss": 0.5326, "step": 6569 }, { "epoch": 0.2018247166159801, "grad_norm": 0.32276055216789246, "learning_rate": 1.9515273243572942e-05, "loss": 0.5517, "step": 6570 }, { "epoch": 0.20185543575092926, "grad_norm": 0.3375779092311859, "learning_rate": 1.9515124593642927e-05, "loss": 0.6021, "step": 6571 }, { "epoch": 0.2018861548858784, "grad_norm": 0.35713785886764526, "learning_rate": 1.9514975921489675e-05, "loss": 0.526, "step": 6572 }, { "epoch": 0.20191687402082759, "grad_norm": 0.32916179299354553, "learning_rate": 1.951482722711353e-05, "loss": 0.5266, "step": 6573 }, { "epoch": 0.20194759315577673, "grad_norm": 0.3126731514930725, "learning_rate": 1.9514678510514843e-05, "loss": 0.484, "step": 6574 }, { "epoch": 0.20197831229072588, "grad_norm": 0.4116491675376892, "learning_rate": 1.951452977169396e-05, "loss": 0.6451, "step": 6575 }, { "epoch": 0.20200903142567506, "grad_norm": 0.3394833207130432, "learning_rate": 1.9514381010651228e-05, "loss": 0.5863, "step": 6576 }, { "epoch": 0.2020397505606242, "grad_norm": 0.37388962507247925, "learning_rate": 1.9514232227386992e-05, "loss": 0.498, "step": 6577 }, { "epoch": 0.20207046969557338, "grad_norm": 0.33847320079803467, "learning_rate": 1.9514083421901606e-05, "loss": 0.5785, "step": 6578 }, { "epoch": 0.20210118883052253, "grad_norm": 0.3102242350578308, "learning_rate": 1.951393459419541e-05, "loss": 0.5755, "step": 6579 }, { "epoch": 0.2021319079654717, "grad_norm": 0.3905925750732422, "learning_rate": 1.9513785744268757e-05, "loss": 0.4821, "step": 6580 }, { "epoch": 0.20216262710042085, "grad_norm": 0.3499460518360138, "learning_rate": 1.9513636872121996e-05, "loss": 0.5091, "step": 6581 }, { "epoch": 0.20219334623537003, "grad_norm": 0.3212675452232361, "learning_rate": 1.9513487977755467e-05, "loss": 0.5528, "step": 6582 }, { "epoch": 0.20222406537031917, "grad_norm": 0.33409202098846436, "learning_rate": 1.9513339061169526e-05, "loss": 0.6892, "step": 6583 }, { "epoch": 0.20225478450526832, "grad_norm": 0.33074238896369934, "learning_rate": 1.9513190122364515e-05, "loss": 0.4905, "step": 6584 }, { "epoch": 0.2022855036402175, "grad_norm": 0.3438633680343628, "learning_rate": 1.9513041161340786e-05, "loss": 0.5986, "step": 6585 }, { "epoch": 0.20231622277516664, "grad_norm": 0.29840245842933655, "learning_rate": 1.951289217809868e-05, "loss": 0.5467, "step": 6586 }, { "epoch": 0.20234694191011582, "grad_norm": 0.3647548258304596, "learning_rate": 1.9512743172638557e-05, "loss": 0.5311, "step": 6587 }, { "epoch": 0.20237766104506497, "grad_norm": 0.3430781364440918, "learning_rate": 1.951259414496075e-05, "loss": 0.638, "step": 6588 }, { "epoch": 0.20240838018001414, "grad_norm": 0.3473239839076996, "learning_rate": 1.951244509506562e-05, "loss": 0.5819, "step": 6589 }, { "epoch": 0.2024390993149633, "grad_norm": 0.3721223473548889, "learning_rate": 1.951229602295351e-05, "loss": 0.6251, "step": 6590 }, { "epoch": 0.20246981844991246, "grad_norm": 0.3801223039627075, "learning_rate": 1.951214692862477e-05, "loss": 0.6804, "step": 6591 }, { "epoch": 0.2025005375848616, "grad_norm": 0.3340519964694977, "learning_rate": 1.9511997812079744e-05, "loss": 0.5758, "step": 6592 }, { "epoch": 0.20253125671981076, "grad_norm": 0.3462814688682556, "learning_rate": 1.9511848673318785e-05, "loss": 0.5186, "step": 6593 }, { "epoch": 0.20256197585475993, "grad_norm": 0.32819464802742004, "learning_rate": 1.9511699512342236e-05, "loss": 0.6257, "step": 6594 }, { "epoch": 0.20259269498970908, "grad_norm": 0.36146801710128784, "learning_rate": 1.9511550329150452e-05, "loss": 0.6132, "step": 6595 }, { "epoch": 0.20262341412465826, "grad_norm": 0.34507298469543457, "learning_rate": 1.9511401123743775e-05, "loss": 0.5839, "step": 6596 }, { "epoch": 0.2026541332596074, "grad_norm": 0.3447723388671875, "learning_rate": 1.9511251896122555e-05, "loss": 0.6621, "step": 6597 }, { "epoch": 0.20268485239455658, "grad_norm": 0.3409908711910248, "learning_rate": 1.9511102646287144e-05, "loss": 0.6518, "step": 6598 }, { "epoch": 0.20271557152950573, "grad_norm": 0.34025144577026367, "learning_rate": 1.951095337423789e-05, "loss": 0.5333, "step": 6599 }, { "epoch": 0.20274629066445488, "grad_norm": 0.6077988147735596, "learning_rate": 1.9510804079975137e-05, "loss": 0.5311, "step": 6600 }, { "epoch": 0.20277700979940405, "grad_norm": 0.4779179096221924, "learning_rate": 1.9510654763499243e-05, "loss": 0.5392, "step": 6601 }, { "epoch": 0.2028077289343532, "grad_norm": 0.3389855921268463, "learning_rate": 1.9510505424810543e-05, "loss": 0.5694, "step": 6602 }, { "epoch": 0.20283844806930237, "grad_norm": 0.34442436695098877, "learning_rate": 1.95103560639094e-05, "loss": 0.6686, "step": 6603 }, { "epoch": 0.20286916720425152, "grad_norm": 0.36375540494918823, "learning_rate": 1.951020668079615e-05, "loss": 0.5489, "step": 6604 }, { "epoch": 0.2028998863392007, "grad_norm": 0.3026214838027954, "learning_rate": 1.951005727547115e-05, "loss": 0.5585, "step": 6605 }, { "epoch": 0.20293060547414984, "grad_norm": 0.360333651304245, "learning_rate": 1.9509907847934753e-05, "loss": 0.6205, "step": 6606 }, { "epoch": 0.20296132460909902, "grad_norm": 0.35661768913269043, "learning_rate": 1.9509758398187294e-05, "loss": 0.6256, "step": 6607 }, { "epoch": 0.20299204374404817, "grad_norm": 0.36203309893608093, "learning_rate": 1.9509608926229134e-05, "loss": 0.5413, "step": 6608 }, { "epoch": 0.20302276287899731, "grad_norm": 0.3284439742565155, "learning_rate": 1.9509459432060618e-05, "loss": 0.5359, "step": 6609 }, { "epoch": 0.2030534820139465, "grad_norm": 0.3345634937286377, "learning_rate": 1.9509309915682096e-05, "loss": 0.5685, "step": 6610 }, { "epoch": 0.20308420114889564, "grad_norm": 0.35636061429977417, "learning_rate": 1.9509160377093915e-05, "loss": 0.6086, "step": 6611 }, { "epoch": 0.2031149202838448, "grad_norm": 0.35787469148635864, "learning_rate": 1.950901081629643e-05, "loss": 0.5346, "step": 6612 }, { "epoch": 0.20314563941879396, "grad_norm": 0.3651569187641144, "learning_rate": 1.950886123328998e-05, "loss": 0.6436, "step": 6613 }, { "epoch": 0.20317635855374314, "grad_norm": 0.3492932915687561, "learning_rate": 1.9508711628074922e-05, "loss": 0.5594, "step": 6614 }, { "epoch": 0.20320707768869228, "grad_norm": 0.6106798648834229, "learning_rate": 1.9508562000651606e-05, "loss": 0.6266, "step": 6615 }, { "epoch": 0.20323779682364146, "grad_norm": 0.3525736331939697, "learning_rate": 1.9508412351020374e-05, "loss": 0.5914, "step": 6616 }, { "epoch": 0.2032685159585906, "grad_norm": 0.3383195102214813, "learning_rate": 1.9508262679181585e-05, "loss": 0.5512, "step": 6617 }, { "epoch": 0.20329923509353975, "grad_norm": 0.5529532432556152, "learning_rate": 1.9508112985135588e-05, "loss": 0.6105, "step": 6618 }, { "epoch": 0.20332995422848893, "grad_norm": 0.33137595653533936, "learning_rate": 1.9507963268882727e-05, "loss": 0.6788, "step": 6619 }, { "epoch": 0.20336067336343808, "grad_norm": 0.43586328625679016, "learning_rate": 1.950781353042335e-05, "loss": 0.5824, "step": 6620 }, { "epoch": 0.20339139249838725, "grad_norm": 0.4539675712585449, "learning_rate": 1.950766376975781e-05, "loss": 0.6391, "step": 6621 }, { "epoch": 0.2034221116333364, "grad_norm": 0.33548209071159363, "learning_rate": 1.950751398688646e-05, "loss": 0.5677, "step": 6622 }, { "epoch": 0.20345283076828558, "grad_norm": 0.32829052209854126, "learning_rate": 1.9507364181809653e-05, "loss": 0.6013, "step": 6623 }, { "epoch": 0.20348354990323472, "grad_norm": 0.32593879103660583, "learning_rate": 1.9507214354527724e-05, "loss": 0.584, "step": 6624 }, { "epoch": 0.2035142690381839, "grad_norm": 0.31636694073677063, "learning_rate": 1.9507064505041036e-05, "loss": 0.5987, "step": 6625 }, { "epoch": 0.20354498817313305, "grad_norm": 0.3085525929927826, "learning_rate": 1.9506914633349933e-05, "loss": 0.5357, "step": 6626 }, { "epoch": 0.2035757073080822, "grad_norm": 0.3349277079105377, "learning_rate": 1.9506764739454768e-05, "loss": 0.5676, "step": 6627 }, { "epoch": 0.20360642644303137, "grad_norm": 0.3635837435722351, "learning_rate": 1.9506614823355892e-05, "loss": 0.6065, "step": 6628 }, { "epoch": 0.20363714557798052, "grad_norm": 0.3902001678943634, "learning_rate": 1.950646488505365e-05, "loss": 0.6437, "step": 6629 }, { "epoch": 0.2036678647129297, "grad_norm": 0.32659780979156494, "learning_rate": 1.9506314924548396e-05, "loss": 0.5532, "step": 6630 }, { "epoch": 0.20369858384787884, "grad_norm": 0.3552607297897339, "learning_rate": 1.950616494184048e-05, "loss": 0.5745, "step": 6631 }, { "epoch": 0.20372930298282801, "grad_norm": 0.3416440784931183, "learning_rate": 1.9506014936930252e-05, "loss": 0.525, "step": 6632 }, { "epoch": 0.20376002211777716, "grad_norm": 0.36332592368125916, "learning_rate": 1.9505864909818062e-05, "loss": 0.6524, "step": 6633 }, { "epoch": 0.2037907412527263, "grad_norm": 0.4168027937412262, "learning_rate": 1.9505714860504258e-05, "loss": 0.5858, "step": 6634 }, { "epoch": 0.20382146038767548, "grad_norm": 0.3072514832019806, "learning_rate": 1.9505564788989198e-05, "loss": 0.5645, "step": 6635 }, { "epoch": 0.20385217952262463, "grad_norm": 0.3746810257434845, "learning_rate": 1.9505414695273225e-05, "loss": 0.5339, "step": 6636 }, { "epoch": 0.2038828986575738, "grad_norm": 0.78830885887146, "learning_rate": 1.9505264579356695e-05, "loss": 0.6362, "step": 6637 }, { "epoch": 0.20391361779252296, "grad_norm": 0.3537925183773041, "learning_rate": 1.9505114441239953e-05, "loss": 0.5514, "step": 6638 }, { "epoch": 0.20394433692747213, "grad_norm": 0.32247769832611084, "learning_rate": 1.950496428092335e-05, "loss": 0.5818, "step": 6639 }, { "epoch": 0.20397505606242128, "grad_norm": 0.3861912190914154, "learning_rate": 1.9504814098407243e-05, "loss": 0.585, "step": 6640 }, { "epoch": 0.20400577519737045, "grad_norm": 0.37115198373794556, "learning_rate": 1.9504663893691976e-05, "loss": 0.5404, "step": 6641 }, { "epoch": 0.2040364943323196, "grad_norm": 0.3092114329338074, "learning_rate": 1.9504513666777904e-05, "loss": 0.6405, "step": 6642 }, { "epoch": 0.20406721346726875, "grad_norm": 0.3151083290576935, "learning_rate": 1.9504363417665378e-05, "loss": 0.614, "step": 6643 }, { "epoch": 0.20409793260221792, "grad_norm": 0.3517356812953949, "learning_rate": 1.9504213146354748e-05, "loss": 0.657, "step": 6644 }, { "epoch": 0.20412865173716707, "grad_norm": 0.34967732429504395, "learning_rate": 1.9504062852846363e-05, "loss": 0.5926, "step": 6645 }, { "epoch": 0.20415937087211625, "grad_norm": 0.3604682683944702, "learning_rate": 1.950391253714057e-05, "loss": 0.5631, "step": 6646 }, { "epoch": 0.2041900900070654, "grad_norm": 1.7231206893920898, "learning_rate": 1.9503762199237735e-05, "loss": 0.5797, "step": 6647 }, { "epoch": 0.20422080914201457, "grad_norm": 0.3516538143157959, "learning_rate": 1.9503611839138194e-05, "loss": 0.6041, "step": 6648 }, { "epoch": 0.20425152827696372, "grad_norm": 0.37410831451416016, "learning_rate": 1.9503461456842304e-05, "loss": 0.5154, "step": 6649 }, { "epoch": 0.2042822474119129, "grad_norm": 0.3604680001735687, "learning_rate": 1.9503311052350417e-05, "loss": 0.6308, "step": 6650 }, { "epoch": 0.20431296654686204, "grad_norm": 0.3361424207687378, "learning_rate": 1.9503160625662886e-05, "loss": 0.6071, "step": 6651 }, { "epoch": 0.2043436856818112, "grad_norm": 0.41705843806266785, "learning_rate": 1.9503010176780055e-05, "loss": 0.672, "step": 6652 }, { "epoch": 0.20437440481676036, "grad_norm": 0.3414507806301117, "learning_rate": 1.9502859705702285e-05, "loss": 0.623, "step": 6653 }, { "epoch": 0.2044051239517095, "grad_norm": 0.34335505962371826, "learning_rate": 1.9502709212429917e-05, "loss": 0.608, "step": 6654 }, { "epoch": 0.2044358430866587, "grad_norm": 0.2996540367603302, "learning_rate": 1.950255869696331e-05, "loss": 0.5344, "step": 6655 }, { "epoch": 0.20446656222160783, "grad_norm": 0.3551822006702423, "learning_rate": 1.9502408159302818e-05, "loss": 0.5313, "step": 6656 }, { "epoch": 0.204497281356557, "grad_norm": 0.37401479482650757, "learning_rate": 1.950225759944878e-05, "loss": 0.6485, "step": 6657 }, { "epoch": 0.20452800049150616, "grad_norm": 0.354064404964447, "learning_rate": 1.950210701740156e-05, "loss": 0.6792, "step": 6658 }, { "epoch": 0.20455871962645533, "grad_norm": 0.4036164879798889, "learning_rate": 1.9501956413161506e-05, "loss": 0.634, "step": 6659 }, { "epoch": 0.20458943876140448, "grad_norm": 0.3556685745716095, "learning_rate": 1.950180578672897e-05, "loss": 0.5781, "step": 6660 }, { "epoch": 0.20462015789635363, "grad_norm": 0.3513967990875244, "learning_rate": 1.95016551381043e-05, "loss": 0.5582, "step": 6661 }, { "epoch": 0.2046508770313028, "grad_norm": 0.3364560306072235, "learning_rate": 1.9501504467287854e-05, "loss": 0.629, "step": 6662 }, { "epoch": 0.20468159616625195, "grad_norm": 0.3356948494911194, "learning_rate": 1.9501353774279977e-05, "loss": 0.5625, "step": 6663 }, { "epoch": 0.20471231530120113, "grad_norm": 0.3466673195362091, "learning_rate": 1.950120305908103e-05, "loss": 0.6096, "step": 6664 }, { "epoch": 0.20474303443615027, "grad_norm": 0.3192053437232971, "learning_rate": 1.9501052321691357e-05, "loss": 0.6211, "step": 6665 }, { "epoch": 0.20477375357109945, "grad_norm": 0.3372943699359894, "learning_rate": 1.9500901562111312e-05, "loss": 0.5525, "step": 6666 }, { "epoch": 0.2048044727060486, "grad_norm": 0.3789060115814209, "learning_rate": 1.9500750780341247e-05, "loss": 0.5996, "step": 6667 }, { "epoch": 0.20483519184099777, "grad_norm": 0.32060423493385315, "learning_rate": 1.9500599976381517e-05, "loss": 0.5865, "step": 6668 }, { "epoch": 0.20486591097594692, "grad_norm": 0.37170183658599854, "learning_rate": 1.9500449150232472e-05, "loss": 0.6511, "step": 6669 }, { "epoch": 0.20489663011089607, "grad_norm": 0.3629882335662842, "learning_rate": 1.9500298301894463e-05, "loss": 0.5579, "step": 6670 }, { "epoch": 0.20492734924584524, "grad_norm": 0.35207095742225647, "learning_rate": 1.9500147431367847e-05, "loss": 0.5543, "step": 6671 }, { "epoch": 0.2049580683807944, "grad_norm": 0.31447380781173706, "learning_rate": 1.9499996538652966e-05, "loss": 0.5636, "step": 6672 }, { "epoch": 0.20498878751574356, "grad_norm": 0.3467654585838318, "learning_rate": 1.949984562375019e-05, "loss": 0.5454, "step": 6673 }, { "epoch": 0.2050195066506927, "grad_norm": 0.3276960849761963, "learning_rate": 1.9499694686659855e-05, "loss": 0.6612, "step": 6674 }, { "epoch": 0.2050502257856419, "grad_norm": 0.421615868806839, "learning_rate": 1.949954372738232e-05, "loss": 0.5989, "step": 6675 }, { "epoch": 0.20508094492059104, "grad_norm": 0.33358386158943176, "learning_rate": 1.9499392745917935e-05, "loss": 0.4902, "step": 6676 }, { "epoch": 0.20511166405554018, "grad_norm": 0.3267568051815033, "learning_rate": 1.9499241742267057e-05, "loss": 0.5212, "step": 6677 }, { "epoch": 0.20514238319048936, "grad_norm": 0.3408997654914856, "learning_rate": 1.949909071643004e-05, "loss": 0.6018, "step": 6678 }, { "epoch": 0.2051731023254385, "grad_norm": 0.3418179452419281, "learning_rate": 1.9498939668407228e-05, "loss": 0.6623, "step": 6679 }, { "epoch": 0.20520382146038768, "grad_norm": 0.34425777196884155, "learning_rate": 1.949878859819898e-05, "loss": 0.5995, "step": 6680 }, { "epoch": 0.20523454059533683, "grad_norm": 0.35966745018959045, "learning_rate": 1.949863750580565e-05, "loss": 0.5284, "step": 6681 }, { "epoch": 0.205265259730286, "grad_norm": 0.37085482478141785, "learning_rate": 1.9498486391227586e-05, "loss": 0.6566, "step": 6682 }, { "epoch": 0.20529597886523515, "grad_norm": 0.3512696623802185, "learning_rate": 1.949833525446515e-05, "loss": 0.6425, "step": 6683 }, { "epoch": 0.20532669800018433, "grad_norm": 0.3685568571090698, "learning_rate": 1.949818409551868e-05, "loss": 0.5535, "step": 6684 }, { "epoch": 0.20535741713513347, "grad_norm": 0.4878056049346924, "learning_rate": 1.9498032914388543e-05, "loss": 0.6469, "step": 6685 }, { "epoch": 0.20538813627008262, "grad_norm": 0.333373486995697, "learning_rate": 1.9497881711075086e-05, "loss": 0.5711, "step": 6686 }, { "epoch": 0.2054188554050318, "grad_norm": 0.3257802128791809, "learning_rate": 1.9497730485578662e-05, "loss": 0.6063, "step": 6687 }, { "epoch": 0.20544957453998094, "grad_norm": 0.33832573890686035, "learning_rate": 1.9497579237899625e-05, "loss": 0.6447, "step": 6688 }, { "epoch": 0.20548029367493012, "grad_norm": 0.3269714117050171, "learning_rate": 1.949742796803833e-05, "loss": 0.5458, "step": 6689 }, { "epoch": 0.20551101280987927, "grad_norm": 0.4572744071483612, "learning_rate": 1.9497276675995126e-05, "loss": 0.6623, "step": 6690 }, { "epoch": 0.20554173194482844, "grad_norm": 0.3461678922176361, "learning_rate": 1.949712536177037e-05, "loss": 0.6177, "step": 6691 }, { "epoch": 0.2055724510797776, "grad_norm": 0.365709125995636, "learning_rate": 1.9496974025364417e-05, "loss": 0.5919, "step": 6692 }, { "epoch": 0.20560317021472677, "grad_norm": 0.3600093424320221, "learning_rate": 1.9496822666777614e-05, "loss": 0.598, "step": 6693 }, { "epoch": 0.2056338893496759, "grad_norm": 0.3331211805343628, "learning_rate": 1.949667128601032e-05, "loss": 0.5732, "step": 6694 }, { "epoch": 0.20566460848462506, "grad_norm": 0.3261450529098511, "learning_rate": 1.9496519883062887e-05, "loss": 0.5509, "step": 6695 }, { "epoch": 0.20569532761957424, "grad_norm": 0.3141263425350189, "learning_rate": 1.9496368457935672e-05, "loss": 0.5989, "step": 6696 }, { "epoch": 0.20572604675452338, "grad_norm": 0.3514042794704437, "learning_rate": 1.949621701062902e-05, "loss": 0.5906, "step": 6697 }, { "epoch": 0.20575676588947256, "grad_norm": 0.34993547201156616, "learning_rate": 1.9496065541143294e-05, "loss": 0.57, "step": 6698 }, { "epoch": 0.2057874850244217, "grad_norm": 0.32500821352005005, "learning_rate": 1.9495914049478843e-05, "loss": 0.5551, "step": 6699 }, { "epoch": 0.20581820415937088, "grad_norm": 0.44382303953170776, "learning_rate": 1.949576253563602e-05, "loss": 0.5861, "step": 6700 }, { "epoch": 0.20584892329432003, "grad_norm": 0.33887532353401184, "learning_rate": 1.949561099961518e-05, "loss": 0.5753, "step": 6701 }, { "epoch": 0.2058796424292692, "grad_norm": 0.3592597246170044, "learning_rate": 1.9495459441416676e-05, "loss": 0.5994, "step": 6702 }, { "epoch": 0.20591036156421835, "grad_norm": 0.32306522130966187, "learning_rate": 1.9495307861040865e-05, "loss": 0.5242, "step": 6703 }, { "epoch": 0.2059410806991675, "grad_norm": 0.3629536032676697, "learning_rate": 1.94951562584881e-05, "loss": 0.6439, "step": 6704 }, { "epoch": 0.20597179983411668, "grad_norm": 0.9498761892318726, "learning_rate": 1.9495004633758738e-05, "loss": 0.5557, "step": 6705 }, { "epoch": 0.20600251896906582, "grad_norm": 0.3656226694583893, "learning_rate": 1.9494852986853124e-05, "loss": 0.6494, "step": 6706 }, { "epoch": 0.206033238104015, "grad_norm": 0.33172282576560974, "learning_rate": 1.949470131777162e-05, "loss": 0.4837, "step": 6707 }, { "epoch": 0.20606395723896415, "grad_norm": 0.5927642583847046, "learning_rate": 1.9494549626514576e-05, "loss": 0.598, "step": 6708 }, { "epoch": 0.20609467637391332, "grad_norm": 0.32943350076675415, "learning_rate": 1.949439791308235e-05, "loss": 0.5841, "step": 6709 }, { "epoch": 0.20612539550886247, "grad_norm": 0.3079349994659424, "learning_rate": 1.9494246177475296e-05, "loss": 0.4915, "step": 6710 }, { "epoch": 0.20615611464381162, "grad_norm": 0.33529531955718994, "learning_rate": 1.9494094419693767e-05, "loss": 0.5619, "step": 6711 }, { "epoch": 0.2061868337787608, "grad_norm": 0.34418290853500366, "learning_rate": 1.949394263973811e-05, "loss": 0.5565, "step": 6712 }, { "epoch": 0.20621755291370994, "grad_norm": 0.33632805943489075, "learning_rate": 1.94937908376087e-05, "loss": 0.5229, "step": 6713 }, { "epoch": 0.20624827204865911, "grad_norm": 0.3438440263271332, "learning_rate": 1.9493639013305867e-05, "loss": 0.6661, "step": 6714 }, { "epoch": 0.20627899118360826, "grad_norm": 0.314663827419281, "learning_rate": 1.949348716682998e-05, "loss": 0.5961, "step": 6715 }, { "epoch": 0.20630971031855744, "grad_norm": 0.3189111649990082, "learning_rate": 1.9493335298181396e-05, "loss": 0.529, "step": 6716 }, { "epoch": 0.20634042945350659, "grad_norm": 0.3500652313232422, "learning_rate": 1.9493183407360457e-05, "loss": 0.5742, "step": 6717 }, { "epoch": 0.20637114858845576, "grad_norm": 0.3489617109298706, "learning_rate": 1.949303149436753e-05, "loss": 0.6067, "step": 6718 }, { "epoch": 0.2064018677234049, "grad_norm": 0.3479386866092682, "learning_rate": 1.9492879559202963e-05, "loss": 0.6256, "step": 6719 }, { "epoch": 0.20643258685835406, "grad_norm": 0.33005088567733765, "learning_rate": 1.9492727601867115e-05, "loss": 0.5821, "step": 6720 }, { "epoch": 0.20646330599330323, "grad_norm": 0.2937559485435486, "learning_rate": 1.9492575622360337e-05, "loss": 0.5816, "step": 6721 }, { "epoch": 0.20649402512825238, "grad_norm": 0.3080694377422333, "learning_rate": 1.9492423620682985e-05, "loss": 0.5551, "step": 6722 }, { "epoch": 0.20652474426320155, "grad_norm": 0.455829918384552, "learning_rate": 1.9492271596835414e-05, "loss": 0.6135, "step": 6723 }, { "epoch": 0.2065554633981507, "grad_norm": 0.34799233078956604, "learning_rate": 1.949211955081798e-05, "loss": 0.5795, "step": 6724 }, { "epoch": 0.20658618253309988, "grad_norm": 0.35359013080596924, "learning_rate": 1.949196748263104e-05, "loss": 0.6636, "step": 6725 }, { "epoch": 0.20661690166804902, "grad_norm": 0.2990283668041229, "learning_rate": 1.9491815392274947e-05, "loss": 0.5921, "step": 6726 }, { "epoch": 0.2066476208029982, "grad_norm": 0.328865110874176, "learning_rate": 1.9491663279750057e-05, "loss": 0.6448, "step": 6727 }, { "epoch": 0.20667833993794735, "grad_norm": 0.3477078080177307, "learning_rate": 1.949151114505672e-05, "loss": 0.5982, "step": 6728 }, { "epoch": 0.2067090590728965, "grad_norm": 0.4893229007720947, "learning_rate": 1.94913589881953e-05, "loss": 0.5703, "step": 6729 }, { "epoch": 0.20673977820784567, "grad_norm": 0.33547571301460266, "learning_rate": 1.9491206809166147e-05, "loss": 0.5753, "step": 6730 }, { "epoch": 0.20677049734279482, "grad_norm": 0.40696045756340027, "learning_rate": 1.949105460796962e-05, "loss": 0.5449, "step": 6731 }, { "epoch": 0.206801216477744, "grad_norm": 0.3171321153640747, "learning_rate": 1.9490902384606066e-05, "loss": 0.5575, "step": 6732 }, { "epoch": 0.20683193561269314, "grad_norm": 0.3086630403995514, "learning_rate": 1.949075013907585e-05, "loss": 0.5874, "step": 6733 }, { "epoch": 0.20686265474764232, "grad_norm": 0.4153421223163605, "learning_rate": 1.9490597871379325e-05, "loss": 0.5058, "step": 6734 }, { "epoch": 0.20689337388259146, "grad_norm": 0.34306612610816956, "learning_rate": 1.9490445581516845e-05, "loss": 0.5239, "step": 6735 }, { "epoch": 0.20692409301754064, "grad_norm": 0.3167012631893158, "learning_rate": 1.949029326948877e-05, "loss": 0.628, "step": 6736 }, { "epoch": 0.2069548121524898, "grad_norm": 0.3090924918651581, "learning_rate": 1.949014093529545e-05, "loss": 0.6109, "step": 6737 }, { "epoch": 0.20698553128743893, "grad_norm": 0.3655340373516083, "learning_rate": 1.9489988578937236e-05, "loss": 0.5591, "step": 6738 }, { "epoch": 0.2070162504223881, "grad_norm": 0.3268328309059143, "learning_rate": 1.94898362004145e-05, "loss": 0.5176, "step": 6739 }, { "epoch": 0.20704696955733726, "grad_norm": 0.33985400199890137, "learning_rate": 1.9489683799727584e-05, "loss": 0.5664, "step": 6740 }, { "epoch": 0.20707768869228643, "grad_norm": 0.35410717129707336, "learning_rate": 1.9489531376876853e-05, "loss": 0.6021, "step": 6741 }, { "epoch": 0.20710840782723558, "grad_norm": 0.33364248275756836, "learning_rate": 1.9489378931862653e-05, "loss": 0.6565, "step": 6742 }, { "epoch": 0.20713912696218476, "grad_norm": 0.35372763872146606, "learning_rate": 1.9489226464685352e-05, "loss": 0.6165, "step": 6743 }, { "epoch": 0.2071698460971339, "grad_norm": 0.3580309748649597, "learning_rate": 1.9489073975345295e-05, "loss": 0.4611, "step": 6744 }, { "epoch": 0.20720056523208308, "grad_norm": 0.33830949664115906, "learning_rate": 1.9488921463842848e-05, "loss": 0.5947, "step": 6745 }, { "epoch": 0.20723128436703223, "grad_norm": 0.3285672068595886, "learning_rate": 1.9488768930178357e-05, "loss": 0.6037, "step": 6746 }, { "epoch": 0.20726200350198137, "grad_norm": 0.36356693506240845, "learning_rate": 1.9488616374352187e-05, "loss": 0.6071, "step": 6747 }, { "epoch": 0.20729272263693055, "grad_norm": 0.3461548089981079, "learning_rate": 1.948846379636469e-05, "loss": 0.6122, "step": 6748 }, { "epoch": 0.2073234417718797, "grad_norm": 0.35301148891448975, "learning_rate": 1.9488311196216224e-05, "loss": 0.657, "step": 6749 }, { "epoch": 0.20735416090682887, "grad_norm": 0.3461911678314209, "learning_rate": 1.9488158573907142e-05, "loss": 0.6252, "step": 6750 }, { "epoch": 0.20738488004177802, "grad_norm": 0.3336573839187622, "learning_rate": 1.9488005929437802e-05, "loss": 0.5627, "step": 6751 }, { "epoch": 0.2074155991767272, "grad_norm": 0.32154688239097595, "learning_rate": 1.9487853262808565e-05, "loss": 0.6428, "step": 6752 }, { "epoch": 0.20744631831167634, "grad_norm": 0.3433793783187866, "learning_rate": 1.9487700574019785e-05, "loss": 0.5338, "step": 6753 }, { "epoch": 0.2074770374466255, "grad_norm": 0.3497168719768524, "learning_rate": 1.9487547863071816e-05, "loss": 0.6273, "step": 6754 }, { "epoch": 0.20750775658157466, "grad_norm": 0.39206165075302124, "learning_rate": 1.948739512996501e-05, "loss": 0.6121, "step": 6755 }, { "epoch": 0.2075384757165238, "grad_norm": 0.34223899245262146, "learning_rate": 1.948724237469974e-05, "loss": 0.6694, "step": 6756 }, { "epoch": 0.207569194851473, "grad_norm": 0.3360214829444885, "learning_rate": 1.9487089597276347e-05, "loss": 0.5222, "step": 6757 }, { "epoch": 0.20759991398642214, "grad_norm": 0.32551413774490356, "learning_rate": 1.9486936797695196e-05, "loss": 0.5571, "step": 6758 }, { "epoch": 0.2076306331213713, "grad_norm": 0.31696659326553345, "learning_rate": 1.9486783975956642e-05, "loss": 0.5926, "step": 6759 }, { "epoch": 0.20766135225632046, "grad_norm": 0.3646683990955353, "learning_rate": 1.948663113206104e-05, "loss": 0.6432, "step": 6760 }, { "epoch": 0.20769207139126963, "grad_norm": 0.36895596981048584, "learning_rate": 1.948647826600875e-05, "loss": 0.5775, "step": 6761 }, { "epoch": 0.20772279052621878, "grad_norm": 0.32395535707473755, "learning_rate": 1.9486325377800125e-05, "loss": 0.6439, "step": 6762 }, { "epoch": 0.20775350966116793, "grad_norm": 0.32173487544059753, "learning_rate": 1.9486172467435525e-05, "loss": 0.5906, "step": 6763 }, { "epoch": 0.2077842287961171, "grad_norm": 0.3746422529220581, "learning_rate": 1.9486019534915307e-05, "loss": 0.651, "step": 6764 }, { "epoch": 0.20781494793106625, "grad_norm": 0.32395410537719727, "learning_rate": 1.9485866580239832e-05, "loss": 0.5481, "step": 6765 }, { "epoch": 0.20784566706601543, "grad_norm": 0.36253607273101807, "learning_rate": 1.9485713603409447e-05, "loss": 0.5655, "step": 6766 }, { "epoch": 0.20787638620096457, "grad_norm": 0.3335859775543213, "learning_rate": 1.948556060442452e-05, "loss": 0.4681, "step": 6767 }, { "epoch": 0.20790710533591375, "grad_norm": 0.3325766623020172, "learning_rate": 1.94854075832854e-05, "loss": 0.6084, "step": 6768 }, { "epoch": 0.2079378244708629, "grad_norm": 0.3553239703178406, "learning_rate": 1.948525453999245e-05, "loss": 0.4982, "step": 6769 }, { "epoch": 0.20796854360581207, "grad_norm": 0.36132708191871643, "learning_rate": 1.9485101474546028e-05, "loss": 0.5845, "step": 6770 }, { "epoch": 0.20799926274076122, "grad_norm": 0.3245886564254761, "learning_rate": 1.9484948386946485e-05, "loss": 0.5711, "step": 6771 }, { "epoch": 0.20802998187571037, "grad_norm": 0.3502661883831024, "learning_rate": 1.9484795277194185e-05, "loss": 0.6252, "step": 6772 }, { "epoch": 0.20806070101065954, "grad_norm": 0.3425042927265167, "learning_rate": 1.948464214528948e-05, "loss": 0.6092, "step": 6773 }, { "epoch": 0.2080914201456087, "grad_norm": 0.3363659381866455, "learning_rate": 1.9484488991232734e-05, "loss": 0.6569, "step": 6774 }, { "epoch": 0.20812213928055787, "grad_norm": 0.4108564555644989, "learning_rate": 1.9484335815024302e-05, "loss": 0.6118, "step": 6775 }, { "epoch": 0.208152858415507, "grad_norm": 0.3707883358001709, "learning_rate": 1.948418261666454e-05, "loss": 0.6176, "step": 6776 }, { "epoch": 0.2081835775504562, "grad_norm": 0.35873401165008545, "learning_rate": 1.9484029396153807e-05, "loss": 0.6354, "step": 6777 }, { "epoch": 0.20821429668540534, "grad_norm": 0.35628142952919006, "learning_rate": 1.9483876153492463e-05, "loss": 0.6387, "step": 6778 }, { "epoch": 0.2082450158203545, "grad_norm": 0.3002642095088959, "learning_rate": 1.948372288868086e-05, "loss": 0.5412, "step": 6779 }, { "epoch": 0.20827573495530366, "grad_norm": 0.35992079973220825, "learning_rate": 1.9483569601719364e-05, "loss": 0.6691, "step": 6780 }, { "epoch": 0.2083064540902528, "grad_norm": 0.33275890350341797, "learning_rate": 1.9483416292608325e-05, "loss": 0.621, "step": 6781 }, { "epoch": 0.20833717322520198, "grad_norm": 0.36708688735961914, "learning_rate": 1.9483262961348107e-05, "loss": 0.5959, "step": 6782 }, { "epoch": 0.20836789236015113, "grad_norm": 0.34460917115211487, "learning_rate": 1.9483109607939064e-05, "loss": 0.5578, "step": 6783 }, { "epoch": 0.2083986114951003, "grad_norm": 0.35045233368873596, "learning_rate": 1.9482956232381564e-05, "loss": 0.5472, "step": 6784 }, { "epoch": 0.20842933063004945, "grad_norm": 0.3484019935131073, "learning_rate": 1.9482802834675947e-05, "loss": 0.517, "step": 6785 }, { "epoch": 0.20846004976499863, "grad_norm": 0.32270124554634094, "learning_rate": 1.9482649414822588e-05, "loss": 0.6547, "step": 6786 }, { "epoch": 0.20849076889994778, "grad_norm": 0.3300982713699341, "learning_rate": 1.9482495972821838e-05, "loss": 0.6363, "step": 6787 }, { "epoch": 0.20852148803489692, "grad_norm": 0.36167359352111816, "learning_rate": 1.9482342508674056e-05, "loss": 0.6265, "step": 6788 }, { "epoch": 0.2085522071698461, "grad_norm": 0.6796222925186157, "learning_rate": 1.9482189022379598e-05, "loss": 0.6427, "step": 6789 }, { "epoch": 0.20858292630479525, "grad_norm": 0.35929855704307556, "learning_rate": 1.948203551393883e-05, "loss": 0.5661, "step": 6790 }, { "epoch": 0.20861364543974442, "grad_norm": 0.342508465051651, "learning_rate": 1.94818819833521e-05, "loss": 0.6576, "step": 6791 }, { "epoch": 0.20864436457469357, "grad_norm": 0.3828396499156952, "learning_rate": 1.948172843061978e-05, "loss": 0.6442, "step": 6792 }, { "epoch": 0.20867508370964274, "grad_norm": 0.30561819672584534, "learning_rate": 1.9481574855742217e-05, "loss": 0.4889, "step": 6793 }, { "epoch": 0.2087058028445919, "grad_norm": 0.31821557879447937, "learning_rate": 1.9481421258719773e-05, "loss": 0.5168, "step": 6794 }, { "epoch": 0.20873652197954107, "grad_norm": 0.35780394077301025, "learning_rate": 1.948126763955281e-05, "loss": 0.5778, "step": 6795 }, { "epoch": 0.20876724111449022, "grad_norm": 0.343059778213501, "learning_rate": 1.9481113998241682e-05, "loss": 0.5675, "step": 6796 }, { "epoch": 0.20879796024943936, "grad_norm": 0.36691999435424805, "learning_rate": 1.9480960334786753e-05, "loss": 0.6534, "step": 6797 }, { "epoch": 0.20882867938438854, "grad_norm": 0.33397820591926575, "learning_rate": 1.9480806649188377e-05, "loss": 0.6542, "step": 6798 }, { "epoch": 0.20885939851933769, "grad_norm": 0.3316566050052643, "learning_rate": 1.9480652941446912e-05, "loss": 0.5522, "step": 6799 }, { "epoch": 0.20889011765428686, "grad_norm": 0.3801775574684143, "learning_rate": 1.9480499211562723e-05, "loss": 0.6214, "step": 6800 }, { "epoch": 0.208920836789236, "grad_norm": 0.33417773246765137, "learning_rate": 1.9480345459536164e-05, "loss": 0.61, "step": 6801 }, { "epoch": 0.20895155592418518, "grad_norm": 0.3459598124027252, "learning_rate": 1.9480191685367598e-05, "loss": 0.5863, "step": 6802 }, { "epoch": 0.20898227505913433, "grad_norm": 0.33064717054367065, "learning_rate": 1.948003788905738e-05, "loss": 0.6296, "step": 6803 }, { "epoch": 0.2090129941940835, "grad_norm": 0.40053218603134155, "learning_rate": 1.9479884070605877e-05, "loss": 0.5637, "step": 6804 }, { "epoch": 0.20904371332903265, "grad_norm": 0.3567271828651428, "learning_rate": 1.9479730230013438e-05, "loss": 0.5237, "step": 6805 }, { "epoch": 0.2090744324639818, "grad_norm": 0.36614957451820374, "learning_rate": 1.9479576367280428e-05, "loss": 0.6784, "step": 6806 }, { "epoch": 0.20910515159893098, "grad_norm": 0.37432631850242615, "learning_rate": 1.9479422482407204e-05, "loss": 0.5682, "step": 6807 }, { "epoch": 0.20913587073388012, "grad_norm": 0.3388708233833313, "learning_rate": 1.947926857539413e-05, "loss": 0.5456, "step": 6808 }, { "epoch": 0.2091665898688293, "grad_norm": 0.3480341136455536, "learning_rate": 1.947911464624156e-05, "loss": 0.642, "step": 6809 }, { "epoch": 0.20919730900377845, "grad_norm": 0.3100312352180481, "learning_rate": 1.9478960694949857e-05, "loss": 0.5554, "step": 6810 }, { "epoch": 0.20922802813872762, "grad_norm": 0.3863712549209595, "learning_rate": 1.947880672151938e-05, "loss": 0.574, "step": 6811 }, { "epoch": 0.20925874727367677, "grad_norm": 0.3236182630062103, "learning_rate": 1.9478652725950484e-05, "loss": 0.6017, "step": 6812 }, { "epoch": 0.20928946640862595, "grad_norm": 0.3828861117362976, "learning_rate": 1.9478498708243534e-05, "loss": 0.5983, "step": 6813 }, { "epoch": 0.2093201855435751, "grad_norm": 0.304354190826416, "learning_rate": 1.947834466839889e-05, "loss": 0.5966, "step": 6814 }, { "epoch": 0.20935090467852424, "grad_norm": 0.37578994035720825, "learning_rate": 1.947819060641691e-05, "loss": 0.531, "step": 6815 }, { "epoch": 0.20938162381347342, "grad_norm": 0.33401918411254883, "learning_rate": 1.947803652229795e-05, "loss": 0.535, "step": 6816 }, { "epoch": 0.20941234294842256, "grad_norm": 0.34877946972846985, "learning_rate": 1.947788241604238e-05, "loss": 0.5208, "step": 6817 }, { "epoch": 0.20944306208337174, "grad_norm": 0.47943314909935, "learning_rate": 1.947772828765055e-05, "loss": 0.5951, "step": 6818 }, { "epoch": 0.2094737812183209, "grad_norm": 0.3514961898326874, "learning_rate": 1.9477574137122827e-05, "loss": 0.5694, "step": 6819 }, { "epoch": 0.20950450035327006, "grad_norm": 0.341605007648468, "learning_rate": 1.9477419964459564e-05, "loss": 0.5858, "step": 6820 }, { "epoch": 0.2095352194882192, "grad_norm": 0.32916802167892456, "learning_rate": 1.947726576966112e-05, "loss": 0.6015, "step": 6821 }, { "epoch": 0.20956593862316839, "grad_norm": 0.4181038439273834, "learning_rate": 1.947711155272787e-05, "loss": 0.5427, "step": 6822 }, { "epoch": 0.20959665775811753, "grad_norm": 0.3307131826877594, "learning_rate": 1.947695731366016e-05, "loss": 0.5673, "step": 6823 }, { "epoch": 0.20962737689306668, "grad_norm": 0.3298184275627136, "learning_rate": 1.9476803052458354e-05, "loss": 0.6506, "step": 6824 }, { "epoch": 0.20965809602801586, "grad_norm": 0.5908688306808472, "learning_rate": 1.9476648769122813e-05, "loss": 0.6687, "step": 6825 }, { "epoch": 0.209688815162965, "grad_norm": 0.40000155568122864, "learning_rate": 1.9476494463653895e-05, "loss": 0.6552, "step": 6826 }, { "epoch": 0.20971953429791418, "grad_norm": 0.3269423246383667, "learning_rate": 1.9476340136051965e-05, "loss": 0.6811, "step": 6827 }, { "epoch": 0.20975025343286333, "grad_norm": 0.3682125210762024, "learning_rate": 1.9476185786317382e-05, "loss": 0.5685, "step": 6828 }, { "epoch": 0.2097809725678125, "grad_norm": 0.3009730279445648, "learning_rate": 1.94760314144505e-05, "loss": 0.5235, "step": 6829 }, { "epoch": 0.20981169170276165, "grad_norm": 0.3236093521118164, "learning_rate": 1.9475877020451686e-05, "loss": 0.549, "step": 6830 }, { "epoch": 0.2098424108377108, "grad_norm": 0.3141080141067505, "learning_rate": 1.94757226043213e-05, "loss": 0.6041, "step": 6831 }, { "epoch": 0.20987312997265997, "grad_norm": 0.32735732197761536, "learning_rate": 1.9475568166059708e-05, "loss": 0.5844, "step": 6832 }, { "epoch": 0.20990384910760912, "grad_norm": 0.301844984292984, "learning_rate": 1.947541370566726e-05, "loss": 0.5797, "step": 6833 }, { "epoch": 0.2099345682425583, "grad_norm": 0.34587377309799194, "learning_rate": 1.9475259223144322e-05, "loss": 0.5753, "step": 6834 }, { "epoch": 0.20996528737750744, "grad_norm": 0.30843761563301086, "learning_rate": 1.9475104718491252e-05, "loss": 0.6491, "step": 6835 }, { "epoch": 0.20999600651245662, "grad_norm": 0.3184061348438263, "learning_rate": 1.9474950191708414e-05, "loss": 0.557, "step": 6836 }, { "epoch": 0.21002672564740577, "grad_norm": 0.3152425289154053, "learning_rate": 1.947479564279617e-05, "loss": 0.5766, "step": 6837 }, { "epoch": 0.21005744478235494, "grad_norm": 0.35139939188957214, "learning_rate": 1.9474641071754882e-05, "loss": 0.5715, "step": 6838 }, { "epoch": 0.2100881639173041, "grad_norm": 0.3102594017982483, "learning_rate": 1.94744864785849e-05, "loss": 0.4805, "step": 6839 }, { "epoch": 0.21011888305225324, "grad_norm": 0.36538225412368774, "learning_rate": 1.94743318632866e-05, "loss": 0.6501, "step": 6840 }, { "epoch": 0.2101496021872024, "grad_norm": 0.31919384002685547, "learning_rate": 1.9474177225860337e-05, "loss": 0.5879, "step": 6841 }, { "epoch": 0.21018032132215156, "grad_norm": 0.3606884181499481, "learning_rate": 1.9474022566306466e-05, "loss": 0.5623, "step": 6842 }, { "epoch": 0.21021104045710073, "grad_norm": 0.30678993463516235, "learning_rate": 1.9473867884625357e-05, "loss": 0.5764, "step": 6843 }, { "epoch": 0.21024175959204988, "grad_norm": 0.37434282898902893, "learning_rate": 1.9473713180817365e-05, "loss": 0.4925, "step": 6844 }, { "epoch": 0.21027247872699906, "grad_norm": 0.3545511066913605, "learning_rate": 1.9473558454882857e-05, "loss": 0.6795, "step": 6845 }, { "epoch": 0.2103031978619482, "grad_norm": 0.5128963589668274, "learning_rate": 1.9473403706822188e-05, "loss": 0.6035, "step": 6846 }, { "epoch": 0.21033391699689738, "grad_norm": 0.3279101252555847, "learning_rate": 1.9473248936635728e-05, "loss": 0.6013, "step": 6847 }, { "epoch": 0.21036463613184653, "grad_norm": 0.3404775857925415, "learning_rate": 1.9473094144323828e-05, "loss": 0.5915, "step": 6848 }, { "epoch": 0.21039535526679568, "grad_norm": 0.3517005443572998, "learning_rate": 1.9472939329886862e-05, "loss": 0.5668, "step": 6849 }, { "epoch": 0.21042607440174485, "grad_norm": 0.34140822291374207, "learning_rate": 1.9472784493325182e-05, "loss": 0.5614, "step": 6850 }, { "epoch": 0.210456793536694, "grad_norm": 0.37153002619743347, "learning_rate": 1.9472629634639148e-05, "loss": 0.6374, "step": 6851 }, { "epoch": 0.21048751267164317, "grad_norm": 0.38014742732048035, "learning_rate": 1.947247475382913e-05, "loss": 0.7005, "step": 6852 }, { "epoch": 0.21051823180659232, "grad_norm": 0.3855288326740265, "learning_rate": 1.9472319850895487e-05, "loss": 0.549, "step": 6853 }, { "epoch": 0.2105489509415415, "grad_norm": 0.3535619080066681, "learning_rate": 1.9472164925838576e-05, "loss": 0.5507, "step": 6854 }, { "epoch": 0.21057967007649064, "grad_norm": 0.3759792149066925, "learning_rate": 1.9472009978658764e-05, "loss": 0.6547, "step": 6855 }, { "epoch": 0.21061038921143982, "grad_norm": 0.3139233887195587, "learning_rate": 1.947185500935641e-05, "loss": 0.5621, "step": 6856 }, { "epoch": 0.21064110834638897, "grad_norm": 0.3505811393260956, "learning_rate": 1.947170001793188e-05, "loss": 0.6614, "step": 6857 }, { "epoch": 0.21067182748133811, "grad_norm": 0.321439266204834, "learning_rate": 1.9471545004385528e-05, "loss": 0.55, "step": 6858 }, { "epoch": 0.2107025466162873, "grad_norm": 0.3130649924278259, "learning_rate": 1.9471389968717725e-05, "loss": 0.5832, "step": 6859 }, { "epoch": 0.21073326575123644, "grad_norm": 0.3085334897041321, "learning_rate": 1.9471234910928827e-05, "loss": 0.556, "step": 6860 }, { "epoch": 0.2107639848861856, "grad_norm": 0.3236427903175354, "learning_rate": 1.94710798310192e-05, "loss": 0.6514, "step": 6861 }, { "epoch": 0.21079470402113476, "grad_norm": 0.3909730017185211, "learning_rate": 1.9470924728989205e-05, "loss": 0.5556, "step": 6862 }, { "epoch": 0.21082542315608394, "grad_norm": 0.3637202978134155, "learning_rate": 1.9470769604839202e-05, "loss": 0.5992, "step": 6863 }, { "epoch": 0.21085614229103308, "grad_norm": 0.3321051299571991, "learning_rate": 1.9470614458569558e-05, "loss": 0.5126, "step": 6864 }, { "epoch": 0.21088686142598223, "grad_norm": 0.3373319208621979, "learning_rate": 1.947045929018063e-05, "loss": 0.6416, "step": 6865 }, { "epoch": 0.2109175805609314, "grad_norm": 0.3937773108482361, "learning_rate": 1.9470304099672782e-05, "loss": 0.6015, "step": 6866 }, { "epoch": 0.21094829969588055, "grad_norm": 0.32138824462890625, "learning_rate": 1.947014888704638e-05, "loss": 0.536, "step": 6867 }, { "epoch": 0.21097901883082973, "grad_norm": 0.356560617685318, "learning_rate": 1.9469993652301783e-05, "loss": 0.5876, "step": 6868 }, { "epoch": 0.21100973796577888, "grad_norm": 0.3164021670818329, "learning_rate": 1.9469838395439353e-05, "loss": 0.5547, "step": 6869 }, { "epoch": 0.21104045710072805, "grad_norm": 0.3307015299797058, "learning_rate": 1.9469683116459456e-05, "loss": 0.6408, "step": 6870 }, { "epoch": 0.2110711762356772, "grad_norm": 0.35809844732284546, "learning_rate": 1.946952781536245e-05, "loss": 0.6241, "step": 6871 }, { "epoch": 0.21110189537062637, "grad_norm": 0.337424099445343, "learning_rate": 1.9469372492148704e-05, "loss": 0.5922, "step": 6872 }, { "epoch": 0.21113261450557552, "grad_norm": 0.3855310380458832, "learning_rate": 1.946921714681858e-05, "loss": 0.5714, "step": 6873 }, { "epoch": 0.21116333364052467, "grad_norm": 0.4581669569015503, "learning_rate": 1.9469061779372433e-05, "loss": 0.6413, "step": 6874 }, { "epoch": 0.21119405277547385, "grad_norm": 0.314171701669693, "learning_rate": 1.9468906389810633e-05, "loss": 0.5897, "step": 6875 }, { "epoch": 0.211224771910423, "grad_norm": 0.3317662477493286, "learning_rate": 1.9468750978133538e-05, "loss": 0.5819, "step": 6876 }, { "epoch": 0.21125549104537217, "grad_norm": 0.37145477533340454, "learning_rate": 1.9468595544341514e-05, "loss": 0.5652, "step": 6877 }, { "epoch": 0.21128621018032132, "grad_norm": 0.4229167103767395, "learning_rate": 1.9468440088434926e-05, "loss": 0.4991, "step": 6878 }, { "epoch": 0.2113169293152705, "grad_norm": 0.31672149896621704, "learning_rate": 1.9468284610414135e-05, "loss": 0.6318, "step": 6879 }, { "epoch": 0.21134764845021964, "grad_norm": 0.34982433915138245, "learning_rate": 1.9468129110279502e-05, "loss": 0.6155, "step": 6880 }, { "epoch": 0.2113783675851688, "grad_norm": 0.3493845462799072, "learning_rate": 1.9467973588031394e-05, "loss": 0.6421, "step": 6881 }, { "epoch": 0.21140908672011796, "grad_norm": 0.352337509393692, "learning_rate": 1.9467818043670172e-05, "loss": 0.6424, "step": 6882 }, { "epoch": 0.2114398058550671, "grad_norm": 0.32012486457824707, "learning_rate": 1.94676624771962e-05, "loss": 0.5868, "step": 6883 }, { "epoch": 0.21147052499001628, "grad_norm": 0.3392123579978943, "learning_rate": 1.946750688860984e-05, "loss": 0.6171, "step": 6884 }, { "epoch": 0.21150124412496543, "grad_norm": 0.3267022669315338, "learning_rate": 1.9467351277911458e-05, "loss": 0.6148, "step": 6885 }, { "epoch": 0.2115319632599146, "grad_norm": 0.4847431480884552, "learning_rate": 1.9467195645101413e-05, "loss": 0.6552, "step": 6886 }, { "epoch": 0.21156268239486375, "grad_norm": 0.42408645153045654, "learning_rate": 1.9467039990180074e-05, "loss": 0.6472, "step": 6887 }, { "epoch": 0.21159340152981293, "grad_norm": 0.3220108151435852, "learning_rate": 1.9466884313147804e-05, "loss": 0.6095, "step": 6888 }, { "epoch": 0.21162412066476208, "grad_norm": 0.4029952883720398, "learning_rate": 1.946672861400496e-05, "loss": 0.6031, "step": 6889 }, { "epoch": 0.21165483979971125, "grad_norm": 0.33666089177131653, "learning_rate": 1.946657289275191e-05, "loss": 0.546, "step": 6890 }, { "epoch": 0.2116855589346604, "grad_norm": 0.3103611171245575, "learning_rate": 1.946641714938902e-05, "loss": 0.5579, "step": 6891 }, { "epoch": 0.21171627806960955, "grad_norm": 0.34201323986053467, "learning_rate": 1.9466261383916654e-05, "loss": 0.6085, "step": 6892 }, { "epoch": 0.21174699720455872, "grad_norm": 0.3597601056098938, "learning_rate": 1.946610559633517e-05, "loss": 0.6078, "step": 6893 }, { "epoch": 0.21177771633950787, "grad_norm": 0.3278222382068634, "learning_rate": 1.9465949786644936e-05, "loss": 0.5362, "step": 6894 }, { "epoch": 0.21180843547445705, "grad_norm": 0.3123507797718048, "learning_rate": 1.9465793954846316e-05, "loss": 0.558, "step": 6895 }, { "epoch": 0.2118391546094062, "grad_norm": 0.34599238634109497, "learning_rate": 1.9465638100939672e-05, "loss": 0.7186, "step": 6896 }, { "epoch": 0.21186987374435537, "grad_norm": 0.3410007655620575, "learning_rate": 1.946548222492537e-05, "loss": 0.5725, "step": 6897 }, { "epoch": 0.21190059287930452, "grad_norm": 0.32351160049438477, "learning_rate": 1.9465326326803772e-05, "loss": 0.5914, "step": 6898 }, { "epoch": 0.2119313120142537, "grad_norm": 0.342926949262619, "learning_rate": 1.9465170406575248e-05, "loss": 0.547, "step": 6899 }, { "epoch": 0.21196203114920284, "grad_norm": 0.31965070962905884, "learning_rate": 1.946501446424015e-05, "loss": 0.6086, "step": 6900 }, { "epoch": 0.211992750284152, "grad_norm": 0.3552413880825043, "learning_rate": 1.9464858499798854e-05, "loss": 0.6007, "step": 6901 }, { "epoch": 0.21202346941910116, "grad_norm": 0.3169109523296356, "learning_rate": 1.9464702513251722e-05, "loss": 0.6141, "step": 6902 }, { "epoch": 0.2120541885540503, "grad_norm": 0.35254988074302673, "learning_rate": 1.946454650459911e-05, "loss": 0.4751, "step": 6903 }, { "epoch": 0.21208490768899949, "grad_norm": 0.37750425934791565, "learning_rate": 1.9464390473841393e-05, "loss": 0.6292, "step": 6904 }, { "epoch": 0.21211562682394863, "grad_norm": 0.33432063460350037, "learning_rate": 1.9464234420978928e-05, "loss": 0.5501, "step": 6905 }, { "epoch": 0.2121463459588978, "grad_norm": 0.3272215723991394, "learning_rate": 1.9464078346012087e-05, "loss": 0.5518, "step": 6906 }, { "epoch": 0.21217706509384696, "grad_norm": 0.3590032458305359, "learning_rate": 1.946392224894123e-05, "loss": 0.6285, "step": 6907 }, { "epoch": 0.2122077842287961, "grad_norm": 0.46296143531799316, "learning_rate": 1.9463766129766717e-05, "loss": 0.5426, "step": 6908 }, { "epoch": 0.21223850336374528, "grad_norm": 0.3289373815059662, "learning_rate": 1.9463609988488922e-05, "loss": 0.5747, "step": 6909 }, { "epoch": 0.21226922249869443, "grad_norm": 0.34269535541534424, "learning_rate": 1.94634538251082e-05, "loss": 0.6338, "step": 6910 }, { "epoch": 0.2122999416336436, "grad_norm": 0.3120807409286499, "learning_rate": 1.9463297639624925e-05, "loss": 0.5857, "step": 6911 }, { "epoch": 0.21233066076859275, "grad_norm": 0.38941940665245056, "learning_rate": 1.9463141432039457e-05, "loss": 0.5153, "step": 6912 }, { "epoch": 0.21236137990354192, "grad_norm": 0.3725374639034271, "learning_rate": 1.9462985202352162e-05, "loss": 0.612, "step": 6913 }, { "epoch": 0.21239209903849107, "grad_norm": 0.3353291153907776, "learning_rate": 1.94628289505634e-05, "loss": 0.6364, "step": 6914 }, { "epoch": 0.21242281817344025, "grad_norm": 0.3772139847278595, "learning_rate": 1.9462672676673543e-05, "loss": 0.6409, "step": 6915 }, { "epoch": 0.2124535373083894, "grad_norm": 0.3532460331916809, "learning_rate": 1.946251638068295e-05, "loss": 0.609, "step": 6916 }, { "epoch": 0.21248425644333854, "grad_norm": 0.5365630984306335, "learning_rate": 1.9462360062591993e-05, "loss": 0.5436, "step": 6917 }, { "epoch": 0.21251497557828772, "grad_norm": 0.32816532254219055, "learning_rate": 1.946220372240103e-05, "loss": 0.5793, "step": 6918 }, { "epoch": 0.21254569471323687, "grad_norm": 0.3348343074321747, "learning_rate": 1.9462047360110433e-05, "loss": 0.6198, "step": 6919 }, { "epoch": 0.21257641384818604, "grad_norm": 0.312858909368515, "learning_rate": 1.9461890975720562e-05, "loss": 0.5931, "step": 6920 }, { "epoch": 0.2126071329831352, "grad_norm": 0.36463090777397156, "learning_rate": 1.9461734569231785e-05, "loss": 0.5936, "step": 6921 }, { "epoch": 0.21263785211808436, "grad_norm": 0.34361931681632996, "learning_rate": 1.9461578140644464e-05, "loss": 0.6401, "step": 6922 }, { "epoch": 0.2126685712530335, "grad_norm": 0.3445605933666229, "learning_rate": 1.9461421689958965e-05, "loss": 0.6152, "step": 6923 }, { "epoch": 0.2126992903879827, "grad_norm": 0.3404785096645355, "learning_rate": 1.9461265217175657e-05, "loss": 0.5736, "step": 6924 }, { "epoch": 0.21273000952293183, "grad_norm": 0.3043605387210846, "learning_rate": 1.9461108722294906e-05, "loss": 0.5943, "step": 6925 }, { "epoch": 0.21276072865788098, "grad_norm": 0.3497447669506073, "learning_rate": 1.946095220531707e-05, "loss": 0.6234, "step": 6926 }, { "epoch": 0.21279144779283016, "grad_norm": 0.316416472196579, "learning_rate": 1.946079566624252e-05, "loss": 0.5272, "step": 6927 }, { "epoch": 0.2128221669277793, "grad_norm": 0.31701144576072693, "learning_rate": 1.9460639105071626e-05, "loss": 0.582, "step": 6928 }, { "epoch": 0.21285288606272848, "grad_norm": 0.3378000855445862, "learning_rate": 1.9460482521804746e-05, "loss": 0.6349, "step": 6929 }, { "epoch": 0.21288360519767763, "grad_norm": 0.371761292219162, "learning_rate": 1.9460325916442248e-05, "loss": 0.6684, "step": 6930 }, { "epoch": 0.2129143243326268, "grad_norm": 0.3238587975502014, "learning_rate": 1.9460169288984496e-05, "loss": 0.5465, "step": 6931 }, { "epoch": 0.21294504346757595, "grad_norm": 0.3284999430179596, "learning_rate": 1.946001263943186e-05, "loss": 0.5261, "step": 6932 }, { "epoch": 0.21297576260252513, "grad_norm": 0.356264591217041, "learning_rate": 1.94598559677847e-05, "loss": 0.5382, "step": 6933 }, { "epoch": 0.21300648173747427, "grad_norm": 0.31854012608528137, "learning_rate": 1.9459699274043392e-05, "loss": 0.5505, "step": 6934 }, { "epoch": 0.21303720087242342, "grad_norm": 0.3469434976577759, "learning_rate": 1.9459542558208292e-05, "loss": 0.5821, "step": 6935 }, { "epoch": 0.2130679200073726, "grad_norm": 0.3392375409603119, "learning_rate": 1.9459385820279772e-05, "loss": 0.5563, "step": 6936 }, { "epoch": 0.21309863914232174, "grad_norm": 0.3762151896953583, "learning_rate": 1.9459229060258193e-05, "loss": 0.6733, "step": 6937 }, { "epoch": 0.21312935827727092, "grad_norm": 0.34337496757507324, "learning_rate": 1.9459072278143927e-05, "loss": 0.6065, "step": 6938 }, { "epoch": 0.21316007741222007, "grad_norm": 0.8158531188964844, "learning_rate": 1.9458915473937335e-05, "loss": 0.5543, "step": 6939 }, { "epoch": 0.21319079654716924, "grad_norm": 0.3446367383003235, "learning_rate": 1.9458758647638785e-05, "loss": 0.5289, "step": 6940 }, { "epoch": 0.2132215156821184, "grad_norm": 0.34889522194862366, "learning_rate": 1.9458601799248644e-05, "loss": 0.5796, "step": 6941 }, { "epoch": 0.21325223481706757, "grad_norm": 0.367064893245697, "learning_rate": 1.945844492876728e-05, "loss": 0.554, "step": 6942 }, { "epoch": 0.2132829539520167, "grad_norm": 0.34040358662605286, "learning_rate": 1.9458288036195053e-05, "loss": 0.6423, "step": 6943 }, { "epoch": 0.21331367308696586, "grad_norm": 0.39948439598083496, "learning_rate": 1.945813112153234e-05, "loss": 0.6084, "step": 6944 }, { "epoch": 0.21334439222191504, "grad_norm": 0.3264942467212677, "learning_rate": 1.9457974184779497e-05, "loss": 0.4465, "step": 6945 }, { "epoch": 0.21337511135686418, "grad_norm": 0.3026781976222992, "learning_rate": 1.9457817225936893e-05, "loss": 0.5793, "step": 6946 }, { "epoch": 0.21340583049181336, "grad_norm": 0.35538795590400696, "learning_rate": 1.94576602450049e-05, "loss": 0.6143, "step": 6947 }, { "epoch": 0.2134365496267625, "grad_norm": 0.3453122675418854, "learning_rate": 1.945750324198388e-05, "loss": 0.6984, "step": 6948 }, { "epoch": 0.21346726876171168, "grad_norm": 0.3291662335395813, "learning_rate": 1.9457346216874197e-05, "loss": 0.6051, "step": 6949 }, { "epoch": 0.21349798789666083, "grad_norm": 0.4322100877761841, "learning_rate": 1.9457189169676228e-05, "loss": 0.5839, "step": 6950 }, { "epoch": 0.21352870703160998, "grad_norm": 0.33026760816574097, "learning_rate": 1.9457032100390326e-05, "loss": 0.5729, "step": 6951 }, { "epoch": 0.21355942616655915, "grad_norm": 0.32274481654167175, "learning_rate": 1.945687500901687e-05, "loss": 0.5627, "step": 6952 }, { "epoch": 0.2135901453015083, "grad_norm": 0.3137408494949341, "learning_rate": 1.9456717895556222e-05, "loss": 0.6242, "step": 6953 }, { "epoch": 0.21362086443645747, "grad_norm": 0.3570811152458191, "learning_rate": 1.945656076000875e-05, "loss": 0.6185, "step": 6954 }, { "epoch": 0.21365158357140662, "grad_norm": 0.3760155439376831, "learning_rate": 1.9456403602374813e-05, "loss": 0.6595, "step": 6955 }, { "epoch": 0.2136823027063558, "grad_norm": 0.33095690608024597, "learning_rate": 1.945624642265479e-05, "loss": 0.576, "step": 6956 }, { "epoch": 0.21371302184130495, "grad_norm": 0.2784162759780884, "learning_rate": 1.9456089220849043e-05, "loss": 0.5204, "step": 6957 }, { "epoch": 0.21374374097625412, "grad_norm": 0.3640543520450592, "learning_rate": 1.945593199695794e-05, "loss": 0.6112, "step": 6958 }, { "epoch": 0.21377446011120327, "grad_norm": 0.3521929979324341, "learning_rate": 1.9455774750981847e-05, "loss": 0.576, "step": 6959 }, { "epoch": 0.21380517924615242, "grad_norm": 0.35714420676231384, "learning_rate": 1.945561748292113e-05, "loss": 0.5723, "step": 6960 }, { "epoch": 0.2138358983811016, "grad_norm": 0.3550485372543335, "learning_rate": 1.9455460192776158e-05, "loss": 0.6195, "step": 6961 }, { "epoch": 0.21386661751605074, "grad_norm": 0.37049639225006104, "learning_rate": 1.94553028805473e-05, "loss": 0.6379, "step": 6962 }, { "epoch": 0.21389733665099991, "grad_norm": 0.3556708097457886, "learning_rate": 1.945514554623492e-05, "loss": 0.5748, "step": 6963 }, { "epoch": 0.21392805578594906, "grad_norm": 0.31783589720726013, "learning_rate": 1.945498818983939e-05, "loss": 0.5699, "step": 6964 }, { "epoch": 0.21395877492089824, "grad_norm": 0.45802897214889526, "learning_rate": 1.945483081136107e-05, "loss": 0.5513, "step": 6965 }, { "epoch": 0.21398949405584738, "grad_norm": 0.3443422317504883, "learning_rate": 1.945467341080034e-05, "loss": 0.5514, "step": 6966 }, { "epoch": 0.21402021319079656, "grad_norm": 0.3587382733821869, "learning_rate": 1.9454515988157553e-05, "loss": 0.5756, "step": 6967 }, { "epoch": 0.2140509323257457, "grad_norm": 0.321412593126297, "learning_rate": 1.9454358543433086e-05, "loss": 0.5468, "step": 6968 }, { "epoch": 0.21408165146069486, "grad_norm": 0.35887831449508667, "learning_rate": 1.9454201076627303e-05, "loss": 0.5715, "step": 6969 }, { "epoch": 0.21411237059564403, "grad_norm": 0.45508962869644165, "learning_rate": 1.9454043587740578e-05, "loss": 0.5171, "step": 6970 }, { "epoch": 0.21414308973059318, "grad_norm": 0.5978578925132751, "learning_rate": 1.9453886076773266e-05, "loss": 0.6083, "step": 6971 }, { "epoch": 0.21417380886554235, "grad_norm": 0.359482079744339, "learning_rate": 1.945372854372575e-05, "loss": 0.6475, "step": 6972 }, { "epoch": 0.2142045280004915, "grad_norm": 0.3641624450683594, "learning_rate": 1.9453570988598388e-05, "loss": 0.5393, "step": 6973 }, { "epoch": 0.21423524713544068, "grad_norm": 0.40729475021362305, "learning_rate": 1.945341341139155e-05, "loss": 0.6804, "step": 6974 }, { "epoch": 0.21426596627038982, "grad_norm": 0.4528740346431732, "learning_rate": 1.9453255812105607e-05, "loss": 0.6394, "step": 6975 }, { "epoch": 0.214296685405339, "grad_norm": 0.32852935791015625, "learning_rate": 1.9453098190740923e-05, "loss": 0.5805, "step": 6976 }, { "epoch": 0.21432740454028815, "grad_norm": 0.3235732913017273, "learning_rate": 1.9452940547297868e-05, "loss": 0.5847, "step": 6977 }, { "epoch": 0.2143581236752373, "grad_norm": 0.34987977147102356, "learning_rate": 1.945278288177681e-05, "loss": 0.5464, "step": 6978 }, { "epoch": 0.21438884281018647, "grad_norm": 0.36413583159446716, "learning_rate": 1.9452625194178118e-05, "loss": 0.5234, "step": 6979 }, { "epoch": 0.21441956194513562, "grad_norm": 0.4398280680179596, "learning_rate": 1.9452467484502158e-05, "loss": 0.5832, "step": 6980 }, { "epoch": 0.2144502810800848, "grad_norm": 0.32694211602211, "learning_rate": 1.9452309752749305e-05, "loss": 0.5408, "step": 6981 }, { "epoch": 0.21448100021503394, "grad_norm": 0.5400601625442505, "learning_rate": 1.9452151998919916e-05, "loss": 0.5704, "step": 6982 }, { "epoch": 0.21451171934998312, "grad_norm": 0.35813888907432556, "learning_rate": 1.945199422301437e-05, "loss": 0.5818, "step": 6983 }, { "epoch": 0.21454243848493226, "grad_norm": 0.324771523475647, "learning_rate": 1.945183642503303e-05, "loss": 0.5583, "step": 6984 }, { "epoch": 0.2145731576198814, "grad_norm": 0.3319883644580841, "learning_rate": 1.9451678604976266e-05, "loss": 0.5364, "step": 6985 }, { "epoch": 0.21460387675483059, "grad_norm": 0.3361334204673767, "learning_rate": 1.9451520762844445e-05, "loss": 0.6008, "step": 6986 }, { "epoch": 0.21463459588977973, "grad_norm": 0.32287830114364624, "learning_rate": 1.945136289863794e-05, "loss": 0.4855, "step": 6987 }, { "epoch": 0.2146653150247289, "grad_norm": 0.3402499556541443, "learning_rate": 1.9451205012357114e-05, "loss": 0.5975, "step": 6988 }, { "epoch": 0.21469603415967806, "grad_norm": 0.3466370701789856, "learning_rate": 1.945104710400234e-05, "loss": 0.6287, "step": 6989 }, { "epoch": 0.21472675329462723, "grad_norm": 0.35339292883872986, "learning_rate": 1.945088917357399e-05, "loss": 0.6624, "step": 6990 }, { "epoch": 0.21475747242957638, "grad_norm": 0.33826643228530884, "learning_rate": 1.945073122107242e-05, "loss": 0.5188, "step": 6991 }, { "epoch": 0.21478819156452555, "grad_norm": 0.37751156091690063, "learning_rate": 1.945057324649801e-05, "loss": 0.582, "step": 6992 }, { "epoch": 0.2148189106994747, "grad_norm": 0.3432838022708893, "learning_rate": 1.945041524985113e-05, "loss": 0.5948, "step": 6993 }, { "epoch": 0.21484962983442385, "grad_norm": 0.3533337116241455, "learning_rate": 1.9450257231132142e-05, "loss": 0.5801, "step": 6994 }, { "epoch": 0.21488034896937303, "grad_norm": 0.33773550391197205, "learning_rate": 1.945009919034142e-05, "loss": 0.5526, "step": 6995 }, { "epoch": 0.21491106810432217, "grad_norm": 0.3985190987586975, "learning_rate": 1.944994112747933e-05, "loss": 0.6666, "step": 6996 }, { "epoch": 0.21494178723927135, "grad_norm": 0.31864067912101746, "learning_rate": 1.9449783042546243e-05, "loss": 0.6031, "step": 6997 }, { "epoch": 0.2149725063742205, "grad_norm": 0.3137242794036865, "learning_rate": 1.9449624935542527e-05, "loss": 0.5976, "step": 6998 }, { "epoch": 0.21500322550916967, "grad_norm": 0.35759952664375305, "learning_rate": 1.9449466806468554e-05, "loss": 0.5727, "step": 6999 }, { "epoch": 0.21503394464411882, "grad_norm": 0.3251418471336365, "learning_rate": 1.9449308655324687e-05, "loss": 0.5846, "step": 7000 }, { "epoch": 0.215064663779068, "grad_norm": 0.3203136622905731, "learning_rate": 1.9449150482111302e-05, "loss": 0.5149, "step": 7001 }, { "epoch": 0.21509538291401714, "grad_norm": 0.35594484210014343, "learning_rate": 1.9448992286828766e-05, "loss": 0.5776, "step": 7002 }, { "epoch": 0.2151261020489663, "grad_norm": 0.36626413464546204, "learning_rate": 1.9448834069477453e-05, "loss": 0.5797, "step": 7003 }, { "epoch": 0.21515682118391546, "grad_norm": 0.3597395718097687, "learning_rate": 1.9448675830057723e-05, "loss": 0.5783, "step": 7004 }, { "epoch": 0.2151875403188646, "grad_norm": 0.4954650104045868, "learning_rate": 1.9448517568569953e-05, "loss": 0.6508, "step": 7005 }, { "epoch": 0.2152182594538138, "grad_norm": 0.3108378052711487, "learning_rate": 1.9448359285014508e-05, "loss": 0.5636, "step": 7006 }, { "epoch": 0.21524897858876293, "grad_norm": 0.3483054041862488, "learning_rate": 1.944820097939176e-05, "loss": 0.6315, "step": 7007 }, { "epoch": 0.2152796977237121, "grad_norm": 0.3371865451335907, "learning_rate": 1.944804265170208e-05, "loss": 0.5594, "step": 7008 }, { "epoch": 0.21531041685866126, "grad_norm": 0.3657490313053131, "learning_rate": 1.944788430194584e-05, "loss": 0.6252, "step": 7009 }, { "epoch": 0.21534113599361043, "grad_norm": 0.4209143817424774, "learning_rate": 1.94477259301234e-05, "loss": 0.5653, "step": 7010 }, { "epoch": 0.21537185512855958, "grad_norm": 0.35575082898139954, "learning_rate": 1.9447567536235142e-05, "loss": 0.5545, "step": 7011 }, { "epoch": 0.21540257426350873, "grad_norm": 0.38152584433555603, "learning_rate": 1.9447409120281427e-05, "loss": 0.5803, "step": 7012 }, { "epoch": 0.2154332933984579, "grad_norm": 0.3336385190486908, "learning_rate": 1.944725068226263e-05, "loss": 0.4925, "step": 7013 }, { "epoch": 0.21546401253340705, "grad_norm": 0.8241966366767883, "learning_rate": 1.9447092222179117e-05, "loss": 0.5981, "step": 7014 }, { "epoch": 0.21549473166835623, "grad_norm": 0.37771591544151306, "learning_rate": 1.9446933740031258e-05, "loss": 0.6146, "step": 7015 }, { "epoch": 0.21552545080330537, "grad_norm": 0.34676146507263184, "learning_rate": 1.9446775235819434e-05, "loss": 0.5896, "step": 7016 }, { "epoch": 0.21555616993825455, "grad_norm": 0.3655661642551422, "learning_rate": 1.9446616709544e-05, "loss": 0.6388, "step": 7017 }, { "epoch": 0.2155868890732037, "grad_norm": 0.3434339463710785, "learning_rate": 1.9446458161205335e-05, "loss": 0.6132, "step": 7018 }, { "epoch": 0.21561760820815287, "grad_norm": 0.34434202313423157, "learning_rate": 1.9446299590803805e-05, "loss": 0.608, "step": 7019 }, { "epoch": 0.21564832734310202, "grad_norm": 0.3559676706790924, "learning_rate": 1.9446140998339784e-05, "loss": 0.6188, "step": 7020 }, { "epoch": 0.21567904647805117, "grad_norm": 0.3530060350894928, "learning_rate": 1.944598238381364e-05, "loss": 0.5562, "step": 7021 }, { "epoch": 0.21570976561300034, "grad_norm": 0.3233635127544403, "learning_rate": 1.9445823747225746e-05, "loss": 0.6522, "step": 7022 }, { "epoch": 0.2157404847479495, "grad_norm": 0.34793621301651, "learning_rate": 1.944566508857647e-05, "loss": 0.6175, "step": 7023 }, { "epoch": 0.21577120388289867, "grad_norm": 0.34585627913475037, "learning_rate": 1.9445506407866187e-05, "loss": 0.5658, "step": 7024 }, { "epoch": 0.2158019230178478, "grad_norm": 0.3485947251319885, "learning_rate": 1.944534770509526e-05, "loss": 0.5964, "step": 7025 }, { "epoch": 0.215832642152797, "grad_norm": 0.3569158911705017, "learning_rate": 1.9445188980264066e-05, "loss": 0.5779, "step": 7026 }, { "epoch": 0.21586336128774614, "grad_norm": 0.36609789729118347, "learning_rate": 1.9445030233372972e-05, "loss": 0.5673, "step": 7027 }, { "epoch": 0.21589408042269528, "grad_norm": 0.36632686853408813, "learning_rate": 1.944487146442235e-05, "loss": 0.6028, "step": 7028 }, { "epoch": 0.21592479955764446, "grad_norm": 0.3538036346435547, "learning_rate": 1.9444712673412572e-05, "loss": 0.5898, "step": 7029 }, { "epoch": 0.2159555186925936, "grad_norm": 0.4421806335449219, "learning_rate": 1.9444553860344007e-05, "loss": 0.6369, "step": 7030 }, { "epoch": 0.21598623782754278, "grad_norm": 0.3854859173297882, "learning_rate": 1.9444395025217027e-05, "loss": 0.6452, "step": 7031 }, { "epoch": 0.21601695696249193, "grad_norm": 0.39243602752685547, "learning_rate": 1.944423616803201e-05, "loss": 0.5832, "step": 7032 }, { "epoch": 0.2160476760974411, "grad_norm": 0.3506511449813843, "learning_rate": 1.944407728878931e-05, "loss": 0.4819, "step": 7033 }, { "epoch": 0.21607839523239025, "grad_norm": 0.343405157327652, "learning_rate": 1.9443918387489307e-05, "loss": 0.6088, "step": 7034 }, { "epoch": 0.21610911436733943, "grad_norm": 0.4161800444126129, "learning_rate": 1.944375946413238e-05, "loss": 0.5872, "step": 7035 }, { "epoch": 0.21613983350228858, "grad_norm": 0.3719150722026825, "learning_rate": 1.9443600518718892e-05, "loss": 0.6866, "step": 7036 }, { "epoch": 0.21617055263723772, "grad_norm": 0.39957189559936523, "learning_rate": 1.9443441551249215e-05, "loss": 0.6121, "step": 7037 }, { "epoch": 0.2162012717721869, "grad_norm": 0.33701056241989136, "learning_rate": 1.9443282561723717e-05, "loss": 0.5845, "step": 7038 }, { "epoch": 0.21623199090713605, "grad_norm": 0.32902827858924866, "learning_rate": 1.9443123550142776e-05, "loss": 0.6187, "step": 7039 }, { "epoch": 0.21626271004208522, "grad_norm": 0.3454064428806305, "learning_rate": 1.944296451650676e-05, "loss": 0.5735, "step": 7040 }, { "epoch": 0.21629342917703437, "grad_norm": 0.3320229947566986, "learning_rate": 1.944280546081604e-05, "loss": 0.5281, "step": 7041 }, { "epoch": 0.21632414831198354, "grad_norm": 0.39070332050323486, "learning_rate": 1.944264638307099e-05, "loss": 0.6278, "step": 7042 }, { "epoch": 0.2163548674469327, "grad_norm": 0.556035578250885, "learning_rate": 1.9442487283271982e-05, "loss": 0.6213, "step": 7043 }, { "epoch": 0.21638558658188187, "grad_norm": 0.3283368945121765, "learning_rate": 1.9442328161419382e-05, "loss": 0.5789, "step": 7044 }, { "epoch": 0.21641630571683101, "grad_norm": 0.4934787452220917, "learning_rate": 1.9442169017513565e-05, "loss": 0.5832, "step": 7045 }, { "epoch": 0.21644702485178016, "grad_norm": 0.3123880922794342, "learning_rate": 1.9442009851554906e-05, "loss": 0.6019, "step": 7046 }, { "epoch": 0.21647774398672934, "grad_norm": 0.3268885910511017, "learning_rate": 1.9441850663543772e-05, "loss": 0.614, "step": 7047 }, { "epoch": 0.21650846312167848, "grad_norm": 0.34913399815559387, "learning_rate": 1.9441691453480534e-05, "loss": 0.5723, "step": 7048 }, { "epoch": 0.21653918225662766, "grad_norm": 0.4546506702899933, "learning_rate": 1.944153222136557e-05, "loss": 0.6813, "step": 7049 }, { "epoch": 0.2165699013915768, "grad_norm": 0.36651623249053955, "learning_rate": 1.9441372967199245e-05, "loss": 0.5354, "step": 7050 }, { "epoch": 0.21660062052652598, "grad_norm": 0.3774753510951996, "learning_rate": 1.9441213690981935e-05, "loss": 0.5903, "step": 7051 }, { "epoch": 0.21663133966147513, "grad_norm": 0.33642512559890747, "learning_rate": 1.9441054392714012e-05, "loss": 0.5794, "step": 7052 }, { "epoch": 0.2166620587964243, "grad_norm": 0.334236741065979, "learning_rate": 1.9440895072395843e-05, "loss": 0.5976, "step": 7053 }, { "epoch": 0.21669277793137345, "grad_norm": 0.31165754795074463, "learning_rate": 1.9440735730027808e-05, "loss": 0.5555, "step": 7054 }, { "epoch": 0.2167234970663226, "grad_norm": 0.3693506419658661, "learning_rate": 1.9440576365610275e-05, "loss": 0.6303, "step": 7055 }, { "epoch": 0.21675421620127178, "grad_norm": 0.3625418543815613, "learning_rate": 1.944041697914362e-05, "loss": 0.631, "step": 7056 }, { "epoch": 0.21678493533622092, "grad_norm": 0.4176451563835144, "learning_rate": 1.9440257570628204e-05, "loss": 0.5343, "step": 7057 }, { "epoch": 0.2168156544711701, "grad_norm": 0.33605360984802246, "learning_rate": 1.9440098140064414e-05, "loss": 0.6322, "step": 7058 }, { "epoch": 0.21684637360611925, "grad_norm": 0.3271574378013611, "learning_rate": 1.943993868745261e-05, "loss": 0.5604, "step": 7059 }, { "epoch": 0.21687709274106842, "grad_norm": 0.36626800894737244, "learning_rate": 1.9439779212793178e-05, "loss": 0.5443, "step": 7060 }, { "epoch": 0.21690781187601757, "grad_norm": 0.3378170132637024, "learning_rate": 1.9439619716086476e-05, "loss": 0.5492, "step": 7061 }, { "epoch": 0.21693853101096672, "grad_norm": 0.34727978706359863, "learning_rate": 1.9439460197332886e-05, "loss": 0.5632, "step": 7062 }, { "epoch": 0.2169692501459159, "grad_norm": 0.3413423001766205, "learning_rate": 1.9439300656532773e-05, "loss": 0.5619, "step": 7063 }, { "epoch": 0.21699996928086504, "grad_norm": 0.32166028022766113, "learning_rate": 1.9439141093686517e-05, "loss": 0.5915, "step": 7064 }, { "epoch": 0.21703068841581422, "grad_norm": 0.3361012935638428, "learning_rate": 1.9438981508794486e-05, "loss": 0.59, "step": 7065 }, { "epoch": 0.21706140755076336, "grad_norm": 0.34965944290161133, "learning_rate": 1.9438821901857058e-05, "loss": 0.6345, "step": 7066 }, { "epoch": 0.21709212668571254, "grad_norm": 0.3997224271297455, "learning_rate": 1.94386622728746e-05, "loss": 0.4985, "step": 7067 }, { "epoch": 0.2171228458206617, "grad_norm": 0.3304525315761566, "learning_rate": 1.9438502621847486e-05, "loss": 0.6308, "step": 7068 }, { "epoch": 0.21715356495561086, "grad_norm": 0.3641963601112366, "learning_rate": 1.9438342948776093e-05, "loss": 0.7052, "step": 7069 }, { "epoch": 0.21718428409056, "grad_norm": 0.3546707034111023, "learning_rate": 1.943818325366079e-05, "loss": 0.6893, "step": 7070 }, { "epoch": 0.21721500322550916, "grad_norm": 0.38702529668807983, "learning_rate": 1.943802353650195e-05, "loss": 0.6122, "step": 7071 }, { "epoch": 0.21724572236045833, "grad_norm": 0.3563117980957031, "learning_rate": 1.9437863797299944e-05, "loss": 0.5916, "step": 7072 }, { "epoch": 0.21727644149540748, "grad_norm": 0.3742269277572632, "learning_rate": 1.9437704036055153e-05, "loss": 0.6351, "step": 7073 }, { "epoch": 0.21730716063035665, "grad_norm": 0.3273765444755554, "learning_rate": 1.9437544252767942e-05, "loss": 0.5471, "step": 7074 }, { "epoch": 0.2173378797653058, "grad_norm": 0.36251184344291687, "learning_rate": 1.9437384447438687e-05, "loss": 0.6181, "step": 7075 }, { "epoch": 0.21736859890025498, "grad_norm": 0.36197036504745483, "learning_rate": 1.9437224620067762e-05, "loss": 0.6834, "step": 7076 }, { "epoch": 0.21739931803520413, "grad_norm": 0.31233644485473633, "learning_rate": 1.943706477065554e-05, "loss": 0.5389, "step": 7077 }, { "epoch": 0.2174300371701533, "grad_norm": 0.594025731086731, "learning_rate": 1.94369048992024e-05, "loss": 0.5865, "step": 7078 }, { "epoch": 0.21746075630510245, "grad_norm": 0.3725707232952118, "learning_rate": 1.94367450057087e-05, "loss": 0.5478, "step": 7079 }, { "epoch": 0.2174914754400516, "grad_norm": 0.3344149887561798, "learning_rate": 1.9436585090174828e-05, "loss": 0.5725, "step": 7080 }, { "epoch": 0.21752219457500077, "grad_norm": 0.3397082984447479, "learning_rate": 1.9436425152601152e-05, "loss": 0.5939, "step": 7081 }, { "epoch": 0.21755291370994992, "grad_norm": 0.33837518095970154, "learning_rate": 1.9436265192988048e-05, "loss": 0.6118, "step": 7082 }, { "epoch": 0.2175836328448991, "grad_norm": 0.333927720785141, "learning_rate": 1.943610521133588e-05, "loss": 0.6075, "step": 7083 }, { "epoch": 0.21761435197984824, "grad_norm": 0.35752636194229126, "learning_rate": 1.9435945207645036e-05, "loss": 0.5654, "step": 7084 }, { "epoch": 0.21764507111479742, "grad_norm": 0.3594207465648651, "learning_rate": 1.9435785181915884e-05, "loss": 0.6413, "step": 7085 }, { "epoch": 0.21767579024974656, "grad_norm": 0.4951466917991638, "learning_rate": 1.943562513414879e-05, "loss": 0.635, "step": 7086 }, { "epoch": 0.21770650938469574, "grad_norm": 0.39108651876449585, "learning_rate": 1.943546506434414e-05, "loss": 0.68, "step": 7087 }, { "epoch": 0.2177372285196449, "grad_norm": 0.38816037774086, "learning_rate": 1.9435304972502306e-05, "loss": 0.6243, "step": 7088 }, { "epoch": 0.21776794765459404, "grad_norm": 0.29980480670928955, "learning_rate": 1.943514485862365e-05, "loss": 0.6272, "step": 7089 }, { "epoch": 0.2177986667895432, "grad_norm": 0.35475099086761475, "learning_rate": 1.9434984722708557e-05, "loss": 0.6514, "step": 7090 }, { "epoch": 0.21782938592449236, "grad_norm": 0.3401775062084198, "learning_rate": 1.94348245647574e-05, "loss": 0.6082, "step": 7091 }, { "epoch": 0.21786010505944153, "grad_norm": 0.3646056652069092, "learning_rate": 1.943466438477055e-05, "loss": 0.6254, "step": 7092 }, { "epoch": 0.21789082419439068, "grad_norm": 0.3053387701511383, "learning_rate": 1.9434504182748384e-05, "loss": 0.548, "step": 7093 }, { "epoch": 0.21792154332933986, "grad_norm": 0.332308292388916, "learning_rate": 1.9434343958691272e-05, "loss": 0.5998, "step": 7094 }, { "epoch": 0.217952262464289, "grad_norm": 0.36136260628700256, "learning_rate": 1.9434183712599594e-05, "loss": 0.6501, "step": 7095 }, { "epoch": 0.21798298159923818, "grad_norm": 0.35034438967704773, "learning_rate": 1.943402344447372e-05, "loss": 0.5634, "step": 7096 }, { "epoch": 0.21801370073418733, "grad_norm": 0.34205761551856995, "learning_rate": 1.9433863154314023e-05, "loss": 0.6982, "step": 7097 }, { "epoch": 0.21804441986913647, "grad_norm": 0.3207978904247284, "learning_rate": 1.943370284212088e-05, "loss": 0.5575, "step": 7098 }, { "epoch": 0.21807513900408565, "grad_norm": 0.3075382709503174, "learning_rate": 1.9433542507894668e-05, "loss": 0.5849, "step": 7099 }, { "epoch": 0.2181058581390348, "grad_norm": 0.30569988489151, "learning_rate": 1.9433382151635757e-05, "loss": 0.5826, "step": 7100 }, { "epoch": 0.21813657727398397, "grad_norm": 0.36008334159851074, "learning_rate": 1.9433221773344523e-05, "loss": 0.6038, "step": 7101 }, { "epoch": 0.21816729640893312, "grad_norm": 0.3342386782169342, "learning_rate": 1.9433061373021342e-05, "loss": 0.5419, "step": 7102 }, { "epoch": 0.2181980155438823, "grad_norm": 0.32767513394355774, "learning_rate": 1.9432900950666583e-05, "loss": 0.5714, "step": 7103 }, { "epoch": 0.21822873467883144, "grad_norm": 0.34108367562294006, "learning_rate": 1.943274050628063e-05, "loss": 0.6162, "step": 7104 }, { "epoch": 0.2182594538137806, "grad_norm": 0.37323689460754395, "learning_rate": 1.9432580039863854e-05, "loss": 0.5639, "step": 7105 }, { "epoch": 0.21829017294872977, "grad_norm": 0.3200756907463074, "learning_rate": 1.9432419551416625e-05, "loss": 0.609, "step": 7106 }, { "epoch": 0.2183208920836789, "grad_norm": 0.3640434145927429, "learning_rate": 1.943225904093932e-05, "loss": 0.5655, "step": 7107 }, { "epoch": 0.2183516112186281, "grad_norm": 0.3428247570991516, "learning_rate": 1.9432098508432317e-05, "loss": 0.6397, "step": 7108 }, { "epoch": 0.21838233035357724, "grad_norm": 0.3588607907295227, "learning_rate": 1.9431937953895993e-05, "loss": 0.6421, "step": 7109 }, { "epoch": 0.2184130494885264, "grad_norm": 0.3374510109424591, "learning_rate": 1.943177737733071e-05, "loss": 0.6247, "step": 7110 }, { "epoch": 0.21844376862347556, "grad_norm": 0.39502817392349243, "learning_rate": 1.9431616778736863e-05, "loss": 0.6301, "step": 7111 }, { "epoch": 0.21847448775842473, "grad_norm": 0.3737110197544098, "learning_rate": 1.943145615811481e-05, "loss": 0.5209, "step": 7112 }, { "epoch": 0.21850520689337388, "grad_norm": 0.36680659651756287, "learning_rate": 1.9431295515464934e-05, "loss": 0.6578, "step": 7113 }, { "epoch": 0.21853592602832303, "grad_norm": 0.3465538024902344, "learning_rate": 1.9431134850787608e-05, "loss": 0.5992, "step": 7114 }, { "epoch": 0.2185666451632722, "grad_norm": 0.32986098527908325, "learning_rate": 1.9430974164083207e-05, "loss": 0.6117, "step": 7115 }, { "epoch": 0.21859736429822135, "grad_norm": 0.3635178804397583, "learning_rate": 1.943081345535211e-05, "loss": 0.5698, "step": 7116 }, { "epoch": 0.21862808343317053, "grad_norm": 0.3401992619037628, "learning_rate": 1.9430652724594686e-05, "loss": 0.5809, "step": 7117 }, { "epoch": 0.21865880256811968, "grad_norm": 0.3635712265968323, "learning_rate": 1.9430491971811317e-05, "loss": 0.6257, "step": 7118 }, { "epoch": 0.21868952170306885, "grad_norm": 0.3290877640247345, "learning_rate": 1.9430331197002375e-05, "loss": 0.6396, "step": 7119 }, { "epoch": 0.218720240838018, "grad_norm": 0.36079996824264526, "learning_rate": 1.9430170400168234e-05, "loss": 0.5824, "step": 7120 }, { "epoch": 0.21875095997296717, "grad_norm": 0.3184202313423157, "learning_rate": 1.9430009581309272e-05, "loss": 0.5777, "step": 7121 }, { "epoch": 0.21878167910791632, "grad_norm": 0.3563358187675476, "learning_rate": 1.9429848740425865e-05, "loss": 0.6189, "step": 7122 }, { "epoch": 0.21881239824286547, "grad_norm": 0.3160257637500763, "learning_rate": 1.9429687877518387e-05, "loss": 0.5865, "step": 7123 }, { "epoch": 0.21884311737781464, "grad_norm": 0.33754220604896545, "learning_rate": 1.9429526992587216e-05, "loss": 0.6129, "step": 7124 }, { "epoch": 0.2188738365127638, "grad_norm": 0.3504053056240082, "learning_rate": 1.9429366085632725e-05, "loss": 0.5329, "step": 7125 }, { "epoch": 0.21890455564771297, "grad_norm": 0.2995401918888092, "learning_rate": 1.9429205156655292e-05, "loss": 0.629, "step": 7126 }, { "epoch": 0.21893527478266211, "grad_norm": 0.3723814785480499, "learning_rate": 1.9429044205655293e-05, "loss": 0.6091, "step": 7127 }, { "epoch": 0.2189659939176113, "grad_norm": 0.3615216016769409, "learning_rate": 1.94288832326331e-05, "loss": 0.6404, "step": 7128 }, { "epoch": 0.21899671305256044, "grad_norm": 0.32463061809539795, "learning_rate": 1.9428722237589092e-05, "loss": 0.5277, "step": 7129 }, { "epoch": 0.2190274321875096, "grad_norm": 0.32243165373802185, "learning_rate": 1.9428561220523644e-05, "loss": 0.5582, "step": 7130 }, { "epoch": 0.21905815132245876, "grad_norm": 0.35075879096984863, "learning_rate": 1.9428400181437132e-05, "loss": 0.6402, "step": 7131 }, { "epoch": 0.2190888704574079, "grad_norm": 0.3256359100341797, "learning_rate": 1.9428239120329936e-05, "loss": 0.6862, "step": 7132 }, { "epoch": 0.21911958959235708, "grad_norm": 0.3349444568157196, "learning_rate": 1.9428078037202427e-05, "loss": 0.6368, "step": 7133 }, { "epoch": 0.21915030872730623, "grad_norm": 0.33468306064605713, "learning_rate": 1.9427916932054985e-05, "loss": 0.5385, "step": 7134 }, { "epoch": 0.2191810278622554, "grad_norm": 0.32043999433517456, "learning_rate": 1.9427755804887984e-05, "loss": 0.6236, "step": 7135 }, { "epoch": 0.21921174699720455, "grad_norm": 0.3287082314491272, "learning_rate": 1.94275946557018e-05, "loss": 0.5075, "step": 7136 }, { "epoch": 0.21924246613215373, "grad_norm": 0.34141165018081665, "learning_rate": 1.9427433484496808e-05, "loss": 0.5898, "step": 7137 }, { "epoch": 0.21927318526710288, "grad_norm": 0.3634958267211914, "learning_rate": 1.942727229127339e-05, "loss": 0.6476, "step": 7138 }, { "epoch": 0.21930390440205202, "grad_norm": 0.33529090881347656, "learning_rate": 1.942711107603192e-05, "loss": 0.6174, "step": 7139 }, { "epoch": 0.2193346235370012, "grad_norm": 0.33158114552497864, "learning_rate": 1.942694983877277e-05, "loss": 0.6182, "step": 7140 }, { "epoch": 0.21936534267195035, "grad_norm": 0.4355070888996124, "learning_rate": 1.9426788579496323e-05, "loss": 0.5697, "step": 7141 }, { "epoch": 0.21939606180689952, "grad_norm": 0.3543454706668854, "learning_rate": 1.9426627298202955e-05, "loss": 0.5054, "step": 7142 }, { "epoch": 0.21942678094184867, "grad_norm": 0.33288151025772095, "learning_rate": 1.9426465994893037e-05, "loss": 0.5994, "step": 7143 }, { "epoch": 0.21945750007679785, "grad_norm": 0.3313842713832855, "learning_rate": 1.942630466956695e-05, "loss": 0.4747, "step": 7144 }, { "epoch": 0.219488219211747, "grad_norm": 0.3558490574359894, "learning_rate": 1.942614332222507e-05, "loss": 0.5928, "step": 7145 }, { "epoch": 0.21951893834669617, "grad_norm": 0.3685089349746704, "learning_rate": 1.9425981952867774e-05, "loss": 0.5504, "step": 7146 }, { "epoch": 0.21954965748164532, "grad_norm": 0.3376852869987488, "learning_rate": 1.942582056149544e-05, "loss": 0.5733, "step": 7147 }, { "epoch": 0.21958037661659446, "grad_norm": 0.37337741255760193, "learning_rate": 1.942565914810844e-05, "loss": 0.6297, "step": 7148 }, { "epoch": 0.21961109575154364, "grad_norm": 0.3233986794948578, "learning_rate": 1.9425497712707163e-05, "loss": 0.4732, "step": 7149 }, { "epoch": 0.2196418148864928, "grad_norm": 0.32665523886680603, "learning_rate": 1.942533625529197e-05, "loss": 0.6151, "step": 7150 }, { "epoch": 0.21967253402144196, "grad_norm": 0.35440289974212646, "learning_rate": 1.942517477586325e-05, "loss": 0.6057, "step": 7151 }, { "epoch": 0.2197032531563911, "grad_norm": 0.3534461557865143, "learning_rate": 1.9425013274421375e-05, "loss": 0.5994, "step": 7152 }, { "epoch": 0.21973397229134028, "grad_norm": 0.3420439064502716, "learning_rate": 1.9424851750966723e-05, "loss": 0.5884, "step": 7153 }, { "epoch": 0.21976469142628943, "grad_norm": 0.3618740141391754, "learning_rate": 1.9424690205499673e-05, "loss": 0.6518, "step": 7154 }, { "epoch": 0.2197954105612386, "grad_norm": 0.35907652974128723, "learning_rate": 1.9424528638020598e-05, "loss": 0.5965, "step": 7155 }, { "epoch": 0.21982612969618776, "grad_norm": 0.3389209508895874, "learning_rate": 1.9424367048529878e-05, "loss": 0.6639, "step": 7156 }, { "epoch": 0.2198568488311369, "grad_norm": 0.34781283140182495, "learning_rate": 1.9424205437027896e-05, "loss": 0.5782, "step": 7157 }, { "epoch": 0.21988756796608608, "grad_norm": 0.3645617961883545, "learning_rate": 1.9424043803515017e-05, "loss": 0.5413, "step": 7158 }, { "epoch": 0.21991828710103523, "grad_norm": 0.3772139847278595, "learning_rate": 1.9423882147991628e-05, "loss": 0.6291, "step": 7159 }, { "epoch": 0.2199490062359844, "grad_norm": 0.35135146975517273, "learning_rate": 1.9423720470458104e-05, "loss": 0.6472, "step": 7160 }, { "epoch": 0.21997972537093355, "grad_norm": 0.39760822057724, "learning_rate": 1.9423558770914823e-05, "loss": 0.5996, "step": 7161 }, { "epoch": 0.22001044450588272, "grad_norm": 0.3335035443305969, "learning_rate": 1.9423397049362163e-05, "loss": 0.5843, "step": 7162 }, { "epoch": 0.22004116364083187, "grad_norm": 0.3445531129837036, "learning_rate": 1.9423235305800503e-05, "loss": 0.5666, "step": 7163 }, { "epoch": 0.22007188277578105, "grad_norm": 0.5594555735588074, "learning_rate": 1.9423073540230214e-05, "loss": 0.6114, "step": 7164 }, { "epoch": 0.2201026019107302, "grad_norm": 0.3134543001651764, "learning_rate": 1.9422911752651685e-05, "loss": 0.6084, "step": 7165 }, { "epoch": 0.22013332104567934, "grad_norm": 0.3298882246017456, "learning_rate": 1.942274994306528e-05, "loss": 0.6003, "step": 7166 }, { "epoch": 0.22016404018062852, "grad_norm": 0.3763889670372009, "learning_rate": 1.942258811147139e-05, "loss": 0.5665, "step": 7167 }, { "epoch": 0.22019475931557767, "grad_norm": 0.3242780864238739, "learning_rate": 1.9422426257870385e-05, "loss": 0.5714, "step": 7168 }, { "epoch": 0.22022547845052684, "grad_norm": 0.3673727512359619, "learning_rate": 1.9422264382262646e-05, "loss": 0.5177, "step": 7169 }, { "epoch": 0.220256197585476, "grad_norm": 0.3830263912677765, "learning_rate": 1.942210248464855e-05, "loss": 0.6409, "step": 7170 }, { "epoch": 0.22028691672042516, "grad_norm": 0.3231630325317383, "learning_rate": 1.9421940565028476e-05, "loss": 0.5445, "step": 7171 }, { "epoch": 0.2203176358553743, "grad_norm": 0.3969411253929138, "learning_rate": 1.9421778623402802e-05, "loss": 0.5669, "step": 7172 }, { "epoch": 0.2203483549903235, "grad_norm": 0.3272370994091034, "learning_rate": 1.9421616659771907e-05, "loss": 0.6428, "step": 7173 }, { "epoch": 0.22037907412527263, "grad_norm": 0.368362158536911, "learning_rate": 1.9421454674136166e-05, "loss": 0.5889, "step": 7174 }, { "epoch": 0.22040979326022178, "grad_norm": 0.3434748947620392, "learning_rate": 1.9421292666495963e-05, "loss": 0.6027, "step": 7175 }, { "epoch": 0.22044051239517096, "grad_norm": 0.3716348707675934, "learning_rate": 1.942113063685167e-05, "loss": 0.5897, "step": 7176 }, { "epoch": 0.2204712315301201, "grad_norm": 0.31760644912719727, "learning_rate": 1.9420968585203667e-05, "loss": 0.5359, "step": 7177 }, { "epoch": 0.22050195066506928, "grad_norm": 0.4329708218574524, "learning_rate": 1.9420806511552337e-05, "loss": 0.6072, "step": 7178 }, { "epoch": 0.22053266980001843, "grad_norm": 0.3030519187450409, "learning_rate": 1.9420644415898055e-05, "loss": 0.4949, "step": 7179 }, { "epoch": 0.2205633889349676, "grad_norm": 0.35866090655326843, "learning_rate": 1.94204822982412e-05, "loss": 0.5449, "step": 7180 }, { "epoch": 0.22059410806991675, "grad_norm": 0.31370922923088074, "learning_rate": 1.942032015858215e-05, "loss": 0.4882, "step": 7181 }, { "epoch": 0.2206248272048659, "grad_norm": 0.3625671863555908, "learning_rate": 1.9420157996921285e-05, "loss": 0.5585, "step": 7182 }, { "epoch": 0.22065554633981507, "grad_norm": 0.3427303433418274, "learning_rate": 1.9419995813258982e-05, "loss": 0.5864, "step": 7183 }, { "epoch": 0.22068626547476422, "grad_norm": 0.32191741466522217, "learning_rate": 1.9419833607595626e-05, "loss": 0.5781, "step": 7184 }, { "epoch": 0.2207169846097134, "grad_norm": 0.35500389337539673, "learning_rate": 1.9419671379931584e-05, "loss": 0.6185, "step": 7185 }, { "epoch": 0.22074770374466254, "grad_norm": 0.3530677258968353, "learning_rate": 1.9419509130267244e-05, "loss": 0.6257, "step": 7186 }, { "epoch": 0.22077842287961172, "grad_norm": 0.3489549458026886, "learning_rate": 1.941934685860298e-05, "loss": 0.6493, "step": 7187 }, { "epoch": 0.22080914201456087, "grad_norm": 0.34287285804748535, "learning_rate": 1.9419184564939176e-05, "loss": 0.6074, "step": 7188 }, { "epoch": 0.22083986114951004, "grad_norm": 0.35013750195503235, "learning_rate": 1.941902224927621e-05, "loss": 0.5663, "step": 7189 }, { "epoch": 0.2208705802844592, "grad_norm": 0.3187258541584015, "learning_rate": 1.9418859911614462e-05, "loss": 0.5749, "step": 7190 }, { "epoch": 0.22090129941940834, "grad_norm": 0.31968042254447937, "learning_rate": 1.9418697551954305e-05, "loss": 0.6362, "step": 7191 }, { "epoch": 0.2209320185543575, "grad_norm": 0.3210238218307495, "learning_rate": 1.9418535170296123e-05, "loss": 0.5818, "step": 7192 }, { "epoch": 0.22096273768930666, "grad_norm": 0.32481810450553894, "learning_rate": 1.941837276664029e-05, "loss": 0.5417, "step": 7193 }, { "epoch": 0.22099345682425584, "grad_norm": 0.3374452292919159, "learning_rate": 1.9418210340987195e-05, "loss": 0.5836, "step": 7194 }, { "epoch": 0.22102417595920498, "grad_norm": 0.3190440237522125, "learning_rate": 1.941804789333721e-05, "loss": 0.5843, "step": 7195 }, { "epoch": 0.22105489509415416, "grad_norm": 0.36927202343940735, "learning_rate": 1.941788542369072e-05, "loss": 0.5106, "step": 7196 }, { "epoch": 0.2210856142291033, "grad_norm": 0.2898714244365692, "learning_rate": 1.9417722932048095e-05, "loss": 0.4868, "step": 7197 }, { "epoch": 0.22111633336405248, "grad_norm": 0.3173111379146576, "learning_rate": 1.9417560418409725e-05, "loss": 0.5629, "step": 7198 }, { "epoch": 0.22114705249900163, "grad_norm": 0.35951098799705505, "learning_rate": 1.9417397882775982e-05, "loss": 0.6421, "step": 7199 }, { "epoch": 0.22117777163395078, "grad_norm": 0.34194108843803406, "learning_rate": 1.941723532514725e-05, "loss": 0.5919, "step": 7200 }, { "epoch": 0.22120849076889995, "grad_norm": 0.31767261028289795, "learning_rate": 1.9417072745523903e-05, "loss": 0.5718, "step": 7201 }, { "epoch": 0.2212392099038491, "grad_norm": 0.32560548186302185, "learning_rate": 1.941691014390633e-05, "loss": 0.5907, "step": 7202 }, { "epoch": 0.22126992903879827, "grad_norm": 0.3510071337223053, "learning_rate": 1.9416747520294906e-05, "loss": 0.6011, "step": 7203 }, { "epoch": 0.22130064817374742, "grad_norm": 0.36088430881500244, "learning_rate": 1.941658487469001e-05, "loss": 0.6563, "step": 7204 }, { "epoch": 0.2213313673086966, "grad_norm": 0.32846441864967346, "learning_rate": 1.941642220709202e-05, "loss": 0.6323, "step": 7205 }, { "epoch": 0.22136208644364574, "grad_norm": 0.3245893716812134, "learning_rate": 1.941625951750132e-05, "loss": 0.5742, "step": 7206 }, { "epoch": 0.22139280557859492, "grad_norm": 0.33086419105529785, "learning_rate": 1.9416096805918284e-05, "loss": 0.6249, "step": 7207 }, { "epoch": 0.22142352471354407, "grad_norm": 0.3194262385368347, "learning_rate": 1.94159340723433e-05, "loss": 0.5829, "step": 7208 }, { "epoch": 0.22145424384849322, "grad_norm": 0.3440783619880676, "learning_rate": 1.9415771316776748e-05, "loss": 0.5817, "step": 7209 }, { "epoch": 0.2214849629834424, "grad_norm": 0.3303492069244385, "learning_rate": 1.9415608539219e-05, "loss": 0.5575, "step": 7210 }, { "epoch": 0.22151568211839154, "grad_norm": 0.333050936460495, "learning_rate": 1.941544573967044e-05, "loss": 0.6176, "step": 7211 }, { "epoch": 0.2215464012533407, "grad_norm": 0.335805743932724, "learning_rate": 1.9415282918131454e-05, "loss": 0.5326, "step": 7212 }, { "epoch": 0.22157712038828986, "grad_norm": 0.41430655121803284, "learning_rate": 1.9415120074602415e-05, "loss": 0.631, "step": 7213 }, { "epoch": 0.22160783952323904, "grad_norm": 0.33804017305374146, "learning_rate": 1.9414957209083702e-05, "loss": 0.5758, "step": 7214 }, { "epoch": 0.22163855865818818, "grad_norm": 0.3440191447734833, "learning_rate": 1.9414794321575702e-05, "loss": 0.5132, "step": 7215 }, { "epoch": 0.22166927779313733, "grad_norm": 0.3468224108219147, "learning_rate": 1.941463141207879e-05, "loss": 0.5191, "step": 7216 }, { "epoch": 0.2216999969280865, "grad_norm": 0.3893570601940155, "learning_rate": 1.941446848059335e-05, "loss": 0.6779, "step": 7217 }, { "epoch": 0.22173071606303565, "grad_norm": 0.3548140823841095, "learning_rate": 1.9414305527119762e-05, "loss": 0.5778, "step": 7218 }, { "epoch": 0.22176143519798483, "grad_norm": 0.3504079580307007, "learning_rate": 1.9414142551658406e-05, "loss": 0.5968, "step": 7219 }, { "epoch": 0.22179215433293398, "grad_norm": 0.39162158966064453, "learning_rate": 1.9413979554209663e-05, "loss": 0.594, "step": 7220 }, { "epoch": 0.22182287346788315, "grad_norm": 0.4032825231552124, "learning_rate": 1.9413816534773913e-05, "loss": 0.6314, "step": 7221 }, { "epoch": 0.2218535926028323, "grad_norm": 0.31971004605293274, "learning_rate": 1.9413653493351536e-05, "loss": 0.5183, "step": 7222 }, { "epoch": 0.22188431173778148, "grad_norm": 0.316476047039032, "learning_rate": 1.9413490429942915e-05, "loss": 0.5556, "step": 7223 }, { "epoch": 0.22191503087273062, "grad_norm": 0.30760884284973145, "learning_rate": 1.941332734454843e-05, "loss": 0.5051, "step": 7224 }, { "epoch": 0.22194575000767977, "grad_norm": 0.3040778934955597, "learning_rate": 1.9413164237168457e-05, "loss": 0.4794, "step": 7225 }, { "epoch": 0.22197646914262895, "grad_norm": 0.3265877962112427, "learning_rate": 1.9413001107803386e-05, "loss": 0.4958, "step": 7226 }, { "epoch": 0.2220071882775781, "grad_norm": 0.44548872113227844, "learning_rate": 1.9412837956453595e-05, "loss": 0.6624, "step": 7227 }, { "epoch": 0.22203790741252727, "grad_norm": 0.3187565207481384, "learning_rate": 1.941267478311946e-05, "loss": 0.59, "step": 7228 }, { "epoch": 0.22206862654747642, "grad_norm": 0.35425472259521484, "learning_rate": 1.9412511587801368e-05, "loss": 0.6078, "step": 7229 }, { "epoch": 0.2220993456824256, "grad_norm": 0.37612730264663696, "learning_rate": 1.9412348370499697e-05, "loss": 0.6622, "step": 7230 }, { "epoch": 0.22213006481737474, "grad_norm": 0.3793788254261017, "learning_rate": 1.941218513121483e-05, "loss": 0.5876, "step": 7231 }, { "epoch": 0.22216078395232391, "grad_norm": 0.3921734392642975, "learning_rate": 1.941202186994714e-05, "loss": 0.5966, "step": 7232 }, { "epoch": 0.22219150308727306, "grad_norm": 0.3476913273334503, "learning_rate": 1.9411858586697027e-05, "loss": 0.5956, "step": 7233 }, { "epoch": 0.2222222222222222, "grad_norm": 0.3753911256790161, "learning_rate": 1.9411695281464853e-05, "loss": 0.5897, "step": 7234 }, { "epoch": 0.22225294135717139, "grad_norm": 0.3128328323364258, "learning_rate": 1.941153195425101e-05, "loss": 0.4938, "step": 7235 }, { "epoch": 0.22228366049212053, "grad_norm": 0.32388758659362793, "learning_rate": 1.9411368605055876e-05, "loss": 0.5559, "step": 7236 }, { "epoch": 0.2223143796270697, "grad_norm": 0.36194419860839844, "learning_rate": 1.9411205233879836e-05, "loss": 0.5585, "step": 7237 }, { "epoch": 0.22234509876201886, "grad_norm": 0.30722498893737793, "learning_rate": 1.9411041840723267e-05, "loss": 0.5528, "step": 7238 }, { "epoch": 0.22237581789696803, "grad_norm": 0.33886101841926575, "learning_rate": 1.941087842558655e-05, "loss": 0.6759, "step": 7239 }, { "epoch": 0.22240653703191718, "grad_norm": 0.3695860207080841, "learning_rate": 1.9410714988470074e-05, "loss": 0.6475, "step": 7240 }, { "epoch": 0.22243725616686635, "grad_norm": 0.33189883828163147, "learning_rate": 1.9410551529374212e-05, "loss": 0.5527, "step": 7241 }, { "epoch": 0.2224679753018155, "grad_norm": 0.3799692690372467, "learning_rate": 1.941038804829935e-05, "loss": 0.6182, "step": 7242 }, { "epoch": 0.22249869443676465, "grad_norm": 0.37830138206481934, "learning_rate": 1.9410224545245874e-05, "loss": 0.5604, "step": 7243 }, { "epoch": 0.22252941357171382, "grad_norm": 0.3199932873249054, "learning_rate": 1.9410061020214156e-05, "loss": 0.5799, "step": 7244 }, { "epoch": 0.22256013270666297, "grad_norm": 0.34440889954566956, "learning_rate": 1.9409897473204585e-05, "loss": 0.5232, "step": 7245 }, { "epoch": 0.22259085184161215, "grad_norm": 0.35764384269714355, "learning_rate": 1.9409733904217543e-05, "loss": 0.5849, "step": 7246 }, { "epoch": 0.2226215709765613, "grad_norm": 0.35114040970802307, "learning_rate": 1.940957031325341e-05, "loss": 0.6643, "step": 7247 }, { "epoch": 0.22265229011151047, "grad_norm": 0.3245311975479126, "learning_rate": 1.9409406700312567e-05, "loss": 0.5557, "step": 7248 }, { "epoch": 0.22268300924645962, "grad_norm": 0.33857250213623047, "learning_rate": 1.9409243065395397e-05, "loss": 0.5887, "step": 7249 }, { "epoch": 0.2227137283814088, "grad_norm": 0.31268182396888733, "learning_rate": 1.9409079408502286e-05, "loss": 0.4847, "step": 7250 }, { "epoch": 0.22274444751635794, "grad_norm": 0.35600247979164124, "learning_rate": 1.940891572963361e-05, "loss": 0.5507, "step": 7251 }, { "epoch": 0.2227751666513071, "grad_norm": 0.40057405829429626, "learning_rate": 1.9408752028789754e-05, "loss": 0.6404, "step": 7252 }, { "epoch": 0.22280588578625626, "grad_norm": 0.32984012365341187, "learning_rate": 1.9408588305971102e-05, "loss": 0.6211, "step": 7253 }, { "epoch": 0.2228366049212054, "grad_norm": 0.35753247141838074, "learning_rate": 1.9408424561178034e-05, "loss": 0.5843, "step": 7254 }, { "epoch": 0.2228673240561546, "grad_norm": 0.3563820421695709, "learning_rate": 1.9408260794410933e-05, "loss": 0.6002, "step": 7255 }, { "epoch": 0.22289804319110373, "grad_norm": 0.3608687222003937, "learning_rate": 1.940809700567018e-05, "loss": 0.5862, "step": 7256 }, { "epoch": 0.2229287623260529, "grad_norm": 0.446508526802063, "learning_rate": 1.9407933194956164e-05, "loss": 0.6057, "step": 7257 }, { "epoch": 0.22295948146100206, "grad_norm": 0.33558741211891174, "learning_rate": 1.940776936226926e-05, "loss": 0.6679, "step": 7258 }, { "epoch": 0.2229902005959512, "grad_norm": 0.4057655334472656, "learning_rate": 1.9407605507609854e-05, "loss": 0.5358, "step": 7259 }, { "epoch": 0.22302091973090038, "grad_norm": 0.37614864110946655, "learning_rate": 1.940744163097833e-05, "loss": 0.6136, "step": 7260 }, { "epoch": 0.22305163886584953, "grad_norm": 0.34239548444747925, "learning_rate": 1.940727773237507e-05, "loss": 0.6088, "step": 7261 }, { "epoch": 0.2230823580007987, "grad_norm": 0.32064956426620483, "learning_rate": 1.9407113811800454e-05, "loss": 0.5675, "step": 7262 }, { "epoch": 0.22311307713574785, "grad_norm": 0.33508574962615967, "learning_rate": 1.9406949869254866e-05, "loss": 0.5856, "step": 7263 }, { "epoch": 0.22314379627069703, "grad_norm": 0.34020787477493286, "learning_rate": 1.9406785904738687e-05, "loss": 0.5811, "step": 7264 }, { "epoch": 0.22317451540564617, "grad_norm": 0.3702728748321533, "learning_rate": 1.9406621918252306e-05, "loss": 0.6372, "step": 7265 }, { "epoch": 0.22320523454059535, "grad_norm": 0.3208499550819397, "learning_rate": 1.9406457909796104e-05, "loss": 0.659, "step": 7266 }, { "epoch": 0.2232359536755445, "grad_norm": 0.33966052532196045, "learning_rate": 1.9406293879370458e-05, "loss": 0.6218, "step": 7267 }, { "epoch": 0.22326667281049364, "grad_norm": 0.43061530590057373, "learning_rate": 1.9406129826975762e-05, "loss": 0.6374, "step": 7268 }, { "epoch": 0.22329739194544282, "grad_norm": 0.3767540156841278, "learning_rate": 1.9405965752612387e-05, "loss": 0.5988, "step": 7269 }, { "epoch": 0.22332811108039197, "grad_norm": 0.3326537013053894, "learning_rate": 1.9405801656280724e-05, "loss": 0.5384, "step": 7270 }, { "epoch": 0.22335883021534114, "grad_norm": 0.35221877694129944, "learning_rate": 1.9405637537981156e-05, "loss": 0.624, "step": 7271 }, { "epoch": 0.2233895493502903, "grad_norm": 0.31498655676841736, "learning_rate": 1.9405473397714063e-05, "loss": 0.5959, "step": 7272 }, { "epoch": 0.22342026848523946, "grad_norm": 0.9106410145759583, "learning_rate": 1.940530923547983e-05, "loss": 0.6769, "step": 7273 }, { "epoch": 0.2234509876201886, "grad_norm": 0.3679969906806946, "learning_rate": 1.940514505127884e-05, "loss": 0.6356, "step": 7274 }, { "epoch": 0.2234817067551378, "grad_norm": 0.353985995054245, "learning_rate": 1.940498084511148e-05, "loss": 0.5396, "step": 7275 }, { "epoch": 0.22351242589008694, "grad_norm": 0.37256819009780884, "learning_rate": 1.940481661697813e-05, "loss": 0.6255, "step": 7276 }, { "epoch": 0.22354314502503608, "grad_norm": 0.31664255261421204, "learning_rate": 1.9404652366879168e-05, "loss": 0.5754, "step": 7277 }, { "epoch": 0.22357386415998526, "grad_norm": 0.377149760723114, "learning_rate": 1.940448809481499e-05, "loss": 0.5047, "step": 7278 }, { "epoch": 0.2236045832949344, "grad_norm": 0.38497188687324524, "learning_rate": 1.9404323800785968e-05, "loss": 0.5563, "step": 7279 }, { "epoch": 0.22363530242988358, "grad_norm": 0.41140514612197876, "learning_rate": 1.9404159484792495e-05, "loss": 0.5729, "step": 7280 }, { "epoch": 0.22366602156483273, "grad_norm": 0.32804861664772034, "learning_rate": 1.940399514683495e-05, "loss": 0.5744, "step": 7281 }, { "epoch": 0.2236967406997819, "grad_norm": 0.33464422821998596, "learning_rate": 1.9403830786913714e-05, "loss": 0.5989, "step": 7282 }, { "epoch": 0.22372745983473105, "grad_norm": 0.3336753845214844, "learning_rate": 1.940366640502918e-05, "loss": 0.6108, "step": 7283 }, { "epoch": 0.22375817896968023, "grad_norm": 0.3749425709247589, "learning_rate": 1.9403502001181723e-05, "loss": 0.573, "step": 7284 }, { "epoch": 0.22378889810462937, "grad_norm": 0.3356497287750244, "learning_rate": 1.9403337575371733e-05, "loss": 0.5221, "step": 7285 }, { "epoch": 0.22381961723957852, "grad_norm": 0.36244598031044006, "learning_rate": 1.9403173127599588e-05, "loss": 0.5863, "step": 7286 }, { "epoch": 0.2238503363745277, "grad_norm": 0.34321773052215576, "learning_rate": 1.9403008657865678e-05, "loss": 0.5795, "step": 7287 }, { "epoch": 0.22388105550947685, "grad_norm": 0.36272549629211426, "learning_rate": 1.9402844166170385e-05, "loss": 0.6019, "step": 7288 }, { "epoch": 0.22391177464442602, "grad_norm": 0.3347555696964264, "learning_rate": 1.9402679652514093e-05, "loss": 0.6483, "step": 7289 }, { "epoch": 0.22394249377937517, "grad_norm": 0.33956190943717957, "learning_rate": 1.940251511689718e-05, "loss": 0.5373, "step": 7290 }, { "epoch": 0.22397321291432434, "grad_norm": 0.3172222971916199, "learning_rate": 1.9402350559320045e-05, "loss": 0.5624, "step": 7291 }, { "epoch": 0.2240039320492735, "grad_norm": 0.35386818647384644, "learning_rate": 1.9402185979783055e-05, "loss": 0.6154, "step": 7292 }, { "epoch": 0.22403465118422264, "grad_norm": 0.32279449701309204, "learning_rate": 1.9402021378286607e-05, "loss": 0.5727, "step": 7293 }, { "epoch": 0.2240653703191718, "grad_norm": 0.34865015745162964, "learning_rate": 1.9401856754831084e-05, "loss": 0.6071, "step": 7294 }, { "epoch": 0.22409608945412096, "grad_norm": 0.32429832220077515, "learning_rate": 1.9401692109416866e-05, "loss": 0.6232, "step": 7295 }, { "epoch": 0.22412680858907014, "grad_norm": 0.3141568601131439, "learning_rate": 1.9401527442044338e-05, "loss": 0.6303, "step": 7296 }, { "epoch": 0.22415752772401928, "grad_norm": 0.3448539078235626, "learning_rate": 1.9401362752713887e-05, "loss": 0.6042, "step": 7297 }, { "epoch": 0.22418824685896846, "grad_norm": 0.3808838427066803, "learning_rate": 1.94011980414259e-05, "loss": 0.5855, "step": 7298 }, { "epoch": 0.2242189659939176, "grad_norm": 0.40192991495132446, "learning_rate": 1.9401033308180754e-05, "loss": 0.5392, "step": 7299 }, { "epoch": 0.22424968512886678, "grad_norm": 0.3906198740005493, "learning_rate": 1.940086855297884e-05, "loss": 0.6582, "step": 7300 }, { "epoch": 0.22428040426381593, "grad_norm": 0.3786102533340454, "learning_rate": 1.9400703775820543e-05, "loss": 0.6042, "step": 7301 }, { "epoch": 0.22431112339876508, "grad_norm": 0.29326459765434265, "learning_rate": 1.940053897670624e-05, "loss": 0.4568, "step": 7302 }, { "epoch": 0.22434184253371425, "grad_norm": 0.3058309257030487, "learning_rate": 1.9400374155636324e-05, "loss": 0.5289, "step": 7303 }, { "epoch": 0.2243725616686634, "grad_norm": 0.31788378953933716, "learning_rate": 1.940020931261118e-05, "loss": 0.4702, "step": 7304 }, { "epoch": 0.22440328080361258, "grad_norm": 0.33539125323295593, "learning_rate": 1.940004444763119e-05, "loss": 0.4825, "step": 7305 }, { "epoch": 0.22443399993856172, "grad_norm": 0.3521583378314972, "learning_rate": 1.939987956069674e-05, "loss": 0.6201, "step": 7306 }, { "epoch": 0.2244647190735109, "grad_norm": 0.34196794033050537, "learning_rate": 1.9399714651808212e-05, "loss": 0.5503, "step": 7307 }, { "epoch": 0.22449543820846005, "grad_norm": 0.38174793124198914, "learning_rate": 1.9399549720965995e-05, "loss": 0.6083, "step": 7308 }, { "epoch": 0.22452615734340922, "grad_norm": 0.38010504841804504, "learning_rate": 1.9399384768170475e-05, "loss": 0.6116, "step": 7309 }, { "epoch": 0.22455687647835837, "grad_norm": 0.32622870802879333, "learning_rate": 1.9399219793422034e-05, "loss": 0.5329, "step": 7310 }, { "epoch": 0.22458759561330752, "grad_norm": 0.31432026624679565, "learning_rate": 1.939905479672106e-05, "loss": 0.5413, "step": 7311 }, { "epoch": 0.2246183147482567, "grad_norm": 0.3305554986000061, "learning_rate": 1.9398889778067936e-05, "loss": 0.5851, "step": 7312 }, { "epoch": 0.22464903388320584, "grad_norm": 0.41531622409820557, "learning_rate": 1.939872473746305e-05, "loss": 0.6308, "step": 7313 }, { "epoch": 0.22467975301815502, "grad_norm": 0.4783003330230713, "learning_rate": 1.9398559674906784e-05, "loss": 0.6013, "step": 7314 }, { "epoch": 0.22471047215310416, "grad_norm": 0.36595404148101807, "learning_rate": 1.9398394590399526e-05, "loss": 0.6299, "step": 7315 }, { "epoch": 0.22474119128805334, "grad_norm": 0.32650187611579895, "learning_rate": 1.939822948394166e-05, "loss": 0.6356, "step": 7316 }, { "epoch": 0.22477191042300249, "grad_norm": 0.3189988136291504, "learning_rate": 1.9398064355533573e-05, "loss": 0.5857, "step": 7317 }, { "epoch": 0.22480262955795166, "grad_norm": 0.4128827750682831, "learning_rate": 1.9397899205175652e-05, "loss": 0.5682, "step": 7318 }, { "epoch": 0.2248333486929008, "grad_norm": 0.3469971716403961, "learning_rate": 1.9397734032868282e-05, "loss": 0.584, "step": 7319 }, { "epoch": 0.22486406782784996, "grad_norm": 0.3722459673881531, "learning_rate": 1.9397568838611845e-05, "loss": 0.5925, "step": 7320 }, { "epoch": 0.22489478696279913, "grad_norm": 0.31458425521850586, "learning_rate": 1.939740362240673e-05, "loss": 0.5144, "step": 7321 }, { "epoch": 0.22492550609774828, "grad_norm": 0.37274929881095886, "learning_rate": 1.9397238384253324e-05, "loss": 0.6625, "step": 7322 }, { "epoch": 0.22495622523269745, "grad_norm": 0.33405041694641113, "learning_rate": 1.939707312415201e-05, "loss": 0.599, "step": 7323 }, { "epoch": 0.2249869443676466, "grad_norm": 0.35059091448783875, "learning_rate": 1.9396907842103178e-05, "loss": 0.6118, "step": 7324 }, { "epoch": 0.22501766350259578, "grad_norm": 0.3474101126194, "learning_rate": 1.9396742538107206e-05, "loss": 0.6079, "step": 7325 }, { "epoch": 0.22504838263754492, "grad_norm": 0.30668747425079346, "learning_rate": 1.939657721216449e-05, "loss": 0.5806, "step": 7326 }, { "epoch": 0.2250791017724941, "grad_norm": 0.30133169889450073, "learning_rate": 1.9396411864275414e-05, "loss": 0.5794, "step": 7327 }, { "epoch": 0.22510982090744325, "grad_norm": 0.3993760347366333, "learning_rate": 1.939624649444036e-05, "loss": 0.5233, "step": 7328 }, { "epoch": 0.2251405400423924, "grad_norm": 0.33116549253463745, "learning_rate": 1.9396081102659712e-05, "loss": 0.5674, "step": 7329 }, { "epoch": 0.22517125917734157, "grad_norm": 0.3810521960258484, "learning_rate": 1.9395915688933864e-05, "loss": 0.6219, "step": 7330 }, { "epoch": 0.22520197831229072, "grad_norm": 0.3808867335319519, "learning_rate": 1.9395750253263198e-05, "loss": 0.5369, "step": 7331 }, { "epoch": 0.2252326974472399, "grad_norm": 0.3798576593399048, "learning_rate": 1.93955847956481e-05, "loss": 0.5186, "step": 7332 }, { "epoch": 0.22526341658218904, "grad_norm": 0.3758757710456848, "learning_rate": 1.9395419316088958e-05, "loss": 0.5404, "step": 7333 }, { "epoch": 0.22529413571713822, "grad_norm": 0.36691203713417053, "learning_rate": 1.939525381458616e-05, "loss": 0.665, "step": 7334 }, { "epoch": 0.22532485485208736, "grad_norm": 0.32093968987464905, "learning_rate": 1.9395088291140088e-05, "loss": 0.5643, "step": 7335 }, { "epoch": 0.2253555739870365, "grad_norm": 0.378238320350647, "learning_rate": 1.9394922745751134e-05, "loss": 0.5369, "step": 7336 }, { "epoch": 0.2253862931219857, "grad_norm": 0.3562236726284027, "learning_rate": 1.939475717841968e-05, "loss": 0.6071, "step": 7337 }, { "epoch": 0.22541701225693483, "grad_norm": 0.3510250151157379, "learning_rate": 1.9394591589146114e-05, "loss": 0.6363, "step": 7338 }, { "epoch": 0.225447731391884, "grad_norm": 0.3472912609577179, "learning_rate": 1.9394425977930824e-05, "loss": 0.5423, "step": 7339 }, { "epoch": 0.22547845052683316, "grad_norm": 0.34918931126594543, "learning_rate": 1.9394260344774192e-05, "loss": 0.5361, "step": 7340 }, { "epoch": 0.22550916966178233, "grad_norm": 0.39391931891441345, "learning_rate": 1.9394094689676616e-05, "loss": 0.5172, "step": 7341 }, { "epoch": 0.22553988879673148, "grad_norm": 0.3395105004310608, "learning_rate": 1.939392901263847e-05, "loss": 0.5618, "step": 7342 }, { "epoch": 0.22557060793168066, "grad_norm": 0.39103609323501587, "learning_rate": 1.939376331366015e-05, "loss": 0.606, "step": 7343 }, { "epoch": 0.2256013270666298, "grad_norm": 0.33228006958961487, "learning_rate": 1.939359759274204e-05, "loss": 0.6044, "step": 7344 }, { "epoch": 0.22563204620157895, "grad_norm": 0.32702723145484924, "learning_rate": 1.9393431849884526e-05, "loss": 0.661, "step": 7345 }, { "epoch": 0.22566276533652813, "grad_norm": 0.36131811141967773, "learning_rate": 1.9393266085087993e-05, "loss": 0.6247, "step": 7346 }, { "epoch": 0.22569348447147727, "grad_norm": 0.36233004927635193, "learning_rate": 1.9393100298352837e-05, "loss": 0.5972, "step": 7347 }, { "epoch": 0.22572420360642645, "grad_norm": 0.33884862065315247, "learning_rate": 1.9392934489679435e-05, "loss": 0.639, "step": 7348 }, { "epoch": 0.2257549227413756, "grad_norm": 0.34336191415786743, "learning_rate": 1.939276865906818e-05, "loss": 0.6654, "step": 7349 }, { "epoch": 0.22578564187632477, "grad_norm": 0.33246946334838867, "learning_rate": 1.9392602806519456e-05, "loss": 0.5886, "step": 7350 }, { "epoch": 0.22581636101127392, "grad_norm": 0.2943590581417084, "learning_rate": 1.9392436932033655e-05, "loss": 0.5847, "step": 7351 }, { "epoch": 0.2258470801462231, "grad_norm": 0.32404372096061707, "learning_rate": 1.939227103561116e-05, "loss": 0.623, "step": 7352 }, { "epoch": 0.22587779928117224, "grad_norm": 0.3628520667552948, "learning_rate": 1.939210511725236e-05, "loss": 0.6296, "step": 7353 }, { "epoch": 0.2259085184161214, "grad_norm": 0.30108460783958435, "learning_rate": 1.939193917695764e-05, "loss": 0.569, "step": 7354 }, { "epoch": 0.22593923755107057, "grad_norm": 0.3308115005493164, "learning_rate": 1.9391773214727395e-05, "loss": 0.5547, "step": 7355 }, { "epoch": 0.2259699566860197, "grad_norm": 0.32858237624168396, "learning_rate": 1.9391607230562004e-05, "loss": 0.536, "step": 7356 }, { "epoch": 0.2260006758209689, "grad_norm": 0.3378011882305145, "learning_rate": 1.9391441224461858e-05, "loss": 0.6078, "step": 7357 }, { "epoch": 0.22603139495591804, "grad_norm": 0.3461975157260895, "learning_rate": 1.9391275196427347e-05, "loss": 0.5692, "step": 7358 }, { "epoch": 0.2260621140908672, "grad_norm": 0.33913785219192505, "learning_rate": 1.9391109146458857e-05, "loss": 0.6414, "step": 7359 }, { "epoch": 0.22609283322581636, "grad_norm": 0.41452690958976746, "learning_rate": 1.9390943074556777e-05, "loss": 0.627, "step": 7360 }, { "epoch": 0.22612355236076553, "grad_norm": 0.3057536482810974, "learning_rate": 1.9390776980721494e-05, "loss": 0.5056, "step": 7361 }, { "epoch": 0.22615427149571468, "grad_norm": 0.4267441928386688, "learning_rate": 1.939061086495339e-05, "loss": 0.5186, "step": 7362 }, { "epoch": 0.22618499063066383, "grad_norm": 0.3263033628463745, "learning_rate": 1.9390444727252863e-05, "loss": 0.528, "step": 7363 }, { "epoch": 0.226215709765613, "grad_norm": 0.3514125645160675, "learning_rate": 1.9390278567620298e-05, "loss": 0.7454, "step": 7364 }, { "epoch": 0.22624642890056215, "grad_norm": 0.35905420780181885, "learning_rate": 1.9390112386056078e-05, "loss": 0.5454, "step": 7365 }, { "epoch": 0.22627714803551133, "grad_norm": 0.38070571422576904, "learning_rate": 1.9389946182560596e-05, "loss": 0.6367, "step": 7366 }, { "epoch": 0.22630786717046047, "grad_norm": 0.3762733042240143, "learning_rate": 1.9389779957134238e-05, "loss": 0.5827, "step": 7367 }, { "epoch": 0.22633858630540965, "grad_norm": 0.35853418707847595, "learning_rate": 1.9389613709777394e-05, "loss": 0.5968, "step": 7368 }, { "epoch": 0.2263693054403588, "grad_norm": 0.3401113450527191, "learning_rate": 1.9389447440490454e-05, "loss": 0.5138, "step": 7369 }, { "epoch": 0.22640002457530797, "grad_norm": 0.35725516080856323, "learning_rate": 1.93892811492738e-05, "loss": 0.6156, "step": 7370 }, { "epoch": 0.22643074371025712, "grad_norm": 0.36085283756256104, "learning_rate": 1.9389114836127824e-05, "loss": 0.616, "step": 7371 }, { "epoch": 0.22646146284520627, "grad_norm": 0.32564806938171387, "learning_rate": 1.938894850105292e-05, "loss": 0.6164, "step": 7372 }, { "epoch": 0.22649218198015544, "grad_norm": 0.3233872652053833, "learning_rate": 1.9388782144049463e-05, "loss": 0.5803, "step": 7373 }, { "epoch": 0.2265229011151046, "grad_norm": 0.3873271346092224, "learning_rate": 1.9388615765117855e-05, "loss": 0.6188, "step": 7374 }, { "epoch": 0.22655362025005377, "grad_norm": 0.3377671241760254, "learning_rate": 1.938844936425848e-05, "loss": 0.5959, "step": 7375 }, { "epoch": 0.22658433938500291, "grad_norm": 0.40109509229660034, "learning_rate": 1.938828294147172e-05, "loss": 0.6394, "step": 7376 }, { "epoch": 0.2266150585199521, "grad_norm": 0.35152846574783325, "learning_rate": 1.9388116496757975e-05, "loss": 0.5199, "step": 7377 }, { "epoch": 0.22664577765490124, "grad_norm": 0.3442809283733368, "learning_rate": 1.9387950030117627e-05, "loss": 0.5702, "step": 7378 }, { "epoch": 0.22667649678985038, "grad_norm": 0.3279044032096863, "learning_rate": 1.9387783541551065e-05, "loss": 0.6007, "step": 7379 }, { "epoch": 0.22670721592479956, "grad_norm": 0.3125022053718567, "learning_rate": 1.938761703105868e-05, "loss": 0.4561, "step": 7380 }, { "epoch": 0.2267379350597487, "grad_norm": 0.3144909739494324, "learning_rate": 1.938745049864086e-05, "loss": 0.6357, "step": 7381 }, { "epoch": 0.22676865419469788, "grad_norm": 0.329017698764801, "learning_rate": 1.938728394429799e-05, "loss": 0.6125, "step": 7382 }, { "epoch": 0.22679937332964703, "grad_norm": 0.341309130191803, "learning_rate": 1.9387117368030465e-05, "loss": 0.5653, "step": 7383 }, { "epoch": 0.2268300924645962, "grad_norm": 0.3181845545768738, "learning_rate": 1.9386950769838677e-05, "loss": 0.5217, "step": 7384 }, { "epoch": 0.22686081159954535, "grad_norm": 0.34928297996520996, "learning_rate": 1.9386784149723002e-05, "loss": 0.6392, "step": 7385 }, { "epoch": 0.22689153073449453, "grad_norm": 0.3331514596939087, "learning_rate": 1.9386617507683842e-05, "loss": 0.5957, "step": 7386 }, { "epoch": 0.22692224986944368, "grad_norm": 0.328285813331604, "learning_rate": 1.938645084372158e-05, "loss": 0.5334, "step": 7387 }, { "epoch": 0.22695296900439282, "grad_norm": 0.37842315435409546, "learning_rate": 1.9386284157836605e-05, "loss": 0.6478, "step": 7388 }, { "epoch": 0.226983688139342, "grad_norm": 0.4938315451145172, "learning_rate": 1.938611745002931e-05, "loss": 0.6377, "step": 7389 }, { "epoch": 0.22701440727429115, "grad_norm": 0.35763365030288696, "learning_rate": 1.938595072030008e-05, "loss": 0.6625, "step": 7390 }, { "epoch": 0.22704512640924032, "grad_norm": 0.33605343103408813, "learning_rate": 1.9385783968649304e-05, "loss": 0.5848, "step": 7391 }, { "epoch": 0.22707584554418947, "grad_norm": 0.34544625878334045, "learning_rate": 1.938561719507738e-05, "loss": 0.5921, "step": 7392 }, { "epoch": 0.22710656467913864, "grad_norm": 0.3244796097278595, "learning_rate": 1.9385450399584692e-05, "loss": 0.5541, "step": 7393 }, { "epoch": 0.2271372838140878, "grad_norm": 0.39071816205978394, "learning_rate": 1.9385283582171625e-05, "loss": 0.521, "step": 7394 }, { "epoch": 0.22716800294903697, "grad_norm": 0.3313172161579132, "learning_rate": 1.9385116742838573e-05, "loss": 0.582, "step": 7395 }, { "epoch": 0.22719872208398612, "grad_norm": 0.38669919967651367, "learning_rate": 1.9384949881585927e-05, "loss": 0.5524, "step": 7396 }, { "epoch": 0.22722944121893526, "grad_norm": 0.3380057215690613, "learning_rate": 1.9384782998414074e-05, "loss": 0.649, "step": 7397 }, { "epoch": 0.22726016035388444, "grad_norm": 0.34008917212486267, "learning_rate": 1.9384616093323408e-05, "loss": 0.5982, "step": 7398 }, { "epoch": 0.22729087948883359, "grad_norm": 0.3785461485385895, "learning_rate": 1.938444916631431e-05, "loss": 0.5531, "step": 7399 }, { "epoch": 0.22732159862378276, "grad_norm": 0.343075156211853, "learning_rate": 1.938428221738718e-05, "loss": 0.5482, "step": 7400 }, { "epoch": 0.2273523177587319, "grad_norm": 0.4082321226596832, "learning_rate": 1.9384115246542404e-05, "loss": 0.6307, "step": 7401 }, { "epoch": 0.22738303689368108, "grad_norm": 0.5316211581230164, "learning_rate": 1.938394825378037e-05, "loss": 0.6285, "step": 7402 }, { "epoch": 0.22741375602863023, "grad_norm": 0.3573005497455597, "learning_rate": 1.938378123910147e-05, "loss": 0.6185, "step": 7403 }, { "epoch": 0.2274444751635794, "grad_norm": 0.3370492160320282, "learning_rate": 1.9383614202506093e-05, "loss": 0.5748, "step": 7404 }, { "epoch": 0.22747519429852855, "grad_norm": 0.31930306553840637, "learning_rate": 1.938344714399463e-05, "loss": 0.5606, "step": 7405 }, { "epoch": 0.2275059134334777, "grad_norm": 0.43836915493011475, "learning_rate": 1.9383280063567468e-05, "loss": 0.5703, "step": 7406 }, { "epoch": 0.22753663256842688, "grad_norm": 0.34689727425575256, "learning_rate": 1.9383112961225e-05, "loss": 0.5091, "step": 7407 }, { "epoch": 0.22756735170337603, "grad_norm": 0.3750269114971161, "learning_rate": 1.938294583696762e-05, "loss": 0.5709, "step": 7408 }, { "epoch": 0.2275980708383252, "grad_norm": 0.3299335241317749, "learning_rate": 1.9382778690795714e-05, "loss": 0.5943, "step": 7409 }, { "epoch": 0.22762878997327435, "grad_norm": 0.3183330297470093, "learning_rate": 1.9382611522709673e-05, "loss": 0.5798, "step": 7410 }, { "epoch": 0.22765950910822352, "grad_norm": 0.3359580338001251, "learning_rate": 1.9382444332709886e-05, "loss": 0.5986, "step": 7411 }, { "epoch": 0.22769022824317267, "grad_norm": 0.3416002094745636, "learning_rate": 1.9382277120796747e-05, "loss": 0.6705, "step": 7412 }, { "epoch": 0.22772094737812182, "grad_norm": 0.3403262197971344, "learning_rate": 1.9382109886970646e-05, "loss": 0.5748, "step": 7413 }, { "epoch": 0.227751666513071, "grad_norm": 0.38222330808639526, "learning_rate": 1.9381942631231968e-05, "loss": 0.5473, "step": 7414 }, { "epoch": 0.22778238564802014, "grad_norm": 0.33137020468711853, "learning_rate": 1.938177535358111e-05, "loss": 0.5488, "step": 7415 }, { "epoch": 0.22781310478296932, "grad_norm": 0.3531838655471802, "learning_rate": 1.9381608054018462e-05, "loss": 0.6129, "step": 7416 }, { "epoch": 0.22784382391791846, "grad_norm": 0.29354819655418396, "learning_rate": 1.938144073254441e-05, "loss": 0.5539, "step": 7417 }, { "epoch": 0.22787454305286764, "grad_norm": 0.36375948786735535, "learning_rate": 1.9381273389159347e-05, "loss": 0.5865, "step": 7418 }, { "epoch": 0.2279052621878168, "grad_norm": 0.33908823132514954, "learning_rate": 1.938110602386367e-05, "loss": 0.6223, "step": 7419 }, { "epoch": 0.22793598132276596, "grad_norm": 0.3436124920845032, "learning_rate": 1.938093863665776e-05, "loss": 0.5772, "step": 7420 }, { "epoch": 0.2279667004577151, "grad_norm": 0.4509897828102112, "learning_rate": 1.9380771227542015e-05, "loss": 0.5496, "step": 7421 }, { "epoch": 0.22799741959266426, "grad_norm": 0.35654711723327637, "learning_rate": 1.9380603796516824e-05, "loss": 0.6003, "step": 7422 }, { "epoch": 0.22802813872761343, "grad_norm": 0.348190039396286, "learning_rate": 1.9380436343582578e-05, "loss": 0.6368, "step": 7423 }, { "epoch": 0.22805885786256258, "grad_norm": 0.3682152330875397, "learning_rate": 1.9380268868739665e-05, "loss": 0.5505, "step": 7424 }, { "epoch": 0.22808957699751176, "grad_norm": 0.3381710648536682, "learning_rate": 1.938010137198848e-05, "loss": 0.6492, "step": 7425 }, { "epoch": 0.2281202961324609, "grad_norm": 0.42278847098350525, "learning_rate": 1.9379933853329415e-05, "loss": 0.5568, "step": 7426 }, { "epoch": 0.22815101526741008, "grad_norm": 0.6092449426651001, "learning_rate": 1.9379766312762854e-05, "loss": 0.5361, "step": 7427 }, { "epoch": 0.22818173440235923, "grad_norm": 0.3231405019760132, "learning_rate": 1.93795987502892e-05, "loss": 0.5929, "step": 7428 }, { "epoch": 0.2282124535373084, "grad_norm": 0.3255586624145508, "learning_rate": 1.9379431165908836e-05, "loss": 0.6008, "step": 7429 }, { "epoch": 0.22824317267225755, "grad_norm": 0.3356032967567444, "learning_rate": 1.9379263559622154e-05, "loss": 0.5606, "step": 7430 }, { "epoch": 0.2282738918072067, "grad_norm": 0.3156086504459381, "learning_rate": 1.9379095931429547e-05, "loss": 0.5398, "step": 7431 }, { "epoch": 0.22830461094215587, "grad_norm": 0.3535171449184418, "learning_rate": 1.937892828133141e-05, "loss": 0.6143, "step": 7432 }, { "epoch": 0.22833533007710502, "grad_norm": 0.3699648976325989, "learning_rate": 1.9378760609328128e-05, "loss": 0.5466, "step": 7433 }, { "epoch": 0.2283660492120542, "grad_norm": 0.3485315442085266, "learning_rate": 1.9378592915420096e-05, "loss": 0.604, "step": 7434 }, { "epoch": 0.22839676834700334, "grad_norm": 0.42829641699790955, "learning_rate": 1.9378425199607704e-05, "loss": 0.5821, "step": 7435 }, { "epoch": 0.22842748748195252, "grad_norm": 0.33152347803115845, "learning_rate": 1.9378257461891348e-05, "loss": 0.5514, "step": 7436 }, { "epoch": 0.22845820661690167, "grad_norm": 0.35419774055480957, "learning_rate": 1.9378089702271415e-05, "loss": 0.596, "step": 7437 }, { "epoch": 0.22848892575185084, "grad_norm": 0.3318338394165039, "learning_rate": 1.93779219207483e-05, "loss": 0.553, "step": 7438 }, { "epoch": 0.2285196448868, "grad_norm": 0.36317870020866394, "learning_rate": 1.9377754117322388e-05, "loss": 0.6069, "step": 7439 }, { "epoch": 0.22855036402174914, "grad_norm": 0.34206825494766235, "learning_rate": 1.9377586291994084e-05, "loss": 0.6825, "step": 7440 }, { "epoch": 0.2285810831566983, "grad_norm": 0.3275406062602997, "learning_rate": 1.9377418444763768e-05, "loss": 0.6105, "step": 7441 }, { "epoch": 0.22861180229164746, "grad_norm": 0.32147493958473206, "learning_rate": 1.9377250575631836e-05, "loss": 0.6127, "step": 7442 }, { "epoch": 0.22864252142659663, "grad_norm": 0.3426972031593323, "learning_rate": 1.937708268459868e-05, "loss": 0.5879, "step": 7443 }, { "epoch": 0.22867324056154578, "grad_norm": 0.31555846333503723, "learning_rate": 1.9376914771664697e-05, "loss": 0.5505, "step": 7444 }, { "epoch": 0.22870395969649496, "grad_norm": 0.3266202211380005, "learning_rate": 1.9376746836830272e-05, "loss": 0.5774, "step": 7445 }, { "epoch": 0.2287346788314441, "grad_norm": 0.3319772481918335, "learning_rate": 1.9376578880095797e-05, "loss": 0.6482, "step": 7446 }, { "epoch": 0.22876539796639328, "grad_norm": 0.3574504554271698, "learning_rate": 1.9376410901461672e-05, "loss": 0.6529, "step": 7447 }, { "epoch": 0.22879611710134243, "grad_norm": 0.35581085085868835, "learning_rate": 1.9376242900928282e-05, "loss": 0.6766, "step": 7448 }, { "epoch": 0.22882683623629158, "grad_norm": 0.41221562027931213, "learning_rate": 1.937607487849602e-05, "loss": 0.5491, "step": 7449 }, { "epoch": 0.22885755537124075, "grad_norm": 0.4038274884223938, "learning_rate": 1.9375906834165282e-05, "loss": 0.5617, "step": 7450 }, { "epoch": 0.2288882745061899, "grad_norm": 0.3290782570838928, "learning_rate": 1.9375738767936457e-05, "loss": 0.4453, "step": 7451 }, { "epoch": 0.22891899364113907, "grad_norm": 0.34250175952911377, "learning_rate": 1.9375570679809944e-05, "loss": 0.6221, "step": 7452 }, { "epoch": 0.22894971277608822, "grad_norm": 0.30701887607574463, "learning_rate": 1.9375402569786127e-05, "loss": 0.6456, "step": 7453 }, { "epoch": 0.2289804319110374, "grad_norm": 0.40501177310943604, "learning_rate": 1.937523443786541e-05, "loss": 0.5994, "step": 7454 }, { "epoch": 0.22901115104598654, "grad_norm": 0.35494571924209595, "learning_rate": 1.9375066284048166e-05, "loss": 0.4769, "step": 7455 }, { "epoch": 0.2290418701809357, "grad_norm": 0.3860504925251007, "learning_rate": 1.9374898108334807e-05, "loss": 0.6088, "step": 7456 }, { "epoch": 0.22907258931588487, "grad_norm": 0.357479453086853, "learning_rate": 1.9374729910725717e-05, "loss": 0.5899, "step": 7457 }, { "epoch": 0.22910330845083401, "grad_norm": 0.3647928535938263, "learning_rate": 1.9374561691221293e-05, "loss": 0.5858, "step": 7458 }, { "epoch": 0.2291340275857832, "grad_norm": 0.372729629278183, "learning_rate": 1.937439344982192e-05, "loss": 0.6123, "step": 7459 }, { "epoch": 0.22916474672073234, "grad_norm": 0.3453364670276642, "learning_rate": 1.9374225186528002e-05, "loss": 0.5541, "step": 7460 }, { "epoch": 0.2291954658556815, "grad_norm": 0.34414008259773254, "learning_rate": 1.9374056901339927e-05, "loss": 0.5296, "step": 7461 }, { "epoch": 0.22922618499063066, "grad_norm": 0.3587051331996918, "learning_rate": 1.9373888594258083e-05, "loss": 0.6517, "step": 7462 }, { "epoch": 0.22925690412557984, "grad_norm": 0.32185864448547363, "learning_rate": 1.937372026528287e-05, "loss": 0.6161, "step": 7463 }, { "epoch": 0.22928762326052898, "grad_norm": 0.44936516880989075, "learning_rate": 1.937355191441468e-05, "loss": 0.5494, "step": 7464 }, { "epoch": 0.22931834239547813, "grad_norm": 0.3466988205909729, "learning_rate": 1.93733835416539e-05, "loss": 0.6399, "step": 7465 }, { "epoch": 0.2293490615304273, "grad_norm": 0.3312217891216278, "learning_rate": 1.9373215147000932e-05, "loss": 0.5736, "step": 7466 }, { "epoch": 0.22937978066537645, "grad_norm": 0.431925505399704, "learning_rate": 1.9373046730456163e-05, "loss": 0.5775, "step": 7467 }, { "epoch": 0.22941049980032563, "grad_norm": 0.3234696388244629, "learning_rate": 1.937287829201999e-05, "loss": 0.6117, "step": 7468 }, { "epoch": 0.22944121893527478, "grad_norm": 0.3022303879261017, "learning_rate": 1.9372709831692808e-05, "loss": 0.608, "step": 7469 }, { "epoch": 0.22947193807022395, "grad_norm": 0.3508851230144501, "learning_rate": 1.9372541349475006e-05, "loss": 0.4912, "step": 7470 }, { "epoch": 0.2295026572051731, "grad_norm": 0.36326491832733154, "learning_rate": 1.9372372845366977e-05, "loss": 0.5748, "step": 7471 }, { "epoch": 0.22953337634012227, "grad_norm": 0.32916176319122314, "learning_rate": 1.9372204319369116e-05, "loss": 0.542, "step": 7472 }, { "epoch": 0.22956409547507142, "grad_norm": 0.3626655042171478, "learning_rate": 1.9372035771481822e-05, "loss": 0.572, "step": 7473 }, { "epoch": 0.22959481461002057, "grad_norm": 0.34336742758750916, "learning_rate": 1.937186720170548e-05, "loss": 0.6382, "step": 7474 }, { "epoch": 0.22962553374496975, "grad_norm": 0.34429338574409485, "learning_rate": 1.937169861004049e-05, "loss": 0.5911, "step": 7475 }, { "epoch": 0.2296562528799189, "grad_norm": 0.3594897389411926, "learning_rate": 1.937152999648724e-05, "loss": 0.5609, "step": 7476 }, { "epoch": 0.22968697201486807, "grad_norm": 0.3616867959499359, "learning_rate": 1.937136136104613e-05, "loss": 0.5621, "step": 7477 }, { "epoch": 0.22971769114981722, "grad_norm": 0.3607358932495117, "learning_rate": 1.937119270371755e-05, "loss": 0.5531, "step": 7478 }, { "epoch": 0.2297484102847664, "grad_norm": 0.3903348445892334, "learning_rate": 1.9371024024501895e-05, "loss": 0.5917, "step": 7479 }, { "epoch": 0.22977912941971554, "grad_norm": 0.3360470235347748, "learning_rate": 1.937085532339956e-05, "loss": 0.5709, "step": 7480 }, { "epoch": 0.22980984855466471, "grad_norm": 0.4198625087738037, "learning_rate": 1.9370686600410935e-05, "loss": 0.605, "step": 7481 }, { "epoch": 0.22984056768961386, "grad_norm": 0.35233667492866516, "learning_rate": 1.937051785553642e-05, "loss": 0.5872, "step": 7482 }, { "epoch": 0.229871286824563, "grad_norm": 0.350803017616272, "learning_rate": 1.9370349088776406e-05, "loss": 0.5588, "step": 7483 }, { "epoch": 0.22990200595951218, "grad_norm": 0.3376011550426483, "learning_rate": 1.9370180300131286e-05, "loss": 0.5901, "step": 7484 }, { "epoch": 0.22993272509446133, "grad_norm": 0.5358785390853882, "learning_rate": 1.9370011489601455e-05, "loss": 0.7617, "step": 7485 }, { "epoch": 0.2299634442294105, "grad_norm": 0.35246506333351135, "learning_rate": 1.936984265718731e-05, "loss": 0.5704, "step": 7486 }, { "epoch": 0.22999416336435966, "grad_norm": 0.3326294720172882, "learning_rate": 1.936967380288924e-05, "loss": 0.6105, "step": 7487 }, { "epoch": 0.23002488249930883, "grad_norm": 0.37752148509025574, "learning_rate": 1.9369504926707644e-05, "loss": 0.6859, "step": 7488 }, { "epoch": 0.23005560163425798, "grad_norm": 0.5301054120063782, "learning_rate": 1.9369336028642917e-05, "loss": 0.5168, "step": 7489 }, { "epoch": 0.23008632076920713, "grad_norm": 0.3549875020980835, "learning_rate": 1.936916710869545e-05, "loss": 0.6376, "step": 7490 }, { "epoch": 0.2301170399041563, "grad_norm": 0.33290788531303406, "learning_rate": 1.9368998166865638e-05, "loss": 0.5356, "step": 7491 }, { "epoch": 0.23014775903910545, "grad_norm": 0.3238588869571686, "learning_rate": 1.9368829203153876e-05, "loss": 0.4919, "step": 7492 }, { "epoch": 0.23017847817405462, "grad_norm": 0.36381638050079346, "learning_rate": 1.936866021756056e-05, "loss": 0.7044, "step": 7493 }, { "epoch": 0.23020919730900377, "grad_norm": 0.42452216148376465, "learning_rate": 1.9368491210086083e-05, "loss": 0.6389, "step": 7494 }, { "epoch": 0.23023991644395295, "grad_norm": 0.3306809961795807, "learning_rate": 1.936832218073084e-05, "loss": 0.5951, "step": 7495 }, { "epoch": 0.2302706355789021, "grad_norm": 0.34385159611701965, "learning_rate": 1.936815312949523e-05, "loss": 0.6244, "step": 7496 }, { "epoch": 0.23030135471385127, "grad_norm": 0.35200953483581543, "learning_rate": 1.936798405637964e-05, "loss": 0.5921, "step": 7497 }, { "epoch": 0.23033207384880042, "grad_norm": 0.33038002252578735, "learning_rate": 1.936781496138447e-05, "loss": 0.5732, "step": 7498 }, { "epoch": 0.23036279298374956, "grad_norm": 0.39050304889678955, "learning_rate": 1.9367645844510114e-05, "loss": 0.6665, "step": 7499 }, { "epoch": 0.23039351211869874, "grad_norm": 0.3414463400840759, "learning_rate": 1.936747670575697e-05, "loss": 0.5897, "step": 7500 }, { "epoch": 0.2304242312536479, "grad_norm": 0.33180636167526245, "learning_rate": 1.9367307545125427e-05, "loss": 0.5954, "step": 7501 }, { "epoch": 0.23045495038859706, "grad_norm": 0.3682723045349121, "learning_rate": 1.9367138362615882e-05, "loss": 0.6972, "step": 7502 }, { "epoch": 0.2304856695235462, "grad_norm": 0.3598076105117798, "learning_rate": 1.936696915822873e-05, "loss": 0.6324, "step": 7503 }, { "epoch": 0.23051638865849539, "grad_norm": 0.3836211860179901, "learning_rate": 1.936679993196437e-05, "loss": 0.5973, "step": 7504 }, { "epoch": 0.23054710779344453, "grad_norm": 0.33843305706977844, "learning_rate": 1.9366630683823197e-05, "loss": 0.5815, "step": 7505 }, { "epoch": 0.2305778269283937, "grad_norm": 0.33769190311431885, "learning_rate": 1.9366461413805602e-05, "loss": 0.6531, "step": 7506 }, { "epoch": 0.23060854606334286, "grad_norm": 0.31675535440444946, "learning_rate": 1.936629212191198e-05, "loss": 0.5979, "step": 7507 }, { "epoch": 0.230639265198292, "grad_norm": 0.3020946681499481, "learning_rate": 1.936612280814273e-05, "loss": 0.5299, "step": 7508 }, { "epoch": 0.23066998433324118, "grad_norm": 0.3477190434932709, "learning_rate": 1.9365953472498246e-05, "loss": 0.6018, "step": 7509 }, { "epoch": 0.23070070346819033, "grad_norm": 0.35950568318367004, "learning_rate": 1.936578411497893e-05, "loss": 0.6338, "step": 7510 }, { "epoch": 0.2307314226031395, "grad_norm": 0.3880220353603363, "learning_rate": 1.936561473558516e-05, "loss": 0.6188, "step": 7511 }, { "epoch": 0.23076214173808865, "grad_norm": 0.3593064248561859, "learning_rate": 1.936544533431735e-05, "loss": 0.5886, "step": 7512 }, { "epoch": 0.23079286087303783, "grad_norm": 0.32392406463623047, "learning_rate": 1.9365275911175883e-05, "loss": 0.5884, "step": 7513 }, { "epoch": 0.23082358000798697, "grad_norm": 0.37048155069351196, "learning_rate": 1.9365106466161166e-05, "loss": 0.6529, "step": 7514 }, { "epoch": 0.23085429914293615, "grad_norm": 0.3442489504814148, "learning_rate": 1.9364936999273584e-05, "loss": 0.6416, "step": 7515 }, { "epoch": 0.2308850182778853, "grad_norm": 0.3358599543571472, "learning_rate": 1.936476751051354e-05, "loss": 0.6786, "step": 7516 }, { "epoch": 0.23091573741283444, "grad_norm": 0.3464985191822052, "learning_rate": 1.9364597999881428e-05, "loss": 0.681, "step": 7517 }, { "epoch": 0.23094645654778362, "grad_norm": 0.3333529829978943, "learning_rate": 1.936442846737764e-05, "loss": 0.543, "step": 7518 }, { "epoch": 0.23097717568273277, "grad_norm": 0.3113676905632019, "learning_rate": 1.936425891300258e-05, "loss": 0.5853, "step": 7519 }, { "epoch": 0.23100789481768194, "grad_norm": 0.31625208258628845, "learning_rate": 1.9364089336756633e-05, "loss": 0.5414, "step": 7520 }, { "epoch": 0.2310386139526311, "grad_norm": 0.3336169421672821, "learning_rate": 1.9363919738640206e-05, "loss": 0.5925, "step": 7521 }, { "epoch": 0.23106933308758026, "grad_norm": 0.4663701057434082, "learning_rate": 1.936375011865369e-05, "loss": 0.5891, "step": 7522 }, { "epoch": 0.2311000522225294, "grad_norm": 0.42512816190719604, "learning_rate": 1.9363580476797483e-05, "loss": 0.7235, "step": 7523 }, { "epoch": 0.2311307713574786, "grad_norm": 0.5116162896156311, "learning_rate": 1.9363410813071977e-05, "loss": 0.5357, "step": 7524 }, { "epoch": 0.23116149049242773, "grad_norm": 0.36814916133880615, "learning_rate": 1.936324112747757e-05, "loss": 0.5336, "step": 7525 }, { "epoch": 0.23119220962737688, "grad_norm": 0.34030500054359436, "learning_rate": 1.9363071420014665e-05, "loss": 0.5912, "step": 7526 }, { "epoch": 0.23122292876232606, "grad_norm": 0.3159242868423462, "learning_rate": 1.936290169068365e-05, "loss": 0.5515, "step": 7527 }, { "epoch": 0.2312536478972752, "grad_norm": 0.3174722194671631, "learning_rate": 1.936273193948492e-05, "loss": 0.5301, "step": 7528 }, { "epoch": 0.23128436703222438, "grad_norm": 0.503537654876709, "learning_rate": 1.936256216641888e-05, "loss": 0.7637, "step": 7529 }, { "epoch": 0.23131508616717353, "grad_norm": 0.4433203935623169, "learning_rate": 1.9362392371485924e-05, "loss": 0.5432, "step": 7530 }, { "epoch": 0.2313458053021227, "grad_norm": 0.3555707633495331, "learning_rate": 1.9362222554686443e-05, "loss": 0.5761, "step": 7531 }, { "epoch": 0.23137652443707185, "grad_norm": 0.4291413724422455, "learning_rate": 1.9362052716020838e-05, "loss": 0.5903, "step": 7532 }, { "epoch": 0.231407243572021, "grad_norm": 0.5711989998817444, "learning_rate": 1.9361882855489507e-05, "loss": 0.5689, "step": 7533 }, { "epoch": 0.23143796270697017, "grad_norm": 0.34418943524360657, "learning_rate": 1.9361712973092843e-05, "loss": 0.6637, "step": 7534 }, { "epoch": 0.23146868184191932, "grad_norm": 0.3729333281517029, "learning_rate": 1.9361543068831244e-05, "loss": 0.5889, "step": 7535 }, { "epoch": 0.2314994009768685, "grad_norm": 0.31492385268211365, "learning_rate": 1.936137314270511e-05, "loss": 0.5236, "step": 7536 }, { "epoch": 0.23153012011181764, "grad_norm": 0.3353425860404968, "learning_rate": 1.936120319471483e-05, "loss": 0.6266, "step": 7537 }, { "epoch": 0.23156083924676682, "grad_norm": 0.3360268473625183, "learning_rate": 1.9361033224860813e-05, "loss": 0.5477, "step": 7538 }, { "epoch": 0.23159155838171597, "grad_norm": 0.3353384733200073, "learning_rate": 1.9360863233143445e-05, "loss": 0.5325, "step": 7539 }, { "epoch": 0.23162227751666514, "grad_norm": 0.34184566140174866, "learning_rate": 1.9360693219563127e-05, "loss": 0.6372, "step": 7540 }, { "epoch": 0.2316529966516143, "grad_norm": 0.3610056936740875, "learning_rate": 1.9360523184120258e-05, "loss": 0.6696, "step": 7541 }, { "epoch": 0.23168371578656344, "grad_norm": 0.32596707344055176, "learning_rate": 1.9360353126815235e-05, "loss": 0.5568, "step": 7542 }, { "epoch": 0.2317144349215126, "grad_norm": 0.3487396240234375, "learning_rate": 1.9360183047648448e-05, "loss": 0.6229, "step": 7543 }, { "epoch": 0.23174515405646176, "grad_norm": 0.33364149928092957, "learning_rate": 1.9360012946620305e-05, "loss": 0.6548, "step": 7544 }, { "epoch": 0.23177587319141094, "grad_norm": 0.38248854875564575, "learning_rate": 1.9359842823731195e-05, "loss": 0.5941, "step": 7545 }, { "epoch": 0.23180659232636008, "grad_norm": 0.3733510971069336, "learning_rate": 1.935967267898152e-05, "loss": 0.6322, "step": 7546 }, { "epoch": 0.23183731146130926, "grad_norm": 0.3500770032405853, "learning_rate": 1.9359502512371675e-05, "loss": 0.5152, "step": 7547 }, { "epoch": 0.2318680305962584, "grad_norm": 0.35562944412231445, "learning_rate": 1.935933232390206e-05, "loss": 0.5523, "step": 7548 }, { "epoch": 0.23189874973120758, "grad_norm": 0.37518608570098877, "learning_rate": 1.9359162113573066e-05, "loss": 0.656, "step": 7549 }, { "epoch": 0.23192946886615673, "grad_norm": 0.3618136942386627, "learning_rate": 1.93589918813851e-05, "loss": 0.5972, "step": 7550 }, { "epoch": 0.23196018800110588, "grad_norm": 0.35142409801483154, "learning_rate": 1.9358821627338553e-05, "loss": 0.6294, "step": 7551 }, { "epoch": 0.23199090713605505, "grad_norm": 0.34081152081489563, "learning_rate": 1.9358651351433825e-05, "loss": 0.5806, "step": 7552 }, { "epoch": 0.2320216262710042, "grad_norm": 0.364881694316864, "learning_rate": 1.9358481053671313e-05, "loss": 0.6021, "step": 7553 }, { "epoch": 0.23205234540595338, "grad_norm": 0.3448374569416046, "learning_rate": 1.9358310734051415e-05, "loss": 0.5321, "step": 7554 }, { "epoch": 0.23208306454090252, "grad_norm": 0.2964259684085846, "learning_rate": 1.935814039257453e-05, "loss": 0.5913, "step": 7555 }, { "epoch": 0.2321137836758517, "grad_norm": 0.35290467739105225, "learning_rate": 1.9357970029241052e-05, "loss": 0.6317, "step": 7556 }, { "epoch": 0.23214450281080085, "grad_norm": 0.32346874475479126, "learning_rate": 1.935779964405138e-05, "loss": 0.6305, "step": 7557 }, { "epoch": 0.23217522194575002, "grad_norm": 0.3327173590660095, "learning_rate": 1.9357629237005916e-05, "loss": 0.5734, "step": 7558 }, { "epoch": 0.23220594108069917, "grad_norm": 0.4587315618991852, "learning_rate": 1.935745880810506e-05, "loss": 0.54, "step": 7559 }, { "epoch": 0.23223666021564832, "grad_norm": 0.3273179233074188, "learning_rate": 1.9357288357349196e-05, "loss": 0.5696, "step": 7560 }, { "epoch": 0.2322673793505975, "grad_norm": 1.009809970855713, "learning_rate": 1.935711788473874e-05, "loss": 0.5902, "step": 7561 }, { "epoch": 0.23229809848554664, "grad_norm": 0.31050586700439453, "learning_rate": 1.9356947390274076e-05, "loss": 0.5864, "step": 7562 }, { "epoch": 0.23232881762049581, "grad_norm": 0.37686458230018616, "learning_rate": 1.935677687395561e-05, "loss": 0.5637, "step": 7563 }, { "epoch": 0.23235953675544496, "grad_norm": 0.33764204382896423, "learning_rate": 1.935660633578374e-05, "loss": 0.5646, "step": 7564 }, { "epoch": 0.23239025589039414, "grad_norm": 0.3393482267856598, "learning_rate": 1.935643577575886e-05, "loss": 0.5195, "step": 7565 }, { "epoch": 0.23242097502534328, "grad_norm": 0.3533332347869873, "learning_rate": 1.9356265193881373e-05, "loss": 0.5764, "step": 7566 }, { "epoch": 0.23245169416029243, "grad_norm": 0.3632010519504547, "learning_rate": 1.935609459015167e-05, "loss": 0.5968, "step": 7567 }, { "epoch": 0.2324824132952416, "grad_norm": 0.36706238985061646, "learning_rate": 1.935592396457016e-05, "loss": 0.606, "step": 7568 }, { "epoch": 0.23251313243019076, "grad_norm": 0.34559687972068787, "learning_rate": 1.9355753317137233e-05, "loss": 0.5591, "step": 7569 }, { "epoch": 0.23254385156513993, "grad_norm": 0.6416429877281189, "learning_rate": 1.9355582647853294e-05, "loss": 0.6624, "step": 7570 }, { "epoch": 0.23257457070008908, "grad_norm": 0.35556235909461975, "learning_rate": 1.9355411956718735e-05, "loss": 0.5388, "step": 7571 }, { "epoch": 0.23260528983503825, "grad_norm": 0.3527929186820984, "learning_rate": 1.9355241243733957e-05, "loss": 0.5279, "step": 7572 }, { "epoch": 0.2326360089699874, "grad_norm": 0.5075631737709045, "learning_rate": 1.935507050889936e-05, "loss": 0.5424, "step": 7573 }, { "epoch": 0.23266672810493658, "grad_norm": 0.34188297390937805, "learning_rate": 1.9354899752215348e-05, "loss": 0.5585, "step": 7574 }, { "epoch": 0.23269744723988572, "grad_norm": 0.37462538480758667, "learning_rate": 1.9354728973682312e-05, "loss": 0.62, "step": 7575 }, { "epoch": 0.23272816637483487, "grad_norm": 0.3452500104904175, "learning_rate": 1.9354558173300652e-05, "loss": 0.565, "step": 7576 }, { "epoch": 0.23275888550978405, "grad_norm": 0.391716331243515, "learning_rate": 1.9354387351070765e-05, "loss": 0.5831, "step": 7577 }, { "epoch": 0.2327896046447332, "grad_norm": 0.3293367028236389, "learning_rate": 1.935421650699306e-05, "loss": 0.4861, "step": 7578 }, { "epoch": 0.23282032377968237, "grad_norm": 0.34202179312705994, "learning_rate": 1.9354045641067922e-05, "loss": 0.6636, "step": 7579 }, { "epoch": 0.23285104291463152, "grad_norm": 0.3431051969528198, "learning_rate": 1.935387475329576e-05, "loss": 0.6538, "step": 7580 }, { "epoch": 0.2328817620495807, "grad_norm": 0.3449808955192566, "learning_rate": 1.935370384367697e-05, "loss": 0.5674, "step": 7581 }, { "epoch": 0.23291248118452984, "grad_norm": 0.41646552085876465, "learning_rate": 1.9353532912211954e-05, "loss": 0.6111, "step": 7582 }, { "epoch": 0.23294320031947902, "grad_norm": 0.46686476469039917, "learning_rate": 1.9353361958901107e-05, "loss": 0.5666, "step": 7583 }, { "epoch": 0.23297391945442816, "grad_norm": 0.3711892366409302, "learning_rate": 1.935319098374483e-05, "loss": 0.6081, "step": 7584 }, { "epoch": 0.2330046385893773, "grad_norm": 0.40066757798194885, "learning_rate": 1.935301998674352e-05, "loss": 0.6595, "step": 7585 }, { "epoch": 0.2330353577243265, "grad_norm": 0.3249231278896332, "learning_rate": 1.9352848967897583e-05, "loss": 0.5701, "step": 7586 }, { "epoch": 0.23306607685927563, "grad_norm": 0.363248348236084, "learning_rate": 1.935267792720741e-05, "loss": 0.6281, "step": 7587 }, { "epoch": 0.2330967959942248, "grad_norm": 0.3286391794681549, "learning_rate": 1.9352506864673406e-05, "loss": 0.5526, "step": 7588 }, { "epoch": 0.23312751512917396, "grad_norm": 0.3623926043510437, "learning_rate": 1.935233578029597e-05, "loss": 0.6452, "step": 7589 }, { "epoch": 0.23315823426412313, "grad_norm": 0.34294041991233826, "learning_rate": 1.9352164674075496e-05, "loss": 0.6021, "step": 7590 }, { "epoch": 0.23318895339907228, "grad_norm": 0.32404443621635437, "learning_rate": 1.9351993546012394e-05, "loss": 0.5451, "step": 7591 }, { "epoch": 0.23321967253402145, "grad_norm": 0.40511974692344666, "learning_rate": 1.9351822396107057e-05, "loss": 0.5898, "step": 7592 }, { "epoch": 0.2332503916689706, "grad_norm": 0.3760119080543518, "learning_rate": 1.935165122435988e-05, "loss": 0.6774, "step": 7593 }, { "epoch": 0.23328111080391975, "grad_norm": 0.34347936511039734, "learning_rate": 1.9351480030771275e-05, "loss": 0.6008, "step": 7594 }, { "epoch": 0.23331182993886893, "grad_norm": 0.3501785397529602, "learning_rate": 1.9351308815341634e-05, "loss": 0.6046, "step": 7595 }, { "epoch": 0.23334254907381807, "grad_norm": 0.3187989592552185, "learning_rate": 1.9351137578071356e-05, "loss": 0.5268, "step": 7596 }, { "epoch": 0.23337326820876725, "grad_norm": 0.3335762619972229, "learning_rate": 1.9350966318960846e-05, "loss": 0.5611, "step": 7597 }, { "epoch": 0.2334039873437164, "grad_norm": 0.35123714804649353, "learning_rate": 1.93507950380105e-05, "loss": 0.5676, "step": 7598 }, { "epoch": 0.23343470647866557, "grad_norm": 0.3891028165817261, "learning_rate": 1.935062373522072e-05, "loss": 0.5636, "step": 7599 }, { "epoch": 0.23346542561361472, "grad_norm": 0.33317187428474426, "learning_rate": 1.9350452410591902e-05, "loss": 0.5371, "step": 7600 }, { "epoch": 0.2334961447485639, "grad_norm": 0.3676287531852722, "learning_rate": 1.935028106412445e-05, "loss": 0.5781, "step": 7601 }, { "epoch": 0.23352686388351304, "grad_norm": 0.35073280334472656, "learning_rate": 1.9350109695818766e-05, "loss": 0.6133, "step": 7602 }, { "epoch": 0.2335575830184622, "grad_norm": 0.33085063099861145, "learning_rate": 1.9349938305675247e-05, "loss": 0.6263, "step": 7603 }, { "epoch": 0.23358830215341136, "grad_norm": 0.3627588450908661, "learning_rate": 1.9349766893694294e-05, "loss": 0.6011, "step": 7604 }, { "epoch": 0.2336190212883605, "grad_norm": 0.3363712430000305, "learning_rate": 1.9349595459876306e-05, "loss": 0.4844, "step": 7605 }, { "epoch": 0.2336497404233097, "grad_norm": 0.35848844051361084, "learning_rate": 1.9349424004221687e-05, "loss": 0.6954, "step": 7606 }, { "epoch": 0.23368045955825884, "grad_norm": 0.32867228984832764, "learning_rate": 1.9349252526730833e-05, "loss": 0.5518, "step": 7607 }, { "epoch": 0.233711178693208, "grad_norm": 0.31755903363227844, "learning_rate": 1.934908102740415e-05, "loss": 0.5543, "step": 7608 }, { "epoch": 0.23374189782815716, "grad_norm": 0.3831489384174347, "learning_rate": 1.9348909506242033e-05, "loss": 0.5645, "step": 7609 }, { "epoch": 0.2337726169631063, "grad_norm": 0.4669325649738312, "learning_rate": 1.9348737963244886e-05, "loss": 0.5257, "step": 7610 }, { "epoch": 0.23380333609805548, "grad_norm": 0.3295033872127533, "learning_rate": 1.9348566398413107e-05, "loss": 0.6141, "step": 7611 }, { "epoch": 0.23383405523300463, "grad_norm": 0.37491798400878906, "learning_rate": 1.93483948117471e-05, "loss": 0.5444, "step": 7612 }, { "epoch": 0.2338647743679538, "grad_norm": 0.3535802662372589, "learning_rate": 1.9348223203247262e-05, "loss": 0.5679, "step": 7613 }, { "epoch": 0.23389549350290295, "grad_norm": 0.3212297856807709, "learning_rate": 1.9348051572913998e-05, "loss": 0.4943, "step": 7614 }, { "epoch": 0.23392621263785213, "grad_norm": 0.32565924525260925, "learning_rate": 1.9347879920747705e-05, "loss": 0.523, "step": 7615 }, { "epoch": 0.23395693177280127, "grad_norm": 0.4027366638183594, "learning_rate": 1.9347708246748788e-05, "loss": 0.5688, "step": 7616 }, { "epoch": 0.23398765090775045, "grad_norm": 0.3453314006328583, "learning_rate": 1.934753655091764e-05, "loss": 0.6907, "step": 7617 }, { "epoch": 0.2340183700426996, "grad_norm": 0.31549596786499023, "learning_rate": 1.9347364833254674e-05, "loss": 0.5399, "step": 7618 }, { "epoch": 0.23404908917764874, "grad_norm": 0.3753878176212311, "learning_rate": 1.9347193093760283e-05, "loss": 0.6061, "step": 7619 }, { "epoch": 0.23407980831259792, "grad_norm": 0.42831939458847046, "learning_rate": 1.934702133243487e-05, "loss": 0.5215, "step": 7620 }, { "epoch": 0.23411052744754707, "grad_norm": 0.3093508780002594, "learning_rate": 1.9346849549278833e-05, "loss": 0.5845, "step": 7621 }, { "epoch": 0.23414124658249624, "grad_norm": 0.37070682644844055, "learning_rate": 1.934667774429258e-05, "loss": 0.6169, "step": 7622 }, { "epoch": 0.2341719657174454, "grad_norm": 0.35813337564468384, "learning_rate": 1.9346505917476502e-05, "loss": 0.6357, "step": 7623 }, { "epoch": 0.23420268485239457, "grad_norm": 0.3317505121231079, "learning_rate": 1.9346334068831013e-05, "loss": 0.5885, "step": 7624 }, { "epoch": 0.2342334039873437, "grad_norm": 0.37164127826690674, "learning_rate": 1.9346162198356502e-05, "loss": 0.6082, "step": 7625 }, { "epoch": 0.2342641231222929, "grad_norm": 0.4579634964466095, "learning_rate": 1.9345990306053384e-05, "loss": 0.6358, "step": 7626 }, { "epoch": 0.23429484225724204, "grad_norm": 0.323894202709198, "learning_rate": 1.934581839192205e-05, "loss": 0.5901, "step": 7627 }, { "epoch": 0.23432556139219118, "grad_norm": 0.38115257024765015, "learning_rate": 1.9345646455962903e-05, "loss": 0.6052, "step": 7628 }, { "epoch": 0.23435628052714036, "grad_norm": 0.3939846158027649, "learning_rate": 1.9345474498176348e-05, "loss": 0.6334, "step": 7629 }, { "epoch": 0.2343869996620895, "grad_norm": 0.3436211943626404, "learning_rate": 1.9345302518562784e-05, "loss": 0.6211, "step": 7630 }, { "epoch": 0.23441771879703868, "grad_norm": 0.8204747438430786, "learning_rate": 1.934513051712261e-05, "loss": 0.5984, "step": 7631 }, { "epoch": 0.23444843793198783, "grad_norm": 0.3392956554889679, "learning_rate": 1.9344958493856235e-05, "loss": 0.6543, "step": 7632 }, { "epoch": 0.234479157066937, "grad_norm": 0.3686206638813019, "learning_rate": 1.9344786448764056e-05, "loss": 0.6674, "step": 7633 }, { "epoch": 0.23450987620188615, "grad_norm": 0.3295072019100189, "learning_rate": 1.9344614381846474e-05, "loss": 0.6194, "step": 7634 }, { "epoch": 0.23454059533683533, "grad_norm": 0.3368518352508545, "learning_rate": 1.9344442293103892e-05, "loss": 0.6064, "step": 7635 }, { "epoch": 0.23457131447178448, "grad_norm": 0.3816315531730652, "learning_rate": 1.9344270182536717e-05, "loss": 0.5524, "step": 7636 }, { "epoch": 0.23460203360673362, "grad_norm": 0.3528873920440674, "learning_rate": 1.934409805014534e-05, "loss": 0.5909, "step": 7637 }, { "epoch": 0.2346327527416828, "grad_norm": 0.3190333843231201, "learning_rate": 1.9343925895930175e-05, "loss": 0.6406, "step": 7638 }, { "epoch": 0.23466347187663195, "grad_norm": 0.4158407151699066, "learning_rate": 1.9343753719891615e-05, "loss": 0.5115, "step": 7639 }, { "epoch": 0.23469419101158112, "grad_norm": 0.8014741539955139, "learning_rate": 1.9343581522030068e-05, "loss": 0.5739, "step": 7640 }, { "epoch": 0.23472491014653027, "grad_norm": 0.39527711272239685, "learning_rate": 1.9343409302345934e-05, "loss": 0.5955, "step": 7641 }, { "epoch": 0.23475562928147944, "grad_norm": 0.37352848052978516, "learning_rate": 1.934323706083961e-05, "loss": 0.6362, "step": 7642 }, { "epoch": 0.2347863484164286, "grad_norm": 0.38728877902030945, "learning_rate": 1.9343064797511508e-05, "loss": 0.6266, "step": 7643 }, { "epoch": 0.23481706755137774, "grad_norm": 0.34163758158683777, "learning_rate": 1.9342892512362026e-05, "loss": 0.4814, "step": 7644 }, { "epoch": 0.23484778668632691, "grad_norm": 0.3481205701828003, "learning_rate": 1.9342720205391566e-05, "loss": 0.6581, "step": 7645 }, { "epoch": 0.23487850582127606, "grad_norm": 0.4333837926387787, "learning_rate": 1.9342547876600528e-05, "loss": 0.5261, "step": 7646 }, { "epoch": 0.23490922495622524, "grad_norm": 0.356250524520874, "learning_rate": 1.9342375525989318e-05, "loss": 0.5447, "step": 7647 }, { "epoch": 0.23493994409117439, "grad_norm": 0.3050040602684021, "learning_rate": 1.934220315355834e-05, "loss": 0.5087, "step": 7648 }, { "epoch": 0.23497066322612356, "grad_norm": 0.35950228571891785, "learning_rate": 1.934203075930799e-05, "loss": 0.5332, "step": 7649 }, { "epoch": 0.2350013823610727, "grad_norm": 0.3559632897377014, "learning_rate": 1.934185834323868e-05, "loss": 0.6611, "step": 7650 }, { "epoch": 0.23503210149602188, "grad_norm": 0.3994937539100647, "learning_rate": 1.93416859053508e-05, "loss": 0.6053, "step": 7651 }, { "epoch": 0.23506282063097103, "grad_norm": 0.3113624155521393, "learning_rate": 1.9341513445644765e-05, "loss": 0.5671, "step": 7652 }, { "epoch": 0.23509353976592018, "grad_norm": 0.31709522008895874, "learning_rate": 1.934134096412097e-05, "loss": 0.5896, "step": 7653 }, { "epoch": 0.23512425890086935, "grad_norm": 0.3245203495025635, "learning_rate": 1.9341168460779824e-05, "loss": 0.4939, "step": 7654 }, { "epoch": 0.2351549780358185, "grad_norm": 0.3531922399997711, "learning_rate": 1.9340995935621727e-05, "loss": 0.6417, "step": 7655 }, { "epoch": 0.23518569717076768, "grad_norm": 0.3636716604232788, "learning_rate": 1.9340823388647077e-05, "loss": 0.5688, "step": 7656 }, { "epoch": 0.23521641630571682, "grad_norm": 0.36881935596466064, "learning_rate": 1.9340650819856287e-05, "loss": 0.4997, "step": 7657 }, { "epoch": 0.235247135440666, "grad_norm": 0.36146917939186096, "learning_rate": 1.934047822924975e-05, "loss": 0.5638, "step": 7658 }, { "epoch": 0.23527785457561515, "grad_norm": 0.3437976539134979, "learning_rate": 1.934030561682788e-05, "loss": 0.5981, "step": 7659 }, { "epoch": 0.23530857371056432, "grad_norm": 0.32935625314712524, "learning_rate": 1.9340132982591066e-05, "loss": 0.6301, "step": 7660 }, { "epoch": 0.23533929284551347, "grad_norm": 0.32763633131980896, "learning_rate": 1.9339960326539727e-05, "loss": 0.5616, "step": 7661 }, { "epoch": 0.23537001198046262, "grad_norm": 0.351895272731781, "learning_rate": 1.9339787648674252e-05, "loss": 0.5391, "step": 7662 }, { "epoch": 0.2354007311154118, "grad_norm": 0.36082878708839417, "learning_rate": 1.9339614948995055e-05, "loss": 0.587, "step": 7663 }, { "epoch": 0.23543145025036094, "grad_norm": 0.3583157956600189, "learning_rate": 1.933944222750253e-05, "loss": 0.5545, "step": 7664 }, { "epoch": 0.23546216938531012, "grad_norm": 0.38586872816085815, "learning_rate": 1.9339269484197092e-05, "loss": 0.6078, "step": 7665 }, { "epoch": 0.23549288852025926, "grad_norm": 0.3221229910850525, "learning_rate": 1.9339096719079132e-05, "loss": 0.515, "step": 7666 }, { "epoch": 0.23552360765520844, "grad_norm": 0.4187864363193512, "learning_rate": 1.9338923932149064e-05, "loss": 0.6033, "step": 7667 }, { "epoch": 0.2355543267901576, "grad_norm": 0.5127690434455872, "learning_rate": 1.9338751123407284e-05, "loss": 0.6238, "step": 7668 }, { "epoch": 0.23558504592510676, "grad_norm": 0.49704238772392273, "learning_rate": 1.93385782928542e-05, "loss": 0.4507, "step": 7669 }, { "epoch": 0.2356157650600559, "grad_norm": 0.4091121554374695, "learning_rate": 1.9338405440490212e-05, "loss": 0.6229, "step": 7670 }, { "epoch": 0.23564648419500506, "grad_norm": 0.36017656326293945, "learning_rate": 1.933823256631573e-05, "loss": 0.5882, "step": 7671 }, { "epoch": 0.23567720332995423, "grad_norm": 0.3415447175502777, "learning_rate": 1.933805967033115e-05, "loss": 0.5652, "step": 7672 }, { "epoch": 0.23570792246490338, "grad_norm": 0.34846335649490356, "learning_rate": 1.933788675253688e-05, "loss": 0.5619, "step": 7673 }, { "epoch": 0.23573864159985256, "grad_norm": 0.31977617740631104, "learning_rate": 1.9337713812933324e-05, "loss": 0.5518, "step": 7674 }, { "epoch": 0.2357693607348017, "grad_norm": 0.3410705029964447, "learning_rate": 1.9337540851520885e-05, "loss": 0.5571, "step": 7675 }, { "epoch": 0.23580007986975088, "grad_norm": 0.3209608197212219, "learning_rate": 1.9337367868299968e-05, "loss": 0.5788, "step": 7676 }, { "epoch": 0.23583079900470003, "grad_norm": 0.33878567814826965, "learning_rate": 1.9337194863270974e-05, "loss": 0.6127, "step": 7677 }, { "epoch": 0.2358615181396492, "grad_norm": 0.30212122201919556, "learning_rate": 1.933702183643431e-05, "loss": 0.5545, "step": 7678 }, { "epoch": 0.23589223727459835, "grad_norm": 0.3538045585155487, "learning_rate": 1.9336848787790384e-05, "loss": 0.5305, "step": 7679 }, { "epoch": 0.2359229564095475, "grad_norm": 0.39698582887649536, "learning_rate": 1.933667571733959e-05, "loss": 0.5815, "step": 7680 }, { "epoch": 0.23595367554449667, "grad_norm": 0.31713250279426575, "learning_rate": 1.9336502625082342e-05, "loss": 0.5977, "step": 7681 }, { "epoch": 0.23598439467944582, "grad_norm": 0.36416423320770264, "learning_rate": 1.9336329511019035e-05, "loss": 0.5352, "step": 7682 }, { "epoch": 0.236015113814395, "grad_norm": 0.3482380509376526, "learning_rate": 1.9336156375150082e-05, "loss": 0.6069, "step": 7683 }, { "epoch": 0.23604583294934414, "grad_norm": 0.3824409246444702, "learning_rate": 1.933598321747588e-05, "loss": 0.5911, "step": 7684 }, { "epoch": 0.23607655208429332, "grad_norm": 0.3841603994369507, "learning_rate": 1.9335810037996844e-05, "loss": 0.6086, "step": 7685 }, { "epoch": 0.23610727121924246, "grad_norm": 0.32101744413375854, "learning_rate": 1.933563683671337e-05, "loss": 0.5019, "step": 7686 }, { "epoch": 0.2361379903541916, "grad_norm": 0.3927426040172577, "learning_rate": 1.933546361362586e-05, "loss": 0.6444, "step": 7687 }, { "epoch": 0.2361687094891408, "grad_norm": 0.38245734572410583, "learning_rate": 1.9335290368734727e-05, "loss": 0.5753, "step": 7688 }, { "epoch": 0.23619942862408994, "grad_norm": 0.32920408248901367, "learning_rate": 1.9335117102040366e-05, "loss": 0.6061, "step": 7689 }, { "epoch": 0.2362301477590391, "grad_norm": 0.44558021426200867, "learning_rate": 1.933494381354319e-05, "loss": 0.5861, "step": 7690 }, { "epoch": 0.23626086689398826, "grad_norm": 0.4034399688243866, "learning_rate": 1.93347705032436e-05, "loss": 0.6005, "step": 7691 }, { "epoch": 0.23629158602893743, "grad_norm": 0.33713221549987793, "learning_rate": 1.9334597171142002e-05, "loss": 0.5933, "step": 7692 }, { "epoch": 0.23632230516388658, "grad_norm": 0.4244551360607147, "learning_rate": 1.9334423817238803e-05, "loss": 0.6989, "step": 7693 }, { "epoch": 0.23635302429883576, "grad_norm": 0.3447105586528778, "learning_rate": 1.9334250441534405e-05, "loss": 0.6015, "step": 7694 }, { "epoch": 0.2363837434337849, "grad_norm": 0.34805458784103394, "learning_rate": 1.933407704402921e-05, "loss": 0.5785, "step": 7695 }, { "epoch": 0.23641446256873405, "grad_norm": 0.32866689562797546, "learning_rate": 1.9333903624723627e-05, "loss": 0.5229, "step": 7696 }, { "epoch": 0.23644518170368323, "grad_norm": 0.32703688740730286, "learning_rate": 1.933373018361806e-05, "loss": 0.6624, "step": 7697 }, { "epoch": 0.23647590083863237, "grad_norm": 0.4280700087547302, "learning_rate": 1.9333556720712915e-05, "loss": 0.5515, "step": 7698 }, { "epoch": 0.23650661997358155, "grad_norm": 0.3575366139411926, "learning_rate": 1.9333383236008597e-05, "loss": 0.6291, "step": 7699 }, { "epoch": 0.2365373391085307, "grad_norm": 0.33593156933784485, "learning_rate": 1.933320972950551e-05, "loss": 0.5783, "step": 7700 }, { "epoch": 0.23656805824347987, "grad_norm": 0.31856995820999146, "learning_rate": 1.933303620120406e-05, "loss": 0.5816, "step": 7701 }, { "epoch": 0.23659877737842902, "grad_norm": 0.4161600172519684, "learning_rate": 1.933286265110465e-05, "loss": 0.7215, "step": 7702 }, { "epoch": 0.2366294965133782, "grad_norm": 0.3353427052497864, "learning_rate": 1.933268907920769e-05, "loss": 0.6098, "step": 7703 }, { "epoch": 0.23666021564832734, "grad_norm": 0.39298662543296814, "learning_rate": 1.9332515485513582e-05, "loss": 0.5791, "step": 7704 }, { "epoch": 0.2366909347832765, "grad_norm": 0.3554583787918091, "learning_rate": 1.9332341870022736e-05, "loss": 0.5708, "step": 7705 }, { "epoch": 0.23672165391822567, "grad_norm": 0.33231639862060547, "learning_rate": 1.933216823273555e-05, "loss": 0.6123, "step": 7706 }, { "epoch": 0.2367523730531748, "grad_norm": 0.32306018471717834, "learning_rate": 1.9331994573652434e-05, "loss": 0.5949, "step": 7707 }, { "epoch": 0.236783092188124, "grad_norm": 0.3217405378818512, "learning_rate": 1.933182089277379e-05, "loss": 0.5405, "step": 7708 }, { "epoch": 0.23681381132307314, "grad_norm": 0.36560437083244324, "learning_rate": 1.933164719010003e-05, "loss": 0.6765, "step": 7709 }, { "epoch": 0.2368445304580223, "grad_norm": 0.3603482246398926, "learning_rate": 1.9331473465631557e-05, "loss": 0.5717, "step": 7710 }, { "epoch": 0.23687524959297146, "grad_norm": 0.6808732151985168, "learning_rate": 1.9331299719368775e-05, "loss": 0.6658, "step": 7711 }, { "epoch": 0.23690596872792063, "grad_norm": 0.3710639774799347, "learning_rate": 1.933112595131209e-05, "loss": 0.6289, "step": 7712 }, { "epoch": 0.23693668786286978, "grad_norm": 0.30413395166397095, "learning_rate": 1.9330952161461905e-05, "loss": 0.5363, "step": 7713 }, { "epoch": 0.23696740699781893, "grad_norm": 0.3688679337501526, "learning_rate": 1.933077834981864e-05, "loss": 0.6192, "step": 7714 }, { "epoch": 0.2369981261327681, "grad_norm": 0.35575923323631287, "learning_rate": 1.933060451638268e-05, "loss": 0.5849, "step": 7715 }, { "epoch": 0.23702884526771725, "grad_norm": 0.33094725012779236, "learning_rate": 1.9330430661154446e-05, "loss": 0.556, "step": 7716 }, { "epoch": 0.23705956440266643, "grad_norm": 0.31887176632881165, "learning_rate": 1.933025678413434e-05, "loss": 0.5694, "step": 7717 }, { "epoch": 0.23709028353761558, "grad_norm": 0.3657797873020172, "learning_rate": 1.9330082885322764e-05, "loss": 0.6719, "step": 7718 }, { "epoch": 0.23712100267256475, "grad_norm": 0.3324965536594391, "learning_rate": 1.9329908964720133e-05, "loss": 0.6388, "step": 7719 }, { "epoch": 0.2371517218075139, "grad_norm": 0.33415529131889343, "learning_rate": 1.9329735022326845e-05, "loss": 0.581, "step": 7720 }, { "epoch": 0.23718244094246305, "grad_norm": 0.3483179807662964, "learning_rate": 1.932956105814331e-05, "loss": 0.5491, "step": 7721 }, { "epoch": 0.23721316007741222, "grad_norm": 0.3640226423740387, "learning_rate": 1.9329387072169937e-05, "loss": 0.5456, "step": 7722 }, { "epoch": 0.23724387921236137, "grad_norm": 0.36551204323768616, "learning_rate": 1.9329213064407125e-05, "loss": 0.624, "step": 7723 }, { "epoch": 0.23727459834731054, "grad_norm": 0.3591916263103485, "learning_rate": 1.9329039034855285e-05, "loss": 0.5748, "step": 7724 }, { "epoch": 0.2373053174822597, "grad_norm": 0.33495157957077026, "learning_rate": 1.9328864983514823e-05, "loss": 0.5929, "step": 7725 }, { "epoch": 0.23733603661720887, "grad_norm": 0.3485173285007477, "learning_rate": 1.9328690910386142e-05, "loss": 0.6303, "step": 7726 }, { "epoch": 0.23736675575215802, "grad_norm": 0.3341186046600342, "learning_rate": 1.9328516815469657e-05, "loss": 0.5982, "step": 7727 }, { "epoch": 0.2373974748871072, "grad_norm": 0.33924180269241333, "learning_rate": 1.9328342698765767e-05, "loss": 0.5341, "step": 7728 }, { "epoch": 0.23742819402205634, "grad_norm": 0.3323246240615845, "learning_rate": 1.932816856027488e-05, "loss": 0.5837, "step": 7729 }, { "epoch": 0.23745891315700549, "grad_norm": 0.36458349227905273, "learning_rate": 1.9327994399997407e-05, "loss": 0.6148, "step": 7730 }, { "epoch": 0.23748963229195466, "grad_norm": 0.34718066453933716, "learning_rate": 1.9327820217933747e-05, "loss": 0.6721, "step": 7731 }, { "epoch": 0.2375203514269038, "grad_norm": 0.3540247976779938, "learning_rate": 1.9327646014084315e-05, "loss": 0.6331, "step": 7732 }, { "epoch": 0.23755107056185298, "grad_norm": 0.3684503734111786, "learning_rate": 1.9327471788449514e-05, "loss": 0.5919, "step": 7733 }, { "epoch": 0.23758178969680213, "grad_norm": 0.46011537313461304, "learning_rate": 1.932729754102975e-05, "loss": 0.6184, "step": 7734 }, { "epoch": 0.2376125088317513, "grad_norm": 0.33986809849739075, "learning_rate": 1.9327123271825436e-05, "loss": 0.5796, "step": 7735 }, { "epoch": 0.23764322796670045, "grad_norm": 0.3374120891094208, "learning_rate": 1.9326948980836967e-05, "loss": 0.5851, "step": 7736 }, { "epoch": 0.23767394710164963, "grad_norm": 3.4150466918945312, "learning_rate": 1.9326774668064763e-05, "loss": 0.6212, "step": 7737 }, { "epoch": 0.23770466623659878, "grad_norm": 0.33239367604255676, "learning_rate": 1.932660033350922e-05, "loss": 0.5369, "step": 7738 }, { "epoch": 0.23773538537154792, "grad_norm": 0.3425189256668091, "learning_rate": 1.9326425977170754e-05, "loss": 0.5843, "step": 7739 }, { "epoch": 0.2377661045064971, "grad_norm": 0.3650931119918823, "learning_rate": 1.932625159904977e-05, "loss": 0.643, "step": 7740 }, { "epoch": 0.23779682364144625, "grad_norm": 0.33989810943603516, "learning_rate": 1.932607719914667e-05, "loss": 0.6032, "step": 7741 }, { "epoch": 0.23782754277639542, "grad_norm": 0.33067411184310913, "learning_rate": 1.932590277746187e-05, "loss": 0.5687, "step": 7742 }, { "epoch": 0.23785826191134457, "grad_norm": 0.40546363592147827, "learning_rate": 1.932572833399577e-05, "loss": 0.589, "step": 7743 }, { "epoch": 0.23788898104629375, "grad_norm": 0.36863774061203003, "learning_rate": 1.932555386874878e-05, "loss": 0.6062, "step": 7744 }, { "epoch": 0.2379197001812429, "grad_norm": 0.31500327587127686, "learning_rate": 1.932537938172131e-05, "loss": 0.5866, "step": 7745 }, { "epoch": 0.23795041931619207, "grad_norm": 0.38562583923339844, "learning_rate": 1.932520487291376e-05, "loss": 0.6745, "step": 7746 }, { "epoch": 0.23798113845114122, "grad_norm": 0.681908905506134, "learning_rate": 1.9325030342326547e-05, "loss": 0.5756, "step": 7747 }, { "epoch": 0.23801185758609036, "grad_norm": 0.35309386253356934, "learning_rate": 1.9324855789960076e-05, "loss": 0.5749, "step": 7748 }, { "epoch": 0.23804257672103954, "grad_norm": 0.3863277733325958, "learning_rate": 1.9324681215814748e-05, "loss": 0.6399, "step": 7749 }, { "epoch": 0.2380732958559887, "grad_norm": 0.30405014753341675, "learning_rate": 1.932450661989098e-05, "loss": 0.5104, "step": 7750 }, { "epoch": 0.23810401499093786, "grad_norm": 0.387319415807724, "learning_rate": 1.9324332002189175e-05, "loss": 0.6099, "step": 7751 }, { "epoch": 0.238134734125887, "grad_norm": 0.3496610224246979, "learning_rate": 1.932415736270974e-05, "loss": 0.5512, "step": 7752 }, { "epoch": 0.23816545326083619, "grad_norm": 0.3087685704231262, "learning_rate": 1.9323982701453087e-05, "loss": 0.665, "step": 7753 }, { "epoch": 0.23819617239578533, "grad_norm": 0.34265461564064026, "learning_rate": 1.9323808018419617e-05, "loss": 0.5932, "step": 7754 }, { "epoch": 0.2382268915307345, "grad_norm": 0.3184657096862793, "learning_rate": 1.9323633313609743e-05, "loss": 0.533, "step": 7755 }, { "epoch": 0.23825761066568366, "grad_norm": 0.4897792637348175, "learning_rate": 1.9323458587023872e-05, "loss": 0.5107, "step": 7756 }, { "epoch": 0.2382883298006328, "grad_norm": 0.35523632168769836, "learning_rate": 1.9323283838662417e-05, "loss": 0.579, "step": 7757 }, { "epoch": 0.23831904893558198, "grad_norm": 0.330981582403183, "learning_rate": 1.9323109068525778e-05, "loss": 0.5497, "step": 7758 }, { "epoch": 0.23834976807053113, "grad_norm": 0.33457767963409424, "learning_rate": 1.9322934276614368e-05, "loss": 0.5733, "step": 7759 }, { "epoch": 0.2383804872054803, "grad_norm": 0.3411354422569275, "learning_rate": 1.932275946292859e-05, "loss": 0.5051, "step": 7760 }, { "epoch": 0.23841120634042945, "grad_norm": 0.3682728111743927, "learning_rate": 1.9322584627468858e-05, "loss": 0.6866, "step": 7761 }, { "epoch": 0.23844192547537862, "grad_norm": 0.3637903928756714, "learning_rate": 1.9322409770235578e-05, "loss": 0.5805, "step": 7762 }, { "epoch": 0.23847264461032777, "grad_norm": 0.39284321665763855, "learning_rate": 1.9322234891229164e-05, "loss": 0.6027, "step": 7763 }, { "epoch": 0.23850336374527692, "grad_norm": 0.35097989439964294, "learning_rate": 1.9322059990450015e-05, "loss": 0.5623, "step": 7764 }, { "epoch": 0.2385340828802261, "grad_norm": 0.32582956552505493, "learning_rate": 1.932188506789854e-05, "loss": 0.543, "step": 7765 }, { "epoch": 0.23856480201517524, "grad_norm": 0.34813395142555237, "learning_rate": 1.932171012357516e-05, "loss": 0.6122, "step": 7766 }, { "epoch": 0.23859552115012442, "grad_norm": 0.3784501254558563, "learning_rate": 1.932153515748027e-05, "loss": 0.5643, "step": 7767 }, { "epoch": 0.23862624028507357, "grad_norm": 0.38413646817207336, "learning_rate": 1.9321360169614287e-05, "loss": 0.5535, "step": 7768 }, { "epoch": 0.23865695942002274, "grad_norm": 0.406183123588562, "learning_rate": 1.932118515997761e-05, "loss": 0.638, "step": 7769 }, { "epoch": 0.2386876785549719, "grad_norm": 0.3350162208080292, "learning_rate": 1.932101012857066e-05, "loss": 0.5746, "step": 7770 }, { "epoch": 0.23871839768992106, "grad_norm": 0.33829784393310547, "learning_rate": 1.932083507539384e-05, "loss": 0.5523, "step": 7771 }, { "epoch": 0.2387491168248702, "grad_norm": 0.31705373525619507, "learning_rate": 1.9320660000447555e-05, "loss": 0.6205, "step": 7772 }, { "epoch": 0.23877983595981936, "grad_norm": 0.3191987872123718, "learning_rate": 1.9320484903732217e-05, "loss": 0.5449, "step": 7773 }, { "epoch": 0.23881055509476853, "grad_norm": 0.36392101645469666, "learning_rate": 1.932030978524824e-05, "loss": 0.7129, "step": 7774 }, { "epoch": 0.23884127422971768, "grad_norm": 0.3997855484485626, "learning_rate": 1.9320134644996024e-05, "loss": 0.5482, "step": 7775 }, { "epoch": 0.23887199336466686, "grad_norm": 0.3979213535785675, "learning_rate": 1.9319959482975987e-05, "loss": 0.59, "step": 7776 }, { "epoch": 0.238902712499616, "grad_norm": 0.3063737452030182, "learning_rate": 1.931978429918853e-05, "loss": 0.6125, "step": 7777 }, { "epoch": 0.23893343163456518, "grad_norm": 0.33630600571632385, "learning_rate": 1.931960909363407e-05, "loss": 0.5853, "step": 7778 }, { "epoch": 0.23896415076951433, "grad_norm": 0.3408201038837433, "learning_rate": 1.931943386631301e-05, "loss": 0.5599, "step": 7779 }, { "epoch": 0.2389948699044635, "grad_norm": 0.3145720660686493, "learning_rate": 1.931925861722576e-05, "loss": 0.562, "step": 7780 }, { "epoch": 0.23902558903941265, "grad_norm": 0.32870301604270935, "learning_rate": 1.931908334637273e-05, "loss": 0.5254, "step": 7781 }, { "epoch": 0.2390563081743618, "grad_norm": 0.3488176763057709, "learning_rate": 1.931890805375433e-05, "loss": 0.5877, "step": 7782 }, { "epoch": 0.23908702730931097, "grad_norm": 0.3338836133480072, "learning_rate": 1.9318732739370976e-05, "loss": 0.522, "step": 7783 }, { "epoch": 0.23911774644426012, "grad_norm": 0.3120950162410736, "learning_rate": 1.9318557403223062e-05, "loss": 0.5908, "step": 7784 }, { "epoch": 0.2391484655792093, "grad_norm": 0.3061603307723999, "learning_rate": 1.9318382045311013e-05, "loss": 0.4664, "step": 7785 }, { "epoch": 0.23917918471415844, "grad_norm": 0.3127819299697876, "learning_rate": 1.9318206665635228e-05, "loss": 0.6252, "step": 7786 }, { "epoch": 0.23920990384910762, "grad_norm": 0.33447209000587463, "learning_rate": 1.931803126419612e-05, "loss": 0.5402, "step": 7787 }, { "epoch": 0.23924062298405677, "grad_norm": 0.3591729700565338, "learning_rate": 1.9317855840994104e-05, "loss": 0.6614, "step": 7788 }, { "epoch": 0.23927134211900594, "grad_norm": 0.3287372589111328, "learning_rate": 1.9317680396029582e-05, "loss": 0.5756, "step": 7789 }, { "epoch": 0.2393020612539551, "grad_norm": 0.32909464836120605, "learning_rate": 1.9317504929302967e-05, "loss": 0.6342, "step": 7790 }, { "epoch": 0.23933278038890424, "grad_norm": 0.6835966110229492, "learning_rate": 1.9317329440814667e-05, "loss": 0.5193, "step": 7791 }, { "epoch": 0.2393634995238534, "grad_norm": 0.36104729771614075, "learning_rate": 1.9317153930565094e-05, "loss": 0.5853, "step": 7792 }, { "epoch": 0.23939421865880256, "grad_norm": 0.3740997314453125, "learning_rate": 1.931697839855466e-05, "loss": 0.6074, "step": 7793 }, { "epoch": 0.23942493779375174, "grad_norm": 0.3470056653022766, "learning_rate": 1.931680284478377e-05, "loss": 0.5723, "step": 7794 }, { "epoch": 0.23945565692870088, "grad_norm": 0.35116130113601685, "learning_rate": 1.9316627269252836e-05, "loss": 0.6979, "step": 7795 }, { "epoch": 0.23948637606365006, "grad_norm": 0.37324821949005127, "learning_rate": 1.931645167196227e-05, "loss": 0.5444, "step": 7796 }, { "epoch": 0.2395170951985992, "grad_norm": 0.34813225269317627, "learning_rate": 1.9316276052912476e-05, "loss": 0.6328, "step": 7797 }, { "epoch": 0.23954781433354835, "grad_norm": 0.37935948371887207, "learning_rate": 1.9316100412103873e-05, "loss": 0.5927, "step": 7798 }, { "epoch": 0.23957853346849753, "grad_norm": 0.3527315855026245, "learning_rate": 1.9315924749536865e-05, "loss": 0.5933, "step": 7799 }, { "epoch": 0.23960925260344668, "grad_norm": 0.3751799762248993, "learning_rate": 1.9315749065211867e-05, "loss": 0.6186, "step": 7800 }, { "epoch": 0.23963997173839585, "grad_norm": 0.32956886291503906, "learning_rate": 1.9315573359129282e-05, "loss": 0.5644, "step": 7801 }, { "epoch": 0.239670690873345, "grad_norm": 0.35524246096611023, "learning_rate": 1.931539763128953e-05, "loss": 0.6097, "step": 7802 }, { "epoch": 0.23970141000829417, "grad_norm": 0.35321658849716187, "learning_rate": 1.9315221881693012e-05, "loss": 0.5968, "step": 7803 }, { "epoch": 0.23973212914324332, "grad_norm": 0.40220344066619873, "learning_rate": 1.9315046110340143e-05, "loss": 0.6235, "step": 7804 }, { "epoch": 0.2397628482781925, "grad_norm": 0.32326823472976685, "learning_rate": 1.9314870317231333e-05, "loss": 0.6687, "step": 7805 }, { "epoch": 0.23979356741314165, "grad_norm": 0.3570176362991333, "learning_rate": 1.9314694502366994e-05, "loss": 0.5888, "step": 7806 }, { "epoch": 0.2398242865480908, "grad_norm": 0.33186575770378113, "learning_rate": 1.9314518665747536e-05, "loss": 0.595, "step": 7807 }, { "epoch": 0.23985500568303997, "grad_norm": 0.3309374451637268, "learning_rate": 1.931434280737337e-05, "loss": 0.6048, "step": 7808 }, { "epoch": 0.23988572481798912, "grad_norm": 0.384705513715744, "learning_rate": 1.9314166927244903e-05, "loss": 0.6753, "step": 7809 }, { "epoch": 0.2399164439529383, "grad_norm": 0.33834895491600037, "learning_rate": 1.9313991025362552e-05, "loss": 0.617, "step": 7810 }, { "epoch": 0.23994716308788744, "grad_norm": 0.36451688408851624, "learning_rate": 1.931381510172672e-05, "loss": 0.563, "step": 7811 }, { "epoch": 0.2399778822228366, "grad_norm": 0.34867507219314575, "learning_rate": 1.9313639156337827e-05, "loss": 0.6688, "step": 7812 }, { "epoch": 0.24000860135778576, "grad_norm": 0.34765496850013733, "learning_rate": 1.9313463189196277e-05, "loss": 0.5385, "step": 7813 }, { "epoch": 0.24003932049273494, "grad_norm": 0.3552313446998596, "learning_rate": 1.9313287200302487e-05, "loss": 0.6122, "step": 7814 }, { "epoch": 0.24007003962768408, "grad_norm": 0.33733466267585754, "learning_rate": 1.931311118965686e-05, "loss": 0.5885, "step": 7815 }, { "epoch": 0.24010075876263323, "grad_norm": 0.33912891149520874, "learning_rate": 1.931293515725981e-05, "loss": 0.5918, "step": 7816 }, { "epoch": 0.2401314778975824, "grad_norm": 0.36407941579818726, "learning_rate": 1.9312759103111754e-05, "loss": 0.6185, "step": 7817 }, { "epoch": 0.24016219703253155, "grad_norm": 0.34853997826576233, "learning_rate": 1.9312583027213097e-05, "loss": 0.6475, "step": 7818 }, { "epoch": 0.24019291616748073, "grad_norm": 0.34397777915000916, "learning_rate": 1.931240692956425e-05, "loss": 0.6099, "step": 7819 }, { "epoch": 0.24022363530242988, "grad_norm": 0.36566784977912903, "learning_rate": 1.9312230810165628e-05, "loss": 0.5643, "step": 7820 }, { "epoch": 0.24025435443737905, "grad_norm": 0.6620187759399414, "learning_rate": 1.931205466901764e-05, "loss": 0.6397, "step": 7821 }, { "epoch": 0.2402850735723282, "grad_norm": 0.36219537258148193, "learning_rate": 1.93118785061207e-05, "loss": 0.5877, "step": 7822 }, { "epoch": 0.24031579270727738, "grad_norm": 0.31784185767173767, "learning_rate": 1.9311702321475216e-05, "loss": 0.595, "step": 7823 }, { "epoch": 0.24034651184222652, "grad_norm": 0.3466523587703705, "learning_rate": 1.9311526115081597e-05, "loss": 0.6164, "step": 7824 }, { "epoch": 0.24037723097717567, "grad_norm": 0.3371412456035614, "learning_rate": 1.9311349886940264e-05, "loss": 0.635, "step": 7825 }, { "epoch": 0.24040795011212485, "grad_norm": 0.3117747902870178, "learning_rate": 1.931117363705162e-05, "loss": 0.4992, "step": 7826 }, { "epoch": 0.240438669247074, "grad_norm": 0.37134701013565063, "learning_rate": 1.9310997365416085e-05, "loss": 0.6486, "step": 7827 }, { "epoch": 0.24046938838202317, "grad_norm": 0.3649406135082245, "learning_rate": 1.931082107203406e-05, "loss": 0.5385, "step": 7828 }, { "epoch": 0.24050010751697232, "grad_norm": 0.3606352210044861, "learning_rate": 1.9310644756905963e-05, "loss": 0.5644, "step": 7829 }, { "epoch": 0.2405308266519215, "grad_norm": 0.32484495639801025, "learning_rate": 1.9310468420032204e-05, "loss": 0.489, "step": 7830 }, { "epoch": 0.24056154578687064, "grad_norm": 0.3407883048057556, "learning_rate": 1.9310292061413197e-05, "loss": 0.5612, "step": 7831 }, { "epoch": 0.24059226492181982, "grad_norm": 0.35288557410240173, "learning_rate": 1.9310115681049355e-05, "loss": 0.6328, "step": 7832 }, { "epoch": 0.24062298405676896, "grad_norm": 0.3287113606929779, "learning_rate": 1.9309939278941085e-05, "loss": 0.5264, "step": 7833 }, { "epoch": 0.2406537031917181, "grad_norm": 0.402421236038208, "learning_rate": 1.9309762855088802e-05, "loss": 0.6029, "step": 7834 }, { "epoch": 0.24068442232666729, "grad_norm": 0.3442458212375641, "learning_rate": 1.9309586409492915e-05, "loss": 0.5546, "step": 7835 }, { "epoch": 0.24071514146161643, "grad_norm": 0.34782230854034424, "learning_rate": 1.9309409942153842e-05, "loss": 0.6357, "step": 7836 }, { "epoch": 0.2407458605965656, "grad_norm": 0.32574185729026794, "learning_rate": 1.9309233453071993e-05, "loss": 0.5366, "step": 7837 }, { "epoch": 0.24077657973151476, "grad_norm": 0.37897226214408875, "learning_rate": 1.9309056942247776e-05, "loss": 0.6164, "step": 7838 }, { "epoch": 0.24080729886646393, "grad_norm": 0.32586869597435, "learning_rate": 1.930888040968161e-05, "loss": 0.59, "step": 7839 }, { "epoch": 0.24083801800141308, "grad_norm": 0.32852333784103394, "learning_rate": 1.93087038553739e-05, "loss": 0.5764, "step": 7840 }, { "epoch": 0.24086873713636223, "grad_norm": 0.4305536150932312, "learning_rate": 1.9308527279325066e-05, "loss": 0.6033, "step": 7841 }, { "epoch": 0.2408994562713114, "grad_norm": 0.3355798125267029, "learning_rate": 1.930835068153551e-05, "loss": 0.6168, "step": 7842 }, { "epoch": 0.24093017540626055, "grad_norm": 0.37456241250038147, "learning_rate": 1.9308174062005656e-05, "loss": 0.5951, "step": 7843 }, { "epoch": 0.24096089454120972, "grad_norm": 0.35027360916137695, "learning_rate": 1.9307997420735912e-05, "loss": 0.6252, "step": 7844 }, { "epoch": 0.24099161367615887, "grad_norm": 0.3349452316761017, "learning_rate": 1.930782075772669e-05, "loss": 0.566, "step": 7845 }, { "epoch": 0.24102233281110805, "grad_norm": 1.350369930267334, "learning_rate": 1.93076440729784e-05, "loss": 0.5513, "step": 7846 }, { "epoch": 0.2410530519460572, "grad_norm": 0.3140746057033539, "learning_rate": 1.9307467366491455e-05, "loss": 0.5303, "step": 7847 }, { "epoch": 0.24108377108100637, "grad_norm": 0.3903653025627136, "learning_rate": 1.9307290638266275e-05, "loss": 0.5685, "step": 7848 }, { "epoch": 0.24111449021595552, "grad_norm": 0.3362683057785034, "learning_rate": 1.9307113888303265e-05, "loss": 0.6236, "step": 7849 }, { "epoch": 0.24114520935090467, "grad_norm": 0.35555949807167053, "learning_rate": 1.930693711660284e-05, "loss": 0.5864, "step": 7850 }, { "epoch": 0.24117592848585384, "grad_norm": 0.35139814019203186, "learning_rate": 1.9306760323165418e-05, "loss": 0.5894, "step": 7851 }, { "epoch": 0.241206647620803, "grad_norm": 0.3535762429237366, "learning_rate": 1.93065835079914e-05, "loss": 0.5963, "step": 7852 }, { "epoch": 0.24123736675575216, "grad_norm": 0.4044000208377838, "learning_rate": 1.9306406671081214e-05, "loss": 0.6477, "step": 7853 }, { "epoch": 0.2412680858907013, "grad_norm": 0.37683460116386414, "learning_rate": 1.9306229812435257e-05, "loss": 0.6218, "step": 7854 }, { "epoch": 0.2412988050256505, "grad_norm": 0.36774733662605286, "learning_rate": 1.9306052932053957e-05, "loss": 0.6126, "step": 7855 }, { "epoch": 0.24132952416059963, "grad_norm": 0.4392901062965393, "learning_rate": 1.9305876029937717e-05, "loss": 0.5969, "step": 7856 }, { "epoch": 0.2413602432955488, "grad_norm": 0.37224143743515015, "learning_rate": 1.9305699106086954e-05, "loss": 0.6484, "step": 7857 }, { "epoch": 0.24139096243049796, "grad_norm": 0.374381422996521, "learning_rate": 1.9305522160502085e-05, "loss": 0.6453, "step": 7858 }, { "epoch": 0.2414216815654471, "grad_norm": 0.31542420387268066, "learning_rate": 1.9305345193183512e-05, "loss": 0.6175, "step": 7859 }, { "epoch": 0.24145240070039628, "grad_norm": 0.357710063457489, "learning_rate": 1.9305168204131662e-05, "loss": 0.5365, "step": 7860 }, { "epoch": 0.24148311983534543, "grad_norm": 0.33481547236442566, "learning_rate": 1.9304991193346937e-05, "loss": 0.5413, "step": 7861 }, { "epoch": 0.2415138389702946, "grad_norm": 0.34574899077415466, "learning_rate": 1.9304814160829758e-05, "loss": 0.5855, "step": 7862 }, { "epoch": 0.24154455810524375, "grad_norm": 0.41088902950286865, "learning_rate": 1.9304637106580536e-05, "loss": 0.5543, "step": 7863 }, { "epoch": 0.24157527724019293, "grad_norm": 0.44712191820144653, "learning_rate": 1.9304460030599684e-05, "loss": 0.5845, "step": 7864 }, { "epoch": 0.24160599637514207, "grad_norm": 0.33812204003334045, "learning_rate": 1.9304282932887612e-05, "loss": 0.622, "step": 7865 }, { "epoch": 0.24163671551009125, "grad_norm": 0.3449346125125885, "learning_rate": 1.930410581344474e-05, "loss": 0.552, "step": 7866 }, { "epoch": 0.2416674346450404, "grad_norm": 0.40710118412971497, "learning_rate": 1.930392867227148e-05, "loss": 0.4625, "step": 7867 }, { "epoch": 0.24169815377998954, "grad_norm": 0.32261523604393005, "learning_rate": 1.9303751509368246e-05, "loss": 0.6027, "step": 7868 }, { "epoch": 0.24172887291493872, "grad_norm": 0.3518761098384857, "learning_rate": 1.9303574324735445e-05, "loss": 0.5961, "step": 7869 }, { "epoch": 0.24175959204988787, "grad_norm": 0.34725886583328247, "learning_rate": 1.9303397118373502e-05, "loss": 0.5574, "step": 7870 }, { "epoch": 0.24179031118483704, "grad_norm": 0.43086719512939453, "learning_rate": 1.9303219890282823e-05, "loss": 0.5696, "step": 7871 }, { "epoch": 0.2418210303197862, "grad_norm": 0.336518794298172, "learning_rate": 1.9303042640463827e-05, "loss": 0.6191, "step": 7872 }, { "epoch": 0.24185174945473537, "grad_norm": 0.33859723806381226, "learning_rate": 1.930286536891692e-05, "loss": 0.5905, "step": 7873 }, { "epoch": 0.2418824685896845, "grad_norm": 0.35092803835868835, "learning_rate": 1.9302688075642524e-05, "loss": 0.6355, "step": 7874 }, { "epoch": 0.2419131877246337, "grad_norm": 0.37284156680107117, "learning_rate": 1.9302510760641053e-05, "loss": 0.5607, "step": 7875 }, { "epoch": 0.24194390685958284, "grad_norm": 0.3246413469314575, "learning_rate": 1.9302333423912912e-05, "loss": 0.61, "step": 7876 }, { "epoch": 0.24197462599453198, "grad_norm": 0.3398270010948181, "learning_rate": 1.930215606545853e-05, "loss": 0.6212, "step": 7877 }, { "epoch": 0.24200534512948116, "grad_norm": 0.3218773305416107, "learning_rate": 1.9301978685278307e-05, "loss": 0.5827, "step": 7878 }, { "epoch": 0.2420360642644303, "grad_norm": 0.3600700795650482, "learning_rate": 1.930180128337266e-05, "loss": 0.6457, "step": 7879 }, { "epoch": 0.24206678339937948, "grad_norm": 0.32336053252220154, "learning_rate": 1.9301623859742014e-05, "loss": 0.5659, "step": 7880 }, { "epoch": 0.24209750253432863, "grad_norm": 0.3215004801750183, "learning_rate": 1.9301446414386773e-05, "loss": 0.5518, "step": 7881 }, { "epoch": 0.2421282216692778, "grad_norm": 0.3726419508457184, "learning_rate": 1.9301268947307354e-05, "loss": 0.5332, "step": 7882 }, { "epoch": 0.24215894080422695, "grad_norm": 0.3332030773162842, "learning_rate": 1.930109145850417e-05, "loss": 0.6095, "step": 7883 }, { "epoch": 0.2421896599391761, "grad_norm": 0.3352378010749817, "learning_rate": 1.930091394797764e-05, "loss": 0.5775, "step": 7884 }, { "epoch": 0.24222037907412527, "grad_norm": 0.34070029854774475, "learning_rate": 1.9300736415728177e-05, "loss": 0.6141, "step": 7885 }, { "epoch": 0.24225109820907442, "grad_norm": 0.3302450478076935, "learning_rate": 1.930055886175619e-05, "loss": 0.5351, "step": 7886 }, { "epoch": 0.2422818173440236, "grad_norm": 0.33124175667762756, "learning_rate": 1.9300381286062105e-05, "loss": 0.5292, "step": 7887 }, { "epoch": 0.24231253647897275, "grad_norm": 0.33741042017936707, "learning_rate": 1.930020368864632e-05, "loss": 0.6031, "step": 7888 }, { "epoch": 0.24234325561392192, "grad_norm": 0.3304395079612732, "learning_rate": 1.930002606950927e-05, "loss": 0.6152, "step": 7889 }, { "epoch": 0.24237397474887107, "grad_norm": 0.37930959463119507, "learning_rate": 1.9299848428651352e-05, "loss": 0.6714, "step": 7890 }, { "epoch": 0.24240469388382024, "grad_norm": 0.5670892596244812, "learning_rate": 1.9299670766072995e-05, "loss": 0.6485, "step": 7891 }, { "epoch": 0.2424354130187694, "grad_norm": 0.3453288972377777, "learning_rate": 1.9299493081774602e-05, "loss": 0.5958, "step": 7892 }, { "epoch": 0.24246613215371854, "grad_norm": 0.3202432096004486, "learning_rate": 1.9299315375756597e-05, "loss": 0.5116, "step": 7893 }, { "epoch": 0.24249685128866771, "grad_norm": 0.3570336103439331, "learning_rate": 1.929913764801939e-05, "loss": 0.6175, "step": 7894 }, { "epoch": 0.24252757042361686, "grad_norm": 0.44624701142311096, "learning_rate": 1.92989598985634e-05, "loss": 0.6628, "step": 7895 }, { "epoch": 0.24255828955856604, "grad_norm": 0.34719568490982056, "learning_rate": 1.9298782127389035e-05, "loss": 0.5573, "step": 7896 }, { "epoch": 0.24258900869351518, "grad_norm": 0.33762839436531067, "learning_rate": 1.9298604334496715e-05, "loss": 0.5828, "step": 7897 }, { "epoch": 0.24261972782846436, "grad_norm": 0.3434600532054901, "learning_rate": 1.929842651988686e-05, "loss": 0.6557, "step": 7898 }, { "epoch": 0.2426504469634135, "grad_norm": 0.3997599184513092, "learning_rate": 1.9298248683559877e-05, "loss": 0.5558, "step": 7899 }, { "epoch": 0.24268116609836268, "grad_norm": 0.5556279420852661, "learning_rate": 1.9298070825516185e-05, "loss": 0.6035, "step": 7900 }, { "epoch": 0.24271188523331183, "grad_norm": 0.35131895542144775, "learning_rate": 1.92978929457562e-05, "loss": 0.6256, "step": 7901 }, { "epoch": 0.24274260436826098, "grad_norm": 0.36762747168540955, "learning_rate": 1.929771504428034e-05, "loss": 0.6083, "step": 7902 }, { "epoch": 0.24277332350321015, "grad_norm": 0.3219127058982849, "learning_rate": 1.9297537121089012e-05, "loss": 0.5874, "step": 7903 }, { "epoch": 0.2428040426381593, "grad_norm": 0.3187677562236786, "learning_rate": 1.9297359176182637e-05, "loss": 0.5491, "step": 7904 }, { "epoch": 0.24283476177310848, "grad_norm": 0.32508912682533264, "learning_rate": 1.9297181209561633e-05, "loss": 0.5487, "step": 7905 }, { "epoch": 0.24286548090805762, "grad_norm": 0.3439057767391205, "learning_rate": 1.929700322122641e-05, "loss": 0.5803, "step": 7906 }, { "epoch": 0.2428962000430068, "grad_norm": 0.3246544599533081, "learning_rate": 1.929682521117739e-05, "loss": 0.5605, "step": 7907 }, { "epoch": 0.24292691917795595, "grad_norm": 0.3815654516220093, "learning_rate": 1.929664717941498e-05, "loss": 0.6008, "step": 7908 }, { "epoch": 0.24295763831290512, "grad_norm": 0.4243093729019165, "learning_rate": 1.929646912593961e-05, "loss": 0.6676, "step": 7909 }, { "epoch": 0.24298835744785427, "grad_norm": 0.3636230528354645, "learning_rate": 1.9296291050751682e-05, "loss": 0.6081, "step": 7910 }, { "epoch": 0.24301907658280342, "grad_norm": 0.3483080565929413, "learning_rate": 1.9296112953851618e-05, "loss": 0.6118, "step": 7911 }, { "epoch": 0.2430497957177526, "grad_norm": 0.42559152841567993, "learning_rate": 1.929593483523983e-05, "loss": 0.6014, "step": 7912 }, { "epoch": 0.24308051485270174, "grad_norm": 0.37733063101768494, "learning_rate": 1.9295756694916742e-05, "loss": 0.6318, "step": 7913 }, { "epoch": 0.24311123398765092, "grad_norm": 0.3372984230518341, "learning_rate": 1.929557853288276e-05, "loss": 0.6271, "step": 7914 }, { "epoch": 0.24314195312260006, "grad_norm": 0.35291334986686707, "learning_rate": 1.9295400349138305e-05, "loss": 0.6337, "step": 7915 }, { "epoch": 0.24317267225754924, "grad_norm": 0.3430301547050476, "learning_rate": 1.92952221436838e-05, "loss": 0.6119, "step": 7916 }, { "epoch": 0.24320339139249839, "grad_norm": 0.34510475397109985, "learning_rate": 1.929504391651965e-05, "loss": 0.5352, "step": 7917 }, { "epoch": 0.24323411052744753, "grad_norm": 0.3225047290325165, "learning_rate": 1.9294865667646275e-05, "loss": 0.544, "step": 7918 }, { "epoch": 0.2432648296623967, "grad_norm": 0.3178761601448059, "learning_rate": 1.929468739706409e-05, "loss": 0.5255, "step": 7919 }, { "epoch": 0.24329554879734586, "grad_norm": 0.3678840696811676, "learning_rate": 1.929450910477352e-05, "loss": 0.7903, "step": 7920 }, { "epoch": 0.24332626793229503, "grad_norm": 0.3986068367958069, "learning_rate": 1.9294330790774972e-05, "loss": 0.6313, "step": 7921 }, { "epoch": 0.24335698706724418, "grad_norm": 0.38289502263069153, "learning_rate": 1.9294152455068862e-05, "loss": 0.6053, "step": 7922 }, { "epoch": 0.24338770620219335, "grad_norm": 0.361075222492218, "learning_rate": 1.9293974097655615e-05, "loss": 0.606, "step": 7923 }, { "epoch": 0.2434184253371425, "grad_norm": 0.31814900040626526, "learning_rate": 1.9293795718535638e-05, "loss": 0.5505, "step": 7924 }, { "epoch": 0.24344914447209168, "grad_norm": 0.33222752809524536, "learning_rate": 1.9293617317709354e-05, "loss": 0.61, "step": 7925 }, { "epoch": 0.24347986360704083, "grad_norm": 0.344445139169693, "learning_rate": 1.9293438895177176e-05, "loss": 0.6176, "step": 7926 }, { "epoch": 0.24351058274198997, "grad_norm": 0.35552841424942017, "learning_rate": 1.9293260450939525e-05, "loss": 0.6091, "step": 7927 }, { "epoch": 0.24354130187693915, "grad_norm": 0.33430352807044983, "learning_rate": 1.9293081984996814e-05, "loss": 0.5695, "step": 7928 }, { "epoch": 0.2435720210118883, "grad_norm": 0.3331231474876404, "learning_rate": 1.9292903497349464e-05, "loss": 0.6192, "step": 7929 }, { "epoch": 0.24360274014683747, "grad_norm": 0.34933149814605713, "learning_rate": 1.9292724987997883e-05, "loss": 0.6531, "step": 7930 }, { "epoch": 0.24363345928178662, "grad_norm": 0.340734601020813, "learning_rate": 1.92925464569425e-05, "loss": 0.6657, "step": 7931 }, { "epoch": 0.2436641784167358, "grad_norm": 0.32582753896713257, "learning_rate": 1.9292367904183722e-05, "loss": 0.5462, "step": 7932 }, { "epoch": 0.24369489755168494, "grad_norm": 0.33016127347946167, "learning_rate": 1.9292189329721972e-05, "loss": 0.6427, "step": 7933 }, { "epoch": 0.24372561668663412, "grad_norm": 0.3651644289493561, "learning_rate": 1.929201073355766e-05, "loss": 0.6554, "step": 7934 }, { "epoch": 0.24375633582158326, "grad_norm": 0.34452199935913086, "learning_rate": 1.929183211569121e-05, "loss": 0.5431, "step": 7935 }, { "epoch": 0.2437870549565324, "grad_norm": 0.3737070858478546, "learning_rate": 1.9291653476123042e-05, "loss": 0.5984, "step": 7936 }, { "epoch": 0.2438177740914816, "grad_norm": 0.3509664833545685, "learning_rate": 1.9291474814853565e-05, "loss": 0.6695, "step": 7937 }, { "epoch": 0.24384849322643073, "grad_norm": 0.3614847660064697, "learning_rate": 1.9291296131883198e-05, "loss": 0.5999, "step": 7938 }, { "epoch": 0.2438792123613799, "grad_norm": 0.3268956243991852, "learning_rate": 1.9291117427212362e-05, "loss": 0.5559, "step": 7939 }, { "epoch": 0.24390993149632906, "grad_norm": 0.3502824604511261, "learning_rate": 1.929093870084147e-05, "loss": 0.6513, "step": 7940 }, { "epoch": 0.24394065063127823, "grad_norm": 0.34977295994758606, "learning_rate": 1.9290759952770946e-05, "loss": 0.6179, "step": 7941 }, { "epoch": 0.24397136976622738, "grad_norm": 0.3297165036201477, "learning_rate": 1.92905811830012e-05, "loss": 0.606, "step": 7942 }, { "epoch": 0.24400208890117656, "grad_norm": 0.32554730772972107, "learning_rate": 1.9290402391532655e-05, "loss": 0.5818, "step": 7943 }, { "epoch": 0.2440328080361257, "grad_norm": 0.3472613990306854, "learning_rate": 1.9290223578365724e-05, "loss": 0.6106, "step": 7944 }, { "epoch": 0.24406352717107485, "grad_norm": 0.3511382043361664, "learning_rate": 1.929004474350083e-05, "loss": 0.6763, "step": 7945 }, { "epoch": 0.24409424630602403, "grad_norm": 0.38003870844841003, "learning_rate": 1.9289865886938384e-05, "loss": 0.5579, "step": 7946 }, { "epoch": 0.24412496544097317, "grad_norm": 0.4087676405906677, "learning_rate": 1.9289687008678812e-05, "loss": 0.6161, "step": 7947 }, { "epoch": 0.24415568457592235, "grad_norm": 0.3422110974788666, "learning_rate": 1.928950810872252e-05, "loss": 0.5713, "step": 7948 }, { "epoch": 0.2441864037108715, "grad_norm": 0.33817535638809204, "learning_rate": 1.9289329187069938e-05, "loss": 0.5698, "step": 7949 }, { "epoch": 0.24421712284582067, "grad_norm": 0.36954671144485474, "learning_rate": 1.928915024372148e-05, "loss": 0.6395, "step": 7950 }, { "epoch": 0.24424784198076982, "grad_norm": 0.3292945623397827, "learning_rate": 1.928897127867756e-05, "loss": 0.5282, "step": 7951 }, { "epoch": 0.244278561115719, "grad_norm": 0.42206960916519165, "learning_rate": 1.92887922919386e-05, "loss": 0.6059, "step": 7952 }, { "epoch": 0.24430928025066814, "grad_norm": 0.34441885352134705, "learning_rate": 1.9288613283505017e-05, "loss": 0.5799, "step": 7953 }, { "epoch": 0.2443399993856173, "grad_norm": 0.32794514298439026, "learning_rate": 1.928843425337723e-05, "loss": 0.6667, "step": 7954 }, { "epoch": 0.24437071852056647, "grad_norm": 0.3815188705921173, "learning_rate": 1.9288255201555657e-05, "loss": 0.5647, "step": 7955 }, { "epoch": 0.2444014376555156, "grad_norm": 0.36957207322120667, "learning_rate": 1.9288076128040712e-05, "loss": 0.5309, "step": 7956 }, { "epoch": 0.2444321567904648, "grad_norm": 0.3602535128593445, "learning_rate": 1.9287897032832817e-05, "loss": 0.5893, "step": 7957 }, { "epoch": 0.24446287592541394, "grad_norm": 0.33680954575538635, "learning_rate": 1.928771791593239e-05, "loss": 0.5443, "step": 7958 }, { "epoch": 0.2444935950603631, "grad_norm": 0.34416428208351135, "learning_rate": 1.928753877733985e-05, "loss": 0.5461, "step": 7959 }, { "epoch": 0.24452431419531226, "grad_norm": 0.34001973271369934, "learning_rate": 1.9287359617055617e-05, "loss": 0.6127, "step": 7960 }, { "epoch": 0.2445550333302614, "grad_norm": 0.3661879301071167, "learning_rate": 1.9287180435080102e-05, "loss": 0.5246, "step": 7961 }, { "epoch": 0.24458575246521058, "grad_norm": 0.3917309641838074, "learning_rate": 1.9287001231413734e-05, "loss": 0.552, "step": 7962 }, { "epoch": 0.24461647160015973, "grad_norm": 0.3918893039226532, "learning_rate": 1.928682200605692e-05, "loss": 0.5524, "step": 7963 }, { "epoch": 0.2446471907351089, "grad_norm": 0.3432528078556061, "learning_rate": 1.9286642759010087e-05, "loss": 0.6223, "step": 7964 }, { "epoch": 0.24467790987005805, "grad_norm": 0.3371836543083191, "learning_rate": 1.9286463490273655e-05, "loss": 0.5581, "step": 7965 }, { "epoch": 0.24470862900500723, "grad_norm": 0.3771596848964691, "learning_rate": 1.9286284199848034e-05, "loss": 0.6633, "step": 7966 }, { "epoch": 0.24473934813995638, "grad_norm": 0.31140148639678955, "learning_rate": 1.928610488773365e-05, "loss": 0.4395, "step": 7967 }, { "epoch": 0.24477006727490555, "grad_norm": 0.36741334199905396, "learning_rate": 1.9285925553930917e-05, "loss": 0.6061, "step": 7968 }, { "epoch": 0.2448007864098547, "grad_norm": 0.4739916920661926, "learning_rate": 1.928574619844026e-05, "loss": 0.5334, "step": 7969 }, { "epoch": 0.24483150554480385, "grad_norm": 0.4568001925945282, "learning_rate": 1.9285566821262093e-05, "loss": 0.4924, "step": 7970 }, { "epoch": 0.24486222467975302, "grad_norm": 0.33018478751182556, "learning_rate": 1.9285387422396836e-05, "loss": 0.5593, "step": 7971 }, { "epoch": 0.24489294381470217, "grad_norm": 0.4059334099292755, "learning_rate": 1.9285208001844907e-05, "loss": 0.5732, "step": 7972 }, { "epoch": 0.24492366294965134, "grad_norm": 0.3718136250972748, "learning_rate": 1.9285028559606725e-05, "loss": 0.5941, "step": 7973 }, { "epoch": 0.2449543820846005, "grad_norm": 0.44362252950668335, "learning_rate": 1.9284849095682715e-05, "loss": 0.6314, "step": 7974 }, { "epoch": 0.24498510121954967, "grad_norm": 0.3230489194393158, "learning_rate": 1.928466961007329e-05, "loss": 0.5835, "step": 7975 }, { "epoch": 0.24501582035449881, "grad_norm": 0.45805832743644714, "learning_rate": 1.9284490102778867e-05, "loss": 0.5668, "step": 7976 }, { "epoch": 0.245046539489448, "grad_norm": 0.7389804124832153, "learning_rate": 1.9284310573799873e-05, "loss": 0.591, "step": 7977 }, { "epoch": 0.24507725862439714, "grad_norm": 0.36947330832481384, "learning_rate": 1.9284131023136724e-05, "loss": 0.6585, "step": 7978 }, { "epoch": 0.24510797775934628, "grad_norm": 0.3818102478981018, "learning_rate": 1.9283951450789836e-05, "loss": 0.594, "step": 7979 }, { "epoch": 0.24513869689429546, "grad_norm": 0.3317219316959381, "learning_rate": 1.928377185675963e-05, "loss": 0.5618, "step": 7980 }, { "epoch": 0.2451694160292446, "grad_norm": 0.3449431359767914, "learning_rate": 1.9283592241046526e-05, "loss": 0.5362, "step": 7981 }, { "epoch": 0.24520013516419378, "grad_norm": 0.3423006534576416, "learning_rate": 1.9283412603650947e-05, "loss": 0.6337, "step": 7982 }, { "epoch": 0.24523085429914293, "grad_norm": 0.3555092513561249, "learning_rate": 1.928323294457331e-05, "loss": 0.5494, "step": 7983 }, { "epoch": 0.2452615734340921, "grad_norm": 0.33041542768478394, "learning_rate": 1.9283053263814032e-05, "loss": 0.5987, "step": 7984 }, { "epoch": 0.24529229256904125, "grad_norm": 0.3063499629497528, "learning_rate": 1.9282873561373536e-05, "loss": 0.6007, "step": 7985 }, { "epoch": 0.24532301170399043, "grad_norm": 0.35881417989730835, "learning_rate": 1.928269383725224e-05, "loss": 0.5529, "step": 7986 }, { "epoch": 0.24535373083893958, "grad_norm": 0.39319562911987305, "learning_rate": 1.9282514091450565e-05, "loss": 0.6086, "step": 7987 }, { "epoch": 0.24538444997388872, "grad_norm": 0.34462374448776245, "learning_rate": 1.9282334323968926e-05, "loss": 0.5773, "step": 7988 }, { "epoch": 0.2454151691088379, "grad_norm": 1.8583154678344727, "learning_rate": 1.928215453480775e-05, "loss": 0.5709, "step": 7989 }, { "epoch": 0.24544588824378705, "grad_norm": 0.3400976359844208, "learning_rate": 1.9281974723967457e-05, "loss": 0.6145, "step": 7990 }, { "epoch": 0.24547660737873622, "grad_norm": 0.35774537920951843, "learning_rate": 1.928179489144846e-05, "loss": 0.5936, "step": 7991 }, { "epoch": 0.24550732651368537, "grad_norm": 0.33753687143325806, "learning_rate": 1.9281615037251184e-05, "loss": 0.5449, "step": 7992 }, { "epoch": 0.24553804564863455, "grad_norm": 0.3430008292198181, "learning_rate": 1.9281435161376047e-05, "loss": 0.6033, "step": 7993 }, { "epoch": 0.2455687647835837, "grad_norm": 0.3242656886577606, "learning_rate": 1.928125526382347e-05, "loss": 0.5093, "step": 7994 }, { "epoch": 0.24559948391853284, "grad_norm": 0.3271774649620056, "learning_rate": 1.9281075344593873e-05, "loss": 0.5723, "step": 7995 }, { "epoch": 0.24563020305348202, "grad_norm": 0.3533971309661865, "learning_rate": 1.9280895403687676e-05, "loss": 0.5809, "step": 7996 }, { "epoch": 0.24566092218843116, "grad_norm": 0.3151695132255554, "learning_rate": 1.9280715441105303e-05, "loss": 0.5115, "step": 7997 }, { "epoch": 0.24569164132338034, "grad_norm": 0.3271646499633789, "learning_rate": 1.9280535456847168e-05, "loss": 0.6286, "step": 7998 }, { "epoch": 0.2457223604583295, "grad_norm": 0.4105626344680786, "learning_rate": 1.9280355450913696e-05, "loss": 0.5976, "step": 7999 }, { "epoch": 0.24575307959327866, "grad_norm": 0.36736223101615906, "learning_rate": 1.9280175423305302e-05, "loss": 0.5329, "step": 8000 }, { "epoch": 0.2457837987282278, "grad_norm": 0.35020485520362854, "learning_rate": 1.9279995374022412e-05, "loss": 0.593, "step": 8001 }, { "epoch": 0.24581451786317698, "grad_norm": 0.5364945530891418, "learning_rate": 1.9279815303065447e-05, "loss": 0.5986, "step": 8002 }, { "epoch": 0.24584523699812613, "grad_norm": 0.40308621525764465, "learning_rate": 1.9279635210434822e-05, "loss": 0.612, "step": 8003 }, { "epoch": 0.24587595613307528, "grad_norm": 0.32339322566986084, "learning_rate": 1.9279455096130963e-05, "loss": 0.5461, "step": 8004 }, { "epoch": 0.24590667526802445, "grad_norm": 0.3269807696342468, "learning_rate": 1.9279274960154288e-05, "loss": 0.6302, "step": 8005 }, { "epoch": 0.2459373944029736, "grad_norm": 0.38802364468574524, "learning_rate": 1.9279094802505218e-05, "loss": 0.6274, "step": 8006 }, { "epoch": 0.24596811353792278, "grad_norm": 0.334692120552063, "learning_rate": 1.9278914623184173e-05, "loss": 0.5446, "step": 8007 }, { "epoch": 0.24599883267287193, "grad_norm": 0.40027111768722534, "learning_rate": 1.9278734422191577e-05, "loss": 0.6228, "step": 8008 }, { "epoch": 0.2460295518078211, "grad_norm": 0.37230971455574036, "learning_rate": 1.927855419952785e-05, "loss": 0.5942, "step": 8009 }, { "epoch": 0.24606027094277025, "grad_norm": 0.3404752314090729, "learning_rate": 1.9278373955193408e-05, "loss": 0.5685, "step": 8010 }, { "epoch": 0.24609099007771942, "grad_norm": 0.3528372347354889, "learning_rate": 1.9278193689188675e-05, "loss": 0.5985, "step": 8011 }, { "epoch": 0.24612170921266857, "grad_norm": 0.3204597234725952, "learning_rate": 1.9278013401514078e-05, "loss": 0.5896, "step": 8012 }, { "epoch": 0.24615242834761772, "grad_norm": 0.3132797181606293, "learning_rate": 1.9277833092170026e-05, "loss": 0.5549, "step": 8013 }, { "epoch": 0.2461831474825669, "grad_norm": 0.3895164132118225, "learning_rate": 1.9277652761156955e-05, "loss": 0.5902, "step": 8014 }, { "epoch": 0.24621386661751604, "grad_norm": 2.027799367904663, "learning_rate": 1.9277472408475272e-05, "loss": 0.6096, "step": 8015 }, { "epoch": 0.24624458575246522, "grad_norm": 0.36613667011260986, "learning_rate": 1.9277292034125406e-05, "loss": 0.6269, "step": 8016 }, { "epoch": 0.24627530488741436, "grad_norm": 0.3338070511817932, "learning_rate": 1.9277111638107774e-05, "loss": 0.5991, "step": 8017 }, { "epoch": 0.24630602402236354, "grad_norm": 0.3337520360946655, "learning_rate": 1.92769312204228e-05, "loss": 0.5579, "step": 8018 }, { "epoch": 0.2463367431573127, "grad_norm": 0.35714322328567505, "learning_rate": 1.927675078107091e-05, "loss": 0.581, "step": 8019 }, { "epoch": 0.24636746229226186, "grad_norm": 0.4503204822540283, "learning_rate": 1.927657032005252e-05, "loss": 0.6275, "step": 8020 }, { "epoch": 0.246398181427211, "grad_norm": 0.3484928607940674, "learning_rate": 1.9276389837368048e-05, "loss": 0.5655, "step": 8021 }, { "epoch": 0.24642890056216016, "grad_norm": 0.33261099457740784, "learning_rate": 1.927620933301792e-05, "loss": 0.6292, "step": 8022 }, { "epoch": 0.24645961969710933, "grad_norm": 0.33663511276245117, "learning_rate": 1.9276028807002563e-05, "loss": 0.669, "step": 8023 }, { "epoch": 0.24649033883205848, "grad_norm": 0.3373728096485138, "learning_rate": 1.9275848259322387e-05, "loss": 0.5407, "step": 8024 }, { "epoch": 0.24652105796700766, "grad_norm": 0.3655604422092438, "learning_rate": 1.927566768997782e-05, "loss": 0.5922, "step": 8025 }, { "epoch": 0.2465517771019568, "grad_norm": 0.34678196907043457, "learning_rate": 1.9275487098969288e-05, "loss": 0.6197, "step": 8026 }, { "epoch": 0.24658249623690598, "grad_norm": 0.43887415528297424, "learning_rate": 1.9275306486297202e-05, "loss": 0.7002, "step": 8027 }, { "epoch": 0.24661321537185513, "grad_norm": 0.348991334438324, "learning_rate": 1.9275125851961994e-05, "loss": 0.5828, "step": 8028 }, { "epoch": 0.2466439345068043, "grad_norm": 0.32087478041648865, "learning_rate": 1.927494519596408e-05, "loss": 0.565, "step": 8029 }, { "epoch": 0.24667465364175345, "grad_norm": 0.3426953852176666, "learning_rate": 1.9274764518303884e-05, "loss": 0.582, "step": 8030 }, { "epoch": 0.2467053727767026, "grad_norm": 0.4904937446117401, "learning_rate": 1.927458381898183e-05, "loss": 0.5104, "step": 8031 }, { "epoch": 0.24673609191165177, "grad_norm": 0.3689292073249817, "learning_rate": 1.9274403097998335e-05, "loss": 0.5955, "step": 8032 }, { "epoch": 0.24676681104660092, "grad_norm": 0.38509637117385864, "learning_rate": 1.9274222355353824e-05, "loss": 0.5392, "step": 8033 }, { "epoch": 0.2467975301815501, "grad_norm": 0.41921335458755493, "learning_rate": 1.9274041591048722e-05, "loss": 0.6243, "step": 8034 }, { "epoch": 0.24682824931649924, "grad_norm": 0.3535846173763275, "learning_rate": 1.9273860805083443e-05, "loss": 0.6042, "step": 8035 }, { "epoch": 0.24685896845144842, "grad_norm": 0.3520556390285492, "learning_rate": 1.927367999745842e-05, "loss": 0.5623, "step": 8036 }, { "epoch": 0.24688968758639757, "grad_norm": 0.31558355689048767, "learning_rate": 1.9273499168174067e-05, "loss": 0.653, "step": 8037 }, { "epoch": 0.2469204067213467, "grad_norm": 0.44058355689048767, "learning_rate": 1.927331831723081e-05, "loss": 0.5983, "step": 8038 }, { "epoch": 0.2469511258562959, "grad_norm": 0.5181899666786194, "learning_rate": 1.927313744462907e-05, "loss": 0.6306, "step": 8039 }, { "epoch": 0.24698184499124504, "grad_norm": 0.3423907160758972, "learning_rate": 1.9272956550369266e-05, "loss": 0.5485, "step": 8040 }, { "epoch": 0.2470125641261942, "grad_norm": 0.36887413263320923, "learning_rate": 1.927277563445183e-05, "loss": 0.5834, "step": 8041 }, { "epoch": 0.24704328326114336, "grad_norm": 0.35967332124710083, "learning_rate": 1.9272594696877176e-05, "loss": 0.5823, "step": 8042 }, { "epoch": 0.24707400239609253, "grad_norm": 0.4522888958454132, "learning_rate": 1.9272413737645728e-05, "loss": 0.6946, "step": 8043 }, { "epoch": 0.24710472153104168, "grad_norm": 0.3292714059352875, "learning_rate": 1.9272232756757913e-05, "loss": 0.5188, "step": 8044 }, { "epoch": 0.24713544066599086, "grad_norm": 0.34779655933380127, "learning_rate": 1.9272051754214153e-05, "loss": 0.5836, "step": 8045 }, { "epoch": 0.24716615980094, "grad_norm": 0.3765181601047516, "learning_rate": 1.9271870730014863e-05, "loss": 0.5761, "step": 8046 }, { "epoch": 0.24719687893588915, "grad_norm": 0.34081602096557617, "learning_rate": 1.9271689684160475e-05, "loss": 0.5028, "step": 8047 }, { "epoch": 0.24722759807083833, "grad_norm": 0.34838491678237915, "learning_rate": 1.9271508616651406e-05, "loss": 0.5435, "step": 8048 }, { "epoch": 0.24725831720578748, "grad_norm": 0.5036017894744873, "learning_rate": 1.927132752748808e-05, "loss": 0.5967, "step": 8049 }, { "epoch": 0.24728903634073665, "grad_norm": 0.3305007219314575, "learning_rate": 1.9271146416670924e-05, "loss": 0.5538, "step": 8050 }, { "epoch": 0.2473197554756858, "grad_norm": 0.34508565068244934, "learning_rate": 1.927096528420036e-05, "loss": 0.5912, "step": 8051 }, { "epoch": 0.24735047461063497, "grad_norm": 0.3657954931259155, "learning_rate": 1.9270784130076805e-05, "loss": 0.5858, "step": 8052 }, { "epoch": 0.24738119374558412, "grad_norm": 0.34513232111930847, "learning_rate": 1.9270602954300692e-05, "loss": 0.6318, "step": 8053 }, { "epoch": 0.2474119128805333, "grad_norm": 0.3303806185722351, "learning_rate": 1.927042175687243e-05, "loss": 0.5491, "step": 8054 }, { "epoch": 0.24744263201548244, "grad_norm": 0.3178413510322571, "learning_rate": 1.9270240537792457e-05, "loss": 0.5779, "step": 8055 }, { "epoch": 0.2474733511504316, "grad_norm": 0.3403319716453552, "learning_rate": 1.927005929706119e-05, "loss": 0.5677, "step": 8056 }, { "epoch": 0.24750407028538077, "grad_norm": 0.3283713757991791, "learning_rate": 1.9269878034679046e-05, "loss": 0.6268, "step": 8057 }, { "epoch": 0.24753478942032991, "grad_norm": 0.37047335505485535, "learning_rate": 1.9269696750646458e-05, "loss": 0.5428, "step": 8058 }, { "epoch": 0.2475655085552791, "grad_norm": 0.4081445634365082, "learning_rate": 1.9269515444963846e-05, "loss": 0.5897, "step": 8059 }, { "epoch": 0.24759622769022824, "grad_norm": 0.2964821457862854, "learning_rate": 1.9269334117631634e-05, "loss": 0.5265, "step": 8060 }, { "epoch": 0.2476269468251774, "grad_norm": 0.39377403259277344, "learning_rate": 1.926915276865024e-05, "loss": 0.6053, "step": 8061 }, { "epoch": 0.24765766596012656, "grad_norm": 0.34116706252098083, "learning_rate": 1.92689713980201e-05, "loss": 0.5968, "step": 8062 }, { "epoch": 0.24768838509507574, "grad_norm": 0.3253757655620575, "learning_rate": 1.9268790005741627e-05, "loss": 0.6742, "step": 8063 }, { "epoch": 0.24771910423002488, "grad_norm": 0.3373892605304718, "learning_rate": 1.9268608591815245e-05, "loss": 0.6066, "step": 8064 }, { "epoch": 0.24774982336497403, "grad_norm": 0.35877639055252075, "learning_rate": 1.9268427156241384e-05, "loss": 0.5821, "step": 8065 }, { "epoch": 0.2477805424999232, "grad_norm": 0.3671904504299164, "learning_rate": 1.926824569902046e-05, "loss": 0.7189, "step": 8066 }, { "epoch": 0.24781126163487235, "grad_norm": 0.6446322202682495, "learning_rate": 1.9268064220152903e-05, "loss": 0.5582, "step": 8067 }, { "epoch": 0.24784198076982153, "grad_norm": 0.35551613569259644, "learning_rate": 1.9267882719639134e-05, "loss": 0.5682, "step": 8068 }, { "epoch": 0.24787269990477068, "grad_norm": 0.3259781002998352, "learning_rate": 1.926770119747958e-05, "loss": 0.589, "step": 8069 }, { "epoch": 0.24790341903971985, "grad_norm": 0.3458731770515442, "learning_rate": 1.926751965367466e-05, "loss": 0.6205, "step": 8070 }, { "epoch": 0.247934138174669, "grad_norm": 0.331540584564209, "learning_rate": 1.9267338088224802e-05, "loss": 0.6153, "step": 8071 }, { "epoch": 0.24796485730961815, "grad_norm": 0.3853614628314972, "learning_rate": 1.9267156501130428e-05, "loss": 0.5625, "step": 8072 }, { "epoch": 0.24799557644456732, "grad_norm": 0.3228750228881836, "learning_rate": 1.9266974892391964e-05, "loss": 0.6221, "step": 8073 }, { "epoch": 0.24802629557951647, "grad_norm": 0.32806092500686646, "learning_rate": 1.9266793262009833e-05, "loss": 0.5397, "step": 8074 }, { "epoch": 0.24805701471446565, "grad_norm": 0.4109097421169281, "learning_rate": 1.9266611609984458e-05, "loss": 0.5849, "step": 8075 }, { "epoch": 0.2480877338494148, "grad_norm": 0.3318442106246948, "learning_rate": 1.9266429936316266e-05, "loss": 0.6451, "step": 8076 }, { "epoch": 0.24811845298436397, "grad_norm": 0.3394380807876587, "learning_rate": 1.9266248241005678e-05, "loss": 0.5933, "step": 8077 }, { "epoch": 0.24814917211931312, "grad_norm": 0.3607509732246399, "learning_rate": 1.9266066524053124e-05, "loss": 0.5998, "step": 8078 }, { "epoch": 0.2481798912542623, "grad_norm": 0.3203928768634796, "learning_rate": 1.9265884785459016e-05, "loss": 0.5327, "step": 8079 }, { "epoch": 0.24821061038921144, "grad_norm": 0.3639335036277771, "learning_rate": 1.9265703025223793e-05, "loss": 0.5511, "step": 8080 }, { "epoch": 0.2482413295241606, "grad_norm": 0.3222848176956177, "learning_rate": 1.926552124334787e-05, "loss": 0.6357, "step": 8081 }, { "epoch": 0.24827204865910976, "grad_norm": 0.41348326206207275, "learning_rate": 1.926533943983168e-05, "loss": 0.5995, "step": 8082 }, { "epoch": 0.2483027677940589, "grad_norm": 0.36074647307395935, "learning_rate": 1.926515761467564e-05, "loss": 0.5604, "step": 8083 }, { "epoch": 0.24833348692900808, "grad_norm": 0.36920756101608276, "learning_rate": 1.926497576788018e-05, "loss": 0.6111, "step": 8084 }, { "epoch": 0.24836420606395723, "grad_norm": 0.3723614811897278, "learning_rate": 1.9264793899445716e-05, "loss": 0.5061, "step": 8085 }, { "epoch": 0.2483949251989064, "grad_norm": 0.3587384819984436, "learning_rate": 1.9264612009372685e-05, "loss": 0.5869, "step": 8086 }, { "epoch": 0.24842564433385556, "grad_norm": 0.3501192331314087, "learning_rate": 1.9264430097661503e-05, "loss": 0.6778, "step": 8087 }, { "epoch": 0.24845636346880473, "grad_norm": 0.3487778604030609, "learning_rate": 1.9264248164312597e-05, "loss": 0.5646, "step": 8088 }, { "epoch": 0.24848708260375388, "grad_norm": 0.33425372838974, "learning_rate": 1.9264066209326393e-05, "loss": 0.5742, "step": 8089 }, { "epoch": 0.24851780173870303, "grad_norm": 0.34251436591148376, "learning_rate": 1.9263884232703315e-05, "loss": 0.5847, "step": 8090 }, { "epoch": 0.2485485208736522, "grad_norm": 0.38921821117401123, "learning_rate": 1.9263702234443787e-05, "loss": 0.5603, "step": 8091 }, { "epoch": 0.24857924000860135, "grad_norm": 0.3532021641731262, "learning_rate": 1.926352021454824e-05, "loss": 0.559, "step": 8092 }, { "epoch": 0.24860995914355052, "grad_norm": 0.337065726518631, "learning_rate": 1.926333817301709e-05, "loss": 0.6507, "step": 8093 }, { "epoch": 0.24864067827849967, "grad_norm": 0.35540589690208435, "learning_rate": 1.9263156109850766e-05, "loss": 0.6026, "step": 8094 }, { "epoch": 0.24867139741344885, "grad_norm": 0.3336319029331207, "learning_rate": 1.92629740250497e-05, "loss": 0.5069, "step": 8095 }, { "epoch": 0.248702116548398, "grad_norm": 0.3272562623023987, "learning_rate": 1.9262791918614306e-05, "loss": 0.6157, "step": 8096 }, { "epoch": 0.24873283568334717, "grad_norm": 0.36984437704086304, "learning_rate": 1.9262609790545017e-05, "loss": 0.5245, "step": 8097 }, { "epoch": 0.24876355481829632, "grad_norm": 0.3190847933292389, "learning_rate": 1.9262427640842256e-05, "loss": 0.6199, "step": 8098 }, { "epoch": 0.24879427395324547, "grad_norm": 0.3381030261516571, "learning_rate": 1.9262245469506446e-05, "loss": 0.5898, "step": 8099 }, { "epoch": 0.24882499308819464, "grad_norm": 0.33470797538757324, "learning_rate": 1.9262063276538018e-05, "loss": 0.5338, "step": 8100 }, { "epoch": 0.2488557122231438, "grad_norm": 0.40966424345970154, "learning_rate": 1.926188106193739e-05, "loss": 0.6545, "step": 8101 }, { "epoch": 0.24888643135809296, "grad_norm": 0.3876827657222748, "learning_rate": 1.9261698825704997e-05, "loss": 0.58, "step": 8102 }, { "epoch": 0.2489171504930421, "grad_norm": 0.3821130692958832, "learning_rate": 1.9261516567841254e-05, "loss": 0.6242, "step": 8103 }, { "epoch": 0.2489478696279913, "grad_norm": 0.3381488025188446, "learning_rate": 1.9261334288346594e-05, "loss": 0.6122, "step": 8104 }, { "epoch": 0.24897858876294043, "grad_norm": 0.35091865062713623, "learning_rate": 1.9261151987221444e-05, "loss": 0.6119, "step": 8105 }, { "epoch": 0.2490093078978896, "grad_norm": 0.3124684691429138, "learning_rate": 1.9260969664466226e-05, "loss": 0.571, "step": 8106 }, { "epoch": 0.24904002703283876, "grad_norm": 0.3466022312641144, "learning_rate": 1.9260787320081365e-05, "loss": 0.5008, "step": 8107 }, { "epoch": 0.2490707461677879, "grad_norm": 0.41703495383262634, "learning_rate": 1.926060495406729e-05, "loss": 0.6287, "step": 8108 }, { "epoch": 0.24910146530273708, "grad_norm": 0.31358665227890015, "learning_rate": 1.9260422566424423e-05, "loss": 0.6127, "step": 8109 }, { "epoch": 0.24913218443768623, "grad_norm": 0.3193607032299042, "learning_rate": 1.9260240157153196e-05, "loss": 0.5134, "step": 8110 }, { "epoch": 0.2491629035726354, "grad_norm": 0.2903992533683777, "learning_rate": 1.926005772625403e-05, "loss": 0.5167, "step": 8111 }, { "epoch": 0.24919362270758455, "grad_norm": 0.435953289270401, "learning_rate": 1.925987527372735e-05, "loss": 0.5766, "step": 8112 }, { "epoch": 0.24922434184253373, "grad_norm": 0.32323580980300903, "learning_rate": 1.9259692799573586e-05, "loss": 0.5281, "step": 8113 }, { "epoch": 0.24925506097748287, "grad_norm": 0.36793777346611023, "learning_rate": 1.9259510303793166e-05, "loss": 0.5419, "step": 8114 }, { "epoch": 0.24928578011243202, "grad_norm": 0.36749446392059326, "learning_rate": 1.9259327786386507e-05, "loss": 0.5959, "step": 8115 }, { "epoch": 0.2493164992473812, "grad_norm": 0.3570019006729126, "learning_rate": 1.9259145247354047e-05, "loss": 0.5079, "step": 8116 }, { "epoch": 0.24934721838233034, "grad_norm": 0.37172096967697144, "learning_rate": 1.9258962686696203e-05, "loss": 0.5902, "step": 8117 }, { "epoch": 0.24937793751727952, "grad_norm": 0.3364390432834625, "learning_rate": 1.9258780104413407e-05, "loss": 0.6258, "step": 8118 }, { "epoch": 0.24940865665222867, "grad_norm": 0.4926721155643463, "learning_rate": 1.925859750050608e-05, "loss": 0.5912, "step": 8119 }, { "epoch": 0.24943937578717784, "grad_norm": 0.5522615313529968, "learning_rate": 1.9258414874974657e-05, "loss": 0.5302, "step": 8120 }, { "epoch": 0.249470094922127, "grad_norm": 0.36573806405067444, "learning_rate": 1.9258232227819556e-05, "loss": 0.5822, "step": 8121 }, { "epoch": 0.24950081405707616, "grad_norm": 0.29107245802879333, "learning_rate": 1.9258049559041208e-05, "loss": 0.5452, "step": 8122 }, { "epoch": 0.2495315331920253, "grad_norm": 0.32735809683799744, "learning_rate": 1.9257866868640037e-05, "loss": 0.6313, "step": 8123 }, { "epoch": 0.24956225232697446, "grad_norm": 0.36087432503700256, "learning_rate": 1.9257684156616473e-05, "loss": 0.6364, "step": 8124 }, { "epoch": 0.24959297146192364, "grad_norm": 0.4113445281982422, "learning_rate": 1.925750142297094e-05, "loss": 0.6035, "step": 8125 }, { "epoch": 0.24962369059687278, "grad_norm": 0.3515910506248474, "learning_rate": 1.925731866770387e-05, "loss": 0.5464, "step": 8126 }, { "epoch": 0.24965440973182196, "grad_norm": 0.34351256489753723, "learning_rate": 1.9257135890815682e-05, "loss": 0.5334, "step": 8127 }, { "epoch": 0.2496851288667711, "grad_norm": 0.3360336422920227, "learning_rate": 1.9256953092306808e-05, "loss": 0.5442, "step": 8128 }, { "epoch": 0.24971584800172028, "grad_norm": 0.34417352080345154, "learning_rate": 1.9256770272177673e-05, "loss": 0.6422, "step": 8129 }, { "epoch": 0.24974656713666943, "grad_norm": 0.35187265276908875, "learning_rate": 1.9256587430428705e-05, "loss": 0.5975, "step": 8130 }, { "epoch": 0.2497772862716186, "grad_norm": 0.3715724050998688, "learning_rate": 1.9256404567060327e-05, "loss": 0.5331, "step": 8131 }, { "epoch": 0.24980800540656775, "grad_norm": 0.3819791376590729, "learning_rate": 1.9256221682072974e-05, "loss": 0.5738, "step": 8132 }, { "epoch": 0.2498387245415169, "grad_norm": 0.34458380937576294, "learning_rate": 1.9256038775467066e-05, "loss": 0.5858, "step": 8133 }, { "epoch": 0.24986944367646607, "grad_norm": 0.35639020800590515, "learning_rate": 1.9255855847243035e-05, "loss": 0.5681, "step": 8134 }, { "epoch": 0.24990016281141522, "grad_norm": 0.32901155948638916, "learning_rate": 1.9255672897401307e-05, "loss": 0.5625, "step": 8135 }, { "epoch": 0.2499308819463644, "grad_norm": 0.3390445113182068, "learning_rate": 1.925548992594231e-05, "loss": 0.6273, "step": 8136 }, { "epoch": 0.24996160108131354, "grad_norm": 0.3519945442676544, "learning_rate": 1.9255306932866463e-05, "loss": 0.6017, "step": 8137 }, { "epoch": 0.24999232021626272, "grad_norm": 0.36910974979400635, "learning_rate": 1.9255123918174205e-05, "loss": 0.6381, "step": 8138 }, { "epoch": 0.2500230393512119, "grad_norm": 0.3440738022327423, "learning_rate": 1.925494088186596e-05, "loss": 0.6243, "step": 8139 }, { "epoch": 0.250053758486161, "grad_norm": 0.4196765720844269, "learning_rate": 1.925475782394215e-05, "loss": 0.6034, "step": 8140 }, { "epoch": 0.2500844776211102, "grad_norm": 0.32861560583114624, "learning_rate": 1.9254574744403212e-05, "loss": 0.5149, "step": 8141 }, { "epoch": 0.25011519675605937, "grad_norm": 0.5508907437324524, "learning_rate": 1.9254391643249565e-05, "loss": 0.5646, "step": 8142 }, { "epoch": 0.2501459158910085, "grad_norm": 0.36771056056022644, "learning_rate": 1.925420852048164e-05, "loss": 0.4936, "step": 8143 }, { "epoch": 0.25017663502595766, "grad_norm": 0.3262122869491577, "learning_rate": 1.9254025376099865e-05, "loss": 0.5458, "step": 8144 }, { "epoch": 0.25020735416090684, "grad_norm": 0.32306981086730957, "learning_rate": 1.9253842210104666e-05, "loss": 0.562, "step": 8145 }, { "epoch": 0.250238073295856, "grad_norm": 0.39342984557151794, "learning_rate": 1.9253659022496473e-05, "loss": 0.5247, "step": 8146 }, { "epoch": 0.25026879243080513, "grad_norm": 0.49063554406166077, "learning_rate": 1.9253475813275716e-05, "loss": 0.6629, "step": 8147 }, { "epoch": 0.2502995115657543, "grad_norm": 0.34504005312919617, "learning_rate": 1.9253292582442818e-05, "loss": 0.6076, "step": 8148 }, { "epoch": 0.2503302307007035, "grad_norm": 0.3373686373233795, "learning_rate": 1.9253109329998207e-05, "loss": 0.5541, "step": 8149 }, { "epoch": 0.2503609498356526, "grad_norm": 0.34113234281539917, "learning_rate": 1.9252926055942316e-05, "loss": 0.5859, "step": 8150 }, { "epoch": 0.2503916689706018, "grad_norm": 0.413992315530777, "learning_rate": 1.9252742760275568e-05, "loss": 0.6391, "step": 8151 }, { "epoch": 0.25042238810555095, "grad_norm": 0.37430351972579956, "learning_rate": 1.9252559442998395e-05, "loss": 0.6406, "step": 8152 }, { "epoch": 0.25045310724050013, "grad_norm": 0.398443341255188, "learning_rate": 1.9252376104111225e-05, "loss": 0.6579, "step": 8153 }, { "epoch": 0.25048382637544925, "grad_norm": 0.3596709966659546, "learning_rate": 1.9252192743614476e-05, "loss": 0.521, "step": 8154 }, { "epoch": 0.2505145455103984, "grad_norm": 0.3297251760959625, "learning_rate": 1.9252009361508593e-05, "loss": 0.5414, "step": 8155 }, { "epoch": 0.2505452646453476, "grad_norm": 0.32663118839263916, "learning_rate": 1.9251825957793994e-05, "loss": 0.6162, "step": 8156 }, { "epoch": 0.2505759837802968, "grad_norm": 0.34362348914146423, "learning_rate": 1.9251642532471106e-05, "loss": 0.5415, "step": 8157 }, { "epoch": 0.2506067029152459, "grad_norm": 0.3262234032154083, "learning_rate": 1.9251459085540365e-05, "loss": 0.5697, "step": 8158 }, { "epoch": 0.25063742205019507, "grad_norm": 0.3727857768535614, "learning_rate": 1.9251275617002192e-05, "loss": 0.538, "step": 8159 }, { "epoch": 0.25066814118514424, "grad_norm": 0.34423235058784485, "learning_rate": 1.9251092126857022e-05, "loss": 0.5658, "step": 8160 }, { "epoch": 0.25069886032009336, "grad_norm": 0.36102432012557983, "learning_rate": 1.9250908615105278e-05, "loss": 0.5984, "step": 8161 }, { "epoch": 0.25072957945504254, "grad_norm": 0.3938758671283722, "learning_rate": 1.925072508174739e-05, "loss": 0.6721, "step": 8162 }, { "epoch": 0.2507602985899917, "grad_norm": 0.37073448300361633, "learning_rate": 1.9250541526783787e-05, "loss": 0.6046, "step": 8163 }, { "epoch": 0.2507910177249409, "grad_norm": 0.3660185635089874, "learning_rate": 1.9250357950214903e-05, "loss": 0.6225, "step": 8164 }, { "epoch": 0.25082173685989, "grad_norm": 0.4896341860294342, "learning_rate": 1.9250174352041157e-05, "loss": 0.5518, "step": 8165 }, { "epoch": 0.2508524559948392, "grad_norm": 0.34070372581481934, "learning_rate": 1.9249990732262988e-05, "loss": 0.609, "step": 8166 }, { "epoch": 0.25088317512978836, "grad_norm": 0.8762781620025635, "learning_rate": 1.9249807090880813e-05, "loss": 0.6292, "step": 8167 }, { "epoch": 0.2509138942647375, "grad_norm": 0.34724852442741394, "learning_rate": 1.924962342789507e-05, "loss": 0.5424, "step": 8168 }, { "epoch": 0.25094461339968666, "grad_norm": 0.4010174572467804, "learning_rate": 1.924943974330619e-05, "loss": 0.6579, "step": 8169 }, { "epoch": 0.25097533253463583, "grad_norm": 0.338174045085907, "learning_rate": 1.9249256037114594e-05, "loss": 0.5181, "step": 8170 }, { "epoch": 0.251006051669585, "grad_norm": 0.4042709171772003, "learning_rate": 1.924907230932071e-05, "loss": 0.5493, "step": 8171 }, { "epoch": 0.2510367708045341, "grad_norm": 0.3951626121997833, "learning_rate": 1.924888855992498e-05, "loss": 0.596, "step": 8172 }, { "epoch": 0.2510674899394833, "grad_norm": 0.32995960116386414, "learning_rate": 1.924870478892782e-05, "loss": 0.5485, "step": 8173 }, { "epoch": 0.2510982090744325, "grad_norm": 0.3660335838794708, "learning_rate": 1.9248520996329665e-05, "loss": 0.5911, "step": 8174 }, { "epoch": 0.2511289282093816, "grad_norm": 0.3794623613357544, "learning_rate": 1.9248337182130943e-05, "loss": 0.5955, "step": 8175 }, { "epoch": 0.25115964734433077, "grad_norm": 0.36354729533195496, "learning_rate": 1.9248153346332085e-05, "loss": 0.6718, "step": 8176 }, { "epoch": 0.25119036647927995, "grad_norm": 0.3120976984500885, "learning_rate": 1.9247969488933517e-05, "loss": 0.5982, "step": 8177 }, { "epoch": 0.2512210856142291, "grad_norm": 0.38308849930763245, "learning_rate": 1.924778560993567e-05, "loss": 0.5373, "step": 8178 }, { "epoch": 0.25125180474917824, "grad_norm": 0.3188348710536957, "learning_rate": 1.9247601709338974e-05, "loss": 0.6313, "step": 8179 }, { "epoch": 0.2512825238841274, "grad_norm": 0.4560845196247101, "learning_rate": 1.924741778714386e-05, "loss": 0.6519, "step": 8180 }, { "epoch": 0.2513132430190766, "grad_norm": 0.34158268570899963, "learning_rate": 1.9247233843350757e-05, "loss": 0.5554, "step": 8181 }, { "epoch": 0.25134396215402577, "grad_norm": 0.351362407207489, "learning_rate": 1.924704987796009e-05, "loss": 0.6034, "step": 8182 }, { "epoch": 0.2513746812889749, "grad_norm": 0.35004445910453796, "learning_rate": 1.9246865890972298e-05, "loss": 0.5859, "step": 8183 }, { "epoch": 0.25140540042392406, "grad_norm": 0.33663198351860046, "learning_rate": 1.92466818823878e-05, "loss": 0.6107, "step": 8184 }, { "epoch": 0.25143611955887324, "grad_norm": 0.3563354015350342, "learning_rate": 1.924649785220703e-05, "loss": 0.5915, "step": 8185 }, { "epoch": 0.25146683869382236, "grad_norm": 0.3741229772567749, "learning_rate": 1.9246313800430422e-05, "loss": 0.5685, "step": 8186 }, { "epoch": 0.25149755782877153, "grad_norm": 0.34285444021224976, "learning_rate": 1.92461297270584e-05, "loss": 0.5993, "step": 8187 }, { "epoch": 0.2515282769637207, "grad_norm": 0.33537644147872925, "learning_rate": 1.9245945632091396e-05, "loss": 0.58, "step": 8188 }, { "epoch": 0.2515589960986699, "grad_norm": 0.3187806308269501, "learning_rate": 1.924576151552984e-05, "loss": 0.6219, "step": 8189 }, { "epoch": 0.251589715233619, "grad_norm": 0.3143298625946045, "learning_rate": 1.9245577377374165e-05, "loss": 0.5398, "step": 8190 }, { "epoch": 0.2516204343685682, "grad_norm": 0.33968761563301086, "learning_rate": 1.92453932176248e-05, "loss": 0.673, "step": 8191 }, { "epoch": 0.25165115350351736, "grad_norm": 0.3368196487426758, "learning_rate": 1.9245209036282166e-05, "loss": 0.5897, "step": 8192 }, { "epoch": 0.2516818726384665, "grad_norm": 0.33812272548675537, "learning_rate": 1.9245024833346706e-05, "loss": 0.6024, "step": 8193 }, { "epoch": 0.25171259177341565, "grad_norm": 0.3766481280326843, "learning_rate": 1.924484060881884e-05, "loss": 0.6476, "step": 8194 }, { "epoch": 0.2517433109083648, "grad_norm": 0.36876025795936584, "learning_rate": 1.924465636269901e-05, "loss": 0.6164, "step": 8195 }, { "epoch": 0.251774030043314, "grad_norm": 0.3389958441257477, "learning_rate": 1.9244472094987635e-05, "loss": 0.5464, "step": 8196 }, { "epoch": 0.2518047491782631, "grad_norm": 0.41937723755836487, "learning_rate": 1.924428780568515e-05, "loss": 0.6162, "step": 8197 }, { "epoch": 0.2518354683132123, "grad_norm": 0.33633318543434143, "learning_rate": 1.9244103494791988e-05, "loss": 0.5877, "step": 8198 }, { "epoch": 0.25186618744816147, "grad_norm": 0.3836783468723297, "learning_rate": 1.9243919162308574e-05, "loss": 0.58, "step": 8199 }, { "epoch": 0.25189690658311065, "grad_norm": 0.33981412649154663, "learning_rate": 1.924373480823534e-05, "loss": 0.6595, "step": 8200 }, { "epoch": 0.25192762571805977, "grad_norm": 0.363895982503891, "learning_rate": 1.9243550432572723e-05, "loss": 0.6425, "step": 8201 }, { "epoch": 0.25195834485300894, "grad_norm": 0.36750778555870056, "learning_rate": 1.9243366035321145e-05, "loss": 0.5451, "step": 8202 }, { "epoch": 0.2519890639879581, "grad_norm": 0.3500846028327942, "learning_rate": 1.924318161648104e-05, "loss": 0.6127, "step": 8203 }, { "epoch": 0.25201978312290724, "grad_norm": 1.7660325765609741, "learning_rate": 1.924299717605284e-05, "loss": 0.5728, "step": 8204 }, { "epoch": 0.2520505022578564, "grad_norm": 0.42121273279190063, "learning_rate": 1.9242812714036975e-05, "loss": 0.6251, "step": 8205 }, { "epoch": 0.2520812213928056, "grad_norm": 0.3199688792228699, "learning_rate": 1.9242628230433874e-05, "loss": 0.5838, "step": 8206 }, { "epoch": 0.25211194052775476, "grad_norm": 0.3284425437450409, "learning_rate": 1.9242443725243967e-05, "loss": 0.5443, "step": 8207 }, { "epoch": 0.2521426596627039, "grad_norm": 0.4871874153614044, "learning_rate": 1.924225919846769e-05, "loss": 0.7046, "step": 8208 }, { "epoch": 0.25217337879765306, "grad_norm": 0.40103310346603394, "learning_rate": 1.924207465010547e-05, "loss": 0.6261, "step": 8209 }, { "epoch": 0.25220409793260223, "grad_norm": 0.3049905300140381, "learning_rate": 1.9241890080157742e-05, "loss": 0.6631, "step": 8210 }, { "epoch": 0.25223481706755135, "grad_norm": 0.33946526050567627, "learning_rate": 1.924170548862493e-05, "loss": 0.5361, "step": 8211 }, { "epoch": 0.25226553620250053, "grad_norm": 0.354472815990448, "learning_rate": 1.924152087550747e-05, "loss": 0.5802, "step": 8212 }, { "epoch": 0.2522962553374497, "grad_norm": 0.34480923414230347, "learning_rate": 1.9241336240805795e-05, "loss": 0.5878, "step": 8213 }, { "epoch": 0.2523269744723989, "grad_norm": 0.4066990315914154, "learning_rate": 1.9241151584520334e-05, "loss": 0.5705, "step": 8214 }, { "epoch": 0.252357693607348, "grad_norm": 0.332962304353714, "learning_rate": 1.924096690665152e-05, "loss": 0.5644, "step": 8215 }, { "epoch": 0.2523884127422972, "grad_norm": 0.29440435767173767, "learning_rate": 1.9240782207199778e-05, "loss": 0.5991, "step": 8216 }, { "epoch": 0.25241913187724635, "grad_norm": 0.3963111340999603, "learning_rate": 1.9240597486165543e-05, "loss": 0.6093, "step": 8217 }, { "epoch": 0.25244985101219547, "grad_norm": 0.4008532166481018, "learning_rate": 1.924041274354925e-05, "loss": 0.5303, "step": 8218 }, { "epoch": 0.25248057014714465, "grad_norm": 0.35854944586753845, "learning_rate": 1.9240227979351328e-05, "loss": 0.5988, "step": 8219 }, { "epoch": 0.2525112892820938, "grad_norm": 0.378581702709198, "learning_rate": 1.924004319357221e-05, "loss": 0.5909, "step": 8220 }, { "epoch": 0.252542008417043, "grad_norm": 0.3735653758049011, "learning_rate": 1.923985838621232e-05, "loss": 0.4872, "step": 8221 }, { "epoch": 0.2525727275519921, "grad_norm": 0.4352099895477295, "learning_rate": 1.9239673557272098e-05, "loss": 0.5242, "step": 8222 }, { "epoch": 0.2526034466869413, "grad_norm": 0.3476884961128235, "learning_rate": 1.9239488706751977e-05, "loss": 0.6236, "step": 8223 }, { "epoch": 0.25263416582189047, "grad_norm": 0.3135400712490082, "learning_rate": 1.9239303834652377e-05, "loss": 0.531, "step": 8224 }, { "epoch": 0.25266488495683964, "grad_norm": 0.3418579697608948, "learning_rate": 1.9239118940973746e-05, "loss": 0.567, "step": 8225 }, { "epoch": 0.25269560409178876, "grad_norm": 0.305403470993042, "learning_rate": 1.9238934025716502e-05, "loss": 0.6091, "step": 8226 }, { "epoch": 0.25272632322673794, "grad_norm": 0.3342288136482239, "learning_rate": 1.9238749088881085e-05, "loss": 0.6021, "step": 8227 }, { "epoch": 0.2527570423616871, "grad_norm": 0.32613804936408997, "learning_rate": 1.9238564130467923e-05, "loss": 0.5693, "step": 8228 }, { "epoch": 0.25278776149663623, "grad_norm": 0.3626268208026886, "learning_rate": 1.9238379150477447e-05, "loss": 0.5343, "step": 8229 }, { "epoch": 0.2528184806315854, "grad_norm": 0.33876654505729675, "learning_rate": 1.9238194148910098e-05, "loss": 0.5205, "step": 8230 }, { "epoch": 0.2528491997665346, "grad_norm": 0.3993907570838928, "learning_rate": 1.9238009125766296e-05, "loss": 0.5771, "step": 8231 }, { "epoch": 0.25287991890148376, "grad_norm": 0.3726364076137543, "learning_rate": 1.923782408104648e-05, "loss": 0.6069, "step": 8232 }, { "epoch": 0.2529106380364329, "grad_norm": 0.3255564570426941, "learning_rate": 1.923763901475108e-05, "loss": 0.5806, "step": 8233 }, { "epoch": 0.25294135717138205, "grad_norm": 0.3490222096443176, "learning_rate": 1.9237453926880525e-05, "loss": 0.5741, "step": 8234 }, { "epoch": 0.25297207630633123, "grad_norm": 0.3594990670681, "learning_rate": 1.923726881743526e-05, "loss": 0.5474, "step": 8235 }, { "epoch": 0.25300279544128035, "grad_norm": 0.3388264775276184, "learning_rate": 1.9237083686415703e-05, "loss": 0.5375, "step": 8236 }, { "epoch": 0.2530335145762295, "grad_norm": 0.35153865814208984, "learning_rate": 1.9236898533822295e-05, "loss": 0.5719, "step": 8237 }, { "epoch": 0.2530642337111787, "grad_norm": 0.34760981798171997, "learning_rate": 1.9236713359655462e-05, "loss": 0.5934, "step": 8238 }, { "epoch": 0.2530949528461279, "grad_norm": 0.3594171106815338, "learning_rate": 1.923652816391564e-05, "loss": 0.5926, "step": 8239 }, { "epoch": 0.253125671981077, "grad_norm": 0.3508000075817108, "learning_rate": 1.9236342946603265e-05, "loss": 0.6361, "step": 8240 }, { "epoch": 0.25315639111602617, "grad_norm": 0.3415468633174896, "learning_rate": 1.923615770771876e-05, "loss": 0.5698, "step": 8241 }, { "epoch": 0.25318711025097534, "grad_norm": 0.3664723038673401, "learning_rate": 1.9235972447262568e-05, "loss": 0.6303, "step": 8242 }, { "epoch": 0.2532178293859245, "grad_norm": 0.4330686926841736, "learning_rate": 1.923578716523512e-05, "loss": 0.6413, "step": 8243 }, { "epoch": 0.25324854852087364, "grad_norm": 0.3995680510997772, "learning_rate": 1.923560186163684e-05, "loss": 0.6848, "step": 8244 }, { "epoch": 0.2532792676558228, "grad_norm": 0.3636017441749573, "learning_rate": 1.923541653646817e-05, "loss": 0.5363, "step": 8245 }, { "epoch": 0.253309986790772, "grad_norm": 0.35383370518684387, "learning_rate": 1.923523118972954e-05, "loss": 0.5979, "step": 8246 }, { "epoch": 0.2533407059257211, "grad_norm": 0.33574268221855164, "learning_rate": 1.9235045821421383e-05, "loss": 0.5797, "step": 8247 }, { "epoch": 0.2533714250606703, "grad_norm": 0.35580554604530334, "learning_rate": 1.923486043154413e-05, "loss": 0.5617, "step": 8248 }, { "epoch": 0.25340214419561946, "grad_norm": 0.3412446081638336, "learning_rate": 1.9234675020098214e-05, "loss": 0.625, "step": 8249 }, { "epoch": 0.25343286333056864, "grad_norm": 0.3826681673526764, "learning_rate": 1.9234489587084072e-05, "loss": 0.5847, "step": 8250 }, { "epoch": 0.25346358246551776, "grad_norm": 0.32245388627052307, "learning_rate": 1.9234304132502134e-05, "loss": 0.5939, "step": 8251 }, { "epoch": 0.25349430160046693, "grad_norm": 0.3382636308670044, "learning_rate": 1.9234118656352833e-05, "loss": 0.5572, "step": 8252 }, { "epoch": 0.2535250207354161, "grad_norm": 0.3398440182209015, "learning_rate": 1.9233933158636605e-05, "loss": 0.5894, "step": 8253 }, { "epoch": 0.2535557398703652, "grad_norm": 0.3638000190258026, "learning_rate": 1.923374763935388e-05, "loss": 0.6477, "step": 8254 }, { "epoch": 0.2535864590053144, "grad_norm": 0.36895161867141724, "learning_rate": 1.923356209850509e-05, "loss": 0.5859, "step": 8255 }, { "epoch": 0.2536171781402636, "grad_norm": 0.30955296754837036, "learning_rate": 1.9233376536090675e-05, "loss": 0.5542, "step": 8256 }, { "epoch": 0.25364789727521275, "grad_norm": 0.36906352639198303, "learning_rate": 1.9233190952111064e-05, "loss": 0.6081, "step": 8257 }, { "epoch": 0.2536786164101619, "grad_norm": 0.3356150686740875, "learning_rate": 1.923300534656669e-05, "loss": 0.6935, "step": 8258 }, { "epoch": 0.25370933554511105, "grad_norm": 0.3574443459510803, "learning_rate": 1.9232819719457983e-05, "loss": 0.6335, "step": 8259 }, { "epoch": 0.2537400546800602, "grad_norm": 0.32649660110473633, "learning_rate": 1.9232634070785384e-05, "loss": 0.5708, "step": 8260 }, { "epoch": 0.25377077381500934, "grad_norm": 0.3734080493450165, "learning_rate": 1.9232448400549325e-05, "loss": 0.5671, "step": 8261 }, { "epoch": 0.2538014929499585, "grad_norm": 0.325977623462677, "learning_rate": 1.9232262708750236e-05, "loss": 0.6072, "step": 8262 }, { "epoch": 0.2538322120849077, "grad_norm": 0.3491586744785309, "learning_rate": 1.9232076995388555e-05, "loss": 0.6572, "step": 8263 }, { "epoch": 0.25386293121985687, "grad_norm": 0.3682660758495331, "learning_rate": 1.923189126046471e-05, "loss": 0.5596, "step": 8264 }, { "epoch": 0.253893650354806, "grad_norm": 0.3288169205188751, "learning_rate": 1.923170550397914e-05, "loss": 0.5981, "step": 8265 }, { "epoch": 0.25392436948975516, "grad_norm": 0.3317849636077881, "learning_rate": 1.9231519725932277e-05, "loss": 0.5698, "step": 8266 }, { "epoch": 0.25395508862470434, "grad_norm": 0.3438563942909241, "learning_rate": 1.9231333926324556e-05, "loss": 0.5793, "step": 8267 }, { "epoch": 0.2539858077596535, "grad_norm": 0.37212660908699036, "learning_rate": 1.923114810515641e-05, "loss": 0.5829, "step": 8268 }, { "epoch": 0.25401652689460263, "grad_norm": 0.3788805305957794, "learning_rate": 1.923096226242827e-05, "loss": 0.5616, "step": 8269 }, { "epoch": 0.2540472460295518, "grad_norm": 0.3562413156032562, "learning_rate": 1.9230776398140576e-05, "loss": 0.6902, "step": 8270 }, { "epoch": 0.254077965164501, "grad_norm": 0.3653242588043213, "learning_rate": 1.9230590512293757e-05, "loss": 0.616, "step": 8271 }, { "epoch": 0.2541086842994501, "grad_norm": 0.3443053364753723, "learning_rate": 1.923040460488825e-05, "loss": 0.6005, "step": 8272 }, { "epoch": 0.2541394034343993, "grad_norm": 0.3591475188732147, "learning_rate": 1.9230218675924486e-05, "loss": 0.5895, "step": 8273 }, { "epoch": 0.25417012256934846, "grad_norm": 0.549377977848053, "learning_rate": 1.9230032725402904e-05, "loss": 0.5725, "step": 8274 }, { "epoch": 0.25420084170429763, "grad_norm": 0.3990809917449951, "learning_rate": 1.9229846753323936e-05, "loss": 0.4525, "step": 8275 }, { "epoch": 0.25423156083924675, "grad_norm": 0.3375081419944763, "learning_rate": 1.9229660759688018e-05, "loss": 0.575, "step": 8276 }, { "epoch": 0.2542622799741959, "grad_norm": 0.32362768054008484, "learning_rate": 1.922947474449558e-05, "loss": 0.5281, "step": 8277 }, { "epoch": 0.2542929991091451, "grad_norm": 0.3783341348171234, "learning_rate": 1.922928870774706e-05, "loss": 0.5236, "step": 8278 }, { "epoch": 0.2543237182440942, "grad_norm": 0.33719080686569214, "learning_rate": 1.9229102649442892e-05, "loss": 0.6079, "step": 8279 }, { "epoch": 0.2543544373790434, "grad_norm": 0.35688069462776184, "learning_rate": 1.922891656958351e-05, "loss": 0.6576, "step": 8280 }, { "epoch": 0.25438515651399257, "grad_norm": 0.3592953383922577, "learning_rate": 1.922873046816935e-05, "loss": 0.5821, "step": 8281 }, { "epoch": 0.25441587564894175, "grad_norm": 0.39769431948661804, "learning_rate": 1.9228544345200842e-05, "loss": 0.5557, "step": 8282 }, { "epoch": 0.25444659478389087, "grad_norm": 0.36735329031944275, "learning_rate": 1.9228358200678428e-05, "loss": 0.6222, "step": 8283 }, { "epoch": 0.25447731391884004, "grad_norm": 0.3429044485092163, "learning_rate": 1.9228172034602536e-05, "loss": 0.5856, "step": 8284 }, { "epoch": 0.2545080330537892, "grad_norm": 0.36852896213531494, "learning_rate": 1.9227985846973606e-05, "loss": 0.5987, "step": 8285 }, { "epoch": 0.25453875218873834, "grad_norm": 0.34844517707824707, "learning_rate": 1.922779963779207e-05, "loss": 0.5977, "step": 8286 }, { "epoch": 0.2545694713236875, "grad_norm": 0.33246520161628723, "learning_rate": 1.922761340705836e-05, "loss": 0.5393, "step": 8287 }, { "epoch": 0.2546001904586367, "grad_norm": 0.38701027631759644, "learning_rate": 1.922742715477292e-05, "loss": 0.5962, "step": 8288 }, { "epoch": 0.25463090959358586, "grad_norm": 0.5734824538230896, "learning_rate": 1.9227240880936177e-05, "loss": 0.6256, "step": 8289 }, { "epoch": 0.254661628728535, "grad_norm": 0.3410373628139496, "learning_rate": 1.9227054585548566e-05, "loss": 0.5992, "step": 8290 }, { "epoch": 0.25469234786348416, "grad_norm": 0.4268675744533539, "learning_rate": 1.922686826861053e-05, "loss": 0.621, "step": 8291 }, { "epoch": 0.25472306699843333, "grad_norm": 0.4205310344696045, "learning_rate": 1.9226681930122493e-05, "loss": 0.55, "step": 8292 }, { "epoch": 0.2547537861333825, "grad_norm": 0.33097419142723083, "learning_rate": 1.92264955700849e-05, "loss": 0.633, "step": 8293 }, { "epoch": 0.25478450526833163, "grad_norm": 0.33111780881881714, "learning_rate": 1.922630918849818e-05, "loss": 0.5962, "step": 8294 }, { "epoch": 0.2548152244032808, "grad_norm": 0.3373839855194092, "learning_rate": 1.9226122785362774e-05, "loss": 0.6012, "step": 8295 }, { "epoch": 0.25484594353823, "grad_norm": 0.7855027318000793, "learning_rate": 1.922593636067911e-05, "loss": 0.6494, "step": 8296 }, { "epoch": 0.2548766626731791, "grad_norm": 0.32734623551368713, "learning_rate": 1.9225749914447627e-05, "loss": 0.6067, "step": 8297 }, { "epoch": 0.2549073818081283, "grad_norm": 0.33113759756088257, "learning_rate": 1.9225563446668764e-05, "loss": 0.5908, "step": 8298 }, { "epoch": 0.25493810094307745, "grad_norm": 0.35075080394744873, "learning_rate": 1.922537695734295e-05, "loss": 0.5158, "step": 8299 }, { "epoch": 0.2549688200780266, "grad_norm": 0.3536144495010376, "learning_rate": 1.9225190446470626e-05, "loss": 0.5716, "step": 8300 }, { "epoch": 0.25499953921297575, "grad_norm": 0.31192684173583984, "learning_rate": 1.9225003914052225e-05, "loss": 0.5235, "step": 8301 }, { "epoch": 0.2550302583479249, "grad_norm": 0.33229339122772217, "learning_rate": 1.9224817360088182e-05, "loss": 0.6497, "step": 8302 }, { "epoch": 0.2550609774828741, "grad_norm": 0.32523420453071594, "learning_rate": 1.9224630784578936e-05, "loss": 0.5882, "step": 8303 }, { "epoch": 0.2550916966178232, "grad_norm": 0.3026721477508545, "learning_rate": 1.922444418752492e-05, "loss": 0.5084, "step": 8304 }, { "epoch": 0.2551224157527724, "grad_norm": 0.6374772191047668, "learning_rate": 1.922425756892657e-05, "loss": 0.6336, "step": 8305 }, { "epoch": 0.25515313488772157, "grad_norm": 0.37942519783973694, "learning_rate": 1.922407092878432e-05, "loss": 0.555, "step": 8306 }, { "epoch": 0.25518385402267074, "grad_norm": 0.4033357501029968, "learning_rate": 1.9223884267098613e-05, "loss": 0.6241, "step": 8307 }, { "epoch": 0.25521457315761986, "grad_norm": 0.34557223320007324, "learning_rate": 1.922369758386988e-05, "loss": 0.5888, "step": 8308 }, { "epoch": 0.25524529229256904, "grad_norm": 0.3422186076641083, "learning_rate": 1.922351087909855e-05, "loss": 0.5857, "step": 8309 }, { "epoch": 0.2552760114275182, "grad_norm": 0.33873534202575684, "learning_rate": 1.922332415278507e-05, "loss": 0.5518, "step": 8310 }, { "epoch": 0.2553067305624674, "grad_norm": 0.3332017660140991, "learning_rate": 1.9223137404929874e-05, "loss": 0.5898, "step": 8311 }, { "epoch": 0.2553374496974165, "grad_norm": 0.4061068296432495, "learning_rate": 1.9222950635533396e-05, "loss": 0.6671, "step": 8312 }, { "epoch": 0.2553681688323657, "grad_norm": 0.3732234537601471, "learning_rate": 1.922276384459607e-05, "loss": 0.5539, "step": 8313 }, { "epoch": 0.25539888796731486, "grad_norm": 0.4348270893096924, "learning_rate": 1.922257703211834e-05, "loss": 0.6074, "step": 8314 }, { "epoch": 0.255429607102264, "grad_norm": 0.3958265781402588, "learning_rate": 1.9222390198100634e-05, "loss": 0.5926, "step": 8315 }, { "epoch": 0.25546032623721315, "grad_norm": 0.3367861807346344, "learning_rate": 1.9222203342543392e-05, "loss": 0.5369, "step": 8316 }, { "epoch": 0.25549104537216233, "grad_norm": 0.35535067319869995, "learning_rate": 1.9222016465447048e-05, "loss": 0.6254, "step": 8317 }, { "epoch": 0.2555217645071115, "grad_norm": 0.31930115818977356, "learning_rate": 1.922182956681204e-05, "loss": 0.597, "step": 8318 }, { "epoch": 0.2555524836420606, "grad_norm": 0.3558056354522705, "learning_rate": 1.922164264663881e-05, "loss": 0.5817, "step": 8319 }, { "epoch": 0.2555832027770098, "grad_norm": 0.5783113241195679, "learning_rate": 1.9221455704927784e-05, "loss": 0.5993, "step": 8320 }, { "epoch": 0.255613921911959, "grad_norm": 0.3064529597759247, "learning_rate": 1.9221268741679407e-05, "loss": 0.5168, "step": 8321 }, { "epoch": 0.2556446410469081, "grad_norm": 0.39963752031326294, "learning_rate": 1.9221081756894116e-05, "loss": 0.5941, "step": 8322 }, { "epoch": 0.25567536018185727, "grad_norm": 0.3655494451522827, "learning_rate": 1.9220894750572342e-05, "loss": 0.5688, "step": 8323 }, { "epoch": 0.25570607931680644, "grad_norm": 0.3439558744430542, "learning_rate": 1.9220707722714524e-05, "loss": 0.5671, "step": 8324 }, { "epoch": 0.2557367984517556, "grad_norm": 0.32365167140960693, "learning_rate": 1.9220520673321097e-05, "loss": 0.5572, "step": 8325 }, { "epoch": 0.25576751758670474, "grad_norm": 0.3465307354927063, "learning_rate": 1.92203336023925e-05, "loss": 0.6031, "step": 8326 }, { "epoch": 0.2557982367216539, "grad_norm": 0.3484705090522766, "learning_rate": 1.9220146509929174e-05, "loss": 0.5351, "step": 8327 }, { "epoch": 0.2558289558566031, "grad_norm": 0.3507540225982666, "learning_rate": 1.9219959395931548e-05, "loss": 0.5553, "step": 8328 }, { "epoch": 0.2558596749915522, "grad_norm": 0.3864234983921051, "learning_rate": 1.9219772260400068e-05, "loss": 0.6467, "step": 8329 }, { "epoch": 0.2558903941265014, "grad_norm": 0.35561054944992065, "learning_rate": 1.921958510333516e-05, "loss": 0.5891, "step": 8330 }, { "epoch": 0.25592111326145056, "grad_norm": 0.3526415228843689, "learning_rate": 1.921939792473727e-05, "loss": 0.4987, "step": 8331 }, { "epoch": 0.25595183239639974, "grad_norm": 0.38084927201271057, "learning_rate": 1.9219210724606835e-05, "loss": 0.5843, "step": 8332 }, { "epoch": 0.25598255153134886, "grad_norm": 0.3239787220954895, "learning_rate": 1.9219023502944284e-05, "loss": 0.5454, "step": 8333 }, { "epoch": 0.25601327066629803, "grad_norm": 0.35937100648880005, "learning_rate": 1.9218836259750065e-05, "loss": 0.5933, "step": 8334 }, { "epoch": 0.2560439898012472, "grad_norm": 0.7028899788856506, "learning_rate": 1.9218648995024606e-05, "loss": 0.6053, "step": 8335 }, { "epoch": 0.2560747089361964, "grad_norm": 0.541114330291748, "learning_rate": 1.921846170876835e-05, "loss": 0.5372, "step": 8336 }, { "epoch": 0.2561054280711455, "grad_norm": 0.332061231136322, "learning_rate": 1.9218274400981732e-05, "loss": 0.5424, "step": 8337 }, { "epoch": 0.2561361472060947, "grad_norm": 0.3522573411464691, "learning_rate": 1.9218087071665194e-05, "loss": 0.6242, "step": 8338 }, { "epoch": 0.25616686634104385, "grad_norm": 0.3371701240539551, "learning_rate": 1.9217899720819165e-05, "loss": 0.5265, "step": 8339 }, { "epoch": 0.256197585475993, "grad_norm": 0.3522646129131317, "learning_rate": 1.921771234844409e-05, "loss": 0.5393, "step": 8340 }, { "epoch": 0.25622830461094215, "grad_norm": 0.37950047850608826, "learning_rate": 1.9217524954540404e-05, "loss": 0.6014, "step": 8341 }, { "epoch": 0.2562590237458913, "grad_norm": 0.37030550837516785, "learning_rate": 1.921733753910854e-05, "loss": 0.593, "step": 8342 }, { "epoch": 0.2562897428808405, "grad_norm": 0.44248276948928833, "learning_rate": 1.9217150102148947e-05, "loss": 0.5534, "step": 8343 }, { "epoch": 0.2563204620157896, "grad_norm": 0.3368872404098511, "learning_rate": 1.9216962643662053e-05, "loss": 0.6871, "step": 8344 }, { "epoch": 0.2563511811507388, "grad_norm": 0.38319915533065796, "learning_rate": 1.92167751636483e-05, "loss": 0.5227, "step": 8345 }, { "epoch": 0.25638190028568797, "grad_norm": 0.3318663239479065, "learning_rate": 1.9216587662108127e-05, "loss": 0.5699, "step": 8346 }, { "epoch": 0.2564126194206371, "grad_norm": 0.3618440330028534, "learning_rate": 1.9216400139041967e-05, "loss": 0.5567, "step": 8347 }, { "epoch": 0.25644333855558626, "grad_norm": 0.37384894490242004, "learning_rate": 1.921621259445026e-05, "loss": 0.6126, "step": 8348 }, { "epoch": 0.25647405769053544, "grad_norm": 1.0549741983413696, "learning_rate": 1.9216025028333444e-05, "loss": 0.6185, "step": 8349 }, { "epoch": 0.2565047768254846, "grad_norm": 0.3201773166656494, "learning_rate": 1.9215837440691962e-05, "loss": 0.5132, "step": 8350 }, { "epoch": 0.25653549596043373, "grad_norm": 0.3349142372608185, "learning_rate": 1.9215649831526247e-05, "loss": 0.5751, "step": 8351 }, { "epoch": 0.2565662150953829, "grad_norm": 0.39289531111717224, "learning_rate": 1.9215462200836735e-05, "loss": 0.6047, "step": 8352 }, { "epoch": 0.2565969342303321, "grad_norm": 0.4867169260978699, "learning_rate": 1.921527454862387e-05, "loss": 0.5975, "step": 8353 }, { "epoch": 0.25662765336528126, "grad_norm": 0.3449751138687134, "learning_rate": 1.921508687488809e-05, "loss": 0.5648, "step": 8354 }, { "epoch": 0.2566583725002304, "grad_norm": 0.35124698281288147, "learning_rate": 1.921489917962983e-05, "loss": 0.5339, "step": 8355 }, { "epoch": 0.25668909163517956, "grad_norm": 0.4545419216156006, "learning_rate": 1.9214711462849524e-05, "loss": 0.5635, "step": 8356 }, { "epoch": 0.25671981077012873, "grad_norm": 0.35993334650993347, "learning_rate": 1.921452372454762e-05, "loss": 0.5789, "step": 8357 }, { "epoch": 0.25675052990507785, "grad_norm": 0.3508540987968445, "learning_rate": 1.9214335964724552e-05, "loss": 0.523, "step": 8358 }, { "epoch": 0.256781249040027, "grad_norm": 0.35137107968330383, "learning_rate": 1.921414818338076e-05, "loss": 0.5821, "step": 8359 }, { "epoch": 0.2568119681749762, "grad_norm": 0.4426778554916382, "learning_rate": 1.921396038051668e-05, "loss": 0.5709, "step": 8360 }, { "epoch": 0.2568426873099254, "grad_norm": 0.3565235137939453, "learning_rate": 1.9213772556132752e-05, "loss": 0.5895, "step": 8361 }, { "epoch": 0.2568734064448745, "grad_norm": 0.31514570116996765, "learning_rate": 1.9213584710229413e-05, "loss": 0.6123, "step": 8362 }, { "epoch": 0.2569041255798237, "grad_norm": 0.3502335250377655, "learning_rate": 1.9213396842807107e-05, "loss": 0.5678, "step": 8363 }, { "epoch": 0.25693484471477285, "grad_norm": 0.36160996556282043, "learning_rate": 1.9213208953866268e-05, "loss": 0.6148, "step": 8364 }, { "epoch": 0.25696556384972197, "grad_norm": 0.33181634545326233, "learning_rate": 1.9213021043407336e-05, "loss": 0.5723, "step": 8365 }, { "epoch": 0.25699628298467114, "grad_norm": 0.6849623322486877, "learning_rate": 1.921283311143075e-05, "loss": 0.5778, "step": 8366 }, { "epoch": 0.2570270021196203, "grad_norm": 0.3445869982242584, "learning_rate": 1.921264515793695e-05, "loss": 0.6034, "step": 8367 }, { "epoch": 0.2570577212545695, "grad_norm": 0.35210371017456055, "learning_rate": 1.921245718292637e-05, "loss": 0.5991, "step": 8368 }, { "epoch": 0.2570884403895186, "grad_norm": 0.33658111095428467, "learning_rate": 1.9212269186399454e-05, "loss": 0.6108, "step": 8369 }, { "epoch": 0.2571191595244678, "grad_norm": 0.4272942543029785, "learning_rate": 1.921208116835664e-05, "loss": 0.631, "step": 8370 }, { "epoch": 0.25714987865941696, "grad_norm": 0.35596874356269836, "learning_rate": 1.9211893128798372e-05, "loss": 0.6175, "step": 8371 }, { "epoch": 0.2571805977943661, "grad_norm": 0.37229210138320923, "learning_rate": 1.921170506772508e-05, "loss": 0.635, "step": 8372 }, { "epoch": 0.25721131692931526, "grad_norm": 0.36065563559532166, "learning_rate": 1.9211516985137208e-05, "loss": 0.6317, "step": 8373 }, { "epoch": 0.25724203606426443, "grad_norm": 0.3491560220718384, "learning_rate": 1.9211328881035194e-05, "loss": 0.6058, "step": 8374 }, { "epoch": 0.2572727551992136, "grad_norm": 0.316683828830719, "learning_rate": 1.921114075541948e-05, "loss": 0.593, "step": 8375 }, { "epoch": 0.25730347433416273, "grad_norm": 0.3620360195636749, "learning_rate": 1.9210952608290502e-05, "loss": 0.5651, "step": 8376 }, { "epoch": 0.2573341934691119, "grad_norm": 0.36430102586746216, "learning_rate": 1.92107644396487e-05, "loss": 0.6605, "step": 8377 }, { "epoch": 0.2573649126040611, "grad_norm": 0.3287462294101715, "learning_rate": 1.9210576249494517e-05, "loss": 0.5726, "step": 8378 }, { "epoch": 0.25739563173901026, "grad_norm": 0.35305526852607727, "learning_rate": 1.921038803782839e-05, "loss": 0.5336, "step": 8379 }, { "epoch": 0.2574263508739594, "grad_norm": 0.34467318654060364, "learning_rate": 1.9210199804650754e-05, "loss": 0.5833, "step": 8380 }, { "epoch": 0.25745707000890855, "grad_norm": 0.3426707983016968, "learning_rate": 1.9210011549962054e-05, "loss": 0.7091, "step": 8381 }, { "epoch": 0.2574877891438577, "grad_norm": 0.3380512595176697, "learning_rate": 1.9209823273762732e-05, "loss": 0.5127, "step": 8382 }, { "epoch": 0.25751850827880685, "grad_norm": 0.3261917531490326, "learning_rate": 1.9209634976053226e-05, "loss": 0.5126, "step": 8383 }, { "epoch": 0.257549227413756, "grad_norm": 0.3352435529232025, "learning_rate": 1.920944665683397e-05, "loss": 0.4357, "step": 8384 }, { "epoch": 0.2575799465487052, "grad_norm": 0.3758123517036438, "learning_rate": 1.920925831610541e-05, "loss": 0.5615, "step": 8385 }, { "epoch": 0.25761066568365437, "grad_norm": 0.3901548981666565, "learning_rate": 1.9209069953867984e-05, "loss": 0.5832, "step": 8386 }, { "epoch": 0.2576413848186035, "grad_norm": 0.39872175455093384, "learning_rate": 1.9208881570122132e-05, "loss": 0.6112, "step": 8387 }, { "epoch": 0.25767210395355267, "grad_norm": 0.36001625657081604, "learning_rate": 1.9208693164868294e-05, "loss": 0.6213, "step": 8388 }, { "epoch": 0.25770282308850184, "grad_norm": 0.32904139161109924, "learning_rate": 1.920850473810691e-05, "loss": 0.4984, "step": 8389 }, { "epoch": 0.25773354222345096, "grad_norm": 0.40487271547317505, "learning_rate": 1.920831628983842e-05, "loss": 0.6284, "step": 8390 }, { "epoch": 0.25776426135840014, "grad_norm": 0.3538924753665924, "learning_rate": 1.9208127820063263e-05, "loss": 0.602, "step": 8391 }, { "epoch": 0.2577949804933493, "grad_norm": 0.3537263870239258, "learning_rate": 1.920793932878188e-05, "loss": 0.5879, "step": 8392 }, { "epoch": 0.2578256996282985, "grad_norm": 0.3580808639526367, "learning_rate": 1.9207750815994715e-05, "loss": 0.6041, "step": 8393 }, { "epoch": 0.2578564187632476, "grad_norm": 0.33015739917755127, "learning_rate": 1.92075622817022e-05, "loss": 0.5588, "step": 8394 }, { "epoch": 0.2578871378981968, "grad_norm": 0.3673814833164215, "learning_rate": 1.9207373725904784e-05, "loss": 0.6002, "step": 8395 }, { "epoch": 0.25791785703314596, "grad_norm": 0.38785138726234436, "learning_rate": 1.9207185148602905e-05, "loss": 0.5237, "step": 8396 }, { "epoch": 0.25794857616809513, "grad_norm": 0.3921777307987213, "learning_rate": 1.9206996549797e-05, "loss": 0.5704, "step": 8397 }, { "epoch": 0.25797929530304425, "grad_norm": 0.6662288308143616, "learning_rate": 1.920680792948751e-05, "loss": 0.5971, "step": 8398 }, { "epoch": 0.25801001443799343, "grad_norm": 0.32414183020591736, "learning_rate": 1.9206619287674878e-05, "loss": 0.4789, "step": 8399 }, { "epoch": 0.2580407335729426, "grad_norm": 0.3465017080307007, "learning_rate": 1.9206430624359544e-05, "loss": 0.5691, "step": 8400 }, { "epoch": 0.2580714527078917, "grad_norm": 0.32809552550315857, "learning_rate": 1.920624193954195e-05, "loss": 0.6133, "step": 8401 }, { "epoch": 0.2581021718428409, "grad_norm": 0.3987266719341278, "learning_rate": 1.9206053233222533e-05, "loss": 0.6697, "step": 8402 }, { "epoch": 0.2581328909777901, "grad_norm": 0.3876710832118988, "learning_rate": 1.9205864505401737e-05, "loss": 0.6622, "step": 8403 }, { "epoch": 0.25816361011273925, "grad_norm": 0.38597238063812256, "learning_rate": 1.920567575608e-05, "loss": 0.6238, "step": 8404 }, { "epoch": 0.25819432924768837, "grad_norm": 0.32934948801994324, "learning_rate": 1.9205486985257766e-05, "loss": 0.5347, "step": 8405 }, { "epoch": 0.25822504838263755, "grad_norm": 0.3313177824020386, "learning_rate": 1.9205298192935472e-05, "loss": 0.6195, "step": 8406 }, { "epoch": 0.2582557675175867, "grad_norm": 0.33913710713386536, "learning_rate": 1.9205109379113565e-05, "loss": 0.5697, "step": 8407 }, { "epoch": 0.25828648665253584, "grad_norm": 0.40206462144851685, "learning_rate": 1.9204920543792477e-05, "loss": 0.5712, "step": 8408 }, { "epoch": 0.258317205787485, "grad_norm": 0.3342077136039734, "learning_rate": 1.920473168697266e-05, "loss": 0.5336, "step": 8409 }, { "epoch": 0.2583479249224342, "grad_norm": 0.33197325468063354, "learning_rate": 1.9204542808654546e-05, "loss": 0.5565, "step": 8410 }, { "epoch": 0.25837864405738337, "grad_norm": 0.43398189544677734, "learning_rate": 1.9204353908838577e-05, "loss": 0.5336, "step": 8411 }, { "epoch": 0.2584093631923325, "grad_norm": 0.36541929841041565, "learning_rate": 1.92041649875252e-05, "loss": 0.6895, "step": 8412 }, { "epoch": 0.25844008232728166, "grad_norm": 0.36581993103027344, "learning_rate": 1.9203976044714855e-05, "loss": 0.6654, "step": 8413 }, { "epoch": 0.25847080146223084, "grad_norm": 0.3536546528339386, "learning_rate": 1.9203787080407976e-05, "loss": 0.5899, "step": 8414 }, { "epoch": 0.25850152059717996, "grad_norm": 0.5512863993644714, "learning_rate": 1.9203598094605013e-05, "loss": 0.6269, "step": 8415 }, { "epoch": 0.25853223973212913, "grad_norm": 0.3745993673801422, "learning_rate": 1.92034090873064e-05, "loss": 0.537, "step": 8416 }, { "epoch": 0.2585629588670783, "grad_norm": 0.3713419735431671, "learning_rate": 1.9203220058512586e-05, "loss": 0.5239, "step": 8417 }, { "epoch": 0.2585936780020275, "grad_norm": 0.371114045381546, "learning_rate": 1.920303100822401e-05, "loss": 0.5597, "step": 8418 }, { "epoch": 0.2586243971369766, "grad_norm": 0.5399904847145081, "learning_rate": 1.9202841936441107e-05, "loss": 0.7273, "step": 8419 }, { "epoch": 0.2586551162719258, "grad_norm": 0.30921491980552673, "learning_rate": 1.9202652843164328e-05, "loss": 0.525, "step": 8420 }, { "epoch": 0.25868583540687495, "grad_norm": 0.37394216656684875, "learning_rate": 1.920246372839411e-05, "loss": 0.5345, "step": 8421 }, { "epoch": 0.25871655454182413, "grad_norm": 0.3397185802459717, "learning_rate": 1.9202274592130894e-05, "loss": 0.535, "step": 8422 }, { "epoch": 0.25874727367677325, "grad_norm": 0.3415963351726532, "learning_rate": 1.9202085434375124e-05, "loss": 0.5212, "step": 8423 }, { "epoch": 0.2587779928117224, "grad_norm": 0.3364271819591522, "learning_rate": 1.920189625512724e-05, "loss": 0.5842, "step": 8424 }, { "epoch": 0.2588087119466716, "grad_norm": 0.34548842906951904, "learning_rate": 1.9201707054387686e-05, "loss": 0.6016, "step": 8425 }, { "epoch": 0.2588394310816207, "grad_norm": 0.43948519229888916, "learning_rate": 1.9201517832156903e-05, "loss": 0.6443, "step": 8426 }, { "epoch": 0.2588701502165699, "grad_norm": 0.3031803071498871, "learning_rate": 1.9201328588435332e-05, "loss": 0.4955, "step": 8427 }, { "epoch": 0.25890086935151907, "grad_norm": 0.4351523518562317, "learning_rate": 1.9201139323223414e-05, "loss": 0.5875, "step": 8428 }, { "epoch": 0.25893158848646824, "grad_norm": 0.32699647545814514, "learning_rate": 1.9200950036521592e-05, "loss": 0.5691, "step": 8429 }, { "epoch": 0.25896230762141736, "grad_norm": 0.31974121928215027, "learning_rate": 1.920076072833031e-05, "loss": 0.6011, "step": 8430 }, { "epoch": 0.25899302675636654, "grad_norm": 0.436801016330719, "learning_rate": 1.920057139865001e-05, "loss": 0.6076, "step": 8431 }, { "epoch": 0.2590237458913157, "grad_norm": 0.39863649010658264, "learning_rate": 1.920038204748113e-05, "loss": 0.4878, "step": 8432 }, { "epoch": 0.25905446502626484, "grad_norm": 0.3513449430465698, "learning_rate": 1.920019267482412e-05, "loss": 0.651, "step": 8433 }, { "epoch": 0.259085184161214, "grad_norm": 0.354182630777359, "learning_rate": 1.9200003280679414e-05, "loss": 0.6561, "step": 8434 }, { "epoch": 0.2591159032961632, "grad_norm": 0.3393734097480774, "learning_rate": 1.9199813865047455e-05, "loss": 0.5759, "step": 8435 }, { "epoch": 0.25914662243111236, "grad_norm": 0.5928172469139099, "learning_rate": 1.9199624427928693e-05, "loss": 0.5985, "step": 8436 }, { "epoch": 0.2591773415660615, "grad_norm": 0.32440996170043945, "learning_rate": 1.9199434969323564e-05, "loss": 0.6021, "step": 8437 }, { "epoch": 0.25920806070101066, "grad_norm": 0.341040700674057, "learning_rate": 1.919924548923251e-05, "loss": 0.5577, "step": 8438 }, { "epoch": 0.25923877983595983, "grad_norm": 0.3697914481163025, "learning_rate": 1.919905598765598e-05, "loss": 0.6025, "step": 8439 }, { "epoch": 0.25926949897090895, "grad_norm": 0.3551967442035675, "learning_rate": 1.9198866464594407e-05, "loss": 0.6382, "step": 8440 }, { "epoch": 0.2593002181058581, "grad_norm": 0.36061891913414, "learning_rate": 1.9198676920048242e-05, "loss": 0.5761, "step": 8441 }, { "epoch": 0.2593309372408073, "grad_norm": 0.4187569320201874, "learning_rate": 1.919848735401793e-05, "loss": 0.7101, "step": 8442 }, { "epoch": 0.2593616563757565, "grad_norm": 0.32394054532051086, "learning_rate": 1.91982977665039e-05, "loss": 0.6386, "step": 8443 }, { "epoch": 0.2593923755107056, "grad_norm": 0.3568401634693146, "learning_rate": 1.9198108157506607e-05, "loss": 0.6404, "step": 8444 }, { "epoch": 0.2594230946456548, "grad_norm": 0.36053451895713806, "learning_rate": 1.919791852702649e-05, "loss": 0.5762, "step": 8445 }, { "epoch": 0.25945381378060395, "grad_norm": 0.38450247049331665, "learning_rate": 1.919772887506399e-05, "loss": 0.5193, "step": 8446 }, { "epoch": 0.2594845329155531, "grad_norm": 0.35586583614349365, "learning_rate": 1.9197539201619554e-05, "loss": 0.5696, "step": 8447 }, { "epoch": 0.25951525205050224, "grad_norm": 0.3618321120738983, "learning_rate": 1.9197349506693622e-05, "loss": 0.5242, "step": 8448 }, { "epoch": 0.2595459711854514, "grad_norm": 0.5853043794631958, "learning_rate": 1.9197159790286637e-05, "loss": 0.5782, "step": 8449 }, { "epoch": 0.2595766903204006, "grad_norm": 0.3509127199649811, "learning_rate": 1.919697005239904e-05, "loss": 0.6589, "step": 8450 }, { "epoch": 0.2596074094553497, "grad_norm": 0.39387160539627075, "learning_rate": 1.9196780293031283e-05, "loss": 0.6226, "step": 8451 }, { "epoch": 0.2596381285902989, "grad_norm": 0.37881380319595337, "learning_rate": 1.9196590512183802e-05, "loss": 0.5628, "step": 8452 }, { "epoch": 0.25966884772524806, "grad_norm": 0.3365399241447449, "learning_rate": 1.919640070985704e-05, "loss": 0.6114, "step": 8453 }, { "epoch": 0.25969956686019724, "grad_norm": 0.36410364508628845, "learning_rate": 1.9196210886051444e-05, "loss": 0.5829, "step": 8454 }, { "epoch": 0.25973028599514636, "grad_norm": 0.3657205402851105, "learning_rate": 1.9196021040767453e-05, "loss": 0.5824, "step": 8455 }, { "epoch": 0.25976100513009553, "grad_norm": 0.4126923084259033, "learning_rate": 1.9195831174005512e-05, "loss": 0.567, "step": 8456 }, { "epoch": 0.2597917242650447, "grad_norm": 0.35808369517326355, "learning_rate": 1.9195641285766065e-05, "loss": 0.5876, "step": 8457 }, { "epoch": 0.25982244339999383, "grad_norm": 0.333223819732666, "learning_rate": 1.919545137604956e-05, "loss": 0.6199, "step": 8458 }, { "epoch": 0.259853162534943, "grad_norm": 0.7553547024726868, "learning_rate": 1.919526144485643e-05, "loss": 0.5869, "step": 8459 }, { "epoch": 0.2598838816698922, "grad_norm": 0.3350219428539276, "learning_rate": 1.9195071492187128e-05, "loss": 0.6632, "step": 8460 }, { "epoch": 0.25991460080484136, "grad_norm": 0.34280940890312195, "learning_rate": 1.9194881518042097e-05, "loss": 0.5472, "step": 8461 }, { "epoch": 0.2599453199397905, "grad_norm": 0.46908918023109436, "learning_rate": 1.9194691522421776e-05, "loss": 0.7192, "step": 8462 }, { "epoch": 0.25997603907473965, "grad_norm": 0.3715907633304596, "learning_rate": 1.919450150532661e-05, "loss": 0.5881, "step": 8463 }, { "epoch": 0.2600067582096888, "grad_norm": 0.3595026135444641, "learning_rate": 1.919431146675704e-05, "loss": 0.5394, "step": 8464 }, { "epoch": 0.260037477344638, "grad_norm": 0.3287903666496277, "learning_rate": 1.919412140671352e-05, "loss": 0.6512, "step": 8465 }, { "epoch": 0.2600681964795871, "grad_norm": 0.39930716156959534, "learning_rate": 1.9193931325196485e-05, "loss": 0.5712, "step": 8466 }, { "epoch": 0.2600989156145363, "grad_norm": 0.34330013394355774, "learning_rate": 1.9193741222206383e-05, "loss": 0.6557, "step": 8467 }, { "epoch": 0.26012963474948547, "grad_norm": 0.6558748483657837, "learning_rate": 1.9193551097743653e-05, "loss": 0.5702, "step": 8468 }, { "epoch": 0.2601603538844346, "grad_norm": 0.31744787096977234, "learning_rate": 1.9193360951808743e-05, "loss": 0.5222, "step": 8469 }, { "epoch": 0.26019107301938377, "grad_norm": 0.3556458055973053, "learning_rate": 1.9193170784402097e-05, "loss": 0.6129, "step": 8470 }, { "epoch": 0.26022179215433294, "grad_norm": 0.33022600412368774, "learning_rate": 1.9192980595524155e-05, "loss": 0.5911, "step": 8471 }, { "epoch": 0.2602525112892821, "grad_norm": 0.45150846242904663, "learning_rate": 1.9192790385175368e-05, "loss": 0.6732, "step": 8472 }, { "epoch": 0.26028323042423124, "grad_norm": 0.33907797932624817, "learning_rate": 1.919260015335618e-05, "loss": 0.6321, "step": 8473 }, { "epoch": 0.2603139495591804, "grad_norm": 0.339253693819046, "learning_rate": 1.919240990006703e-05, "loss": 0.5115, "step": 8474 }, { "epoch": 0.2603446686941296, "grad_norm": 0.3328070342540741, "learning_rate": 1.9192219625308363e-05, "loss": 0.6354, "step": 8475 }, { "epoch": 0.2603753878290787, "grad_norm": 0.3555017113685608, "learning_rate": 1.9192029329080626e-05, "loss": 0.6521, "step": 8476 }, { "epoch": 0.2604061069640279, "grad_norm": 0.41972824931144714, "learning_rate": 1.9191839011384263e-05, "loss": 0.5299, "step": 8477 }, { "epoch": 0.26043682609897706, "grad_norm": 0.33733484148979187, "learning_rate": 1.919164867221972e-05, "loss": 0.5753, "step": 8478 }, { "epoch": 0.26046754523392623, "grad_norm": 0.3684072196483612, "learning_rate": 1.9191458311587435e-05, "loss": 0.5379, "step": 8479 }, { "epoch": 0.26049826436887535, "grad_norm": 0.3225468397140503, "learning_rate": 1.919126792948786e-05, "loss": 0.5347, "step": 8480 }, { "epoch": 0.26052898350382453, "grad_norm": 0.4204122722148895, "learning_rate": 1.919107752592144e-05, "loss": 0.6103, "step": 8481 }, { "epoch": 0.2605597026387737, "grad_norm": 0.3356093764305115, "learning_rate": 1.919088710088861e-05, "loss": 0.6144, "step": 8482 }, { "epoch": 0.2605904217737228, "grad_norm": 0.34645208716392517, "learning_rate": 1.9190696654389823e-05, "loss": 0.5108, "step": 8483 }, { "epoch": 0.260621140908672, "grad_norm": 0.37335851788520813, "learning_rate": 1.9190506186425523e-05, "loss": 0.6211, "step": 8484 }, { "epoch": 0.2606518600436212, "grad_norm": 0.3374592959880829, "learning_rate": 1.9190315696996154e-05, "loss": 0.6119, "step": 8485 }, { "epoch": 0.26068257917857035, "grad_norm": 0.4412243366241455, "learning_rate": 1.919012518610216e-05, "loss": 0.6166, "step": 8486 }, { "epoch": 0.26071329831351947, "grad_norm": 0.31606999039649963, "learning_rate": 1.918993465374399e-05, "loss": 0.5528, "step": 8487 }, { "epoch": 0.26074401744846865, "grad_norm": 0.33653852343559265, "learning_rate": 1.9189744099922085e-05, "loss": 0.6352, "step": 8488 }, { "epoch": 0.2607747365834178, "grad_norm": 0.344862163066864, "learning_rate": 1.918955352463689e-05, "loss": 0.5644, "step": 8489 }, { "epoch": 0.260805455718367, "grad_norm": 0.34384918212890625, "learning_rate": 1.918936292788885e-05, "loss": 0.5499, "step": 8490 }, { "epoch": 0.2608361748533161, "grad_norm": 0.33562541007995605, "learning_rate": 1.9189172309678413e-05, "loss": 0.5977, "step": 8491 }, { "epoch": 0.2608668939882653, "grad_norm": 0.33057430386543274, "learning_rate": 1.918898167000602e-05, "loss": 0.6165, "step": 8492 }, { "epoch": 0.26089761312321447, "grad_norm": 0.39845001697540283, "learning_rate": 1.918879100887212e-05, "loss": 0.6413, "step": 8493 }, { "epoch": 0.2609283322581636, "grad_norm": 0.3605203330516815, "learning_rate": 1.9188600326277157e-05, "loss": 0.5744, "step": 8494 }, { "epoch": 0.26095905139311276, "grad_norm": 0.3813885748386383, "learning_rate": 1.9188409622221576e-05, "loss": 0.6388, "step": 8495 }, { "epoch": 0.26098977052806194, "grad_norm": 0.29842594265937805, "learning_rate": 1.9188218896705822e-05, "loss": 0.5652, "step": 8496 }, { "epoch": 0.2610204896630111, "grad_norm": 0.33796894550323486, "learning_rate": 1.918802814973034e-05, "loss": 0.5524, "step": 8497 }, { "epoch": 0.26105120879796023, "grad_norm": 0.36626482009887695, "learning_rate": 1.918783738129558e-05, "loss": 0.5549, "step": 8498 }, { "epoch": 0.2610819279329094, "grad_norm": 0.4122002124786377, "learning_rate": 1.918764659140198e-05, "loss": 0.7224, "step": 8499 }, { "epoch": 0.2611126470678586, "grad_norm": 0.29269230365753174, "learning_rate": 1.9187455780049994e-05, "loss": 0.5024, "step": 8500 }, { "epoch": 0.2611433662028077, "grad_norm": 0.3680095076560974, "learning_rate": 1.918726494724006e-05, "loss": 0.6163, "step": 8501 }, { "epoch": 0.2611740853377569, "grad_norm": 0.37187477946281433, "learning_rate": 1.9187074092972627e-05, "loss": 0.5487, "step": 8502 }, { "epoch": 0.26120480447270605, "grad_norm": 0.3449639081954956, "learning_rate": 1.9186883217248143e-05, "loss": 0.5546, "step": 8503 }, { "epoch": 0.26123552360765523, "grad_norm": 0.3270503878593445, "learning_rate": 1.918669232006705e-05, "loss": 0.6006, "step": 8504 }, { "epoch": 0.26126624274260435, "grad_norm": 0.3097042441368103, "learning_rate": 1.9186501401429794e-05, "loss": 0.541, "step": 8505 }, { "epoch": 0.2612969618775535, "grad_norm": 0.3719574213027954, "learning_rate": 1.9186310461336827e-05, "loss": 0.5077, "step": 8506 }, { "epoch": 0.2613276810125027, "grad_norm": 0.3337031602859497, "learning_rate": 1.918611949978859e-05, "loss": 0.6245, "step": 8507 }, { "epoch": 0.2613584001474519, "grad_norm": 0.32008373737335205, "learning_rate": 1.9185928516785527e-05, "loss": 0.5037, "step": 8508 }, { "epoch": 0.261389119282401, "grad_norm": 0.35328608751296997, "learning_rate": 1.9185737512328088e-05, "loss": 0.5417, "step": 8509 }, { "epoch": 0.26141983841735017, "grad_norm": 0.4180358350276947, "learning_rate": 1.9185546486416714e-05, "loss": 0.5479, "step": 8510 }, { "epoch": 0.26145055755229935, "grad_norm": 0.3193531632423401, "learning_rate": 1.918535543905186e-05, "loss": 0.5988, "step": 8511 }, { "epoch": 0.26148127668724847, "grad_norm": 0.41306576132774353, "learning_rate": 1.9185164370233963e-05, "loss": 0.6255, "step": 8512 }, { "epoch": 0.26151199582219764, "grad_norm": 0.46524518728256226, "learning_rate": 1.918497327996347e-05, "loss": 0.6345, "step": 8513 }, { "epoch": 0.2615427149571468, "grad_norm": 0.3421257436275482, "learning_rate": 1.9184782168240835e-05, "loss": 0.5594, "step": 8514 }, { "epoch": 0.261573434092096, "grad_norm": 0.3875979483127594, "learning_rate": 1.91845910350665e-05, "loss": 0.642, "step": 8515 }, { "epoch": 0.2616041532270451, "grad_norm": 0.3152855634689331, "learning_rate": 1.918439988044091e-05, "loss": 0.6351, "step": 8516 }, { "epoch": 0.2616348723619943, "grad_norm": 0.3307898938655853, "learning_rate": 1.918420870436451e-05, "loss": 0.5395, "step": 8517 }, { "epoch": 0.26166559149694346, "grad_norm": 0.3206230401992798, "learning_rate": 1.9184017506837754e-05, "loss": 0.633, "step": 8518 }, { "epoch": 0.2616963106318926, "grad_norm": 0.346110999584198, "learning_rate": 1.9183826287861086e-05, "loss": 0.5919, "step": 8519 }, { "epoch": 0.26172702976684176, "grad_norm": 0.35199153423309326, "learning_rate": 1.9183635047434946e-05, "loss": 0.5941, "step": 8520 }, { "epoch": 0.26175774890179093, "grad_norm": 0.3227297365665436, "learning_rate": 1.9183443785559783e-05, "loss": 0.5597, "step": 8521 }, { "epoch": 0.2617884680367401, "grad_norm": 0.32325902581214905, "learning_rate": 1.9183252502236048e-05, "loss": 0.5739, "step": 8522 }, { "epoch": 0.2618191871716892, "grad_norm": 0.3115094006061554, "learning_rate": 1.9183061197464186e-05, "loss": 0.5163, "step": 8523 }, { "epoch": 0.2618499063066384, "grad_norm": 0.32263317704200745, "learning_rate": 1.9182869871244643e-05, "loss": 0.5201, "step": 8524 }, { "epoch": 0.2618806254415876, "grad_norm": 0.33352795243263245, "learning_rate": 1.9182678523577863e-05, "loss": 0.6587, "step": 8525 }, { "epoch": 0.2619113445765367, "grad_norm": 0.34361153841018677, "learning_rate": 1.91824871544643e-05, "loss": 0.6149, "step": 8526 }, { "epoch": 0.2619420637114859, "grad_norm": 0.31019067764282227, "learning_rate": 1.9182295763904396e-05, "loss": 0.5957, "step": 8527 }, { "epoch": 0.26197278284643505, "grad_norm": 0.32717934250831604, "learning_rate": 1.9182104351898596e-05, "loss": 0.5765, "step": 8528 }, { "epoch": 0.2620035019813842, "grad_norm": 0.37994396686553955, "learning_rate": 1.9181912918447355e-05, "loss": 0.6061, "step": 8529 }, { "epoch": 0.26203422111633334, "grad_norm": 0.3438194692134857, "learning_rate": 1.918172146355111e-05, "loss": 0.5919, "step": 8530 }, { "epoch": 0.2620649402512825, "grad_norm": 0.3439701199531555, "learning_rate": 1.918152998721032e-05, "loss": 0.5989, "step": 8531 }, { "epoch": 0.2620956593862317, "grad_norm": 0.3134026825428009, "learning_rate": 1.9181338489425418e-05, "loss": 0.5513, "step": 8532 }, { "epoch": 0.26212637852118087, "grad_norm": 0.3630007803440094, "learning_rate": 1.918114697019686e-05, "loss": 0.5367, "step": 8533 }, { "epoch": 0.26215709765613, "grad_norm": 2.1824119091033936, "learning_rate": 1.91809554295251e-05, "loss": 0.7148, "step": 8534 }, { "epoch": 0.26218781679107916, "grad_norm": 0.345398485660553, "learning_rate": 1.918076386741057e-05, "loss": 0.5927, "step": 8535 }, { "epoch": 0.26221853592602834, "grad_norm": 0.3576037585735321, "learning_rate": 1.9180572283853728e-05, "loss": 0.5814, "step": 8536 }, { "epoch": 0.26224925506097746, "grad_norm": 0.3604942262172699, "learning_rate": 1.9180380678855015e-05, "loss": 0.6848, "step": 8537 }, { "epoch": 0.26227997419592664, "grad_norm": 0.34088876843452454, "learning_rate": 1.9180189052414883e-05, "loss": 0.5965, "step": 8538 }, { "epoch": 0.2623106933308758, "grad_norm": 0.3199382424354553, "learning_rate": 1.917999740453378e-05, "loss": 0.6103, "step": 8539 }, { "epoch": 0.262341412465825, "grad_norm": 0.3564507067203522, "learning_rate": 1.917980573521215e-05, "loss": 0.6536, "step": 8540 }, { "epoch": 0.2623721316007741, "grad_norm": 0.32049381732940674, "learning_rate": 1.9179614044450443e-05, "loss": 0.5994, "step": 8541 }, { "epoch": 0.2624028507357233, "grad_norm": 0.3843544125556946, "learning_rate": 1.9179422332249105e-05, "loss": 0.6521, "step": 8542 }, { "epoch": 0.26243356987067246, "grad_norm": 0.2994089424610138, "learning_rate": 1.917923059860859e-05, "loss": 0.5538, "step": 8543 }, { "epoch": 0.2624642890056216, "grad_norm": 0.41258490085601807, "learning_rate": 1.9179038843529336e-05, "loss": 0.5667, "step": 8544 }, { "epoch": 0.26249500814057075, "grad_norm": 0.36281338334083557, "learning_rate": 1.9178847067011797e-05, "loss": 0.6144, "step": 8545 }, { "epoch": 0.2625257272755199, "grad_norm": 0.35347986221313477, "learning_rate": 1.9178655269056418e-05, "loss": 0.6147, "step": 8546 }, { "epoch": 0.2625564464104691, "grad_norm": 0.4175623655319214, "learning_rate": 1.9178463449663654e-05, "loss": 0.6343, "step": 8547 }, { "epoch": 0.2625871655454182, "grad_norm": 0.4029988944530487, "learning_rate": 1.9178271608833943e-05, "loss": 0.6117, "step": 8548 }, { "epoch": 0.2626178846803674, "grad_norm": 0.33154869079589844, "learning_rate": 1.917807974656774e-05, "loss": 0.5365, "step": 8549 }, { "epoch": 0.2626486038153166, "grad_norm": 0.34640833735466003, "learning_rate": 1.9177887862865486e-05, "loss": 0.6115, "step": 8550 }, { "epoch": 0.26267932295026575, "grad_norm": 0.31729528307914734, "learning_rate": 1.917769595772764e-05, "loss": 0.5378, "step": 8551 }, { "epoch": 0.26271004208521487, "grad_norm": 0.46107879281044006, "learning_rate": 1.9177504031154642e-05, "loss": 1.1178, "step": 8552 }, { "epoch": 0.26274076122016404, "grad_norm": 0.3560522198677063, "learning_rate": 1.917731208314694e-05, "loss": 0.5717, "step": 8553 }, { "epoch": 0.2627714803551132, "grad_norm": 0.3685148358345032, "learning_rate": 1.917712011370499e-05, "loss": 0.5499, "step": 8554 }, { "epoch": 0.26280219949006234, "grad_norm": 0.3241267502307892, "learning_rate": 1.917692812282923e-05, "loss": 0.5362, "step": 8555 }, { "epoch": 0.2628329186250115, "grad_norm": 0.3417528569698334, "learning_rate": 1.9176736110520114e-05, "loss": 0.6444, "step": 8556 }, { "epoch": 0.2628636377599607, "grad_norm": 0.3417874872684479, "learning_rate": 1.917654407677809e-05, "loss": 0.5511, "step": 8557 }, { "epoch": 0.26289435689490986, "grad_norm": 0.3350205421447754, "learning_rate": 1.917635202160361e-05, "loss": 0.6038, "step": 8558 }, { "epoch": 0.262925076029859, "grad_norm": 0.3464849591255188, "learning_rate": 1.9176159944997114e-05, "loss": 0.5525, "step": 8559 }, { "epoch": 0.26295579516480816, "grad_norm": 0.4327222406864166, "learning_rate": 1.917596784695906e-05, "loss": 0.6415, "step": 8560 }, { "epoch": 0.26298651429975733, "grad_norm": 0.32474657893180847, "learning_rate": 1.9175775727489888e-05, "loss": 0.5749, "step": 8561 }, { "epoch": 0.26301723343470645, "grad_norm": 0.33323007822036743, "learning_rate": 1.9175583586590055e-05, "loss": 0.5284, "step": 8562 }, { "epoch": 0.26304795256965563, "grad_norm": 0.36922502517700195, "learning_rate": 1.9175391424260004e-05, "loss": 0.6248, "step": 8563 }, { "epoch": 0.2630786717046048, "grad_norm": 0.39141130447387695, "learning_rate": 1.9175199240500185e-05, "loss": 0.6111, "step": 8564 }, { "epoch": 0.263109390839554, "grad_norm": 0.541361391544342, "learning_rate": 1.9175007035311046e-05, "loss": 0.5266, "step": 8565 }, { "epoch": 0.2631401099745031, "grad_norm": 0.7977179288864136, "learning_rate": 1.917481480869304e-05, "loss": 0.549, "step": 8566 }, { "epoch": 0.2631708291094523, "grad_norm": 0.33126649260520935, "learning_rate": 1.9174622560646613e-05, "loss": 0.6278, "step": 8567 }, { "epoch": 0.26320154824440145, "grad_norm": 0.3607638478279114, "learning_rate": 1.9174430291172213e-05, "loss": 0.5985, "step": 8568 }, { "epoch": 0.26323226737935057, "grad_norm": 0.3725287616252899, "learning_rate": 1.9174238000270292e-05, "loss": 0.6004, "step": 8569 }, { "epoch": 0.26326298651429975, "grad_norm": 0.3333872854709625, "learning_rate": 1.9174045687941298e-05, "loss": 0.5596, "step": 8570 }, { "epoch": 0.2632937056492489, "grad_norm": 0.34317559003829956, "learning_rate": 1.9173853354185676e-05, "loss": 0.6085, "step": 8571 }, { "epoch": 0.2633244247841981, "grad_norm": 0.35292309522628784, "learning_rate": 1.917366099900388e-05, "loss": 0.6209, "step": 8572 }, { "epoch": 0.2633551439191472, "grad_norm": 0.4825374484062195, "learning_rate": 1.917346862239636e-05, "loss": 0.6039, "step": 8573 }, { "epoch": 0.2633858630540964, "grad_norm": 0.3660709261894226, "learning_rate": 1.917327622436356e-05, "loss": 0.5935, "step": 8574 }, { "epoch": 0.26341658218904557, "grad_norm": 0.41419005393981934, "learning_rate": 1.9173083804905938e-05, "loss": 0.6097, "step": 8575 }, { "epoch": 0.26344730132399474, "grad_norm": 0.33252617716789246, "learning_rate": 1.9172891364023935e-05, "loss": 0.4906, "step": 8576 }, { "epoch": 0.26347802045894386, "grad_norm": 0.4028460383415222, "learning_rate": 1.9172698901718002e-05, "loss": 0.523, "step": 8577 }, { "epoch": 0.26350873959389304, "grad_norm": 0.34851452708244324, "learning_rate": 1.9172506417988594e-05, "loss": 0.6729, "step": 8578 }, { "epoch": 0.2635394587288422, "grad_norm": 0.3942740559577942, "learning_rate": 1.9172313912836154e-05, "loss": 0.594, "step": 8579 }, { "epoch": 0.26357017786379133, "grad_norm": 0.3468930125236511, "learning_rate": 1.9172121386261135e-05, "loss": 0.596, "step": 8580 }, { "epoch": 0.2636008969987405, "grad_norm": 0.3775671720504761, "learning_rate": 1.9171928838263983e-05, "loss": 0.6325, "step": 8581 }, { "epoch": 0.2636316161336897, "grad_norm": 0.4092210829257965, "learning_rate": 1.9171736268845157e-05, "loss": 0.5782, "step": 8582 }, { "epoch": 0.26366233526863886, "grad_norm": 0.3275635540485382, "learning_rate": 1.9171543678005092e-05, "loss": 0.5954, "step": 8583 }, { "epoch": 0.263693054403588, "grad_norm": 0.4057236611843109, "learning_rate": 1.9171351065744255e-05, "loss": 0.5596, "step": 8584 }, { "epoch": 0.26372377353853715, "grad_norm": 0.3731808066368103, "learning_rate": 1.9171158432063082e-05, "loss": 0.5731, "step": 8585 }, { "epoch": 0.26375449267348633, "grad_norm": 0.31362029910087585, "learning_rate": 1.917096577696203e-05, "loss": 0.584, "step": 8586 }, { "epoch": 0.26378521180843545, "grad_norm": 0.3233252465724945, "learning_rate": 1.9170773100441545e-05, "loss": 0.519, "step": 8587 }, { "epoch": 0.2638159309433846, "grad_norm": 0.3287304639816284, "learning_rate": 1.917058040250208e-05, "loss": 0.5354, "step": 8588 }, { "epoch": 0.2638466500783338, "grad_norm": 0.31928664445877075, "learning_rate": 1.9170387683144086e-05, "loss": 0.6139, "step": 8589 }, { "epoch": 0.263877369213283, "grad_norm": 0.32602551579475403, "learning_rate": 1.9170194942368008e-05, "loss": 0.6434, "step": 8590 }, { "epoch": 0.2639080883482321, "grad_norm": 0.33402854204177856, "learning_rate": 1.91700021801743e-05, "loss": 0.6487, "step": 8591 }, { "epoch": 0.26393880748318127, "grad_norm": 0.3243404030799866, "learning_rate": 1.916980939656341e-05, "loss": 0.5668, "step": 8592 }, { "epoch": 0.26396952661813045, "grad_norm": 0.3539455831050873, "learning_rate": 1.916961659153579e-05, "loss": 0.6563, "step": 8593 }, { "epoch": 0.2640002457530796, "grad_norm": 0.3879533112049103, "learning_rate": 1.9169423765091894e-05, "loss": 0.6501, "step": 8594 }, { "epoch": 0.26403096488802874, "grad_norm": 0.33644822239875793, "learning_rate": 1.9169230917232163e-05, "loss": 0.6304, "step": 8595 }, { "epoch": 0.2640616840229779, "grad_norm": 0.33295971155166626, "learning_rate": 1.9169038047957057e-05, "loss": 0.5925, "step": 8596 }, { "epoch": 0.2640924031579271, "grad_norm": 0.34748509526252747, "learning_rate": 1.9168845157267023e-05, "loss": 0.6151, "step": 8597 }, { "epoch": 0.2641231222928762, "grad_norm": 0.3289951682090759, "learning_rate": 1.9168652245162506e-05, "loss": 0.5674, "step": 8598 }, { "epoch": 0.2641538414278254, "grad_norm": 0.3522871732711792, "learning_rate": 1.9168459311643966e-05, "loss": 0.5664, "step": 8599 }, { "epoch": 0.26418456056277456, "grad_norm": 0.36649706959724426, "learning_rate": 1.9168266356711847e-05, "loss": 0.6039, "step": 8600 }, { "epoch": 0.26421527969772374, "grad_norm": 0.3710519075393677, "learning_rate": 1.91680733803666e-05, "loss": 0.5879, "step": 8601 }, { "epoch": 0.26424599883267286, "grad_norm": 0.4659448266029358, "learning_rate": 1.916788038260868e-05, "loss": 0.6114, "step": 8602 }, { "epoch": 0.26427671796762203, "grad_norm": 0.3291952311992645, "learning_rate": 1.9167687363438535e-05, "loss": 0.6451, "step": 8603 }, { "epoch": 0.2643074371025712, "grad_norm": 0.3181070387363434, "learning_rate": 1.9167494322856616e-05, "loss": 0.6196, "step": 8604 }, { "epoch": 0.2643381562375203, "grad_norm": 0.3475816547870636, "learning_rate": 1.916730126086337e-05, "loss": 0.5272, "step": 8605 }, { "epoch": 0.2643688753724695, "grad_norm": 0.3757767677307129, "learning_rate": 1.9167108177459254e-05, "loss": 0.618, "step": 8606 }, { "epoch": 0.2643995945074187, "grad_norm": 0.31085532903671265, "learning_rate": 1.916691507264472e-05, "loss": 0.5276, "step": 8607 }, { "epoch": 0.26443031364236785, "grad_norm": 0.36124253273010254, "learning_rate": 1.916672194642021e-05, "loss": 0.6385, "step": 8608 }, { "epoch": 0.264461032777317, "grad_norm": 13.547745704650879, "learning_rate": 1.9166528798786184e-05, "loss": 1.0145, "step": 8609 }, { "epoch": 0.26449175191226615, "grad_norm": 0.3517603874206543, "learning_rate": 1.9166335629743093e-05, "loss": 0.6897, "step": 8610 }, { "epoch": 0.2645224710472153, "grad_norm": 0.3518737256526947, "learning_rate": 1.916614243929138e-05, "loss": 0.5395, "step": 8611 }, { "epoch": 0.26455319018216444, "grad_norm": 0.36003583669662476, "learning_rate": 1.91659492274315e-05, "loss": 0.5464, "step": 8612 }, { "epoch": 0.2645839093171136, "grad_norm": 0.34680846333503723, "learning_rate": 1.9165755994163907e-05, "loss": 0.5929, "step": 8613 }, { "epoch": 0.2646146284520628, "grad_norm": 0.3636135458946228, "learning_rate": 1.9165562739489054e-05, "loss": 0.5978, "step": 8614 }, { "epoch": 0.26464534758701197, "grad_norm": 0.35425472259521484, "learning_rate": 1.9165369463407385e-05, "loss": 0.5357, "step": 8615 }, { "epoch": 0.2646760667219611, "grad_norm": 0.3702721893787384, "learning_rate": 1.9165176165919357e-05, "loss": 0.5563, "step": 8616 }, { "epoch": 0.26470678585691026, "grad_norm": 0.3871748745441437, "learning_rate": 1.9164982847025423e-05, "loss": 0.6642, "step": 8617 }, { "epoch": 0.26473750499185944, "grad_norm": 0.8477364778518677, "learning_rate": 1.9164789506726026e-05, "loss": 0.6267, "step": 8618 }, { "epoch": 0.2647682241268086, "grad_norm": 0.5241139531135559, "learning_rate": 1.9164596145021627e-05, "loss": 0.6858, "step": 8619 }, { "epoch": 0.26479894326175774, "grad_norm": 0.34346094727516174, "learning_rate": 1.9164402761912673e-05, "loss": 0.5613, "step": 8620 }, { "epoch": 0.2648296623967069, "grad_norm": 0.3765224516391754, "learning_rate": 1.9164209357399616e-05, "loss": 0.6356, "step": 8621 }, { "epoch": 0.2648603815316561, "grad_norm": 0.3233392536640167, "learning_rate": 1.9164015931482907e-05, "loss": 0.5384, "step": 8622 }, { "epoch": 0.2648911006666052, "grad_norm": 0.3689977824687958, "learning_rate": 1.9163822484163e-05, "loss": 0.5265, "step": 8623 }, { "epoch": 0.2649218198015544, "grad_norm": 0.33127060532569885, "learning_rate": 1.9163629015440347e-05, "loss": 0.5729, "step": 8624 }, { "epoch": 0.26495253893650356, "grad_norm": 0.34674641489982605, "learning_rate": 1.9163435525315393e-05, "loss": 0.586, "step": 8625 }, { "epoch": 0.26498325807145273, "grad_norm": 0.3240914046764374, "learning_rate": 1.91632420137886e-05, "loss": 0.6215, "step": 8626 }, { "epoch": 0.26501397720640185, "grad_norm": 0.33640554547309875, "learning_rate": 1.916304848086041e-05, "loss": 0.638, "step": 8627 }, { "epoch": 0.265044696341351, "grad_norm": 0.33202335238456726, "learning_rate": 1.916285492653129e-05, "loss": 0.5963, "step": 8628 }, { "epoch": 0.2650754154763002, "grad_norm": 0.4308584928512573, "learning_rate": 1.9162661350801675e-05, "loss": 0.5473, "step": 8629 }, { "epoch": 0.2651061346112493, "grad_norm": 0.3613283634185791, "learning_rate": 1.916246775367203e-05, "loss": 0.6451, "step": 8630 }, { "epoch": 0.2651368537461985, "grad_norm": 0.4001268148422241, "learning_rate": 1.9162274135142796e-05, "loss": 0.5917, "step": 8631 }, { "epoch": 0.2651675728811477, "grad_norm": 0.34868407249450684, "learning_rate": 1.9162080495214434e-05, "loss": 0.6391, "step": 8632 }, { "epoch": 0.26519829201609685, "grad_norm": 0.36719056963920593, "learning_rate": 1.916188683388739e-05, "loss": 0.6264, "step": 8633 }, { "epoch": 0.26522901115104597, "grad_norm": 0.43466565012931824, "learning_rate": 1.916169315116212e-05, "loss": 0.6162, "step": 8634 }, { "epoch": 0.26525973028599514, "grad_norm": 0.34804072976112366, "learning_rate": 1.916149944703908e-05, "loss": 0.6357, "step": 8635 }, { "epoch": 0.2652904494209443, "grad_norm": 0.36143893003463745, "learning_rate": 1.9161305721518713e-05, "loss": 0.6028, "step": 8636 }, { "epoch": 0.26532116855589344, "grad_norm": 0.31955575942993164, "learning_rate": 1.916111197460148e-05, "loss": 0.5466, "step": 8637 }, { "epoch": 0.2653518876908426, "grad_norm": 0.40474921464920044, "learning_rate": 1.916091820628783e-05, "loss": 0.5703, "step": 8638 }, { "epoch": 0.2653826068257918, "grad_norm": 0.31477952003479004, "learning_rate": 1.9160724416578213e-05, "loss": 0.6447, "step": 8639 }, { "epoch": 0.26541332596074096, "grad_norm": 0.4715936779975891, "learning_rate": 1.916053060547309e-05, "loss": 0.5215, "step": 8640 }, { "epoch": 0.2654440450956901, "grad_norm": 0.33720293641090393, "learning_rate": 1.91603367729729e-05, "loss": 0.6249, "step": 8641 }, { "epoch": 0.26547476423063926, "grad_norm": 0.34082359075546265, "learning_rate": 1.9160142919078112e-05, "loss": 0.5642, "step": 8642 }, { "epoch": 0.26550548336558843, "grad_norm": 0.36827898025512695, "learning_rate": 1.9159949043789167e-05, "loss": 0.6115, "step": 8643 }, { "epoch": 0.2655362025005376, "grad_norm": 0.3330440819263458, "learning_rate": 1.915975514710652e-05, "loss": 0.5753, "step": 8644 }, { "epoch": 0.26556692163548673, "grad_norm": 0.320085734128952, "learning_rate": 1.9159561229030626e-05, "loss": 0.6472, "step": 8645 }, { "epoch": 0.2655976407704359, "grad_norm": 0.3217692971229553, "learning_rate": 1.9159367289561938e-05, "loss": 0.4955, "step": 8646 }, { "epoch": 0.2656283599053851, "grad_norm": 0.3983650207519531, "learning_rate": 1.9159173328700907e-05, "loss": 0.5973, "step": 8647 }, { "epoch": 0.2656590790403342, "grad_norm": 0.3807363510131836, "learning_rate": 1.9158979346447988e-05, "loss": 0.6441, "step": 8648 }, { "epoch": 0.2656897981752834, "grad_norm": 0.3113970458507538, "learning_rate": 1.9158785342803628e-05, "loss": 0.5598, "step": 8649 }, { "epoch": 0.26572051731023255, "grad_norm": 0.35806983709335327, "learning_rate": 1.9158591317768295e-05, "loss": 0.6413, "step": 8650 }, { "epoch": 0.2657512364451817, "grad_norm": 0.36145317554473877, "learning_rate": 1.9158397271342425e-05, "loss": 0.6124, "step": 8651 }, { "epoch": 0.26578195558013085, "grad_norm": 0.35813474655151367, "learning_rate": 1.915820320352648e-05, "loss": 0.5748, "step": 8652 }, { "epoch": 0.26581267471508, "grad_norm": 0.33496809005737305, "learning_rate": 1.9158009114320913e-05, "loss": 0.5329, "step": 8653 }, { "epoch": 0.2658433938500292, "grad_norm": 0.31693416833877563, "learning_rate": 1.9157815003726175e-05, "loss": 0.5249, "step": 8654 }, { "epoch": 0.2658741129849783, "grad_norm": 0.32396790385246277, "learning_rate": 1.915762087174272e-05, "loss": 0.5328, "step": 8655 }, { "epoch": 0.2659048321199275, "grad_norm": 0.34105533361434937, "learning_rate": 1.9157426718371002e-05, "loss": 0.5669, "step": 8656 }, { "epoch": 0.26593555125487667, "grad_norm": 0.33394908905029297, "learning_rate": 1.9157232543611476e-05, "loss": 0.6016, "step": 8657 }, { "epoch": 0.26596627038982584, "grad_norm": 0.45857465267181396, "learning_rate": 1.9157038347464594e-05, "loss": 0.6617, "step": 8658 }, { "epoch": 0.26599698952477496, "grad_norm": 0.3963107168674469, "learning_rate": 1.915684412993081e-05, "loss": 0.6011, "step": 8659 }, { "epoch": 0.26602770865972414, "grad_norm": 0.352446973323822, "learning_rate": 1.9156649891010572e-05, "loss": 0.5923, "step": 8660 }, { "epoch": 0.2660584277946733, "grad_norm": 0.3275332450866699, "learning_rate": 1.9156455630704343e-05, "loss": 0.539, "step": 8661 }, { "epoch": 0.2660891469296225, "grad_norm": 0.3456118702888489, "learning_rate": 1.9156261349012572e-05, "loss": 0.5791, "step": 8662 }, { "epoch": 0.2661198660645716, "grad_norm": 0.3373566269874573, "learning_rate": 1.9156067045935712e-05, "loss": 0.5302, "step": 8663 }, { "epoch": 0.2661505851995208, "grad_norm": 0.36883556842803955, "learning_rate": 1.915587272147422e-05, "loss": 0.6227, "step": 8664 }, { "epoch": 0.26618130433446996, "grad_norm": 0.3588220477104187, "learning_rate": 1.9155678375628544e-05, "loss": 0.6248, "step": 8665 }, { "epoch": 0.2662120234694191, "grad_norm": 0.3369339406490326, "learning_rate": 1.9155484008399144e-05, "loss": 0.6689, "step": 8666 }, { "epoch": 0.26624274260436825, "grad_norm": 0.3390048146247864, "learning_rate": 1.9155289619786472e-05, "loss": 0.5686, "step": 8667 }, { "epoch": 0.26627346173931743, "grad_norm": 0.34848764538764954, "learning_rate": 1.915509520979098e-05, "loss": 0.6851, "step": 8668 }, { "epoch": 0.2663041808742666, "grad_norm": 0.3103259205818176, "learning_rate": 1.915490077841313e-05, "loss": 0.6477, "step": 8669 }, { "epoch": 0.2663349000092157, "grad_norm": 0.3364870548248291, "learning_rate": 1.915470632565336e-05, "loss": 0.5323, "step": 8670 }, { "epoch": 0.2663656191441649, "grad_norm": 0.34060412645339966, "learning_rate": 1.915451185151214e-05, "loss": 0.6242, "step": 8671 }, { "epoch": 0.2663963382791141, "grad_norm": 0.374696284532547, "learning_rate": 1.915431735598992e-05, "loss": 0.6195, "step": 8672 }, { "epoch": 0.2664270574140632, "grad_norm": 0.389600932598114, "learning_rate": 1.915412283908715e-05, "loss": 0.6145, "step": 8673 }, { "epoch": 0.26645777654901237, "grad_norm": 0.32126542925834656, "learning_rate": 1.9153928300804286e-05, "loss": 0.6145, "step": 8674 }, { "epoch": 0.26648849568396155, "grad_norm": 0.33721697330474854, "learning_rate": 1.9153733741141784e-05, "loss": 0.6453, "step": 8675 }, { "epoch": 0.2665192148189107, "grad_norm": 0.3183304965496063, "learning_rate": 1.9153539160100095e-05, "loss": 0.5655, "step": 8676 }, { "epoch": 0.26654993395385984, "grad_norm": 0.3261716961860657, "learning_rate": 1.915334455767968e-05, "loss": 0.5648, "step": 8677 }, { "epoch": 0.266580653088809, "grad_norm": 0.3437630236148834, "learning_rate": 1.915314993388099e-05, "loss": 0.6488, "step": 8678 }, { "epoch": 0.2666113722237582, "grad_norm": 0.3338691294193268, "learning_rate": 1.9152955288704476e-05, "loss": 0.5529, "step": 8679 }, { "epoch": 0.2666420913587073, "grad_norm": 0.3944566249847412, "learning_rate": 1.9152760622150593e-05, "loss": 0.6572, "step": 8680 }, { "epoch": 0.2666728104936565, "grad_norm": 0.31331443786621094, "learning_rate": 1.9152565934219805e-05, "loss": 0.5249, "step": 8681 }, { "epoch": 0.26670352962860566, "grad_norm": 0.34602057933807373, "learning_rate": 1.9152371224912557e-05, "loss": 0.5719, "step": 8682 }, { "epoch": 0.26673424876355484, "grad_norm": 0.3866533637046814, "learning_rate": 1.9152176494229305e-05, "loss": 0.6278, "step": 8683 }, { "epoch": 0.26676496789850396, "grad_norm": 0.35636788606643677, "learning_rate": 1.915198174217051e-05, "loss": 0.5574, "step": 8684 }, { "epoch": 0.26679568703345313, "grad_norm": 0.3229437470436096, "learning_rate": 1.915178696873662e-05, "loss": 0.5262, "step": 8685 }, { "epoch": 0.2668264061684023, "grad_norm": 0.43372148275375366, "learning_rate": 1.915159217392809e-05, "loss": 0.6515, "step": 8686 }, { "epoch": 0.2668571253033515, "grad_norm": 0.3932594954967499, "learning_rate": 1.915139735774538e-05, "loss": 0.6472, "step": 8687 }, { "epoch": 0.2668878444383006, "grad_norm": 0.3896244466304779, "learning_rate": 1.9151202520188943e-05, "loss": 0.5888, "step": 8688 }, { "epoch": 0.2669185635732498, "grad_norm": 0.31793564558029175, "learning_rate": 1.915100766125923e-05, "loss": 0.5495, "step": 8689 }, { "epoch": 0.26694928270819895, "grad_norm": 0.3580148220062256, "learning_rate": 1.9150812780956706e-05, "loss": 0.5469, "step": 8690 }, { "epoch": 0.2669800018431481, "grad_norm": 0.7980449199676514, "learning_rate": 1.915061787928181e-05, "loss": 0.7187, "step": 8691 }, { "epoch": 0.26701072097809725, "grad_norm": 0.393425315618515, "learning_rate": 1.9150422956235016e-05, "loss": 0.5972, "step": 8692 }, { "epoch": 0.2670414401130464, "grad_norm": 0.2895285189151764, "learning_rate": 1.915022801181677e-05, "loss": 0.5436, "step": 8693 }, { "epoch": 0.2670721592479956, "grad_norm": 0.35438692569732666, "learning_rate": 1.915003304602752e-05, "loss": 0.5916, "step": 8694 }, { "epoch": 0.2671028783829447, "grad_norm": 0.32918208837509155, "learning_rate": 1.9149838058867734e-05, "loss": 0.6358, "step": 8695 }, { "epoch": 0.2671335975178939, "grad_norm": 0.3524302840232849, "learning_rate": 1.9149643050337858e-05, "loss": 0.6119, "step": 8696 }, { "epoch": 0.26716431665284307, "grad_norm": 0.3242788314819336, "learning_rate": 1.9149448020438357e-05, "loss": 0.5947, "step": 8697 }, { "epoch": 0.2671950357877922, "grad_norm": 0.3229874074459076, "learning_rate": 1.914925296916968e-05, "loss": 0.6013, "step": 8698 }, { "epoch": 0.26722575492274137, "grad_norm": 0.354824423789978, "learning_rate": 1.914905789653228e-05, "loss": 0.6298, "step": 8699 }, { "epoch": 0.26725647405769054, "grad_norm": 0.36988961696624756, "learning_rate": 1.914886280252662e-05, "loss": 0.5598, "step": 8700 }, { "epoch": 0.2672871931926397, "grad_norm": 0.34439322352409363, "learning_rate": 1.9148667687153152e-05, "loss": 0.5418, "step": 8701 }, { "epoch": 0.26731791232758884, "grad_norm": 0.3510277569293976, "learning_rate": 1.914847255041233e-05, "loss": 0.6518, "step": 8702 }, { "epoch": 0.267348631462538, "grad_norm": 0.34336963295936584, "learning_rate": 1.9148277392304612e-05, "loss": 0.5847, "step": 8703 }, { "epoch": 0.2673793505974872, "grad_norm": 0.32249927520751953, "learning_rate": 1.9148082212830453e-05, "loss": 0.5416, "step": 8704 }, { "epoch": 0.26741006973243636, "grad_norm": 0.41251808404922485, "learning_rate": 1.9147887011990308e-05, "loss": 0.5336, "step": 8705 }, { "epoch": 0.2674407888673855, "grad_norm": 0.33706411719322205, "learning_rate": 1.9147691789784635e-05, "loss": 0.5467, "step": 8706 }, { "epoch": 0.26747150800233466, "grad_norm": 0.3492804169654846, "learning_rate": 1.914749654621389e-05, "loss": 0.6012, "step": 8707 }, { "epoch": 0.26750222713728383, "grad_norm": 0.3020542860031128, "learning_rate": 1.914730128127853e-05, "loss": 0.4564, "step": 8708 }, { "epoch": 0.26753294627223295, "grad_norm": 0.3618064522743225, "learning_rate": 1.9147105994979004e-05, "loss": 0.5884, "step": 8709 }, { "epoch": 0.2675636654071821, "grad_norm": 0.38659313321113586, "learning_rate": 1.9146910687315773e-05, "loss": 0.5719, "step": 8710 }, { "epoch": 0.2675943845421313, "grad_norm": 0.33900532126426697, "learning_rate": 1.9146715358289297e-05, "loss": 0.5532, "step": 8711 }, { "epoch": 0.2676251036770805, "grad_norm": 0.3218209743499756, "learning_rate": 1.9146520007900027e-05, "loss": 0.6291, "step": 8712 }, { "epoch": 0.2676558228120296, "grad_norm": 0.36449605226516724, "learning_rate": 1.914632463614842e-05, "loss": 0.5569, "step": 8713 }, { "epoch": 0.2676865419469788, "grad_norm": 0.376830518245697, "learning_rate": 1.9146129243034934e-05, "loss": 0.6303, "step": 8714 }, { "epoch": 0.26771726108192795, "grad_norm": 0.33445143699645996, "learning_rate": 1.9145933828560025e-05, "loss": 0.5689, "step": 8715 }, { "epoch": 0.26774798021687707, "grad_norm": 0.30119338631629944, "learning_rate": 1.914573839272415e-05, "loss": 0.5377, "step": 8716 }, { "epoch": 0.26777869935182624, "grad_norm": 0.3272824287414551, "learning_rate": 1.914554293552776e-05, "loss": 0.5278, "step": 8717 }, { "epoch": 0.2678094184867754, "grad_norm": 0.36979183554649353, "learning_rate": 1.914534745697132e-05, "loss": 0.6396, "step": 8718 }, { "epoch": 0.2678401376217246, "grad_norm": 0.44983625411987305, "learning_rate": 1.914515195705528e-05, "loss": 0.4552, "step": 8719 }, { "epoch": 0.2678708567566737, "grad_norm": 0.35280346870422363, "learning_rate": 1.9144956435780098e-05, "loss": 0.5026, "step": 8720 }, { "epoch": 0.2679015758916229, "grad_norm": 0.3629494309425354, "learning_rate": 1.9144760893146235e-05, "loss": 0.6105, "step": 8721 }, { "epoch": 0.26793229502657206, "grad_norm": 0.3300478756427765, "learning_rate": 1.914456532915414e-05, "loss": 0.5622, "step": 8722 }, { "epoch": 0.2679630141615212, "grad_norm": 0.36566174030303955, "learning_rate": 1.9144369743804277e-05, "loss": 0.5614, "step": 8723 }, { "epoch": 0.26799373329647036, "grad_norm": 0.4073520600795746, "learning_rate": 1.91441741370971e-05, "loss": 0.5545, "step": 8724 }, { "epoch": 0.26802445243141954, "grad_norm": 0.33625128865242004, "learning_rate": 1.9143978509033064e-05, "loss": 0.5344, "step": 8725 }, { "epoch": 0.2680551715663687, "grad_norm": 0.37191861867904663, "learning_rate": 1.914378285961263e-05, "loss": 0.6352, "step": 8726 }, { "epoch": 0.26808589070131783, "grad_norm": 0.38028866052627563, "learning_rate": 1.9143587188836246e-05, "loss": 0.6224, "step": 8727 }, { "epoch": 0.268116609836267, "grad_norm": 0.3240302801132202, "learning_rate": 1.9143391496704385e-05, "loss": 0.5062, "step": 8728 }, { "epoch": 0.2681473289712162, "grad_norm": 0.41265347599983215, "learning_rate": 1.9143195783217488e-05, "loss": 0.6219, "step": 8729 }, { "epoch": 0.26817804810616536, "grad_norm": 0.40836480259895325, "learning_rate": 1.914300004837602e-05, "loss": 0.5122, "step": 8730 }, { "epoch": 0.2682087672411145, "grad_norm": 0.3246540129184723, "learning_rate": 1.9142804292180437e-05, "loss": 0.5901, "step": 8731 }, { "epoch": 0.26823948637606365, "grad_norm": 0.3361242711544037, "learning_rate": 1.91426085146312e-05, "loss": 0.5755, "step": 8732 }, { "epoch": 0.2682702055110128, "grad_norm": 0.354328453540802, "learning_rate": 1.9142412715728757e-05, "loss": 0.5808, "step": 8733 }, { "epoch": 0.26830092464596195, "grad_norm": 0.42316415905952454, "learning_rate": 1.914221689547357e-05, "loss": 0.6059, "step": 8734 }, { "epoch": 0.2683316437809111, "grad_norm": 0.5421087145805359, "learning_rate": 1.9142021053866103e-05, "loss": 0.6387, "step": 8735 }, { "epoch": 0.2683623629158603, "grad_norm": 0.34757357835769653, "learning_rate": 1.91418251909068e-05, "loss": 0.5693, "step": 8736 }, { "epoch": 0.2683930820508095, "grad_norm": 0.34967756271362305, "learning_rate": 1.914162930659613e-05, "loss": 0.6068, "step": 8737 }, { "epoch": 0.2684238011857586, "grad_norm": 0.3463193476200104, "learning_rate": 1.9141433400934544e-05, "loss": 0.6253, "step": 8738 }, { "epoch": 0.26845452032070777, "grad_norm": 0.32746225595474243, "learning_rate": 1.9141237473922507e-05, "loss": 0.5195, "step": 8739 }, { "epoch": 0.26848523945565694, "grad_norm": 0.33240628242492676, "learning_rate": 1.9141041525560468e-05, "loss": 0.6585, "step": 8740 }, { "epoch": 0.26851595859060606, "grad_norm": 0.3499237298965454, "learning_rate": 1.914084555584889e-05, "loss": 0.6428, "step": 8741 }, { "epoch": 0.26854667772555524, "grad_norm": 0.39372339844703674, "learning_rate": 1.9140649564788225e-05, "loss": 0.5839, "step": 8742 }, { "epoch": 0.2685773968605044, "grad_norm": 0.5834788680076599, "learning_rate": 1.9140453552378935e-05, "loss": 0.6261, "step": 8743 }, { "epoch": 0.2686081159954536, "grad_norm": 0.3498360216617584, "learning_rate": 1.914025751862148e-05, "loss": 0.5573, "step": 8744 }, { "epoch": 0.2686388351304027, "grad_norm": 0.3862430453300476, "learning_rate": 1.9140061463516314e-05, "loss": 0.5714, "step": 8745 }, { "epoch": 0.2686695542653519, "grad_norm": 0.34388431906700134, "learning_rate": 1.91398653870639e-05, "loss": 0.6122, "step": 8746 }, { "epoch": 0.26870027340030106, "grad_norm": 0.3377215564250946, "learning_rate": 1.9139669289264687e-05, "loss": 0.5307, "step": 8747 }, { "epoch": 0.26873099253525023, "grad_norm": 0.33628585934638977, "learning_rate": 1.913947317011914e-05, "loss": 0.5866, "step": 8748 }, { "epoch": 0.26876171167019935, "grad_norm": 0.5191262364387512, "learning_rate": 1.9139277029627714e-05, "loss": 0.5521, "step": 8749 }, { "epoch": 0.26879243080514853, "grad_norm": 0.3837521970272064, "learning_rate": 1.913908086779087e-05, "loss": 0.5304, "step": 8750 }, { "epoch": 0.2688231499400977, "grad_norm": 0.3514169156551361, "learning_rate": 1.9138884684609064e-05, "loss": 0.6399, "step": 8751 }, { "epoch": 0.2688538690750468, "grad_norm": 0.3408939838409424, "learning_rate": 1.913868848008275e-05, "loss": 0.6509, "step": 8752 }, { "epoch": 0.268884588209996, "grad_norm": 0.3469868004322052, "learning_rate": 1.9138492254212397e-05, "loss": 0.6015, "step": 8753 }, { "epoch": 0.2689153073449452, "grad_norm": 0.3380759358406067, "learning_rate": 1.9138296006998452e-05, "loss": 0.55, "step": 8754 }, { "epoch": 0.26894602647989435, "grad_norm": 0.5519193410873413, "learning_rate": 1.913809973844138e-05, "loss": 0.6821, "step": 8755 }, { "epoch": 0.26897674561484347, "grad_norm": 0.34753289818763733, "learning_rate": 1.9137903448541642e-05, "loss": 0.5425, "step": 8756 }, { "epoch": 0.26900746474979265, "grad_norm": 0.35978060960769653, "learning_rate": 1.913770713729969e-05, "loss": 0.5108, "step": 8757 }, { "epoch": 0.2690381838847418, "grad_norm": 0.33711108565330505, "learning_rate": 1.9137510804715983e-05, "loss": 0.5946, "step": 8758 }, { "epoch": 0.26906890301969094, "grad_norm": 0.31975722312927246, "learning_rate": 1.9137314450790984e-05, "loss": 0.5946, "step": 8759 }, { "epoch": 0.2690996221546401, "grad_norm": 0.32161158323287964, "learning_rate": 1.9137118075525147e-05, "loss": 0.5021, "step": 8760 }, { "epoch": 0.2691303412895893, "grad_norm": 0.3427479863166809, "learning_rate": 1.913692167891893e-05, "loss": 0.5933, "step": 8761 }, { "epoch": 0.26916106042453847, "grad_norm": 0.34702685475349426, "learning_rate": 1.91367252609728e-05, "loss": 0.5635, "step": 8762 }, { "epoch": 0.2691917795594876, "grad_norm": 0.37181591987609863, "learning_rate": 1.913652882168721e-05, "loss": 0.5859, "step": 8763 }, { "epoch": 0.26922249869443676, "grad_norm": 0.4100138545036316, "learning_rate": 1.9136332361062613e-05, "loss": 0.5317, "step": 8764 }, { "epoch": 0.26925321782938594, "grad_norm": 0.32772231101989746, "learning_rate": 1.9136135879099477e-05, "loss": 0.5491, "step": 8765 }, { "epoch": 0.26928393696433506, "grad_norm": 0.304598331451416, "learning_rate": 1.9135939375798257e-05, "loss": 0.5681, "step": 8766 }, { "epoch": 0.26931465609928423, "grad_norm": 0.34933724999427795, "learning_rate": 1.9135742851159413e-05, "loss": 0.5842, "step": 8767 }, { "epoch": 0.2693453752342334, "grad_norm": 0.34307432174682617, "learning_rate": 1.9135546305183402e-05, "loss": 0.5691, "step": 8768 }, { "epoch": 0.2693760943691826, "grad_norm": 0.3506833612918854, "learning_rate": 1.913534973787069e-05, "loss": 0.6673, "step": 8769 }, { "epoch": 0.2694068135041317, "grad_norm": 0.3275294005870819, "learning_rate": 1.9135153149221727e-05, "loss": 0.618, "step": 8770 }, { "epoch": 0.2694375326390809, "grad_norm": 0.36960718035697937, "learning_rate": 1.913495653923697e-05, "loss": 0.5089, "step": 8771 }, { "epoch": 0.26946825177403005, "grad_norm": 0.35343295335769653, "learning_rate": 1.9134759907916893e-05, "loss": 0.6565, "step": 8772 }, { "epoch": 0.26949897090897923, "grad_norm": 0.3468801975250244, "learning_rate": 1.9134563255261944e-05, "loss": 0.5472, "step": 8773 }, { "epoch": 0.26952969004392835, "grad_norm": 0.332366943359375, "learning_rate": 1.9134366581272587e-05, "loss": 0.5642, "step": 8774 }, { "epoch": 0.2695604091788775, "grad_norm": 0.3387770652770996, "learning_rate": 1.9134169885949272e-05, "loss": 0.4935, "step": 8775 }, { "epoch": 0.2695911283138267, "grad_norm": 0.32291948795318604, "learning_rate": 1.9133973169292467e-05, "loss": 0.5737, "step": 8776 }, { "epoch": 0.2696218474487758, "grad_norm": 0.3799710273742676, "learning_rate": 1.9133776431302635e-05, "loss": 0.6235, "step": 8777 }, { "epoch": 0.269652566583725, "grad_norm": 0.3026837706565857, "learning_rate": 1.9133579671980228e-05, "loss": 0.5733, "step": 8778 }, { "epoch": 0.26968328571867417, "grad_norm": 0.36235255002975464, "learning_rate": 1.9133382891325706e-05, "loss": 0.6167, "step": 8779 }, { "epoch": 0.26971400485362335, "grad_norm": 0.33729901909828186, "learning_rate": 1.913318608933953e-05, "loss": 0.6028, "step": 8780 }, { "epoch": 0.26974472398857247, "grad_norm": 0.3713393807411194, "learning_rate": 1.913298926602216e-05, "loss": 0.6001, "step": 8781 }, { "epoch": 0.26977544312352164, "grad_norm": 0.3227938413619995, "learning_rate": 1.913279242137406e-05, "loss": 0.6855, "step": 8782 }, { "epoch": 0.2698061622584708, "grad_norm": 0.3470472991466522, "learning_rate": 1.913259555539568e-05, "loss": 0.622, "step": 8783 }, { "epoch": 0.26983688139341994, "grad_norm": 0.35053467750549316, "learning_rate": 1.9132398668087487e-05, "loss": 0.5284, "step": 8784 }, { "epoch": 0.2698676005283691, "grad_norm": 0.3765382468700409, "learning_rate": 1.913220175944994e-05, "loss": 0.6632, "step": 8785 }, { "epoch": 0.2698983196633183, "grad_norm": 0.3320727050304413, "learning_rate": 1.91320048294835e-05, "loss": 0.6291, "step": 8786 }, { "epoch": 0.26992903879826746, "grad_norm": 0.34642383456230164, "learning_rate": 1.913180787818862e-05, "loss": 0.6721, "step": 8787 }, { "epoch": 0.2699597579332166, "grad_norm": 0.31732189655303955, "learning_rate": 1.913161090556577e-05, "loss": 0.5667, "step": 8788 }, { "epoch": 0.26999047706816576, "grad_norm": 0.3027111291885376, "learning_rate": 1.91314139116154e-05, "loss": 0.6047, "step": 8789 }, { "epoch": 0.27002119620311493, "grad_norm": 0.38515254855155945, "learning_rate": 1.9131216896337977e-05, "loss": 0.4883, "step": 8790 }, { "epoch": 0.27005191533806405, "grad_norm": 0.3503424823284149, "learning_rate": 1.913101985973396e-05, "loss": 0.5738, "step": 8791 }, { "epoch": 0.27008263447301323, "grad_norm": 0.4063030481338501, "learning_rate": 1.913082280180381e-05, "loss": 0.5803, "step": 8792 }, { "epoch": 0.2701133536079624, "grad_norm": 0.3719574511051178, "learning_rate": 1.9130625722547982e-05, "loss": 0.5778, "step": 8793 }, { "epoch": 0.2701440727429116, "grad_norm": 0.31902459263801575, "learning_rate": 1.913042862196694e-05, "loss": 0.573, "step": 8794 }, { "epoch": 0.2701747918778607, "grad_norm": 0.36431774497032166, "learning_rate": 1.9130231500061147e-05, "loss": 0.5616, "step": 8795 }, { "epoch": 0.2702055110128099, "grad_norm": 0.35929057002067566, "learning_rate": 1.9130034356831058e-05, "loss": 0.5473, "step": 8796 }, { "epoch": 0.27023623014775905, "grad_norm": 0.3804222345352173, "learning_rate": 1.9129837192277142e-05, "loss": 0.5391, "step": 8797 }, { "epoch": 0.2702669492827082, "grad_norm": 0.3318871259689331, "learning_rate": 1.912964000639985e-05, "loss": 0.5405, "step": 8798 }, { "epoch": 0.27029766841765734, "grad_norm": 0.36665672063827515, "learning_rate": 1.9129442799199643e-05, "loss": 0.6162, "step": 8799 }, { "epoch": 0.2703283875526065, "grad_norm": 1.0458301305770874, "learning_rate": 1.912924557067699e-05, "loss": 0.5588, "step": 8800 }, { "epoch": 0.2703591066875557, "grad_norm": 0.36511552333831787, "learning_rate": 1.912904832083234e-05, "loss": 0.6937, "step": 8801 }, { "epoch": 0.2703898258225048, "grad_norm": 0.36752644181251526, "learning_rate": 1.9128851049666165e-05, "loss": 0.5755, "step": 8802 }, { "epoch": 0.270420544957454, "grad_norm": 0.3430335819721222, "learning_rate": 1.912865375717892e-05, "loss": 0.5566, "step": 8803 }, { "epoch": 0.27045126409240317, "grad_norm": 0.36485645174980164, "learning_rate": 1.912845644337107e-05, "loss": 0.5913, "step": 8804 }, { "epoch": 0.27048198322735234, "grad_norm": 0.3651985824108124, "learning_rate": 1.9128259108243064e-05, "loss": 0.6088, "step": 8805 }, { "epoch": 0.27051270236230146, "grad_norm": 0.3461216390132904, "learning_rate": 1.912806175179538e-05, "loss": 0.5424, "step": 8806 }, { "epoch": 0.27054342149725064, "grad_norm": 0.3235522210597992, "learning_rate": 1.9127864374028467e-05, "loss": 0.5912, "step": 8807 }, { "epoch": 0.2705741406321998, "grad_norm": 0.6234167814254761, "learning_rate": 1.9127666974942786e-05, "loss": 0.5318, "step": 8808 }, { "epoch": 0.27060485976714893, "grad_norm": 0.3207589089870453, "learning_rate": 1.9127469554538804e-05, "loss": 0.4937, "step": 8809 }, { "epoch": 0.2706355789020981, "grad_norm": 0.35057052969932556, "learning_rate": 1.9127272112816978e-05, "loss": 0.613, "step": 8810 }, { "epoch": 0.2706662980370473, "grad_norm": 0.3662264943122864, "learning_rate": 1.9127074649777775e-05, "loss": 0.6168, "step": 8811 }, { "epoch": 0.27069701717199646, "grad_norm": 0.3365170657634735, "learning_rate": 1.912687716542165e-05, "loss": 0.6207, "step": 8812 }, { "epoch": 0.2707277363069456, "grad_norm": 0.35136595368385315, "learning_rate": 1.9126679659749064e-05, "loss": 0.5171, "step": 8813 }, { "epoch": 0.27075845544189475, "grad_norm": 0.5461302995681763, "learning_rate": 1.912648213276048e-05, "loss": 0.6367, "step": 8814 }, { "epoch": 0.2707891745768439, "grad_norm": 0.33905112743377686, "learning_rate": 1.9126284584456362e-05, "loss": 0.5995, "step": 8815 }, { "epoch": 0.2708198937117931, "grad_norm": 0.3529479503631592, "learning_rate": 1.9126087014837165e-05, "loss": 0.5837, "step": 8816 }, { "epoch": 0.2708506128467422, "grad_norm": 0.3347271978855133, "learning_rate": 1.9125889423903357e-05, "loss": 0.5695, "step": 8817 }, { "epoch": 0.2708813319816914, "grad_norm": 0.4054874777793884, "learning_rate": 1.9125691811655397e-05, "loss": 0.6749, "step": 8818 }, { "epoch": 0.2709120511166406, "grad_norm": 0.3560183048248291, "learning_rate": 1.9125494178093748e-05, "loss": 0.5195, "step": 8819 }, { "epoch": 0.2709427702515897, "grad_norm": 0.33188876509666443, "learning_rate": 1.9125296523218866e-05, "loss": 0.6446, "step": 8820 }, { "epoch": 0.27097348938653887, "grad_norm": 0.34257251024246216, "learning_rate": 1.912509884703122e-05, "loss": 0.6093, "step": 8821 }, { "epoch": 0.27100420852148804, "grad_norm": 0.3774929344654083, "learning_rate": 1.9124901149531266e-05, "loss": 0.6387, "step": 8822 }, { "epoch": 0.2710349276564372, "grad_norm": 0.3429766595363617, "learning_rate": 1.912470343071947e-05, "loss": 0.616, "step": 8823 }, { "epoch": 0.27106564679138634, "grad_norm": 0.4031590223312378, "learning_rate": 1.9124505690596292e-05, "loss": 0.65, "step": 8824 }, { "epoch": 0.2710963659263355, "grad_norm": 0.3609292805194855, "learning_rate": 1.9124307929162192e-05, "loss": 0.5913, "step": 8825 }, { "epoch": 0.2711270850612847, "grad_norm": 0.3417905867099762, "learning_rate": 1.9124110146417637e-05, "loss": 0.6182, "step": 8826 }, { "epoch": 0.2711578041962338, "grad_norm": 0.3306379020214081, "learning_rate": 1.912391234236308e-05, "loss": 0.5472, "step": 8827 }, { "epoch": 0.271188523331183, "grad_norm": 0.35126590728759766, "learning_rate": 1.9123714516998993e-05, "loss": 0.5891, "step": 8828 }, { "epoch": 0.27121924246613216, "grad_norm": 0.35027673840522766, "learning_rate": 1.9123516670325833e-05, "loss": 0.5746, "step": 8829 }, { "epoch": 0.27124996160108134, "grad_norm": 0.3246749937534332, "learning_rate": 1.912331880234406e-05, "loss": 0.6189, "step": 8830 }, { "epoch": 0.27128068073603046, "grad_norm": 0.354961633682251, "learning_rate": 1.9123120913054142e-05, "loss": 0.6643, "step": 8831 }, { "epoch": 0.27131139987097963, "grad_norm": 0.3105543851852417, "learning_rate": 1.9122923002456537e-05, "loss": 0.496, "step": 8832 }, { "epoch": 0.2713421190059288, "grad_norm": 0.3143341839313507, "learning_rate": 1.9122725070551706e-05, "loss": 0.5558, "step": 8833 }, { "epoch": 0.2713728381408779, "grad_norm": 0.47411176562309265, "learning_rate": 1.9122527117340114e-05, "loss": 0.5882, "step": 8834 }, { "epoch": 0.2714035572758271, "grad_norm": 0.35616979002952576, "learning_rate": 1.9122329142822224e-05, "loss": 0.5367, "step": 8835 }, { "epoch": 0.2714342764107763, "grad_norm": 0.5545275807380676, "learning_rate": 1.9122131146998498e-05, "loss": 0.5933, "step": 8836 }, { "epoch": 0.27146499554572545, "grad_norm": 0.5083135962486267, "learning_rate": 1.91219331298694e-05, "loss": 0.6129, "step": 8837 }, { "epoch": 0.27149571468067457, "grad_norm": 0.33757463097572327, "learning_rate": 1.9121735091435387e-05, "loss": 0.5408, "step": 8838 }, { "epoch": 0.27152643381562375, "grad_norm": 0.41185396909713745, "learning_rate": 1.9121537031696923e-05, "loss": 0.6395, "step": 8839 }, { "epoch": 0.2715571529505729, "grad_norm": 0.32927656173706055, "learning_rate": 1.9121338950654472e-05, "loss": 0.5897, "step": 8840 }, { "epoch": 0.2715878720855221, "grad_norm": 0.3563174605369568, "learning_rate": 1.91211408483085e-05, "loss": 0.599, "step": 8841 }, { "epoch": 0.2716185912204712, "grad_norm": 0.3249050974845886, "learning_rate": 1.9120942724659465e-05, "loss": 0.5756, "step": 8842 }, { "epoch": 0.2716493103554204, "grad_norm": 0.34877511858940125, "learning_rate": 1.9120744579707833e-05, "loss": 0.7053, "step": 8843 }, { "epoch": 0.27168002949036957, "grad_norm": 0.40192946791648865, "learning_rate": 1.912054641345406e-05, "loss": 0.6363, "step": 8844 }, { "epoch": 0.2717107486253187, "grad_norm": 0.33977794647216797, "learning_rate": 1.912034822589862e-05, "loss": 0.6081, "step": 8845 }, { "epoch": 0.27174146776026786, "grad_norm": 0.34973058104515076, "learning_rate": 1.9120150017041968e-05, "loss": 0.4995, "step": 8846 }, { "epoch": 0.27177218689521704, "grad_norm": 0.3722167909145355, "learning_rate": 1.9119951786884568e-05, "loss": 0.6355, "step": 8847 }, { "epoch": 0.2718029060301662, "grad_norm": 0.3560855984687805, "learning_rate": 1.911975353542688e-05, "loss": 0.6365, "step": 8848 }, { "epoch": 0.27183362516511533, "grad_norm": 0.31516897678375244, "learning_rate": 1.9119555262669375e-05, "loss": 0.5073, "step": 8849 }, { "epoch": 0.2718643443000645, "grad_norm": 0.3414231538772583, "learning_rate": 1.9119356968612514e-05, "loss": 0.5917, "step": 8850 }, { "epoch": 0.2718950634350137, "grad_norm": 0.3458062708377838, "learning_rate": 1.9119158653256752e-05, "loss": 0.5908, "step": 8851 }, { "epoch": 0.2719257825699628, "grad_norm": 0.348469614982605, "learning_rate": 1.9118960316602564e-05, "loss": 0.6038, "step": 8852 }, { "epoch": 0.271956501704912, "grad_norm": 0.3256475627422333, "learning_rate": 1.9118761958650403e-05, "loss": 0.5809, "step": 8853 }, { "epoch": 0.27198722083986115, "grad_norm": 0.31291434168815613, "learning_rate": 1.9118563579400737e-05, "loss": 0.5687, "step": 8854 }, { "epoch": 0.27201793997481033, "grad_norm": 0.34046798944473267, "learning_rate": 1.911836517885403e-05, "loss": 0.6351, "step": 8855 }, { "epoch": 0.27204865910975945, "grad_norm": 0.4025737941265106, "learning_rate": 1.9118166757010743e-05, "loss": 0.5983, "step": 8856 }, { "epoch": 0.2720793782447086, "grad_norm": 0.3714946508407593, "learning_rate": 1.9117968313871343e-05, "loss": 0.5832, "step": 8857 }, { "epoch": 0.2721100973796578, "grad_norm": 0.39165204763412476, "learning_rate": 1.9117769849436292e-05, "loss": 0.6649, "step": 8858 }, { "epoch": 0.272140816514607, "grad_norm": 0.3350888788700104, "learning_rate": 1.911757136370605e-05, "loss": 0.6307, "step": 8859 }, { "epoch": 0.2721715356495561, "grad_norm": 0.3730376362800598, "learning_rate": 1.9117372856681084e-05, "loss": 0.6646, "step": 8860 }, { "epoch": 0.27220225478450527, "grad_norm": 0.3719291090965271, "learning_rate": 1.9117174328361856e-05, "loss": 0.5729, "step": 8861 }, { "epoch": 0.27223297391945445, "grad_norm": 0.3241090178489685, "learning_rate": 1.9116975778748832e-05, "loss": 0.5873, "step": 8862 }, { "epoch": 0.27226369305440357, "grad_norm": 0.3794568181037903, "learning_rate": 1.9116777207842475e-05, "loss": 0.6015, "step": 8863 }, { "epoch": 0.27229441218935274, "grad_norm": 0.3454555571079254, "learning_rate": 1.9116578615643247e-05, "loss": 0.5825, "step": 8864 }, { "epoch": 0.2723251313243019, "grad_norm": 0.32911139726638794, "learning_rate": 1.9116380002151614e-05, "loss": 0.5721, "step": 8865 }, { "epoch": 0.2723558504592511, "grad_norm": 0.31994473934173584, "learning_rate": 1.9116181367368035e-05, "loss": 0.5695, "step": 8866 }, { "epoch": 0.2723865695942002, "grad_norm": 0.3712998330593109, "learning_rate": 1.9115982711292982e-05, "loss": 0.5074, "step": 8867 }, { "epoch": 0.2724172887291494, "grad_norm": 0.379839152097702, "learning_rate": 1.9115784033926912e-05, "loss": 0.5021, "step": 8868 }, { "epoch": 0.27244800786409856, "grad_norm": 0.3399556875228882, "learning_rate": 1.9115585335270294e-05, "loss": 0.5864, "step": 8869 }, { "epoch": 0.2724787269990477, "grad_norm": 0.3710068464279175, "learning_rate": 1.911538661532359e-05, "loss": 0.5488, "step": 8870 }, { "epoch": 0.27250944613399686, "grad_norm": 0.3313765525817871, "learning_rate": 1.9115187874087265e-05, "loss": 0.6427, "step": 8871 }, { "epoch": 0.27254016526894603, "grad_norm": 0.34828439354896545, "learning_rate": 1.9114989111561777e-05, "loss": 0.6077, "step": 8872 }, { "epoch": 0.2725708844038952, "grad_norm": 0.41461580991744995, "learning_rate": 1.91147903277476e-05, "loss": 0.5607, "step": 8873 }, { "epoch": 0.27260160353884433, "grad_norm": 0.3667692542076111, "learning_rate": 1.911459152264519e-05, "loss": 0.615, "step": 8874 }, { "epoch": 0.2726323226737935, "grad_norm": 0.3691164255142212, "learning_rate": 1.9114392696255017e-05, "loss": 0.603, "step": 8875 }, { "epoch": 0.2726630418087427, "grad_norm": 0.42669859528541565, "learning_rate": 1.9114193848577544e-05, "loss": 0.6272, "step": 8876 }, { "epoch": 0.2726937609436918, "grad_norm": 0.35573604702949524, "learning_rate": 1.9113994979613235e-05, "loss": 0.5425, "step": 8877 }, { "epoch": 0.272724480078641, "grad_norm": 0.3659326136112213, "learning_rate": 1.9113796089362554e-05, "loss": 0.6159, "step": 8878 }, { "epoch": 0.27275519921359015, "grad_norm": 0.32208549976348877, "learning_rate": 1.9113597177825966e-05, "loss": 0.5808, "step": 8879 }, { "epoch": 0.2727859183485393, "grad_norm": 0.3187258541584015, "learning_rate": 1.9113398245003935e-05, "loss": 0.4718, "step": 8880 }, { "epoch": 0.27281663748348844, "grad_norm": 0.3943973481655121, "learning_rate": 1.911319929089692e-05, "loss": 0.5462, "step": 8881 }, { "epoch": 0.2728473566184376, "grad_norm": 0.36336538195610046, "learning_rate": 1.91130003155054e-05, "loss": 0.5962, "step": 8882 }, { "epoch": 0.2728780757533868, "grad_norm": 0.35019150376319885, "learning_rate": 1.911280131882983e-05, "loss": 0.6469, "step": 8883 }, { "epoch": 0.27290879488833597, "grad_norm": 0.34080177545547485, "learning_rate": 1.9112602300870673e-05, "loss": 0.5473, "step": 8884 }, { "epoch": 0.2729395140232851, "grad_norm": 0.33876243233680725, "learning_rate": 1.9112403261628398e-05, "loss": 0.604, "step": 8885 }, { "epoch": 0.27297023315823427, "grad_norm": 0.3503236472606659, "learning_rate": 1.9112204201103468e-05, "loss": 0.5638, "step": 8886 }, { "epoch": 0.27300095229318344, "grad_norm": 0.33791080117225647, "learning_rate": 1.9112005119296347e-05, "loss": 0.7053, "step": 8887 }, { "epoch": 0.27303167142813256, "grad_norm": 0.3821645677089691, "learning_rate": 1.9111806016207506e-05, "loss": 0.6307, "step": 8888 }, { "epoch": 0.27306239056308174, "grad_norm": 0.3523755669593811, "learning_rate": 1.9111606891837404e-05, "loss": 0.5899, "step": 8889 }, { "epoch": 0.2730931096980309, "grad_norm": 0.39738741517066956, "learning_rate": 1.9111407746186506e-05, "loss": 0.5879, "step": 8890 }, { "epoch": 0.2731238288329801, "grad_norm": 0.33360692858695984, "learning_rate": 1.911120857925528e-05, "loss": 0.6026, "step": 8891 }, { "epoch": 0.2731545479679292, "grad_norm": 0.38408562541007996, "learning_rate": 1.9111009391044197e-05, "loss": 0.5351, "step": 8892 }, { "epoch": 0.2731852671028784, "grad_norm": 0.34869739413261414, "learning_rate": 1.9110810181553703e-05, "loss": 0.6113, "step": 8893 }, { "epoch": 0.27321598623782756, "grad_norm": 0.956933319568634, "learning_rate": 1.9110610950784285e-05, "loss": 0.4808, "step": 8894 }, { "epoch": 0.2732467053727767, "grad_norm": 0.3162194490432739, "learning_rate": 1.9110411698736393e-05, "loss": 0.575, "step": 8895 }, { "epoch": 0.27327742450772585, "grad_norm": 0.33804410696029663, "learning_rate": 1.9110212425410502e-05, "loss": 0.4935, "step": 8896 }, { "epoch": 0.273308143642675, "grad_norm": 0.35966956615448, "learning_rate": 1.911001313080707e-05, "loss": 0.6304, "step": 8897 }, { "epoch": 0.2733388627776242, "grad_norm": 0.31171077489852905, "learning_rate": 1.910981381492657e-05, "loss": 0.6165, "step": 8898 }, { "epoch": 0.2733695819125733, "grad_norm": 0.3354741036891937, "learning_rate": 1.910961447776946e-05, "loss": 0.5143, "step": 8899 }, { "epoch": 0.2734003010475225, "grad_norm": 0.34670934081077576, "learning_rate": 1.9109415119336213e-05, "loss": 0.542, "step": 8900 }, { "epoch": 0.2734310201824717, "grad_norm": 0.36840805411338806, "learning_rate": 1.9109215739627287e-05, "loss": 0.5795, "step": 8901 }, { "epoch": 0.27346173931742085, "grad_norm": 0.40598446130752563, "learning_rate": 1.9109016338643152e-05, "loss": 0.5658, "step": 8902 }, { "epoch": 0.27349245845236997, "grad_norm": 0.3738972544670105, "learning_rate": 1.9108816916384278e-05, "loss": 0.605, "step": 8903 }, { "epoch": 0.27352317758731914, "grad_norm": 0.39003321528434753, "learning_rate": 1.910861747285112e-05, "loss": 0.6879, "step": 8904 }, { "epoch": 0.2735538967222683, "grad_norm": 0.34234437346458435, "learning_rate": 1.9108418008044154e-05, "loss": 0.5735, "step": 8905 }, { "epoch": 0.27358461585721744, "grad_norm": 0.3420734107494354, "learning_rate": 1.910821852196384e-05, "loss": 0.6142, "step": 8906 }, { "epoch": 0.2736153349921666, "grad_norm": 0.32506605982780457, "learning_rate": 1.9108019014610643e-05, "loss": 0.5248, "step": 8907 }, { "epoch": 0.2736460541271158, "grad_norm": 0.3793134093284607, "learning_rate": 1.9107819485985034e-05, "loss": 0.5833, "step": 8908 }, { "epoch": 0.27367677326206497, "grad_norm": 0.4484535753726959, "learning_rate": 1.910761993608748e-05, "loss": 0.5195, "step": 8909 }, { "epoch": 0.2737074923970141, "grad_norm": 0.3436616063117981, "learning_rate": 1.910742036491844e-05, "loss": 0.5764, "step": 8910 }, { "epoch": 0.27373821153196326, "grad_norm": 0.38640061020851135, "learning_rate": 1.910722077247838e-05, "loss": 0.6554, "step": 8911 }, { "epoch": 0.27376893066691244, "grad_norm": 0.35989174246788025, "learning_rate": 1.9107021158767774e-05, "loss": 0.6063, "step": 8912 }, { "epoch": 0.27379964980186156, "grad_norm": 0.37530404329299927, "learning_rate": 1.9106821523787084e-05, "loss": 0.5986, "step": 8913 }, { "epoch": 0.27383036893681073, "grad_norm": 0.30335140228271484, "learning_rate": 1.9106621867536774e-05, "loss": 0.5418, "step": 8914 }, { "epoch": 0.2738610880717599, "grad_norm": 0.34579136967658997, "learning_rate": 1.9106422190017318e-05, "loss": 0.5699, "step": 8915 }, { "epoch": 0.2738918072067091, "grad_norm": 0.3723529279232025, "learning_rate": 1.9106222491229172e-05, "loss": 0.5195, "step": 8916 }, { "epoch": 0.2739225263416582, "grad_norm": 0.39239197969436646, "learning_rate": 1.910602277117281e-05, "loss": 0.6141, "step": 8917 }, { "epoch": 0.2739532454766074, "grad_norm": 0.3132525682449341, "learning_rate": 1.910582302984869e-05, "loss": 0.6001, "step": 8918 }, { "epoch": 0.27398396461155655, "grad_norm": 0.36027947068214417, "learning_rate": 1.910562326725729e-05, "loss": 0.5879, "step": 8919 }, { "epoch": 0.27401468374650567, "grad_norm": 0.3734198808670044, "learning_rate": 1.9105423483399072e-05, "loss": 0.5794, "step": 8920 }, { "epoch": 0.27404540288145485, "grad_norm": 0.3293069005012512, "learning_rate": 1.9105223678274498e-05, "loss": 0.6303, "step": 8921 }, { "epoch": 0.274076122016404, "grad_norm": 0.3279346227645874, "learning_rate": 1.910502385188404e-05, "loss": 0.5793, "step": 8922 }, { "epoch": 0.2741068411513532, "grad_norm": 0.3261664807796478, "learning_rate": 1.9104824004228164e-05, "loss": 0.5587, "step": 8923 }, { "epoch": 0.2741375602863023, "grad_norm": 0.3696810007095337, "learning_rate": 1.9104624135307332e-05, "loss": 0.5782, "step": 8924 }, { "epoch": 0.2741682794212515, "grad_norm": 0.3383024036884308, "learning_rate": 1.9104424245122016e-05, "loss": 0.5871, "step": 8925 }, { "epoch": 0.27419899855620067, "grad_norm": 0.35828855633735657, "learning_rate": 1.9104224333672687e-05, "loss": 0.5553, "step": 8926 }, { "epoch": 0.27422971769114984, "grad_norm": 0.388014554977417, "learning_rate": 1.9104024400959798e-05, "loss": 0.6547, "step": 8927 }, { "epoch": 0.27426043682609896, "grad_norm": 0.355530709028244, "learning_rate": 1.910382444698383e-05, "loss": 0.5429, "step": 8928 }, { "epoch": 0.27429115596104814, "grad_norm": 0.3389267325401306, "learning_rate": 1.910362447174524e-05, "loss": 0.5981, "step": 8929 }, { "epoch": 0.2743218750959973, "grad_norm": 0.3831421732902527, "learning_rate": 1.91034244752445e-05, "loss": 0.6034, "step": 8930 }, { "epoch": 0.27435259423094643, "grad_norm": 0.4831177294254303, "learning_rate": 1.9103224457482077e-05, "loss": 0.6503, "step": 8931 }, { "epoch": 0.2743833133658956, "grad_norm": 0.36687353253364563, "learning_rate": 1.910302441845844e-05, "loss": 0.5822, "step": 8932 }, { "epoch": 0.2744140325008448, "grad_norm": 0.34491580724716187, "learning_rate": 1.910282435817405e-05, "loss": 0.6385, "step": 8933 }, { "epoch": 0.27444475163579396, "grad_norm": 0.3449207544326782, "learning_rate": 1.910262427662938e-05, "loss": 0.6804, "step": 8934 }, { "epoch": 0.2744754707707431, "grad_norm": 0.5267018675804138, "learning_rate": 1.9102424173824894e-05, "loss": 0.6248, "step": 8935 }, { "epoch": 0.27450618990569225, "grad_norm": 0.3400757312774658, "learning_rate": 1.9102224049761062e-05, "loss": 0.6319, "step": 8936 }, { "epoch": 0.27453690904064143, "grad_norm": 0.3299473524093628, "learning_rate": 1.910202390443835e-05, "loss": 0.577, "step": 8937 }, { "epoch": 0.27456762817559055, "grad_norm": 0.3763038218021393, "learning_rate": 1.9101823737857224e-05, "loss": 0.6289, "step": 8938 }, { "epoch": 0.2745983473105397, "grad_norm": 0.3190712630748749, "learning_rate": 1.9101623550018154e-05, "loss": 0.5398, "step": 8939 }, { "epoch": 0.2746290664454889, "grad_norm": 0.3233368992805481, "learning_rate": 1.910142334092161e-05, "loss": 0.5982, "step": 8940 }, { "epoch": 0.2746597855804381, "grad_norm": 0.36336877942085266, "learning_rate": 1.910122311056805e-05, "loss": 0.4736, "step": 8941 }, { "epoch": 0.2746905047153872, "grad_norm": 0.3668202757835388, "learning_rate": 1.910102285895795e-05, "loss": 0.6218, "step": 8942 }, { "epoch": 0.27472122385033637, "grad_norm": 0.3294389247894287, "learning_rate": 1.9100822586091775e-05, "loss": 0.5677, "step": 8943 }, { "epoch": 0.27475194298528555, "grad_norm": 0.43302157521247864, "learning_rate": 1.9100622291969995e-05, "loss": 0.5536, "step": 8944 }, { "epoch": 0.2747826621202347, "grad_norm": 0.35770484805107117, "learning_rate": 1.9100421976593076e-05, "loss": 0.5653, "step": 8945 }, { "epoch": 0.27481338125518384, "grad_norm": 0.32942259311676025, "learning_rate": 1.9100221639961484e-05, "loss": 0.5625, "step": 8946 }, { "epoch": 0.274844100390133, "grad_norm": 0.3709118068218231, "learning_rate": 1.910002128207569e-05, "loss": 0.5742, "step": 8947 }, { "epoch": 0.2748748195250822, "grad_norm": 0.3394337594509125, "learning_rate": 1.909982090293616e-05, "loss": 0.6551, "step": 8948 }, { "epoch": 0.2749055386600313, "grad_norm": 0.3434814214706421, "learning_rate": 1.9099620502543364e-05, "loss": 0.5231, "step": 8949 }, { "epoch": 0.2749362577949805, "grad_norm": 0.3574450612068176, "learning_rate": 1.909942008089777e-05, "loss": 0.5682, "step": 8950 }, { "epoch": 0.27496697692992966, "grad_norm": 0.33513689041137695, "learning_rate": 1.9099219637999843e-05, "loss": 0.5638, "step": 8951 }, { "epoch": 0.27499769606487884, "grad_norm": 0.3504103422164917, "learning_rate": 1.9099019173850053e-05, "loss": 0.5926, "step": 8952 }, { "epoch": 0.27502841519982796, "grad_norm": 0.3579115569591522, "learning_rate": 1.9098818688448867e-05, "loss": 0.5915, "step": 8953 }, { "epoch": 0.27505913433477713, "grad_norm": 0.39586082100868225, "learning_rate": 1.9098618181796757e-05, "loss": 0.5457, "step": 8954 }, { "epoch": 0.2750898534697263, "grad_norm": 0.3422037363052368, "learning_rate": 1.909841765389419e-05, "loss": 0.5574, "step": 8955 }, { "epoch": 0.27512057260467543, "grad_norm": 0.5863453149795532, "learning_rate": 1.909821710474163e-05, "loss": 0.6255, "step": 8956 }, { "epoch": 0.2751512917396246, "grad_norm": 0.37905779480934143, "learning_rate": 1.909801653433955e-05, "loss": 0.5849, "step": 8957 }, { "epoch": 0.2751820108745738, "grad_norm": 0.3327501714229584, "learning_rate": 1.9097815942688415e-05, "loss": 0.5834, "step": 8958 }, { "epoch": 0.27521273000952295, "grad_norm": 0.3520163595676422, "learning_rate": 1.9097615329788696e-05, "loss": 0.568, "step": 8959 }, { "epoch": 0.2752434491444721, "grad_norm": 0.3394906222820282, "learning_rate": 1.9097414695640864e-05, "loss": 0.545, "step": 8960 }, { "epoch": 0.27527416827942125, "grad_norm": 0.334539532661438, "learning_rate": 1.9097214040245384e-05, "loss": 0.5752, "step": 8961 }, { "epoch": 0.2753048874143704, "grad_norm": 0.3546842932701111, "learning_rate": 1.9097013363602722e-05, "loss": 0.6924, "step": 8962 }, { "epoch": 0.27533560654931954, "grad_norm": 0.32422614097595215, "learning_rate": 1.909681266571335e-05, "loss": 0.6054, "step": 8963 }, { "epoch": 0.2753663256842687, "grad_norm": 0.4164823591709137, "learning_rate": 1.909661194657774e-05, "loss": 0.5769, "step": 8964 }, { "epoch": 0.2753970448192179, "grad_norm": 0.3924878239631653, "learning_rate": 1.9096411206196357e-05, "loss": 0.6011, "step": 8965 }, { "epoch": 0.27542776395416707, "grad_norm": 0.3801572918891907, "learning_rate": 1.9096210444569666e-05, "loss": 0.6116, "step": 8966 }, { "epoch": 0.2754584830891162, "grad_norm": 0.37226173281669617, "learning_rate": 1.9096009661698147e-05, "loss": 0.6168, "step": 8967 }, { "epoch": 0.27548920222406537, "grad_norm": 0.34011054039001465, "learning_rate": 1.9095808857582258e-05, "loss": 0.5994, "step": 8968 }, { "epoch": 0.27551992135901454, "grad_norm": 0.3680865466594696, "learning_rate": 1.9095608032222474e-05, "loss": 0.6313, "step": 8969 }, { "epoch": 0.2755506404939637, "grad_norm": 0.3725404441356659, "learning_rate": 1.909540718561926e-05, "loss": 0.5479, "step": 8970 }, { "epoch": 0.27558135962891284, "grad_norm": 0.5812805891036987, "learning_rate": 1.909520631777309e-05, "loss": 0.5515, "step": 8971 }, { "epoch": 0.275612078763862, "grad_norm": 0.3492903411388397, "learning_rate": 1.9095005428684428e-05, "loss": 0.5824, "step": 8972 }, { "epoch": 0.2756427978988112, "grad_norm": 0.3500000536441803, "learning_rate": 1.9094804518353748e-05, "loss": 0.589, "step": 8973 }, { "epoch": 0.2756735170337603, "grad_norm": 0.3428811728954315, "learning_rate": 1.9094603586781515e-05, "loss": 0.5063, "step": 8974 }, { "epoch": 0.2757042361687095, "grad_norm": 0.4418536126613617, "learning_rate": 1.90944026339682e-05, "loss": 0.6844, "step": 8975 }, { "epoch": 0.27573495530365866, "grad_norm": 0.3432382345199585, "learning_rate": 1.9094201659914272e-05, "loss": 0.5618, "step": 8976 }, { "epoch": 0.27576567443860783, "grad_norm": 0.3306511640548706, "learning_rate": 1.90940006646202e-05, "loss": 0.519, "step": 8977 }, { "epoch": 0.27579639357355695, "grad_norm": 0.3258973956108093, "learning_rate": 1.909379964808646e-05, "loss": 0.6326, "step": 8978 }, { "epoch": 0.27582711270850613, "grad_norm": 0.3203446567058563, "learning_rate": 1.909359861031351e-05, "loss": 0.5378, "step": 8979 }, { "epoch": 0.2758578318434553, "grad_norm": 0.4642522931098938, "learning_rate": 1.9093397551301827e-05, "loss": 0.5933, "step": 8980 }, { "epoch": 0.2758885509784044, "grad_norm": 0.3560873568058014, "learning_rate": 1.909319647105188e-05, "loss": 0.6887, "step": 8981 }, { "epoch": 0.2759192701133536, "grad_norm": 0.47500696778297424, "learning_rate": 1.909299536956414e-05, "loss": 0.6705, "step": 8982 }, { "epoch": 0.2759499892483028, "grad_norm": 0.3600247800350189, "learning_rate": 1.909279424683907e-05, "loss": 0.5295, "step": 8983 }, { "epoch": 0.27598070838325195, "grad_norm": 0.3570362627506256, "learning_rate": 1.9092593102877144e-05, "loss": 0.5187, "step": 8984 }, { "epoch": 0.27601142751820107, "grad_norm": 0.31439149379730225, "learning_rate": 1.9092391937678834e-05, "loss": 0.6392, "step": 8985 }, { "epoch": 0.27604214665315024, "grad_norm": 0.36509600281715393, "learning_rate": 1.9092190751244606e-05, "loss": 0.6676, "step": 8986 }, { "epoch": 0.2760728657880994, "grad_norm": 0.3099333643913269, "learning_rate": 1.909198954357493e-05, "loss": 0.4955, "step": 8987 }, { "epoch": 0.27610358492304854, "grad_norm": 0.35008206963539124, "learning_rate": 1.909178831467028e-05, "loss": 0.5912, "step": 8988 }, { "epoch": 0.2761343040579977, "grad_norm": 0.34117862582206726, "learning_rate": 1.9091587064531127e-05, "loss": 0.6502, "step": 8989 }, { "epoch": 0.2761650231929469, "grad_norm": 0.33481520414352417, "learning_rate": 1.9091385793157932e-05, "loss": 0.5873, "step": 8990 }, { "epoch": 0.27619574232789607, "grad_norm": 0.3501184284687042, "learning_rate": 1.9091184500551173e-05, "loss": 0.5376, "step": 8991 }, { "epoch": 0.2762264614628452, "grad_norm": 0.32594314217567444, "learning_rate": 1.9090983186711318e-05, "loss": 0.6628, "step": 8992 }, { "epoch": 0.27625718059779436, "grad_norm": 0.36819323897361755, "learning_rate": 1.9090781851638833e-05, "loss": 0.6096, "step": 8993 }, { "epoch": 0.27628789973274354, "grad_norm": 0.33233359456062317, "learning_rate": 1.9090580495334193e-05, "loss": 0.6071, "step": 8994 }, { "epoch": 0.2763186188676927, "grad_norm": 0.3059450387954712, "learning_rate": 1.909037911779787e-05, "loss": 0.6026, "step": 8995 }, { "epoch": 0.27634933800264183, "grad_norm": 0.33230817317962646, "learning_rate": 1.909017771903033e-05, "loss": 0.5778, "step": 8996 }, { "epoch": 0.276380057137591, "grad_norm": 0.3581162989139557, "learning_rate": 1.9089976299032046e-05, "loss": 0.5676, "step": 8997 }, { "epoch": 0.2764107762725402, "grad_norm": 0.31263190507888794, "learning_rate": 1.908977485780349e-05, "loss": 0.6123, "step": 8998 }, { "epoch": 0.2764414954074893, "grad_norm": 0.34813234210014343, "learning_rate": 1.9089573395345127e-05, "loss": 0.5412, "step": 8999 }, { "epoch": 0.2764722145424385, "grad_norm": 0.46781453490257263, "learning_rate": 1.908937191165743e-05, "loss": 0.5505, "step": 9000 }, { "epoch": 0.27650293367738765, "grad_norm": 0.3854745328426361, "learning_rate": 1.9089170406740872e-05, "loss": 0.5897, "step": 9001 }, { "epoch": 0.2765336528123368, "grad_norm": 0.36361077427864075, "learning_rate": 1.9088968880595922e-05, "loss": 0.628, "step": 9002 }, { "epoch": 0.27656437194728595, "grad_norm": 0.3584422469139099, "learning_rate": 1.9088767333223045e-05, "loss": 0.5979, "step": 9003 }, { "epoch": 0.2765950910822351, "grad_norm": 0.3585317134857178, "learning_rate": 1.9088565764622723e-05, "loss": 0.6294, "step": 9004 }, { "epoch": 0.2766258102171843, "grad_norm": 0.41040074825286865, "learning_rate": 1.908836417479542e-05, "loss": 0.6446, "step": 9005 }, { "epoch": 0.2766565293521334, "grad_norm": 0.34213459491729736, "learning_rate": 1.9088162563741606e-05, "loss": 0.6356, "step": 9006 }, { "epoch": 0.2766872484870826, "grad_norm": 0.31852757930755615, "learning_rate": 1.9087960931461754e-05, "loss": 0.5922, "step": 9007 }, { "epoch": 0.27671796762203177, "grad_norm": 0.33009183406829834, "learning_rate": 1.9087759277956337e-05, "loss": 0.5775, "step": 9008 }, { "epoch": 0.27674868675698094, "grad_norm": 0.3288157880306244, "learning_rate": 1.9087557603225822e-05, "loss": 0.5656, "step": 9009 }, { "epoch": 0.27677940589193006, "grad_norm": 0.35060247778892517, "learning_rate": 1.9087355907270678e-05, "loss": 0.6007, "step": 9010 }, { "epoch": 0.27681012502687924, "grad_norm": 0.6472494602203369, "learning_rate": 1.9087154190091385e-05, "loss": 0.6286, "step": 9011 }, { "epoch": 0.2768408441618284, "grad_norm": 0.39090588688850403, "learning_rate": 1.9086952451688407e-05, "loss": 0.626, "step": 9012 }, { "epoch": 0.2768715632967776, "grad_norm": 0.3241627812385559, "learning_rate": 1.9086750692062216e-05, "loss": 0.5668, "step": 9013 }, { "epoch": 0.2769022824317267, "grad_norm": 0.3629131019115448, "learning_rate": 1.9086548911213286e-05, "loss": 0.5002, "step": 9014 }, { "epoch": 0.2769330015666759, "grad_norm": 0.3145436644554138, "learning_rate": 1.908634710914208e-05, "loss": 0.6012, "step": 9015 }, { "epoch": 0.27696372070162506, "grad_norm": 0.3382406532764435, "learning_rate": 1.9086145285849086e-05, "loss": 0.5573, "step": 9016 }, { "epoch": 0.2769944398365742, "grad_norm": 0.4025759696960449, "learning_rate": 1.908594344133476e-05, "loss": 0.5118, "step": 9017 }, { "epoch": 0.27702515897152336, "grad_norm": 0.3920857608318329, "learning_rate": 1.908574157559958e-05, "loss": 0.5965, "step": 9018 }, { "epoch": 0.27705587810647253, "grad_norm": 0.33328357338905334, "learning_rate": 1.9085539688644012e-05, "loss": 0.5735, "step": 9019 }, { "epoch": 0.2770865972414217, "grad_norm": 0.3722972869873047, "learning_rate": 1.9085337780468537e-05, "loss": 0.6733, "step": 9020 }, { "epoch": 0.2771173163763708, "grad_norm": 0.35463979840278625, "learning_rate": 1.9085135851073617e-05, "loss": 0.5368, "step": 9021 }, { "epoch": 0.27714803551132, "grad_norm": 0.7503504753112793, "learning_rate": 1.9084933900459724e-05, "loss": 0.4803, "step": 9022 }, { "epoch": 0.2771787546462692, "grad_norm": 0.35250139236450195, "learning_rate": 1.908473192862734e-05, "loss": 0.6552, "step": 9023 }, { "epoch": 0.2772094737812183, "grad_norm": 0.3554668128490448, "learning_rate": 1.908452993557693e-05, "loss": 0.5629, "step": 9024 }, { "epoch": 0.27724019291616747, "grad_norm": 0.35414043068885803, "learning_rate": 1.9084327921308965e-05, "loss": 0.5526, "step": 9025 }, { "epoch": 0.27727091205111665, "grad_norm": 0.33430102467536926, "learning_rate": 1.9084125885823916e-05, "loss": 0.5873, "step": 9026 }, { "epoch": 0.2773016311860658, "grad_norm": 0.3474169969558716, "learning_rate": 1.9083923829122258e-05, "loss": 0.5442, "step": 9027 }, { "epoch": 0.27733235032101494, "grad_norm": 0.32017382979393005, "learning_rate": 1.908372175120446e-05, "loss": 0.5905, "step": 9028 }, { "epoch": 0.2773630694559641, "grad_norm": 0.4072002172470093, "learning_rate": 1.9083519652070997e-05, "loss": 0.5705, "step": 9029 }, { "epoch": 0.2773937885909133, "grad_norm": 0.34388968348503113, "learning_rate": 1.9083317531722338e-05, "loss": 0.6537, "step": 9030 }, { "epoch": 0.2774245077258624, "grad_norm": 0.3410652279853821, "learning_rate": 1.908311539015896e-05, "loss": 0.5391, "step": 9031 }, { "epoch": 0.2774552268608116, "grad_norm": 0.3265860676765442, "learning_rate": 1.9082913227381325e-05, "loss": 0.6763, "step": 9032 }, { "epoch": 0.27748594599576076, "grad_norm": 0.32773032784461975, "learning_rate": 1.908271104338992e-05, "loss": 0.5321, "step": 9033 }, { "epoch": 0.27751666513070994, "grad_norm": 0.36354589462280273, "learning_rate": 1.9082508838185205e-05, "loss": 0.5701, "step": 9034 }, { "epoch": 0.27754738426565906, "grad_norm": 0.3671620190143585, "learning_rate": 1.9082306611767657e-05, "loss": 0.5136, "step": 9035 }, { "epoch": 0.27757810340060823, "grad_norm": 0.35858261585235596, "learning_rate": 1.9082104364137745e-05, "loss": 0.5851, "step": 9036 }, { "epoch": 0.2776088225355574, "grad_norm": 0.3182668685913086, "learning_rate": 1.9081902095295946e-05, "loss": 0.6237, "step": 9037 }, { "epoch": 0.2776395416705066, "grad_norm": 0.3546692132949829, "learning_rate": 1.908169980524273e-05, "loss": 0.5636, "step": 9038 }, { "epoch": 0.2776702608054557, "grad_norm": 0.2975437045097351, "learning_rate": 1.9081497493978568e-05, "loss": 0.4691, "step": 9039 }, { "epoch": 0.2777009799404049, "grad_norm": 0.33980146050453186, "learning_rate": 1.9081295161503937e-05, "loss": 0.585, "step": 9040 }, { "epoch": 0.27773169907535405, "grad_norm": 0.3226301670074463, "learning_rate": 1.9081092807819307e-05, "loss": 0.6345, "step": 9041 }, { "epoch": 0.2777624182103032, "grad_norm": 0.3364366292953491, "learning_rate": 1.908089043292515e-05, "loss": 0.6068, "step": 9042 }, { "epoch": 0.27779313734525235, "grad_norm": 0.4512809216976166, "learning_rate": 1.908068803682194e-05, "loss": 0.5981, "step": 9043 }, { "epoch": 0.2778238564802015, "grad_norm": 0.35016995668411255, "learning_rate": 1.9080485619510147e-05, "loss": 0.5707, "step": 9044 }, { "epoch": 0.2778545756151507, "grad_norm": 0.34459540247917175, "learning_rate": 1.9080283180990247e-05, "loss": 0.559, "step": 9045 }, { "epoch": 0.2778852947500998, "grad_norm": 0.3512503504753113, "learning_rate": 1.908008072126271e-05, "loss": 0.5704, "step": 9046 }, { "epoch": 0.277916013885049, "grad_norm": 0.35759225487709045, "learning_rate": 1.907987824032801e-05, "loss": 0.5564, "step": 9047 }, { "epoch": 0.27794673301999817, "grad_norm": 0.4252570867538452, "learning_rate": 1.9079675738186623e-05, "loss": 0.5988, "step": 9048 }, { "epoch": 0.2779774521549473, "grad_norm": 0.33365559577941895, "learning_rate": 1.907947321483902e-05, "loss": 0.6829, "step": 9049 }, { "epoch": 0.27800817128989647, "grad_norm": 0.4178057312965393, "learning_rate": 1.907927067028567e-05, "loss": 0.5362, "step": 9050 }, { "epoch": 0.27803889042484564, "grad_norm": 0.3512328863143921, "learning_rate": 1.907906810452705e-05, "loss": 0.5708, "step": 9051 }, { "epoch": 0.2780696095597948, "grad_norm": 0.37863969802856445, "learning_rate": 1.907886551756363e-05, "loss": 0.5807, "step": 9052 }, { "epoch": 0.27810032869474394, "grad_norm": 0.3397274911403656, "learning_rate": 1.907866290939589e-05, "loss": 0.6099, "step": 9053 }, { "epoch": 0.2781310478296931, "grad_norm": 0.4564332365989685, "learning_rate": 1.9078460280024296e-05, "loss": 0.5756, "step": 9054 }, { "epoch": 0.2781617669646423, "grad_norm": 0.358132004737854, "learning_rate": 1.9078257629449325e-05, "loss": 0.5948, "step": 9055 }, { "epoch": 0.27819248609959146, "grad_norm": 0.3633950650691986, "learning_rate": 1.907805495767145e-05, "loss": 0.5391, "step": 9056 }, { "epoch": 0.2782232052345406, "grad_norm": 0.32616159319877625, "learning_rate": 1.9077852264691143e-05, "loss": 0.5877, "step": 9057 }, { "epoch": 0.27825392436948976, "grad_norm": 0.49297791719436646, "learning_rate": 1.9077649550508877e-05, "loss": 0.5723, "step": 9058 }, { "epoch": 0.27828464350443893, "grad_norm": 0.32946455478668213, "learning_rate": 1.907744681512513e-05, "loss": 0.6174, "step": 9059 }, { "epoch": 0.27831536263938805, "grad_norm": 0.39921310544013977, "learning_rate": 1.9077244058540366e-05, "loss": 0.5907, "step": 9060 }, { "epoch": 0.27834608177433723, "grad_norm": 0.3172428607940674, "learning_rate": 1.907704128075507e-05, "loss": 0.5873, "step": 9061 }, { "epoch": 0.2783768009092864, "grad_norm": 0.34898245334625244, "learning_rate": 1.907683848176971e-05, "loss": 0.5857, "step": 9062 }, { "epoch": 0.2784075200442356, "grad_norm": 0.398285448551178, "learning_rate": 1.907663566158476e-05, "loss": 0.5958, "step": 9063 }, { "epoch": 0.2784382391791847, "grad_norm": 0.3771199584007263, "learning_rate": 1.907643282020069e-05, "loss": 0.5921, "step": 9064 }, { "epoch": 0.2784689583141339, "grad_norm": 0.337539404630661, "learning_rate": 1.9076229957617975e-05, "loss": 0.6756, "step": 9065 }, { "epoch": 0.27849967744908305, "grad_norm": 0.3611900806427002, "learning_rate": 1.9076027073837094e-05, "loss": 0.5611, "step": 9066 }, { "epoch": 0.27853039658403217, "grad_norm": 0.31272998452186584, "learning_rate": 1.907582416885852e-05, "loss": 0.6608, "step": 9067 }, { "epoch": 0.27856111571898134, "grad_norm": 0.4112411439418793, "learning_rate": 1.9075621242682724e-05, "loss": 0.6188, "step": 9068 }, { "epoch": 0.2785918348539305, "grad_norm": 0.40089571475982666, "learning_rate": 1.907541829531018e-05, "loss": 0.5852, "step": 9069 }, { "epoch": 0.2786225539888797, "grad_norm": 0.3860073983669281, "learning_rate": 1.9075215326741365e-05, "loss": 0.6317, "step": 9070 }, { "epoch": 0.2786532731238288, "grad_norm": 0.3245793879032135, "learning_rate": 1.9075012336976746e-05, "loss": 0.5854, "step": 9071 }, { "epoch": 0.278683992258778, "grad_norm": 0.36706921458244324, "learning_rate": 1.9074809326016805e-05, "loss": 0.6567, "step": 9072 }, { "epoch": 0.27871471139372717, "grad_norm": 0.36660969257354736, "learning_rate": 1.9074606293862013e-05, "loss": 0.5101, "step": 9073 }, { "epoch": 0.2787454305286763, "grad_norm": 0.4670531451702118, "learning_rate": 1.9074403240512843e-05, "loss": 0.6072, "step": 9074 }, { "epoch": 0.27877614966362546, "grad_norm": 0.3610023558139801, "learning_rate": 1.907420016596977e-05, "loss": 0.5562, "step": 9075 }, { "epoch": 0.27880686879857464, "grad_norm": 0.3565497398376465, "learning_rate": 1.907399707023327e-05, "loss": 0.5818, "step": 9076 }, { "epoch": 0.2788375879335238, "grad_norm": 0.34848126769065857, "learning_rate": 1.907379395330382e-05, "loss": 0.5453, "step": 9077 }, { "epoch": 0.27886830706847293, "grad_norm": 0.3282102644443512, "learning_rate": 1.9073590815181885e-05, "loss": 0.5333, "step": 9078 }, { "epoch": 0.2788990262034221, "grad_norm": 0.3239019513130188, "learning_rate": 1.907338765586795e-05, "loss": 0.5996, "step": 9079 }, { "epoch": 0.2789297453383713, "grad_norm": 0.33595916628837585, "learning_rate": 1.9073184475362478e-05, "loss": 0.6391, "step": 9080 }, { "epoch": 0.27896046447332046, "grad_norm": 0.3324783444404602, "learning_rate": 1.9072981273665954e-05, "loss": 0.6015, "step": 9081 }, { "epoch": 0.2789911836082696, "grad_norm": 0.31574615836143494, "learning_rate": 1.9072778050778847e-05, "loss": 0.5889, "step": 9082 }, { "epoch": 0.27902190274321875, "grad_norm": 0.32440051436424255, "learning_rate": 1.9072574806701633e-05, "loss": 0.6434, "step": 9083 }, { "epoch": 0.27905262187816793, "grad_norm": 0.4100591838359833, "learning_rate": 1.9072371541434788e-05, "loss": 0.6162, "step": 9084 }, { "epoch": 0.27908334101311705, "grad_norm": 0.3961588144302368, "learning_rate": 1.9072168254978787e-05, "loss": 0.5467, "step": 9085 }, { "epoch": 0.2791140601480662, "grad_norm": 0.3792514503002167, "learning_rate": 1.9071964947334104e-05, "loss": 0.5919, "step": 9086 }, { "epoch": 0.2791447792830154, "grad_norm": 0.362362265586853, "learning_rate": 1.9071761618501208e-05, "loss": 0.5418, "step": 9087 }, { "epoch": 0.2791754984179646, "grad_norm": 0.38976141810417175, "learning_rate": 1.9071558268480585e-05, "loss": 0.6041, "step": 9088 }, { "epoch": 0.2792062175529137, "grad_norm": 0.36411556601524353, "learning_rate": 1.90713548972727e-05, "loss": 0.6148, "step": 9089 }, { "epoch": 0.27923693668786287, "grad_norm": 0.39533731341362, "learning_rate": 1.9071151504878034e-05, "loss": 0.5414, "step": 9090 }, { "epoch": 0.27926765582281204, "grad_norm": 0.3917321264743805, "learning_rate": 1.907094809129706e-05, "loss": 0.5736, "step": 9091 }, { "epoch": 0.27929837495776116, "grad_norm": 0.3691883385181427, "learning_rate": 1.9070744656530254e-05, "loss": 0.5602, "step": 9092 }, { "epoch": 0.27932909409271034, "grad_norm": 0.3628910481929779, "learning_rate": 1.9070541200578088e-05, "loss": 0.5404, "step": 9093 }, { "epoch": 0.2793598132276595, "grad_norm": 0.5084805488586426, "learning_rate": 1.9070337723441042e-05, "loss": 0.6108, "step": 9094 }, { "epoch": 0.2793905323626087, "grad_norm": 0.4648604393005371, "learning_rate": 1.9070134225119584e-05, "loss": 0.5258, "step": 9095 }, { "epoch": 0.2794212514975578, "grad_norm": 0.3662586510181427, "learning_rate": 1.90699307056142e-05, "loss": 0.5614, "step": 9096 }, { "epoch": 0.279451970632507, "grad_norm": 0.32525962591171265, "learning_rate": 1.9069727164925357e-05, "loss": 0.5242, "step": 9097 }, { "epoch": 0.27948268976745616, "grad_norm": 0.3515898585319519, "learning_rate": 1.9069523603053534e-05, "loss": 0.5638, "step": 9098 }, { "epoch": 0.27951340890240534, "grad_norm": 0.34382545948028564, "learning_rate": 1.9069320019999205e-05, "loss": 0.6133, "step": 9099 }, { "epoch": 0.27954412803735446, "grad_norm": 0.35474342107772827, "learning_rate": 1.9069116415762843e-05, "loss": 0.5615, "step": 9100 }, { "epoch": 0.27957484717230363, "grad_norm": 0.3720458149909973, "learning_rate": 1.9068912790344928e-05, "loss": 0.5355, "step": 9101 }, { "epoch": 0.2796055663072528, "grad_norm": 0.3140740394592285, "learning_rate": 1.9068709143745937e-05, "loss": 0.5605, "step": 9102 }, { "epoch": 0.2796362854422019, "grad_norm": 0.32804426550865173, "learning_rate": 1.906850547596634e-05, "loss": 0.6037, "step": 9103 }, { "epoch": 0.2796670045771511, "grad_norm": 0.34563273191452026, "learning_rate": 1.9068301787006613e-05, "loss": 0.5979, "step": 9104 }, { "epoch": 0.2796977237121003, "grad_norm": 0.6360843777656555, "learning_rate": 1.9068098076867233e-05, "loss": 0.5531, "step": 9105 }, { "epoch": 0.27972844284704945, "grad_norm": 0.3664611876010895, "learning_rate": 1.906789434554868e-05, "loss": 0.5393, "step": 9106 }, { "epoch": 0.27975916198199857, "grad_norm": 0.33465975522994995, "learning_rate": 1.9067690593051426e-05, "loss": 0.6122, "step": 9107 }, { "epoch": 0.27978988111694775, "grad_norm": 0.3548470139503479, "learning_rate": 1.9067486819375948e-05, "loss": 0.6282, "step": 9108 }, { "epoch": 0.2798206002518969, "grad_norm": 0.33826085925102234, "learning_rate": 1.906728302452272e-05, "loss": 0.6089, "step": 9109 }, { "epoch": 0.27985131938684604, "grad_norm": 0.36325445771217346, "learning_rate": 1.906707920849222e-05, "loss": 0.6067, "step": 9110 }, { "epoch": 0.2798820385217952, "grad_norm": 0.3527682423591614, "learning_rate": 1.9066875371284922e-05, "loss": 0.5502, "step": 9111 }, { "epoch": 0.2799127576567444, "grad_norm": 0.5144418478012085, "learning_rate": 1.90666715129013e-05, "loss": 0.6132, "step": 9112 }, { "epoch": 0.27994347679169357, "grad_norm": 0.4337041676044464, "learning_rate": 1.906646763334184e-05, "loss": 0.5602, "step": 9113 }, { "epoch": 0.2799741959266427, "grad_norm": 0.41205838322639465, "learning_rate": 1.906626373260701e-05, "loss": 0.6153, "step": 9114 }, { "epoch": 0.28000491506159186, "grad_norm": 0.3950354754924774, "learning_rate": 1.9066059810697285e-05, "loss": 0.6055, "step": 9115 }, { "epoch": 0.28003563419654104, "grad_norm": 0.37157151103019714, "learning_rate": 1.9065855867613148e-05, "loss": 0.6045, "step": 9116 }, { "epoch": 0.28006635333149016, "grad_norm": 0.5323193073272705, "learning_rate": 1.906565190335507e-05, "loss": 0.5784, "step": 9117 }, { "epoch": 0.28009707246643933, "grad_norm": 0.3454533517360687, "learning_rate": 1.9065447917923527e-05, "loss": 0.5878, "step": 9118 }, { "epoch": 0.2801277916013885, "grad_norm": 0.38236916065216064, "learning_rate": 1.9065243911319e-05, "loss": 0.5991, "step": 9119 }, { "epoch": 0.2801585107363377, "grad_norm": 0.3339017629623413, "learning_rate": 1.9065039883541963e-05, "loss": 0.6436, "step": 9120 }, { "epoch": 0.2801892298712868, "grad_norm": 0.3213183581829071, "learning_rate": 1.9064835834592886e-05, "loss": 0.4852, "step": 9121 }, { "epoch": 0.280219949006236, "grad_norm": 0.3528851866722107, "learning_rate": 1.9064631764472258e-05, "loss": 0.5822, "step": 9122 }, { "epoch": 0.28025066814118516, "grad_norm": 0.37559354305267334, "learning_rate": 1.9064427673180547e-05, "loss": 0.5836, "step": 9123 }, { "epoch": 0.28028138727613433, "grad_norm": 0.3790963292121887, "learning_rate": 1.9064223560718233e-05, "loss": 0.5956, "step": 9124 }, { "epoch": 0.28031210641108345, "grad_norm": 0.36210086941719055, "learning_rate": 1.9064019427085794e-05, "loss": 0.5997, "step": 9125 }, { "epoch": 0.2803428255460326, "grad_norm": 0.37444207072257996, "learning_rate": 1.90638152722837e-05, "loss": 0.6471, "step": 9126 }, { "epoch": 0.2803735446809818, "grad_norm": 0.36174386739730835, "learning_rate": 1.906361109631244e-05, "loss": 0.5862, "step": 9127 }, { "epoch": 0.2804042638159309, "grad_norm": 0.38481345772743225, "learning_rate": 1.9063406899172478e-05, "loss": 0.663, "step": 9128 }, { "epoch": 0.2804349829508801, "grad_norm": 0.32650884985923767, "learning_rate": 1.9063202680864293e-05, "loss": 0.5601, "step": 9129 }, { "epoch": 0.28046570208582927, "grad_norm": 0.31207475066185, "learning_rate": 1.906299844138837e-05, "loss": 0.571, "step": 9130 }, { "epoch": 0.28049642122077845, "grad_norm": 0.3309776782989502, "learning_rate": 1.906279418074518e-05, "loss": 0.5732, "step": 9131 }, { "epoch": 0.28052714035572757, "grad_norm": 0.3705063462257385, "learning_rate": 1.90625898989352e-05, "loss": 0.5928, "step": 9132 }, { "epoch": 0.28055785949067674, "grad_norm": 0.31914445757865906, "learning_rate": 1.906238559595891e-05, "loss": 0.5602, "step": 9133 }, { "epoch": 0.2805885786256259, "grad_norm": 0.3247717022895813, "learning_rate": 1.9062181271816783e-05, "loss": 0.488, "step": 9134 }, { "epoch": 0.28061929776057504, "grad_norm": 0.4837476909160614, "learning_rate": 1.9061976926509302e-05, "loss": 0.5393, "step": 9135 }, { "epoch": 0.2806500168955242, "grad_norm": 0.36529341340065, "learning_rate": 1.906177256003694e-05, "loss": 0.5759, "step": 9136 }, { "epoch": 0.2806807360304734, "grad_norm": 0.3611502945423126, "learning_rate": 1.9061568172400176e-05, "loss": 0.5475, "step": 9137 }, { "epoch": 0.28071145516542256, "grad_norm": 0.308287113904953, "learning_rate": 1.9061363763599486e-05, "loss": 0.5794, "step": 9138 }, { "epoch": 0.2807421743003717, "grad_norm": 0.33967965841293335, "learning_rate": 1.9061159333635345e-05, "loss": 0.5654, "step": 9139 }, { "epoch": 0.28077289343532086, "grad_norm": 0.32252803444862366, "learning_rate": 1.906095488250824e-05, "loss": 0.638, "step": 9140 }, { "epoch": 0.28080361257027003, "grad_norm": 0.3256196975708008, "learning_rate": 1.9060750410218634e-05, "loss": 0.6142, "step": 9141 }, { "epoch": 0.28083433170521915, "grad_norm": 0.36493703722953796, "learning_rate": 1.906054591676702e-05, "loss": 0.6018, "step": 9142 }, { "epoch": 0.28086505084016833, "grad_norm": 0.39330241084098816, "learning_rate": 1.9060341402153866e-05, "loss": 0.6642, "step": 9143 }, { "epoch": 0.2808957699751175, "grad_norm": 0.3678169250488281, "learning_rate": 1.906013686637965e-05, "loss": 0.5573, "step": 9144 }, { "epoch": 0.2809264891100667, "grad_norm": 0.37946817278862, "learning_rate": 1.905993230944485e-05, "loss": 0.5951, "step": 9145 }, { "epoch": 0.2809572082450158, "grad_norm": 0.40081921219825745, "learning_rate": 1.905972773134995e-05, "loss": 0.5473, "step": 9146 }, { "epoch": 0.280987927379965, "grad_norm": 1.0911986827850342, "learning_rate": 1.905952313209542e-05, "loss": 0.4654, "step": 9147 }, { "epoch": 0.28101864651491415, "grad_norm": 0.3236619830131531, "learning_rate": 1.9059318511681743e-05, "loss": 0.5285, "step": 9148 }, { "epoch": 0.2810493656498633, "grad_norm": 0.3519318997859955, "learning_rate": 1.9059113870109395e-05, "loss": 0.5001, "step": 9149 }, { "epoch": 0.28108008478481245, "grad_norm": 0.3407547175884247, "learning_rate": 1.9058909207378852e-05, "loss": 0.5813, "step": 9150 }, { "epoch": 0.2811108039197616, "grad_norm": 0.5294622778892517, "learning_rate": 1.9058704523490595e-05, "loss": 0.5969, "step": 9151 }, { "epoch": 0.2811415230547108, "grad_norm": 0.3187311887741089, "learning_rate": 1.9058499818445102e-05, "loss": 0.5426, "step": 9152 }, { "epoch": 0.2811722421896599, "grad_norm": 0.3913319408893585, "learning_rate": 1.905829509224285e-05, "loss": 0.5602, "step": 9153 }, { "epoch": 0.2812029613246091, "grad_norm": 0.3923867642879486, "learning_rate": 1.9058090344884314e-05, "loss": 0.6527, "step": 9154 }, { "epoch": 0.28123368045955827, "grad_norm": 0.307500958442688, "learning_rate": 1.9057885576369978e-05, "loss": 0.5325, "step": 9155 }, { "epoch": 0.28126439959450744, "grad_norm": 0.37111952900886536, "learning_rate": 1.9057680786700316e-05, "loss": 0.5729, "step": 9156 }, { "epoch": 0.28129511872945656, "grad_norm": 0.3090234398841858, "learning_rate": 1.905747597587581e-05, "loss": 0.5922, "step": 9157 }, { "epoch": 0.28132583786440574, "grad_norm": 0.35042232275009155, "learning_rate": 1.9057271143896937e-05, "loss": 0.5552, "step": 9158 }, { "epoch": 0.2813565569993549, "grad_norm": 0.38807666301727295, "learning_rate": 1.9057066290764172e-05, "loss": 0.5498, "step": 9159 }, { "epoch": 0.28138727613430403, "grad_norm": 0.3417232036590576, "learning_rate": 1.9056861416477996e-05, "loss": 0.5724, "step": 9160 }, { "epoch": 0.2814179952692532, "grad_norm": 0.3571210205554962, "learning_rate": 1.905665652103889e-05, "loss": 0.564, "step": 9161 }, { "epoch": 0.2814487144042024, "grad_norm": 0.3851800560951233, "learning_rate": 1.905645160444733e-05, "loss": 0.6106, "step": 9162 }, { "epoch": 0.28147943353915156, "grad_norm": 0.34985366463661194, "learning_rate": 1.905624666670379e-05, "loss": 0.5324, "step": 9163 }, { "epoch": 0.2815101526741007, "grad_norm": 0.6735703945159912, "learning_rate": 1.905604170780876e-05, "loss": 0.6053, "step": 9164 }, { "epoch": 0.28154087180904985, "grad_norm": 0.36200132966041565, "learning_rate": 1.905583672776271e-05, "loss": 0.5965, "step": 9165 }, { "epoch": 0.28157159094399903, "grad_norm": 0.38071003556251526, "learning_rate": 1.9055631726566118e-05, "loss": 0.5241, "step": 9166 }, { "epoch": 0.2816023100789482, "grad_norm": 0.3446987569332123, "learning_rate": 1.9055426704219467e-05, "loss": 0.5087, "step": 9167 }, { "epoch": 0.2816330292138973, "grad_norm": 0.339468777179718, "learning_rate": 1.9055221660723237e-05, "loss": 0.584, "step": 9168 }, { "epoch": 0.2816637483488465, "grad_norm": 0.36629000306129456, "learning_rate": 1.9055016596077905e-05, "loss": 0.5384, "step": 9169 }, { "epoch": 0.2816944674837957, "grad_norm": 0.32118692994117737, "learning_rate": 1.9054811510283943e-05, "loss": 0.5814, "step": 9170 }, { "epoch": 0.2817251866187448, "grad_norm": 0.34915217757225037, "learning_rate": 1.905460640334184e-05, "loss": 0.5376, "step": 9171 }, { "epoch": 0.28175590575369397, "grad_norm": 0.37165379524230957, "learning_rate": 1.9054401275252075e-05, "loss": 0.5813, "step": 9172 }, { "epoch": 0.28178662488864314, "grad_norm": 0.3591201603412628, "learning_rate": 1.905419612601512e-05, "loss": 0.528, "step": 9173 }, { "epoch": 0.2818173440235923, "grad_norm": 0.32323089241981506, "learning_rate": 1.905399095563146e-05, "loss": 0.5508, "step": 9174 }, { "epoch": 0.28184806315854144, "grad_norm": 0.36700931191444397, "learning_rate": 1.905378576410157e-05, "loss": 0.6019, "step": 9175 }, { "epoch": 0.2818787822934906, "grad_norm": 0.34973546862602234, "learning_rate": 1.9053580551425928e-05, "loss": 0.592, "step": 9176 }, { "epoch": 0.2819095014284398, "grad_norm": 0.3601730763912201, "learning_rate": 1.905337531760502e-05, "loss": 0.6419, "step": 9177 }, { "epoch": 0.2819402205633889, "grad_norm": 1.3510164022445679, "learning_rate": 1.905317006263932e-05, "loss": 0.5087, "step": 9178 }, { "epoch": 0.2819709396983381, "grad_norm": 0.39943233132362366, "learning_rate": 1.905296478652931e-05, "loss": 0.5275, "step": 9179 }, { "epoch": 0.28200165883328726, "grad_norm": 0.43816235661506653, "learning_rate": 1.905275948927547e-05, "loss": 0.6043, "step": 9180 }, { "epoch": 0.28203237796823644, "grad_norm": 0.35390520095825195, "learning_rate": 1.9052554170878274e-05, "loss": 0.6551, "step": 9181 }, { "epoch": 0.28206309710318556, "grad_norm": 0.3542981743812561, "learning_rate": 1.905234883133821e-05, "loss": 0.5695, "step": 9182 }, { "epoch": 0.28209381623813473, "grad_norm": 0.331887811422348, "learning_rate": 1.9052143470655752e-05, "loss": 0.6555, "step": 9183 }, { "epoch": 0.2821245353730839, "grad_norm": 0.3267354667186737, "learning_rate": 1.9051938088831378e-05, "loss": 0.6066, "step": 9184 }, { "epoch": 0.282155254508033, "grad_norm": 0.3515908718109131, "learning_rate": 1.9051732685865573e-05, "loss": 0.6652, "step": 9185 }, { "epoch": 0.2821859736429822, "grad_norm": 0.3333424925804138, "learning_rate": 1.905152726175881e-05, "loss": 0.566, "step": 9186 }, { "epoch": 0.2822166927779314, "grad_norm": 0.3293896019458771, "learning_rate": 1.9051321816511578e-05, "loss": 0.5469, "step": 9187 }, { "epoch": 0.28224741191288055, "grad_norm": 0.33662623167037964, "learning_rate": 1.905111635012435e-05, "loss": 0.5345, "step": 9188 }, { "epoch": 0.2822781310478297, "grad_norm": 0.35640814900398254, "learning_rate": 1.9050910862597606e-05, "loss": 0.5894, "step": 9189 }, { "epoch": 0.28230885018277885, "grad_norm": 0.3821048438549042, "learning_rate": 1.905070535393183e-05, "loss": 0.5653, "step": 9190 }, { "epoch": 0.282339569317728, "grad_norm": 0.333499938249588, "learning_rate": 1.90504998241275e-05, "loss": 0.554, "step": 9191 }, { "epoch": 0.2823702884526772, "grad_norm": 0.3365623354911804, "learning_rate": 1.9050294273185093e-05, "loss": 0.6299, "step": 9192 }, { "epoch": 0.2824010075876263, "grad_norm": 0.4478514492511749, "learning_rate": 1.905008870110509e-05, "loss": 0.6032, "step": 9193 }, { "epoch": 0.2824317267225755, "grad_norm": 0.3897596001625061, "learning_rate": 1.9049883107887978e-05, "loss": 0.5846, "step": 9194 }, { "epoch": 0.28246244585752467, "grad_norm": 1.0069001913070679, "learning_rate": 1.9049677493534228e-05, "loss": 0.5139, "step": 9195 }, { "epoch": 0.2824931649924738, "grad_norm": 0.31660154461860657, "learning_rate": 1.904947185804433e-05, "loss": 0.5878, "step": 9196 }, { "epoch": 0.28252388412742296, "grad_norm": 0.36523911356925964, "learning_rate": 1.904926620141875e-05, "loss": 0.5316, "step": 9197 }, { "epoch": 0.28255460326237214, "grad_norm": 0.34745100140571594, "learning_rate": 1.904906052365798e-05, "loss": 0.5766, "step": 9198 }, { "epoch": 0.2825853223973213, "grad_norm": 0.3421125113964081, "learning_rate": 1.90488548247625e-05, "loss": 0.6279, "step": 9199 }, { "epoch": 0.28261604153227043, "grad_norm": 0.36627262830734253, "learning_rate": 1.9048649104732785e-05, "loss": 0.6297, "step": 9200 }, { "epoch": 0.2826467606672196, "grad_norm": 0.3289797008037567, "learning_rate": 1.904844336356932e-05, "loss": 0.5045, "step": 9201 }, { "epoch": 0.2826774798021688, "grad_norm": 0.3827930986881256, "learning_rate": 1.9048237601272585e-05, "loss": 0.6533, "step": 9202 }, { "epoch": 0.2827081989371179, "grad_norm": 0.39602360129356384, "learning_rate": 1.9048031817843053e-05, "loss": 0.5575, "step": 9203 }, { "epoch": 0.2827389180720671, "grad_norm": 0.3208671808242798, "learning_rate": 1.9047826013281218e-05, "loss": 0.4543, "step": 9204 }, { "epoch": 0.28276963720701626, "grad_norm": 0.3140404522418976, "learning_rate": 1.9047620187587547e-05, "loss": 0.5448, "step": 9205 }, { "epoch": 0.28280035634196543, "grad_norm": 0.3388734459877014, "learning_rate": 1.9047414340762532e-05, "loss": 0.5414, "step": 9206 }, { "epoch": 0.28283107547691455, "grad_norm": 0.3525868356227875, "learning_rate": 1.9047208472806647e-05, "loss": 0.5943, "step": 9207 }, { "epoch": 0.2828617946118637, "grad_norm": 0.35683268308639526, "learning_rate": 1.9047002583720373e-05, "loss": 0.5465, "step": 9208 }, { "epoch": 0.2828925137468129, "grad_norm": 0.34637200832366943, "learning_rate": 1.9046796673504195e-05, "loss": 0.5505, "step": 9209 }, { "epoch": 0.2829232328817621, "grad_norm": 0.404055655002594, "learning_rate": 1.904659074215859e-05, "loss": 0.5078, "step": 9210 }, { "epoch": 0.2829539520167112, "grad_norm": 0.3247663378715515, "learning_rate": 1.9046384789684044e-05, "loss": 0.499, "step": 9211 }, { "epoch": 0.28298467115166037, "grad_norm": 0.3425362706184387, "learning_rate": 1.9046178816081032e-05, "loss": 0.5904, "step": 9212 }, { "epoch": 0.28301539028660955, "grad_norm": 0.35069555044174194, "learning_rate": 1.9045972821350034e-05, "loss": 0.5522, "step": 9213 }, { "epoch": 0.28304610942155867, "grad_norm": 0.33017054200172424, "learning_rate": 1.904576680549154e-05, "loss": 0.5912, "step": 9214 }, { "epoch": 0.28307682855650784, "grad_norm": 0.3653112053871155, "learning_rate": 1.9045560768506024e-05, "loss": 0.5874, "step": 9215 }, { "epoch": 0.283107547691457, "grad_norm": 0.40580424666404724, "learning_rate": 1.904535471039397e-05, "loss": 0.536, "step": 9216 }, { "epoch": 0.2831382668264062, "grad_norm": 0.3198440670967102, "learning_rate": 1.9045148631155856e-05, "loss": 0.6434, "step": 9217 }, { "epoch": 0.2831689859613553, "grad_norm": 0.35780346393585205, "learning_rate": 1.904494253079217e-05, "loss": 0.6613, "step": 9218 }, { "epoch": 0.2831997050963045, "grad_norm": 0.337893545627594, "learning_rate": 1.9044736409303385e-05, "loss": 0.6066, "step": 9219 }, { "epoch": 0.28323042423125366, "grad_norm": 0.32627835869789124, "learning_rate": 1.904453026668999e-05, "loss": 0.6466, "step": 9220 }, { "epoch": 0.2832611433662028, "grad_norm": 0.3313881456851959, "learning_rate": 1.904432410295246e-05, "loss": 0.5575, "step": 9221 }, { "epoch": 0.28329186250115196, "grad_norm": 0.4146954417228699, "learning_rate": 1.904411791809128e-05, "loss": 0.573, "step": 9222 }, { "epoch": 0.28332258163610113, "grad_norm": 0.38519003987312317, "learning_rate": 1.9043911712106927e-05, "loss": 0.559, "step": 9223 }, { "epoch": 0.2833533007710503, "grad_norm": 0.35593336820602417, "learning_rate": 1.9043705484999894e-05, "loss": 0.6773, "step": 9224 }, { "epoch": 0.28338401990599943, "grad_norm": 0.3379130959510803, "learning_rate": 1.904349923677065e-05, "loss": 0.6276, "step": 9225 }, { "epoch": 0.2834147390409486, "grad_norm": 0.3275161385536194, "learning_rate": 1.9043292967419683e-05, "loss": 0.5958, "step": 9226 }, { "epoch": 0.2834454581758978, "grad_norm": 0.34059518575668335, "learning_rate": 1.9043086676947477e-05, "loss": 0.5753, "step": 9227 }, { "epoch": 0.2834761773108469, "grad_norm": 0.3282456696033478, "learning_rate": 1.9042880365354505e-05, "loss": 0.5503, "step": 9228 }, { "epoch": 0.2835068964457961, "grad_norm": 0.3477804958820343, "learning_rate": 1.904267403264126e-05, "loss": 0.5565, "step": 9229 }, { "epoch": 0.28353761558074525, "grad_norm": 0.36641037464141846, "learning_rate": 1.9042467678808215e-05, "loss": 0.4992, "step": 9230 }, { "epoch": 0.2835683347156944, "grad_norm": 0.3428441286087036, "learning_rate": 1.9042261303855857e-05, "loss": 0.5274, "step": 9231 }, { "epoch": 0.28359905385064355, "grad_norm": 0.35292333364486694, "learning_rate": 1.9042054907784664e-05, "loss": 0.6198, "step": 9232 }, { "epoch": 0.2836297729855927, "grad_norm": 0.41114139556884766, "learning_rate": 1.904184849059512e-05, "loss": 0.5293, "step": 9233 }, { "epoch": 0.2836604921205419, "grad_norm": 0.32805487513542175, "learning_rate": 1.904164205228771e-05, "loss": 0.5652, "step": 9234 }, { "epoch": 0.28369121125549107, "grad_norm": 0.8303053379058838, "learning_rate": 1.9041435592862913e-05, "loss": 0.5233, "step": 9235 }, { "epoch": 0.2837219303904402, "grad_norm": 0.36536163091659546, "learning_rate": 1.904122911232121e-05, "loss": 0.6481, "step": 9236 }, { "epoch": 0.28375264952538937, "grad_norm": 0.4073179364204407, "learning_rate": 1.9041022610663087e-05, "loss": 0.6435, "step": 9237 }, { "epoch": 0.28378336866033854, "grad_norm": 0.39240485429763794, "learning_rate": 1.9040816087889022e-05, "loss": 0.6058, "step": 9238 }, { "epoch": 0.28381408779528766, "grad_norm": 0.3746502697467804, "learning_rate": 1.90406095439995e-05, "loss": 0.6221, "step": 9239 }, { "epoch": 0.28384480693023684, "grad_norm": 0.34633976221084595, "learning_rate": 1.9040402978995006e-05, "loss": 0.5887, "step": 9240 }, { "epoch": 0.283875526065186, "grad_norm": 0.3711487650871277, "learning_rate": 1.9040196392876016e-05, "loss": 0.5934, "step": 9241 }, { "epoch": 0.2839062452001352, "grad_norm": 0.36307963728904724, "learning_rate": 1.903998978564302e-05, "loss": 0.5861, "step": 9242 }, { "epoch": 0.2839369643350843, "grad_norm": 0.3704046607017517, "learning_rate": 1.9039783157296492e-05, "loss": 0.579, "step": 9243 }, { "epoch": 0.2839676834700335, "grad_norm": 0.6482707858085632, "learning_rate": 1.903957650783692e-05, "loss": 0.7307, "step": 9244 }, { "epoch": 0.28399840260498266, "grad_norm": 0.32211390137672424, "learning_rate": 1.9039369837264785e-05, "loss": 0.5432, "step": 9245 }, { "epoch": 0.2840291217399318, "grad_norm": 0.33078062534332275, "learning_rate": 1.9039163145580574e-05, "loss": 0.525, "step": 9246 }, { "epoch": 0.28405984087488095, "grad_norm": 0.3394846022129059, "learning_rate": 1.9038956432784764e-05, "loss": 0.6028, "step": 9247 }, { "epoch": 0.28409056000983013, "grad_norm": 0.4098581373691559, "learning_rate": 1.903874969887784e-05, "loss": 0.6213, "step": 9248 }, { "epoch": 0.2841212791447793, "grad_norm": 0.3759462535381317, "learning_rate": 1.9038542943860284e-05, "loss": 0.5343, "step": 9249 }, { "epoch": 0.2841519982797284, "grad_norm": 0.3840775191783905, "learning_rate": 1.903833616773258e-05, "loss": 0.545, "step": 9250 }, { "epoch": 0.2841827174146776, "grad_norm": 0.3568432629108429, "learning_rate": 1.9038129370495212e-05, "loss": 0.5957, "step": 9251 }, { "epoch": 0.2842134365496268, "grad_norm": 0.3490675687789917, "learning_rate": 1.9037922552148662e-05, "loss": 0.5988, "step": 9252 }, { "epoch": 0.28424415568457595, "grad_norm": 0.35074281692504883, "learning_rate": 1.9037715712693413e-05, "loss": 0.5567, "step": 9253 }, { "epoch": 0.28427487481952507, "grad_norm": 0.3904297947883606, "learning_rate": 1.9037508852129948e-05, "loss": 0.5855, "step": 9254 }, { "epoch": 0.28430559395447424, "grad_norm": 0.3398498296737671, "learning_rate": 1.9037301970458747e-05, "loss": 0.6413, "step": 9255 }, { "epoch": 0.2843363130894234, "grad_norm": 0.35571521520614624, "learning_rate": 1.9037095067680295e-05, "loss": 0.6853, "step": 9256 }, { "epoch": 0.28436703222437254, "grad_norm": 0.38996225595474243, "learning_rate": 1.903688814379508e-05, "loss": 0.6284, "step": 9257 }, { "epoch": 0.2843977513593217, "grad_norm": 0.33855849504470825, "learning_rate": 1.903668119880358e-05, "loss": 0.6221, "step": 9258 }, { "epoch": 0.2844284704942709, "grad_norm": 0.3115846812725067, "learning_rate": 1.903647423270628e-05, "loss": 0.6095, "step": 9259 }, { "epoch": 0.28445918962922007, "grad_norm": 0.3606906235218048, "learning_rate": 1.9036267245503666e-05, "loss": 0.5904, "step": 9260 }, { "epoch": 0.2844899087641692, "grad_norm": 0.33507227897644043, "learning_rate": 1.9036060237196215e-05, "loss": 0.6039, "step": 9261 }, { "epoch": 0.28452062789911836, "grad_norm": 0.5059760808944702, "learning_rate": 1.9035853207784412e-05, "loss": 0.4953, "step": 9262 }, { "epoch": 0.28455134703406754, "grad_norm": 0.35479697585105896, "learning_rate": 1.903564615726875e-05, "loss": 0.5756, "step": 9263 }, { "epoch": 0.28458206616901666, "grad_norm": 0.6511971950531006, "learning_rate": 1.9035439085649702e-05, "loss": 0.5652, "step": 9264 }, { "epoch": 0.28461278530396583, "grad_norm": 0.3750301003456116, "learning_rate": 1.9035231992927757e-05, "loss": 0.6148, "step": 9265 }, { "epoch": 0.284643504438915, "grad_norm": 0.3578222692012787, "learning_rate": 1.9035024879103392e-05, "loss": 0.5647, "step": 9266 }, { "epoch": 0.2846742235738642, "grad_norm": 0.3476320505142212, "learning_rate": 1.90348177441771e-05, "loss": 0.6066, "step": 9267 }, { "epoch": 0.2847049427088133, "grad_norm": 0.35508644580841064, "learning_rate": 1.9034610588149354e-05, "loss": 0.5753, "step": 9268 }, { "epoch": 0.2847356618437625, "grad_norm": 0.3461879789829254, "learning_rate": 1.903440341102065e-05, "loss": 0.569, "step": 9269 }, { "epoch": 0.28476638097871165, "grad_norm": 0.3182808458805084, "learning_rate": 1.9034196212791465e-05, "loss": 0.5471, "step": 9270 }, { "epoch": 0.2847971001136608, "grad_norm": 0.37409618496894836, "learning_rate": 1.903398899346228e-05, "loss": 0.614, "step": 9271 }, { "epoch": 0.28482781924860995, "grad_norm": 0.3373118042945862, "learning_rate": 1.9033781753033587e-05, "loss": 0.5307, "step": 9272 }, { "epoch": 0.2848585383835591, "grad_norm": 0.3816783130168915, "learning_rate": 1.9033574491505863e-05, "loss": 0.5855, "step": 9273 }, { "epoch": 0.2848892575185083, "grad_norm": 0.4373297393321991, "learning_rate": 1.9033367208879598e-05, "loss": 0.5764, "step": 9274 }, { "epoch": 0.2849199766534574, "grad_norm": 0.39109355211257935, "learning_rate": 1.903315990515527e-05, "loss": 0.5369, "step": 9275 }, { "epoch": 0.2849506957884066, "grad_norm": 0.34315603971481323, "learning_rate": 1.903295258033337e-05, "loss": 0.5672, "step": 9276 }, { "epoch": 0.28498141492335577, "grad_norm": 0.35574328899383545, "learning_rate": 1.9032745234414374e-05, "loss": 0.6326, "step": 9277 }, { "epoch": 0.28501213405830494, "grad_norm": 0.7355232834815979, "learning_rate": 1.9032537867398773e-05, "loss": 0.4883, "step": 9278 }, { "epoch": 0.28504285319325406, "grad_norm": 0.325614333152771, "learning_rate": 1.903233047928705e-05, "loss": 0.5696, "step": 9279 }, { "epoch": 0.28507357232820324, "grad_norm": 0.35045966506004333, "learning_rate": 1.9032123070079684e-05, "loss": 0.5879, "step": 9280 }, { "epoch": 0.2851042914631524, "grad_norm": 0.32836273312568665, "learning_rate": 1.903191563977717e-05, "loss": 0.5151, "step": 9281 }, { "epoch": 0.28513501059810153, "grad_norm": 0.33084821701049805, "learning_rate": 1.903170818837998e-05, "loss": 0.5152, "step": 9282 }, { "epoch": 0.2851657297330507, "grad_norm": 0.37283238768577576, "learning_rate": 1.903150071588861e-05, "loss": 0.5749, "step": 9283 }, { "epoch": 0.2851964488679999, "grad_norm": 0.35400035977363586, "learning_rate": 1.9031293222303536e-05, "loss": 0.5763, "step": 9284 }, { "epoch": 0.28522716800294906, "grad_norm": 0.37130653858184814, "learning_rate": 1.9031085707625246e-05, "loss": 0.5864, "step": 9285 }, { "epoch": 0.2852578871378982, "grad_norm": 0.307373970746994, "learning_rate": 1.9030878171854224e-05, "loss": 0.5718, "step": 9286 }, { "epoch": 0.28528860627284736, "grad_norm": 0.338461697101593, "learning_rate": 1.9030670614990957e-05, "loss": 0.6098, "step": 9287 }, { "epoch": 0.28531932540779653, "grad_norm": 0.40855664014816284, "learning_rate": 1.903046303703593e-05, "loss": 0.6019, "step": 9288 }, { "epoch": 0.28535004454274565, "grad_norm": 0.35944223403930664, "learning_rate": 1.903025543798962e-05, "loss": 0.4941, "step": 9289 }, { "epoch": 0.2853807636776948, "grad_norm": 0.39662492275238037, "learning_rate": 1.9030047817852524e-05, "loss": 0.554, "step": 9290 }, { "epoch": 0.285411482812644, "grad_norm": 0.3280997574329376, "learning_rate": 1.9029840176625117e-05, "loss": 0.5401, "step": 9291 }, { "epoch": 0.2854422019475932, "grad_norm": 0.3458532691001892, "learning_rate": 1.902963251430789e-05, "loss": 0.627, "step": 9292 }, { "epoch": 0.2854729210825423, "grad_norm": 0.33751818537712097, "learning_rate": 1.9029424830901323e-05, "loss": 0.648, "step": 9293 }, { "epoch": 0.2855036402174915, "grad_norm": 0.42452800273895264, "learning_rate": 1.902921712640591e-05, "loss": 0.6276, "step": 9294 }, { "epoch": 0.28553435935244065, "grad_norm": 0.3729802370071411, "learning_rate": 1.902900940082212e-05, "loss": 0.5547, "step": 9295 }, { "epoch": 0.28556507848738977, "grad_norm": 0.36892151832580566, "learning_rate": 1.9028801654150456e-05, "loss": 0.5245, "step": 9296 }, { "epoch": 0.28559579762233894, "grad_norm": 0.37191852927207947, "learning_rate": 1.902859388639139e-05, "loss": 0.5353, "step": 9297 }, { "epoch": 0.2856265167572881, "grad_norm": 0.34723421931266785, "learning_rate": 1.9028386097545416e-05, "loss": 0.6368, "step": 9298 }, { "epoch": 0.2856572358922373, "grad_norm": 0.3488089442253113, "learning_rate": 1.9028178287613014e-05, "loss": 0.6932, "step": 9299 }, { "epoch": 0.2856879550271864, "grad_norm": 0.36049705743789673, "learning_rate": 1.902797045659467e-05, "loss": 0.5763, "step": 9300 }, { "epoch": 0.2857186741621356, "grad_norm": 0.3234849274158478, "learning_rate": 1.902776260449087e-05, "loss": 0.6412, "step": 9301 }, { "epoch": 0.28574939329708476, "grad_norm": 0.3377651274204254, "learning_rate": 1.9027554731302102e-05, "loss": 0.5445, "step": 9302 }, { "epoch": 0.28578011243203394, "grad_norm": 0.3206236660480499, "learning_rate": 1.9027346837028854e-05, "loss": 0.5307, "step": 9303 }, { "epoch": 0.28581083156698306, "grad_norm": 0.3338584303855896, "learning_rate": 1.90271389216716e-05, "loss": 0.6497, "step": 9304 }, { "epoch": 0.28584155070193223, "grad_norm": 0.37008991837501526, "learning_rate": 1.9026930985230833e-05, "loss": 0.5968, "step": 9305 }, { "epoch": 0.2858722698368814, "grad_norm": 0.3823692500591278, "learning_rate": 1.9026723027707042e-05, "loss": 0.5703, "step": 9306 }, { "epoch": 0.28590298897183053, "grad_norm": 0.3399040102958679, "learning_rate": 1.9026515049100707e-05, "loss": 0.5965, "step": 9307 }, { "epoch": 0.2859337081067797, "grad_norm": 0.34630969166755676, "learning_rate": 1.9026307049412314e-05, "loss": 0.6437, "step": 9308 }, { "epoch": 0.2859644272417289, "grad_norm": 0.35077065229415894, "learning_rate": 1.9026099028642353e-05, "loss": 0.5729, "step": 9309 }, { "epoch": 0.28599514637667806, "grad_norm": 0.33529430627822876, "learning_rate": 1.9025890986791306e-05, "loss": 0.6529, "step": 9310 }, { "epoch": 0.2860258655116272, "grad_norm": 0.38041603565216064, "learning_rate": 1.9025682923859663e-05, "loss": 0.5619, "step": 9311 }, { "epoch": 0.28605658464657635, "grad_norm": 0.3517540991306305, "learning_rate": 1.9025474839847904e-05, "loss": 0.5736, "step": 9312 }, { "epoch": 0.2860873037815255, "grad_norm": 0.44308212399482727, "learning_rate": 1.9025266734756517e-05, "loss": 0.7029, "step": 9313 }, { "epoch": 0.28611802291647465, "grad_norm": 0.31547999382019043, "learning_rate": 1.9025058608585993e-05, "loss": 0.5753, "step": 9314 }, { "epoch": 0.2861487420514238, "grad_norm": 0.3447605073451996, "learning_rate": 1.902485046133681e-05, "loss": 0.5373, "step": 9315 }, { "epoch": 0.286179461186373, "grad_norm": 0.41334059834480286, "learning_rate": 1.9024642293009465e-05, "loss": 0.5929, "step": 9316 }, { "epoch": 0.28621018032132217, "grad_norm": 0.42987290024757385, "learning_rate": 1.9024434103604432e-05, "loss": 0.6413, "step": 9317 }, { "epoch": 0.2862408994562713, "grad_norm": 0.41721147298812866, "learning_rate": 1.9024225893122207e-05, "loss": 0.5549, "step": 9318 }, { "epoch": 0.28627161859122047, "grad_norm": 0.31250229477882385, "learning_rate": 1.902401766156327e-05, "loss": 0.5685, "step": 9319 }, { "epoch": 0.28630233772616964, "grad_norm": 0.3770241439342499, "learning_rate": 1.902380940892811e-05, "loss": 0.5868, "step": 9320 }, { "epoch": 0.2863330568611188, "grad_norm": 0.3890383243560791, "learning_rate": 1.902360113521721e-05, "loss": 0.6166, "step": 9321 }, { "epoch": 0.28636377599606794, "grad_norm": 0.3839413523674011, "learning_rate": 1.902339284043106e-05, "loss": 0.576, "step": 9322 }, { "epoch": 0.2863944951310171, "grad_norm": 0.3640425205230713, "learning_rate": 1.9023184524570148e-05, "loss": 0.5935, "step": 9323 }, { "epoch": 0.2864252142659663, "grad_norm": 0.3006657361984253, "learning_rate": 1.9022976187634957e-05, "loss": 0.566, "step": 9324 }, { "epoch": 0.2864559334009154, "grad_norm": 0.32806137204170227, "learning_rate": 1.9022767829625976e-05, "loss": 0.5874, "step": 9325 }, { "epoch": 0.2864866525358646, "grad_norm": 0.448210209608078, "learning_rate": 1.9022559450543693e-05, "loss": 0.6282, "step": 9326 }, { "epoch": 0.28651737167081376, "grad_norm": 0.3499283492565155, "learning_rate": 1.902235105038859e-05, "loss": 0.5926, "step": 9327 }, { "epoch": 0.28654809080576293, "grad_norm": 0.36114048957824707, "learning_rate": 1.9022142629161156e-05, "loss": 0.6316, "step": 9328 }, { "epoch": 0.28657880994071205, "grad_norm": 0.39125198125839233, "learning_rate": 1.9021934186861876e-05, "loss": 0.6138, "step": 9329 }, { "epoch": 0.28660952907566123, "grad_norm": 0.511008083820343, "learning_rate": 1.902172572349124e-05, "loss": 0.6685, "step": 9330 }, { "epoch": 0.2866402482106104, "grad_norm": 0.4135851562023163, "learning_rate": 1.9021517239049734e-05, "loss": 0.6528, "step": 9331 }, { "epoch": 0.2866709673455595, "grad_norm": 0.33446189761161804, "learning_rate": 1.9021308733537843e-05, "loss": 0.5372, "step": 9332 }, { "epoch": 0.2867016864805087, "grad_norm": 0.334303081035614, "learning_rate": 1.9021100206956058e-05, "loss": 0.4858, "step": 9333 }, { "epoch": 0.2867324056154579, "grad_norm": 0.38410356640815735, "learning_rate": 1.902089165930486e-05, "loss": 0.5219, "step": 9334 }, { "epoch": 0.28676312475040705, "grad_norm": 0.3804892599582672, "learning_rate": 1.9020683090584745e-05, "loss": 0.6135, "step": 9335 }, { "epoch": 0.28679384388535617, "grad_norm": 0.3023620843887329, "learning_rate": 1.902047450079619e-05, "loss": 0.5603, "step": 9336 }, { "epoch": 0.28682456302030535, "grad_norm": 0.35357919335365295, "learning_rate": 1.9020265889939685e-05, "loss": 0.6175, "step": 9337 }, { "epoch": 0.2868552821552545, "grad_norm": 0.3368571102619171, "learning_rate": 1.9020057258015723e-05, "loss": 0.6156, "step": 9338 }, { "epoch": 0.28688600129020364, "grad_norm": 0.3990270793437958, "learning_rate": 1.9019848605024787e-05, "loss": 0.581, "step": 9339 }, { "epoch": 0.2869167204251528, "grad_norm": 0.3414308428764343, "learning_rate": 1.9019639930967363e-05, "loss": 0.6013, "step": 9340 }, { "epoch": 0.286947439560102, "grad_norm": 0.3435481786727905, "learning_rate": 1.901943123584394e-05, "loss": 0.6667, "step": 9341 }, { "epoch": 0.28697815869505117, "grad_norm": 0.35575971007347107, "learning_rate": 1.9019222519655007e-05, "loss": 0.655, "step": 9342 }, { "epoch": 0.2870088778300003, "grad_norm": 0.3572206199169159, "learning_rate": 1.901901378240105e-05, "loss": 0.6028, "step": 9343 }, { "epoch": 0.28703959696494946, "grad_norm": 0.34801185131073, "learning_rate": 1.9018805024082555e-05, "loss": 0.5665, "step": 9344 }, { "epoch": 0.28707031609989864, "grad_norm": 0.4003714621067047, "learning_rate": 1.9018596244700012e-05, "loss": 0.5772, "step": 9345 }, { "epoch": 0.2871010352348478, "grad_norm": 0.4205717146396637, "learning_rate": 1.901838744425391e-05, "loss": 0.5552, "step": 9346 }, { "epoch": 0.28713175436979693, "grad_norm": 0.33847397565841675, "learning_rate": 1.901817862274473e-05, "loss": 0.6187, "step": 9347 }, { "epoch": 0.2871624735047461, "grad_norm": 0.34234726428985596, "learning_rate": 1.9017969780172966e-05, "loss": 0.6485, "step": 9348 }, { "epoch": 0.2871931926396953, "grad_norm": 0.3132323920726776, "learning_rate": 1.9017760916539102e-05, "loss": 0.5419, "step": 9349 }, { "epoch": 0.2872239117746444, "grad_norm": 0.3304428160190582, "learning_rate": 1.901755203184363e-05, "loss": 0.5927, "step": 9350 }, { "epoch": 0.2872546309095936, "grad_norm": 0.3361068069934845, "learning_rate": 1.9017343126087033e-05, "loss": 0.6007, "step": 9351 }, { "epoch": 0.28728535004454275, "grad_norm": 0.34766003489494324, "learning_rate": 1.9017134199269804e-05, "loss": 0.5377, "step": 9352 }, { "epoch": 0.28731606917949193, "grad_norm": 0.33210045099258423, "learning_rate": 1.9016925251392425e-05, "loss": 0.5818, "step": 9353 }, { "epoch": 0.28734678831444105, "grad_norm": 1.606541633605957, "learning_rate": 1.9016716282455392e-05, "loss": 0.5621, "step": 9354 }, { "epoch": 0.2873775074493902, "grad_norm": 0.33201372623443604, "learning_rate": 1.9016507292459185e-05, "loss": 0.64, "step": 9355 }, { "epoch": 0.2874082265843394, "grad_norm": 0.3337506055831909, "learning_rate": 1.9016298281404295e-05, "loss": 0.5938, "step": 9356 }, { "epoch": 0.2874389457192885, "grad_norm": 0.3602793216705322, "learning_rate": 1.9016089249291214e-05, "loss": 0.5202, "step": 9357 }, { "epoch": 0.2874696648542377, "grad_norm": 0.32055869698524475, "learning_rate": 1.9015880196120424e-05, "loss": 0.5663, "step": 9358 }, { "epoch": 0.28750038398918687, "grad_norm": 0.36625251173973083, "learning_rate": 1.9015671121892417e-05, "loss": 0.6253, "step": 9359 }, { "epoch": 0.28753110312413604, "grad_norm": 0.3435142934322357, "learning_rate": 1.901546202660768e-05, "loss": 0.5318, "step": 9360 }, { "epoch": 0.28756182225908516, "grad_norm": 0.33965981006622314, "learning_rate": 1.9015252910266704e-05, "loss": 0.5699, "step": 9361 }, { "epoch": 0.28759254139403434, "grad_norm": 0.3194831907749176, "learning_rate": 1.9015043772869972e-05, "loss": 0.572, "step": 9362 }, { "epoch": 0.2876232605289835, "grad_norm": 0.34829676151275635, "learning_rate": 1.9014834614417977e-05, "loss": 0.5434, "step": 9363 }, { "epoch": 0.2876539796639327, "grad_norm": 0.33492106199264526, "learning_rate": 1.9014625434911206e-05, "loss": 0.6085, "step": 9364 }, { "epoch": 0.2876846987988818, "grad_norm": 0.33397242426872253, "learning_rate": 1.901441623435015e-05, "loss": 0.6381, "step": 9365 }, { "epoch": 0.287715417933831, "grad_norm": 0.477981299161911, "learning_rate": 1.901420701273529e-05, "loss": 0.5382, "step": 9366 }, { "epoch": 0.28774613706878016, "grad_norm": 0.3303143084049225, "learning_rate": 1.9013997770067125e-05, "loss": 0.5863, "step": 9367 }, { "epoch": 0.2877768562037293, "grad_norm": 0.3454250395298004, "learning_rate": 1.901378850634614e-05, "loss": 0.7058, "step": 9368 }, { "epoch": 0.28780757533867846, "grad_norm": 0.31945088505744934, "learning_rate": 1.9013579221572817e-05, "loss": 0.5837, "step": 9369 }, { "epoch": 0.28783829447362763, "grad_norm": 0.37382662296295166, "learning_rate": 1.9013369915747653e-05, "loss": 0.6121, "step": 9370 }, { "epoch": 0.2878690136085768, "grad_norm": 0.31966260075569153, "learning_rate": 1.9013160588871132e-05, "loss": 0.5455, "step": 9371 }, { "epoch": 0.2878997327435259, "grad_norm": 0.38057276606559753, "learning_rate": 1.9012951240943748e-05, "loss": 0.6135, "step": 9372 }, { "epoch": 0.2879304518784751, "grad_norm": 0.8911390900611877, "learning_rate": 1.9012741871965986e-05, "loss": 0.5589, "step": 9373 }, { "epoch": 0.2879611710134243, "grad_norm": 0.3580016791820526, "learning_rate": 1.9012532481938334e-05, "loss": 0.5436, "step": 9374 }, { "epoch": 0.2879918901483734, "grad_norm": 0.3633563816547394, "learning_rate": 1.9012323070861286e-05, "loss": 0.5991, "step": 9375 }, { "epoch": 0.2880226092833226, "grad_norm": 0.3777547776699066, "learning_rate": 1.9012113638735327e-05, "loss": 0.4652, "step": 9376 }, { "epoch": 0.28805332841827175, "grad_norm": 0.3117181360721588, "learning_rate": 1.9011904185560945e-05, "loss": 0.5646, "step": 9377 }, { "epoch": 0.2880840475532209, "grad_norm": 0.3774385452270508, "learning_rate": 1.9011694711338635e-05, "loss": 0.4911, "step": 9378 }, { "epoch": 0.28811476668817004, "grad_norm": 0.36549118161201477, "learning_rate": 1.901148521606888e-05, "loss": 0.5772, "step": 9379 }, { "epoch": 0.2881454858231192, "grad_norm": 0.3867574632167816, "learning_rate": 1.9011275699752173e-05, "loss": 0.6028, "step": 9380 }, { "epoch": 0.2881762049580684, "grad_norm": 0.30374130606651306, "learning_rate": 1.9011066162389e-05, "loss": 0.5147, "step": 9381 }, { "epoch": 0.2882069240930175, "grad_norm": 0.3855179250240326, "learning_rate": 1.9010856603979856e-05, "loss": 0.576, "step": 9382 }, { "epoch": 0.2882376432279667, "grad_norm": 0.3699890673160553, "learning_rate": 1.9010647024525225e-05, "loss": 0.6336, "step": 9383 }, { "epoch": 0.28826836236291586, "grad_norm": 0.34892505407333374, "learning_rate": 1.90104374240256e-05, "loss": 0.5234, "step": 9384 }, { "epoch": 0.28829908149786504, "grad_norm": 0.37301263213157654, "learning_rate": 1.9010227802481467e-05, "loss": 0.5508, "step": 9385 }, { "epoch": 0.28832980063281416, "grad_norm": 0.40849706530570984, "learning_rate": 1.901001815989332e-05, "loss": 0.644, "step": 9386 }, { "epoch": 0.28836051976776333, "grad_norm": 0.38499918580055237, "learning_rate": 1.900980849626165e-05, "loss": 0.5357, "step": 9387 }, { "epoch": 0.2883912389027125, "grad_norm": 0.36384740471839905, "learning_rate": 1.9009598811586935e-05, "loss": 0.6321, "step": 9388 }, { "epoch": 0.2884219580376617, "grad_norm": 0.30856820940971375, "learning_rate": 1.9009389105869677e-05, "loss": 0.5326, "step": 9389 }, { "epoch": 0.2884526771726108, "grad_norm": 0.35361042618751526, "learning_rate": 1.900917937911036e-05, "loss": 0.6199, "step": 9390 }, { "epoch": 0.28848339630756, "grad_norm": 0.3460847735404968, "learning_rate": 1.9008969631309474e-05, "loss": 0.5334, "step": 9391 }, { "epoch": 0.28851411544250916, "grad_norm": 0.3597692847251892, "learning_rate": 1.9008759862467515e-05, "loss": 0.6134, "step": 9392 }, { "epoch": 0.2885448345774583, "grad_norm": 0.34109750390052795, "learning_rate": 1.9008550072584967e-05, "loss": 0.5629, "step": 9393 }, { "epoch": 0.28857555371240745, "grad_norm": 0.335252583026886, "learning_rate": 1.9008340261662316e-05, "loss": 0.5537, "step": 9394 }, { "epoch": 0.2886062728473566, "grad_norm": 0.3669186532497406, "learning_rate": 1.9008130429700062e-05, "loss": 0.5853, "step": 9395 }, { "epoch": 0.2886369919823058, "grad_norm": 0.36158856749534607, "learning_rate": 1.9007920576698692e-05, "loss": 0.5612, "step": 9396 }, { "epoch": 0.2886677111172549, "grad_norm": 0.6757898330688477, "learning_rate": 1.900771070265869e-05, "loss": 0.5554, "step": 9397 }, { "epoch": 0.2886984302522041, "grad_norm": 0.3661435842514038, "learning_rate": 1.900750080758055e-05, "loss": 0.6394, "step": 9398 }, { "epoch": 0.28872914938715327, "grad_norm": 0.3476350009441376, "learning_rate": 1.9007290891464768e-05, "loss": 0.5459, "step": 9399 }, { "epoch": 0.2887598685221024, "grad_norm": 0.334116667509079, "learning_rate": 1.9007080954311827e-05, "loss": 0.5913, "step": 9400 }, { "epoch": 0.28879058765705157, "grad_norm": 0.35747507214546204, "learning_rate": 1.900687099612222e-05, "loss": 0.6345, "step": 9401 }, { "epoch": 0.28882130679200074, "grad_norm": 0.3479762673377991, "learning_rate": 1.9006661016896432e-05, "loss": 0.6165, "step": 9402 }, { "epoch": 0.2888520259269499, "grad_norm": 0.3553013801574707, "learning_rate": 1.900645101663496e-05, "loss": 0.608, "step": 9403 }, { "epoch": 0.28888274506189904, "grad_norm": 0.35640326142311096, "learning_rate": 1.9006240995338293e-05, "loss": 0.6227, "step": 9404 }, { "epoch": 0.2889134641968482, "grad_norm": 0.40772804617881775, "learning_rate": 1.9006030953006923e-05, "loss": 0.5666, "step": 9405 }, { "epoch": 0.2889441833317974, "grad_norm": 0.3512440621852875, "learning_rate": 1.900582088964134e-05, "loss": 0.6112, "step": 9406 }, { "epoch": 0.28897490246674656, "grad_norm": 0.38386377692222595, "learning_rate": 1.900561080524203e-05, "loss": 0.6801, "step": 9407 }, { "epoch": 0.2890056216016957, "grad_norm": 0.3171926438808441, "learning_rate": 1.9005400699809486e-05, "loss": 0.5791, "step": 9408 }, { "epoch": 0.28903634073664486, "grad_norm": 0.9443144202232361, "learning_rate": 1.9005190573344203e-05, "loss": 0.7347, "step": 9409 }, { "epoch": 0.28906705987159403, "grad_norm": 0.32842209935188293, "learning_rate": 1.900498042584667e-05, "loss": 0.5579, "step": 9410 }, { "epoch": 0.28909777900654315, "grad_norm": 0.3448334038257599, "learning_rate": 1.9004770257317374e-05, "loss": 0.6037, "step": 9411 }, { "epoch": 0.28912849814149233, "grad_norm": 0.31179970502853394, "learning_rate": 1.9004560067756806e-05, "loss": 0.626, "step": 9412 }, { "epoch": 0.2891592172764415, "grad_norm": 0.3652092218399048, "learning_rate": 1.900434985716546e-05, "loss": 0.5241, "step": 9413 }, { "epoch": 0.2891899364113907, "grad_norm": 0.6611483097076416, "learning_rate": 1.9004139625543826e-05, "loss": 0.6478, "step": 9414 }, { "epoch": 0.2892206555463398, "grad_norm": 0.36069566011428833, "learning_rate": 1.9003929372892396e-05, "loss": 0.6164, "step": 9415 }, { "epoch": 0.289251374681289, "grad_norm": 0.3264824151992798, "learning_rate": 1.9003719099211662e-05, "loss": 0.6144, "step": 9416 }, { "epoch": 0.28928209381623815, "grad_norm": 0.3639531135559082, "learning_rate": 1.9003508804502112e-05, "loss": 0.5656, "step": 9417 }, { "epoch": 0.28931281295118727, "grad_norm": 0.41040992736816406, "learning_rate": 1.9003298488764237e-05, "loss": 0.6215, "step": 9418 }, { "epoch": 0.28934353208613645, "grad_norm": 0.3587819039821625, "learning_rate": 1.900308815199853e-05, "loss": 0.5586, "step": 9419 }, { "epoch": 0.2893742512210856, "grad_norm": 0.3278818726539612, "learning_rate": 1.9002877794205482e-05, "loss": 0.595, "step": 9420 }, { "epoch": 0.2894049703560348, "grad_norm": 0.39501529932022095, "learning_rate": 1.9002667415385584e-05, "loss": 0.5116, "step": 9421 }, { "epoch": 0.2894356894909839, "grad_norm": 0.32418352365493774, "learning_rate": 1.900245701553933e-05, "loss": 0.6188, "step": 9422 }, { "epoch": 0.2894664086259331, "grad_norm": 0.3303264081478119, "learning_rate": 1.9002246594667207e-05, "loss": 0.5131, "step": 9423 }, { "epoch": 0.28949712776088227, "grad_norm": 0.39344894886016846, "learning_rate": 1.9002036152769708e-05, "loss": 0.6458, "step": 9424 }, { "epoch": 0.2895278468958314, "grad_norm": 0.4250757098197937, "learning_rate": 1.9001825689847325e-05, "loss": 0.5673, "step": 9425 }, { "epoch": 0.28955856603078056, "grad_norm": 0.3264045715332031, "learning_rate": 1.900161520590055e-05, "loss": 0.5979, "step": 9426 }, { "epoch": 0.28958928516572974, "grad_norm": 0.33560627698898315, "learning_rate": 1.9001404700929872e-05, "loss": 0.4424, "step": 9427 }, { "epoch": 0.2896200043006789, "grad_norm": 0.3638589680194855, "learning_rate": 1.9001194174935787e-05, "loss": 0.5966, "step": 9428 }, { "epoch": 0.28965072343562803, "grad_norm": 0.3742683231830597, "learning_rate": 1.900098362791878e-05, "loss": 0.5835, "step": 9429 }, { "epoch": 0.2896814425705772, "grad_norm": 0.33662042021751404, "learning_rate": 1.9000773059879357e-05, "loss": 0.5593, "step": 9430 }, { "epoch": 0.2897121617055264, "grad_norm": 0.3326091766357422, "learning_rate": 1.900056247081799e-05, "loss": 0.6441, "step": 9431 }, { "epoch": 0.28974288084047556, "grad_norm": 0.33163192868232727, "learning_rate": 1.9000351860735184e-05, "loss": 0.5076, "step": 9432 }, { "epoch": 0.2897735999754247, "grad_norm": 0.3366028070449829, "learning_rate": 1.900014122963143e-05, "loss": 0.622, "step": 9433 }, { "epoch": 0.28980431911037385, "grad_norm": 0.3558137118816376, "learning_rate": 1.8999930577507215e-05, "loss": 0.6605, "step": 9434 }, { "epoch": 0.28983503824532303, "grad_norm": 0.3563781678676605, "learning_rate": 1.8999719904363036e-05, "loss": 0.5989, "step": 9435 }, { "epoch": 0.28986575738027215, "grad_norm": 0.3242925703525543, "learning_rate": 1.899950921019938e-05, "loss": 0.5614, "step": 9436 }, { "epoch": 0.2898964765152213, "grad_norm": 0.424644410610199, "learning_rate": 1.899929849501674e-05, "loss": 0.6731, "step": 9437 }, { "epoch": 0.2899271956501705, "grad_norm": 0.3384351432323456, "learning_rate": 1.8999087758815615e-05, "loss": 0.5738, "step": 9438 }, { "epoch": 0.2899579147851197, "grad_norm": 0.3216378688812256, "learning_rate": 1.899887700159649e-05, "loss": 0.5417, "step": 9439 }, { "epoch": 0.2899886339200688, "grad_norm": 0.3223850727081299, "learning_rate": 1.899866622335986e-05, "loss": 0.6346, "step": 9440 }, { "epoch": 0.29001935305501797, "grad_norm": 0.3380982279777527, "learning_rate": 1.8998455424106213e-05, "loss": 0.6408, "step": 9441 }, { "epoch": 0.29005007218996715, "grad_norm": 0.45507633686065674, "learning_rate": 1.899824460383605e-05, "loss": 0.5965, "step": 9442 }, { "epoch": 0.29008079132491627, "grad_norm": 0.3227490186691284, "learning_rate": 1.8998033762549854e-05, "loss": 0.5883, "step": 9443 }, { "epoch": 0.29011151045986544, "grad_norm": 0.431487113237381, "learning_rate": 1.8997822900248124e-05, "loss": 0.5625, "step": 9444 }, { "epoch": 0.2901422295948146, "grad_norm": 0.3830277621746063, "learning_rate": 1.8997612016931347e-05, "loss": 0.5835, "step": 9445 }, { "epoch": 0.2901729487297638, "grad_norm": 0.35452818870544434, "learning_rate": 1.899740111260002e-05, "loss": 0.66, "step": 9446 }, { "epoch": 0.2902036678647129, "grad_norm": 0.3321070373058319, "learning_rate": 1.899719018725464e-05, "loss": 0.6534, "step": 9447 }, { "epoch": 0.2902343869996621, "grad_norm": 0.3561092019081116, "learning_rate": 1.8996979240895686e-05, "loss": 0.5855, "step": 9448 }, { "epoch": 0.29026510613461126, "grad_norm": 0.35406848788261414, "learning_rate": 1.8996768273523666e-05, "loss": 0.5919, "step": 9449 }, { "epoch": 0.29029582526956044, "grad_norm": 0.3625110685825348, "learning_rate": 1.899655728513906e-05, "loss": 0.5927, "step": 9450 }, { "epoch": 0.29032654440450956, "grad_norm": 0.35873591899871826, "learning_rate": 1.899634627574237e-05, "loss": 0.5385, "step": 9451 }, { "epoch": 0.29035726353945873, "grad_norm": 0.33755919337272644, "learning_rate": 1.899613524533408e-05, "loss": 0.5627, "step": 9452 }, { "epoch": 0.2903879826744079, "grad_norm": 0.3175421357154846, "learning_rate": 1.899592419391469e-05, "loss": 0.5211, "step": 9453 }, { "epoch": 0.290418701809357, "grad_norm": 0.353015273809433, "learning_rate": 1.8995713121484695e-05, "loss": 0.6106, "step": 9454 }, { "epoch": 0.2904494209443062, "grad_norm": 0.34359729290008545, "learning_rate": 1.8995502028044577e-05, "loss": 0.5667, "step": 9455 }, { "epoch": 0.2904801400792554, "grad_norm": 0.48201557993888855, "learning_rate": 1.899529091359484e-05, "loss": 0.5814, "step": 9456 }, { "epoch": 0.29051085921420455, "grad_norm": 0.3213118016719818, "learning_rate": 1.899507977813597e-05, "loss": 0.5919, "step": 9457 }, { "epoch": 0.2905415783491537, "grad_norm": 0.31962600350379944, "learning_rate": 1.8994868621668464e-05, "loss": 0.5529, "step": 9458 }, { "epoch": 0.29057229748410285, "grad_norm": 0.33782291412353516, "learning_rate": 1.8994657444192818e-05, "loss": 0.6445, "step": 9459 }, { "epoch": 0.290603016619052, "grad_norm": 0.3900882601737976, "learning_rate": 1.8994446245709518e-05, "loss": 0.5802, "step": 9460 }, { "epoch": 0.29063373575400114, "grad_norm": 0.3228319585323334, "learning_rate": 1.899423502621906e-05, "loss": 0.581, "step": 9461 }, { "epoch": 0.2906644548889503, "grad_norm": 0.4409694969654083, "learning_rate": 1.899402378572194e-05, "loss": 0.5947, "step": 9462 }, { "epoch": 0.2906951740238995, "grad_norm": 0.33868488669395447, "learning_rate": 1.8993812524218646e-05, "loss": 0.5513, "step": 9463 }, { "epoch": 0.29072589315884867, "grad_norm": 0.3501710891723633, "learning_rate": 1.8993601241709678e-05, "loss": 0.606, "step": 9464 }, { "epoch": 0.2907566122937978, "grad_norm": 0.33608150482177734, "learning_rate": 1.8993389938195522e-05, "loss": 0.6716, "step": 9465 }, { "epoch": 0.29078733142874696, "grad_norm": 0.3239777088165283, "learning_rate": 1.899317861367668e-05, "loss": 0.6297, "step": 9466 }, { "epoch": 0.29081805056369614, "grad_norm": 0.34961315989494324, "learning_rate": 1.8992967268153642e-05, "loss": 0.6624, "step": 9467 }, { "epoch": 0.29084876969864526, "grad_norm": 0.3577212691307068, "learning_rate": 1.89927559016269e-05, "loss": 0.5842, "step": 9468 }, { "epoch": 0.29087948883359444, "grad_norm": 0.30937889218330383, "learning_rate": 1.8992544514096946e-05, "loss": 0.5103, "step": 9469 }, { "epoch": 0.2909102079685436, "grad_norm": 0.3873792588710785, "learning_rate": 1.8992333105564274e-05, "loss": 0.6457, "step": 9470 }, { "epoch": 0.2909409271034928, "grad_norm": 0.33313965797424316, "learning_rate": 1.8992121676029387e-05, "loss": 0.5962, "step": 9471 }, { "epoch": 0.2909716462384419, "grad_norm": 0.33405938744544983, "learning_rate": 1.899191022549277e-05, "loss": 0.5539, "step": 9472 }, { "epoch": 0.2910023653733911, "grad_norm": 0.3451133966445923, "learning_rate": 1.8991698753954913e-05, "loss": 0.609, "step": 9473 }, { "epoch": 0.29103308450834026, "grad_norm": 0.3713354468345642, "learning_rate": 1.8991487261416323e-05, "loss": 0.5884, "step": 9474 }, { "epoch": 0.29106380364328943, "grad_norm": 0.35267654061317444, "learning_rate": 1.8991275747877482e-05, "loss": 0.6004, "step": 9475 }, { "epoch": 0.29109452277823855, "grad_norm": 0.34927988052368164, "learning_rate": 1.899106421333889e-05, "loss": 0.606, "step": 9476 }, { "epoch": 0.2911252419131877, "grad_norm": 0.3376772105693817, "learning_rate": 1.8990852657801037e-05, "loss": 0.5662, "step": 9477 }, { "epoch": 0.2911559610481369, "grad_norm": 0.3216066062450409, "learning_rate": 1.8990641081264422e-05, "loss": 0.5955, "step": 9478 }, { "epoch": 0.291186680183086, "grad_norm": 0.3336585462093353, "learning_rate": 1.8990429483729538e-05, "loss": 0.5374, "step": 9479 }, { "epoch": 0.2912173993180352, "grad_norm": 0.34971824288368225, "learning_rate": 1.8990217865196877e-05, "loss": 0.6124, "step": 9480 }, { "epoch": 0.2912481184529844, "grad_norm": 0.34529784321784973, "learning_rate": 1.8990006225666934e-05, "loss": 0.5933, "step": 9481 }, { "epoch": 0.29127883758793355, "grad_norm": 0.31069839000701904, "learning_rate": 1.8989794565140205e-05, "loss": 0.5733, "step": 9482 }, { "epoch": 0.29130955672288267, "grad_norm": 0.3323630094528198, "learning_rate": 1.898958288361718e-05, "loss": 0.5858, "step": 9483 }, { "epoch": 0.29134027585783184, "grad_norm": 0.32430770993232727, "learning_rate": 1.898937118109836e-05, "loss": 0.5012, "step": 9484 }, { "epoch": 0.291370994992781, "grad_norm": 0.3173278570175171, "learning_rate": 1.8989159457584233e-05, "loss": 0.5343, "step": 9485 }, { "epoch": 0.29140171412773014, "grad_norm": 0.37989574670791626, "learning_rate": 1.89889477130753e-05, "loss": 0.5884, "step": 9486 }, { "epoch": 0.2914324332626793, "grad_norm": 0.6298141479492188, "learning_rate": 1.898873594757205e-05, "loss": 0.6426, "step": 9487 }, { "epoch": 0.2914631523976285, "grad_norm": 0.335166871547699, "learning_rate": 1.898852416107498e-05, "loss": 0.5848, "step": 9488 }, { "epoch": 0.29149387153257766, "grad_norm": 0.34140294790267944, "learning_rate": 1.898831235358458e-05, "loss": 0.5958, "step": 9489 }, { "epoch": 0.2915245906675268, "grad_norm": 0.3721246123313904, "learning_rate": 1.898810052510135e-05, "loss": 0.5936, "step": 9490 }, { "epoch": 0.29155530980247596, "grad_norm": 0.3496953845024109, "learning_rate": 1.8987888675625787e-05, "loss": 0.6623, "step": 9491 }, { "epoch": 0.29158602893742513, "grad_norm": 0.33917510509490967, "learning_rate": 1.8987676805158378e-05, "loss": 0.5757, "step": 9492 }, { "epoch": 0.29161674807237425, "grad_norm": 0.3491023778915405, "learning_rate": 1.8987464913699626e-05, "loss": 0.5178, "step": 9493 }, { "epoch": 0.29164746720732343, "grad_norm": 0.34286242723464966, "learning_rate": 1.8987253001250022e-05, "loss": 0.613, "step": 9494 }, { "epoch": 0.2916781863422726, "grad_norm": 0.3346743881702423, "learning_rate": 1.8987041067810058e-05, "loss": 0.6061, "step": 9495 }, { "epoch": 0.2917089054772218, "grad_norm": 0.3325253427028656, "learning_rate": 1.8986829113380234e-05, "loss": 0.5592, "step": 9496 }, { "epoch": 0.2917396246121709, "grad_norm": 0.3663845956325531, "learning_rate": 1.8986617137961042e-05, "loss": 0.5225, "step": 9497 }, { "epoch": 0.2917703437471201, "grad_norm": 0.3396764099597931, "learning_rate": 1.8986405141552977e-05, "loss": 0.5059, "step": 9498 }, { "epoch": 0.29180106288206925, "grad_norm": 0.3335491120815277, "learning_rate": 1.8986193124156536e-05, "loss": 0.5336, "step": 9499 }, { "epoch": 0.2918317820170184, "grad_norm": 0.3298214375972748, "learning_rate": 1.8985981085772215e-05, "loss": 0.5862, "step": 9500 }, { "epoch": 0.29186250115196755, "grad_norm": 0.3363182544708252, "learning_rate": 1.8985769026400508e-05, "loss": 0.5339, "step": 9501 }, { "epoch": 0.2918932202869167, "grad_norm": 0.34193772077560425, "learning_rate": 1.8985556946041906e-05, "loss": 0.543, "step": 9502 }, { "epoch": 0.2919239394218659, "grad_norm": 0.4068673551082611, "learning_rate": 1.898534484469691e-05, "loss": 0.612, "step": 9503 }, { "epoch": 0.291954658556815, "grad_norm": 0.3414188623428345, "learning_rate": 1.8985132722366014e-05, "loss": 0.6004, "step": 9504 }, { "epoch": 0.2919853776917642, "grad_norm": 0.3507465720176697, "learning_rate": 1.8984920579049714e-05, "loss": 0.5875, "step": 9505 }, { "epoch": 0.29201609682671337, "grad_norm": 0.34816718101501465, "learning_rate": 1.8984708414748503e-05, "loss": 0.57, "step": 9506 }, { "epoch": 0.29204681596166254, "grad_norm": 0.39329907298088074, "learning_rate": 1.898449622946288e-05, "loss": 0.4667, "step": 9507 }, { "epoch": 0.29207753509661166, "grad_norm": 0.33350738883018494, "learning_rate": 1.8984284023193334e-05, "loss": 0.5795, "step": 9508 }, { "epoch": 0.29210825423156084, "grad_norm": 0.33408451080322266, "learning_rate": 1.8984071795940375e-05, "loss": 0.5423, "step": 9509 }, { "epoch": 0.29213897336651, "grad_norm": 0.3309657573699951, "learning_rate": 1.898385954770448e-05, "loss": 0.5992, "step": 9510 }, { "epoch": 0.29216969250145913, "grad_norm": 0.3400677442550659, "learning_rate": 1.8983647278486155e-05, "loss": 0.5075, "step": 9511 }, { "epoch": 0.2922004116364083, "grad_norm": 0.3487856090068817, "learning_rate": 1.8983434988285894e-05, "loss": 0.5478, "step": 9512 }, { "epoch": 0.2922311307713575, "grad_norm": 0.4834105372428894, "learning_rate": 1.8983222677104198e-05, "loss": 0.5457, "step": 9513 }, { "epoch": 0.29226184990630666, "grad_norm": 0.336187481880188, "learning_rate": 1.8983010344941552e-05, "loss": 0.5311, "step": 9514 }, { "epoch": 0.2922925690412558, "grad_norm": 0.35424697399139404, "learning_rate": 1.898279799179846e-05, "loss": 0.6245, "step": 9515 }, { "epoch": 0.29232328817620495, "grad_norm": 0.3801720440387726, "learning_rate": 1.8982585617675418e-05, "loss": 0.5803, "step": 9516 }, { "epoch": 0.29235400731115413, "grad_norm": 0.3366343379020691, "learning_rate": 1.898237322257292e-05, "loss": 0.5386, "step": 9517 }, { "epoch": 0.2923847264461033, "grad_norm": 0.6376914381980896, "learning_rate": 1.898216080649146e-05, "loss": 0.6246, "step": 9518 }, { "epoch": 0.2924154455810524, "grad_norm": 0.3457103967666626, "learning_rate": 1.8981948369431536e-05, "loss": 0.602, "step": 9519 }, { "epoch": 0.2924461647160016, "grad_norm": 0.41022199392318726, "learning_rate": 1.8981735911393645e-05, "loss": 0.5584, "step": 9520 }, { "epoch": 0.2924768838509508, "grad_norm": 0.43180036544799805, "learning_rate": 1.8981523432378282e-05, "loss": 0.5665, "step": 9521 }, { "epoch": 0.2925076029858999, "grad_norm": 0.4509851634502411, "learning_rate": 1.8981310932385945e-05, "loss": 0.5837, "step": 9522 }, { "epoch": 0.29253832212084907, "grad_norm": 0.35788509249687195, "learning_rate": 1.8981098411417128e-05, "loss": 0.5818, "step": 9523 }, { "epoch": 0.29256904125579825, "grad_norm": 0.3508554697036743, "learning_rate": 1.8980885869472326e-05, "loss": 0.6137, "step": 9524 }, { "epoch": 0.2925997603907474, "grad_norm": 0.38057956099510193, "learning_rate": 1.898067330655204e-05, "loss": 0.6173, "step": 9525 }, { "epoch": 0.29263047952569654, "grad_norm": 0.36012351512908936, "learning_rate": 1.8980460722656767e-05, "loss": 0.6551, "step": 9526 }, { "epoch": 0.2926611986606457, "grad_norm": 0.3516409397125244, "learning_rate": 1.8980248117787e-05, "loss": 0.5411, "step": 9527 }, { "epoch": 0.2926919177955949, "grad_norm": 0.32511675357818604, "learning_rate": 1.898003549194323e-05, "loss": 0.5465, "step": 9528 }, { "epoch": 0.292722636930544, "grad_norm": 0.3519776463508606, "learning_rate": 1.897982284512597e-05, "loss": 0.6168, "step": 9529 }, { "epoch": 0.2927533560654932, "grad_norm": 0.29429343342781067, "learning_rate": 1.89796101773357e-05, "loss": 0.544, "step": 9530 }, { "epoch": 0.29278407520044236, "grad_norm": 0.36166876554489136, "learning_rate": 1.8979397488572922e-05, "loss": 0.5988, "step": 9531 }, { "epoch": 0.29281479433539154, "grad_norm": 0.3069896101951599, "learning_rate": 1.8979184778838137e-05, "loss": 0.5335, "step": 9532 }, { "epoch": 0.29284551347034066, "grad_norm": 0.3235434591770172, "learning_rate": 1.897897204813184e-05, "loss": 0.5925, "step": 9533 }, { "epoch": 0.29287623260528983, "grad_norm": 0.36955368518829346, "learning_rate": 1.8978759296454525e-05, "loss": 0.6282, "step": 9534 }, { "epoch": 0.292906951740239, "grad_norm": 0.3482399582862854, "learning_rate": 1.8978546523806693e-05, "loss": 0.525, "step": 9535 }, { "epoch": 0.2929376708751881, "grad_norm": 0.3320288360118866, "learning_rate": 1.8978333730188833e-05, "loss": 0.5872, "step": 9536 }, { "epoch": 0.2929683900101373, "grad_norm": 0.3655318319797516, "learning_rate": 1.8978120915601455e-05, "loss": 0.6175, "step": 9537 }, { "epoch": 0.2929991091450865, "grad_norm": 0.3792562186717987, "learning_rate": 1.8977908080045042e-05, "loss": 0.5634, "step": 9538 }, { "epoch": 0.29302982828003565, "grad_norm": 0.3221607804298401, "learning_rate": 1.89776952235201e-05, "loss": 0.553, "step": 9539 }, { "epoch": 0.2930605474149848, "grad_norm": 0.8123154640197754, "learning_rate": 1.8977482346027125e-05, "loss": 0.6325, "step": 9540 }, { "epoch": 0.29309126654993395, "grad_norm": 0.3780273497104645, "learning_rate": 1.897726944756661e-05, "loss": 0.5688, "step": 9541 }, { "epoch": 0.2931219856848831, "grad_norm": 0.4373965859413147, "learning_rate": 1.897705652813906e-05, "loss": 0.6145, "step": 9542 }, { "epoch": 0.2931527048198323, "grad_norm": 0.3567352890968323, "learning_rate": 1.8976843587744963e-05, "loss": 0.6527, "step": 9543 }, { "epoch": 0.2931834239547814, "grad_norm": 0.33677729964256287, "learning_rate": 1.8976630626384824e-05, "loss": 0.5944, "step": 9544 }, { "epoch": 0.2932141430897306, "grad_norm": 0.33334919810295105, "learning_rate": 1.8976417644059137e-05, "loss": 0.5635, "step": 9545 }, { "epoch": 0.29324486222467977, "grad_norm": 0.4299328029155731, "learning_rate": 1.8976204640768397e-05, "loss": 0.6263, "step": 9546 }, { "epoch": 0.2932755813596289, "grad_norm": 0.31479552388191223, "learning_rate": 1.8975991616513108e-05, "loss": 0.496, "step": 9547 }, { "epoch": 0.29330630049457806, "grad_norm": 0.3650813102722168, "learning_rate": 1.897577857129376e-05, "loss": 0.6395, "step": 9548 }, { "epoch": 0.29333701962952724, "grad_norm": 0.323186457157135, "learning_rate": 1.8975565505110855e-05, "loss": 0.5124, "step": 9549 }, { "epoch": 0.2933677387644764, "grad_norm": 0.429595023393631, "learning_rate": 1.897535241796489e-05, "loss": 0.5703, "step": 9550 }, { "epoch": 0.29339845789942554, "grad_norm": 0.3061355650424957, "learning_rate": 1.8975139309856362e-05, "loss": 0.5919, "step": 9551 }, { "epoch": 0.2934291770343747, "grad_norm": 0.3978032171726227, "learning_rate": 1.8974926180785774e-05, "loss": 0.6421, "step": 9552 }, { "epoch": 0.2934598961693239, "grad_norm": 0.3311902582645416, "learning_rate": 1.8974713030753617e-05, "loss": 0.6024, "step": 9553 }, { "epoch": 0.293490615304273, "grad_norm": 0.34466949105262756, "learning_rate": 1.8974499859760386e-05, "loss": 0.624, "step": 9554 }, { "epoch": 0.2935213344392222, "grad_norm": 0.35075896978378296, "learning_rate": 1.897428666780659e-05, "loss": 0.6283, "step": 9555 }, { "epoch": 0.29355205357417136, "grad_norm": 0.36592888832092285, "learning_rate": 1.897407345489272e-05, "loss": 0.6165, "step": 9556 }, { "epoch": 0.29358277270912053, "grad_norm": 0.3629481792449951, "learning_rate": 1.897386022101927e-05, "loss": 0.6169, "step": 9557 }, { "epoch": 0.29361349184406965, "grad_norm": 0.3128698468208313, "learning_rate": 1.8973646966186746e-05, "loss": 0.6011, "step": 9558 }, { "epoch": 0.2936442109790188, "grad_norm": 0.3430546820163727, "learning_rate": 1.897343369039564e-05, "loss": 0.5418, "step": 9559 }, { "epoch": 0.293674930113968, "grad_norm": 0.3367222547531128, "learning_rate": 1.8973220393646458e-05, "loss": 0.4987, "step": 9560 }, { "epoch": 0.2937056492489172, "grad_norm": 0.32873091101646423, "learning_rate": 1.897300707593969e-05, "loss": 0.5395, "step": 9561 }, { "epoch": 0.2937363683838663, "grad_norm": 0.37361085414886475, "learning_rate": 1.8972793737275837e-05, "loss": 0.5489, "step": 9562 }, { "epoch": 0.2937670875188155, "grad_norm": 0.3759135901927948, "learning_rate": 1.89725803776554e-05, "loss": 0.6122, "step": 9563 }, { "epoch": 0.29379780665376465, "grad_norm": 0.3464308977127075, "learning_rate": 1.8972366997078873e-05, "loss": 0.5868, "step": 9564 }, { "epoch": 0.29382852578871377, "grad_norm": 0.3251703381538391, "learning_rate": 1.8972153595546757e-05, "loss": 0.545, "step": 9565 }, { "epoch": 0.29385924492366294, "grad_norm": 0.3920583426952362, "learning_rate": 1.8971940173059548e-05, "loss": 0.561, "step": 9566 }, { "epoch": 0.2938899640586121, "grad_norm": 0.2970030903816223, "learning_rate": 1.8971726729617746e-05, "loss": 0.5916, "step": 9567 }, { "epoch": 0.2939206831935613, "grad_norm": 0.42063599824905396, "learning_rate": 1.8971513265221854e-05, "loss": 0.7191, "step": 9568 }, { "epoch": 0.2939514023285104, "grad_norm": 0.33417123556137085, "learning_rate": 1.8971299779872362e-05, "loss": 0.5899, "step": 9569 }, { "epoch": 0.2939821214634596, "grad_norm": 0.33266299962997437, "learning_rate": 1.8971086273569775e-05, "loss": 0.5186, "step": 9570 }, { "epoch": 0.29401284059840876, "grad_norm": 0.3238975703716278, "learning_rate": 1.8970872746314585e-05, "loss": 0.5665, "step": 9571 }, { "epoch": 0.2940435597333579, "grad_norm": 0.3370126187801361, "learning_rate": 1.89706591981073e-05, "loss": 0.5685, "step": 9572 }, { "epoch": 0.29407427886830706, "grad_norm": 0.3486659824848175, "learning_rate": 1.897044562894841e-05, "loss": 0.5774, "step": 9573 }, { "epoch": 0.29410499800325623, "grad_norm": 0.3460404872894287, "learning_rate": 1.8970232038838423e-05, "loss": 0.5821, "step": 9574 }, { "epoch": 0.2941357171382054, "grad_norm": 0.36355626583099365, "learning_rate": 1.897001842777783e-05, "loss": 0.6359, "step": 9575 }, { "epoch": 0.29416643627315453, "grad_norm": 0.36081662774086, "learning_rate": 1.8969804795767132e-05, "loss": 0.5519, "step": 9576 }, { "epoch": 0.2941971554081037, "grad_norm": 0.33564263582229614, "learning_rate": 1.896959114280683e-05, "loss": 0.5773, "step": 9577 }, { "epoch": 0.2942278745430529, "grad_norm": 0.3374861180782318, "learning_rate": 1.896937746889742e-05, "loss": 0.5291, "step": 9578 }, { "epoch": 0.294258593678002, "grad_norm": 0.33552759885787964, "learning_rate": 1.8969163774039402e-05, "loss": 0.5894, "step": 9579 }, { "epoch": 0.2942893128129512, "grad_norm": 0.34528395533561707, "learning_rate": 1.8968950058233275e-05, "loss": 0.5608, "step": 9580 }, { "epoch": 0.29432003194790035, "grad_norm": 0.5486834049224854, "learning_rate": 1.8968736321479543e-05, "loss": 0.5766, "step": 9581 }, { "epoch": 0.2943507510828495, "grad_norm": 0.32925066351890564, "learning_rate": 1.89685225637787e-05, "loss": 0.6351, "step": 9582 }, { "epoch": 0.29438147021779865, "grad_norm": 0.4103151559829712, "learning_rate": 1.8968308785131242e-05, "loss": 0.6331, "step": 9583 }, { "epoch": 0.2944121893527478, "grad_norm": 0.36522626876831055, "learning_rate": 1.8968094985537677e-05, "loss": 0.6035, "step": 9584 }, { "epoch": 0.294442908487697, "grad_norm": 0.3412953019142151, "learning_rate": 1.8967881164998494e-05, "loss": 0.6852, "step": 9585 }, { "epoch": 0.2944736276226462, "grad_norm": 0.3408052921295166, "learning_rate": 1.8967667323514203e-05, "loss": 0.5393, "step": 9586 }, { "epoch": 0.2945043467575953, "grad_norm": 0.3126402497291565, "learning_rate": 1.8967453461085298e-05, "loss": 0.5609, "step": 9587 }, { "epoch": 0.29453506589254447, "grad_norm": 0.3628767430782318, "learning_rate": 1.8967239577712276e-05, "loss": 0.6448, "step": 9588 }, { "epoch": 0.29456578502749364, "grad_norm": 0.3629752993583679, "learning_rate": 1.8967025673395643e-05, "loss": 0.5778, "step": 9589 }, { "epoch": 0.29459650416244276, "grad_norm": 0.3381538987159729, "learning_rate": 1.8966811748135897e-05, "loss": 0.6197, "step": 9590 }, { "epoch": 0.29462722329739194, "grad_norm": 0.4506744146347046, "learning_rate": 1.8966597801933533e-05, "loss": 0.5235, "step": 9591 }, { "epoch": 0.2946579424323411, "grad_norm": 0.3358134627342224, "learning_rate": 1.8966383834789054e-05, "loss": 0.5198, "step": 9592 }, { "epoch": 0.2946886615672903, "grad_norm": 0.35360878705978394, "learning_rate": 1.896616984670296e-05, "loss": 0.6326, "step": 9593 }, { "epoch": 0.2947193807022394, "grad_norm": 0.32817041873931885, "learning_rate": 1.896595583767575e-05, "loss": 0.6663, "step": 9594 }, { "epoch": 0.2947500998371886, "grad_norm": 1.1207202672958374, "learning_rate": 1.8965741807707924e-05, "loss": 0.5035, "step": 9595 }, { "epoch": 0.29478081897213776, "grad_norm": 3.716357946395874, "learning_rate": 1.8965527756799978e-05, "loss": 0.6172, "step": 9596 }, { "epoch": 0.2948115381070869, "grad_norm": 0.3494524359703064, "learning_rate": 1.896531368495242e-05, "loss": 0.6362, "step": 9597 }, { "epoch": 0.29484225724203605, "grad_norm": 0.3531571924686432, "learning_rate": 1.8965099592165748e-05, "loss": 0.5716, "step": 9598 }, { "epoch": 0.29487297637698523, "grad_norm": 0.39642706513404846, "learning_rate": 1.8964885478440456e-05, "loss": 0.5422, "step": 9599 }, { "epoch": 0.2949036955119344, "grad_norm": 0.3277052342891693, "learning_rate": 1.896467134377705e-05, "loss": 0.6154, "step": 9600 }, { "epoch": 0.2949344146468835, "grad_norm": 0.40405479073524475, "learning_rate": 1.8964457188176023e-05, "loss": 0.5766, "step": 9601 }, { "epoch": 0.2949651337818327, "grad_norm": 0.45250841975212097, "learning_rate": 1.8964243011637885e-05, "loss": 0.6191, "step": 9602 }, { "epoch": 0.2949958529167819, "grad_norm": 0.3300922214984894, "learning_rate": 1.8964028814163132e-05, "loss": 0.5547, "step": 9603 }, { "epoch": 0.29502657205173105, "grad_norm": 0.34479033946990967, "learning_rate": 1.8963814595752263e-05, "loss": 0.5735, "step": 9604 }, { "epoch": 0.29505729118668017, "grad_norm": 0.30435702204704285, "learning_rate": 1.8963600356405775e-05, "loss": 0.5202, "step": 9605 }, { "epoch": 0.29508801032162935, "grad_norm": 0.33053603768348694, "learning_rate": 1.8963386096124176e-05, "loss": 0.5242, "step": 9606 }, { "epoch": 0.2951187294565785, "grad_norm": 0.566961944103241, "learning_rate": 1.896317181490796e-05, "loss": 0.6261, "step": 9607 }, { "epoch": 0.29514944859152764, "grad_norm": 0.3661292791366577, "learning_rate": 1.8962957512757632e-05, "loss": 0.6484, "step": 9608 }, { "epoch": 0.2951801677264768, "grad_norm": 0.32124316692352295, "learning_rate": 1.8962743189673694e-05, "loss": 0.6562, "step": 9609 }, { "epoch": 0.295210886861426, "grad_norm": 0.3607218563556671, "learning_rate": 1.896252884565664e-05, "loss": 0.6283, "step": 9610 }, { "epoch": 0.29524160599637517, "grad_norm": 0.3653939366340637, "learning_rate": 1.8962314480706974e-05, "loss": 0.605, "step": 9611 }, { "epoch": 0.2952723251313243, "grad_norm": 0.3272026479244232, "learning_rate": 1.89621000948252e-05, "loss": 0.5869, "step": 9612 }, { "epoch": 0.29530304426627346, "grad_norm": 0.34552496671676636, "learning_rate": 1.896188568801181e-05, "loss": 0.6614, "step": 9613 }, { "epoch": 0.29533376340122264, "grad_norm": 0.3519168496131897, "learning_rate": 1.896167126026731e-05, "loss": 0.588, "step": 9614 }, { "epoch": 0.29536448253617176, "grad_norm": 0.3311556279659271, "learning_rate": 1.8961456811592205e-05, "loss": 0.5872, "step": 9615 }, { "epoch": 0.29539520167112093, "grad_norm": 0.3169788420200348, "learning_rate": 1.896124234198699e-05, "loss": 0.5711, "step": 9616 }, { "epoch": 0.2954259208060701, "grad_norm": 0.3222866654396057, "learning_rate": 1.8961027851452166e-05, "loss": 0.5677, "step": 9617 }, { "epoch": 0.2954566399410193, "grad_norm": 0.34067559242248535, "learning_rate": 1.896081333998824e-05, "loss": 0.5206, "step": 9618 }, { "epoch": 0.2954873590759684, "grad_norm": 0.33663129806518555, "learning_rate": 1.8960598807595704e-05, "loss": 0.5706, "step": 9619 }, { "epoch": 0.2955180782109176, "grad_norm": 0.3504725694656372, "learning_rate": 1.8960384254275068e-05, "loss": 0.5916, "step": 9620 }, { "epoch": 0.29554879734586675, "grad_norm": 0.35444918274879456, "learning_rate": 1.8960169680026826e-05, "loss": 0.5963, "step": 9621 }, { "epoch": 0.2955795164808159, "grad_norm": 0.37759289145469666, "learning_rate": 1.895995508485148e-05, "loss": 0.6063, "step": 9622 }, { "epoch": 0.29561023561576505, "grad_norm": 0.37891051173210144, "learning_rate": 1.8959740468749538e-05, "loss": 0.648, "step": 9623 }, { "epoch": 0.2956409547507142, "grad_norm": 0.31931546330451965, "learning_rate": 1.8959525831721494e-05, "loss": 0.5298, "step": 9624 }, { "epoch": 0.2956716738856634, "grad_norm": 0.7510591149330139, "learning_rate": 1.895931117376785e-05, "loss": 0.5803, "step": 9625 }, { "epoch": 0.2957023930206125, "grad_norm": 0.36139360070228577, "learning_rate": 1.895909649488911e-05, "loss": 0.5806, "step": 9626 }, { "epoch": 0.2957331121555617, "grad_norm": 0.3351001441478729, "learning_rate": 1.8958881795085776e-05, "loss": 0.537, "step": 9627 }, { "epoch": 0.29576383129051087, "grad_norm": 0.3678440451622009, "learning_rate": 1.8958667074358348e-05, "loss": 0.6195, "step": 9628 }, { "epoch": 0.29579455042546005, "grad_norm": 0.32193076610565186, "learning_rate": 1.8958452332707325e-05, "loss": 0.5957, "step": 9629 }, { "epoch": 0.29582526956040917, "grad_norm": 0.3359934389591217, "learning_rate": 1.895823757013321e-05, "loss": 0.4976, "step": 9630 }, { "epoch": 0.29585598869535834, "grad_norm": 0.32972991466522217, "learning_rate": 1.895802278663651e-05, "loss": 0.576, "step": 9631 }, { "epoch": 0.2958867078303075, "grad_norm": 0.38143596053123474, "learning_rate": 1.895780798221772e-05, "loss": 0.6471, "step": 9632 }, { "epoch": 0.29591742696525664, "grad_norm": 0.39756229519844055, "learning_rate": 1.8957593156877343e-05, "loss": 0.5226, "step": 9633 }, { "epoch": 0.2959481461002058, "grad_norm": 0.35852378606796265, "learning_rate": 1.895737831061588e-05, "loss": 0.537, "step": 9634 }, { "epoch": 0.295978865235155, "grad_norm": 0.3557642102241516, "learning_rate": 1.8957163443433838e-05, "loss": 0.5717, "step": 9635 }, { "epoch": 0.29600958437010416, "grad_norm": 0.354401558637619, "learning_rate": 1.8956948555331717e-05, "loss": 0.642, "step": 9636 }, { "epoch": 0.2960403035050533, "grad_norm": 0.3682270348072052, "learning_rate": 1.8956733646310013e-05, "loss": 0.5635, "step": 9637 }, { "epoch": 0.29607102264000246, "grad_norm": 0.3520122468471527, "learning_rate": 1.895651871636923e-05, "loss": 0.5169, "step": 9638 }, { "epoch": 0.29610174177495163, "grad_norm": 0.4236225485801697, "learning_rate": 1.8956303765509877e-05, "loss": 0.6088, "step": 9639 }, { "epoch": 0.29613246090990075, "grad_norm": 0.30634164810180664, "learning_rate": 1.895608879373245e-05, "loss": 0.5855, "step": 9640 }, { "epoch": 0.2961631800448499, "grad_norm": 0.4158584177494049, "learning_rate": 1.895587380103745e-05, "loss": 0.6099, "step": 9641 }, { "epoch": 0.2961938991797991, "grad_norm": 0.5082555413246155, "learning_rate": 1.8955658787425383e-05, "loss": 0.6526, "step": 9642 }, { "epoch": 0.2962246183147483, "grad_norm": 0.33939340710639954, "learning_rate": 1.895544375289675e-05, "loss": 0.625, "step": 9643 }, { "epoch": 0.2962553374496974, "grad_norm": 0.35879603028297424, "learning_rate": 1.895522869745205e-05, "loss": 0.5382, "step": 9644 }, { "epoch": 0.2962860565846466, "grad_norm": 0.3787858188152313, "learning_rate": 1.895501362109179e-05, "loss": 0.6582, "step": 9645 }, { "epoch": 0.29631677571959575, "grad_norm": 0.3573126792907715, "learning_rate": 1.8954798523816472e-05, "loss": 0.6583, "step": 9646 }, { "epoch": 0.29634749485454487, "grad_norm": 0.4100559651851654, "learning_rate": 1.8954583405626596e-05, "loss": 0.639, "step": 9647 }, { "epoch": 0.29637821398949404, "grad_norm": 0.3965766727924347, "learning_rate": 1.895436826652266e-05, "loss": 0.6383, "step": 9648 }, { "epoch": 0.2964089331244432, "grad_norm": 0.36630764603614807, "learning_rate": 1.8954153106505178e-05, "loss": 0.5662, "step": 9649 }, { "epoch": 0.2964396522593924, "grad_norm": 0.3609057366847992, "learning_rate": 1.8953937925574643e-05, "loss": 0.54, "step": 9650 }, { "epoch": 0.2964703713943415, "grad_norm": 0.3224005401134491, "learning_rate": 1.8953722723731562e-05, "loss": 0.5115, "step": 9651 }, { "epoch": 0.2965010905292907, "grad_norm": 0.3513984680175781, "learning_rate": 1.8953507500976434e-05, "loss": 0.4943, "step": 9652 }, { "epoch": 0.29653180966423986, "grad_norm": 0.31077805161476135, "learning_rate": 1.8953292257309766e-05, "loss": 0.6143, "step": 9653 }, { "epoch": 0.29656252879918904, "grad_norm": 0.33379340171813965, "learning_rate": 1.8953076992732054e-05, "loss": 0.5687, "step": 9654 }, { "epoch": 0.29659324793413816, "grad_norm": 0.3536103665828705, "learning_rate": 1.895286170724381e-05, "loss": 0.6049, "step": 9655 }, { "epoch": 0.29662396706908734, "grad_norm": 1.3200701475143433, "learning_rate": 1.895264640084553e-05, "loss": 0.5471, "step": 9656 }, { "epoch": 0.2966546862040365, "grad_norm": 0.3239513635635376, "learning_rate": 1.8952431073537723e-05, "loss": 0.4842, "step": 9657 }, { "epoch": 0.29668540533898563, "grad_norm": 0.29912689328193665, "learning_rate": 1.8952215725320885e-05, "loss": 0.5429, "step": 9658 }, { "epoch": 0.2967161244739348, "grad_norm": 0.4870859682559967, "learning_rate": 1.8952000356195522e-05, "loss": 0.5589, "step": 9659 }, { "epoch": 0.296746843608884, "grad_norm": 0.38821282982826233, "learning_rate": 1.8951784966162138e-05, "loss": 0.5068, "step": 9660 }, { "epoch": 0.29677756274383316, "grad_norm": 0.3341291546821594, "learning_rate": 1.8951569555221235e-05, "loss": 0.602, "step": 9661 }, { "epoch": 0.2968082818787823, "grad_norm": 0.370922327041626, "learning_rate": 1.8951354123373317e-05, "loss": 0.6539, "step": 9662 }, { "epoch": 0.29683900101373145, "grad_norm": 0.42636317014694214, "learning_rate": 1.8951138670618884e-05, "loss": 0.6751, "step": 9663 }, { "epoch": 0.2968697201486806, "grad_norm": 0.3206750154495239, "learning_rate": 1.895092319695844e-05, "loss": 0.5513, "step": 9664 }, { "epoch": 0.29690043928362975, "grad_norm": 0.43418148159980774, "learning_rate": 1.8950707702392494e-05, "loss": 0.6398, "step": 9665 }, { "epoch": 0.2969311584185789, "grad_norm": 0.32011228799819946, "learning_rate": 1.895049218692154e-05, "loss": 0.599, "step": 9666 }, { "epoch": 0.2969618775535281, "grad_norm": 0.3483123481273651, "learning_rate": 1.8950276650546093e-05, "loss": 0.6033, "step": 9667 }, { "epoch": 0.2969925966884773, "grad_norm": 0.3314444124698639, "learning_rate": 1.8950061093266645e-05, "loss": 0.5796, "step": 9668 }, { "epoch": 0.2970233158234264, "grad_norm": 0.31698739528656006, "learning_rate": 1.8949845515083706e-05, "loss": 0.536, "step": 9669 }, { "epoch": 0.29705403495837557, "grad_norm": 0.4664519429206848, "learning_rate": 1.8949629915997775e-05, "loss": 0.5823, "step": 9670 }, { "epoch": 0.29708475409332474, "grad_norm": 0.5449690222740173, "learning_rate": 1.8949414296009362e-05, "loss": 0.5268, "step": 9671 }, { "epoch": 0.2971154732282739, "grad_norm": 0.3628597557544708, "learning_rate": 1.8949198655118967e-05, "loss": 0.4785, "step": 9672 }, { "epoch": 0.29714619236322304, "grad_norm": 0.32228782773017883, "learning_rate": 1.894898299332709e-05, "loss": 0.5391, "step": 9673 }, { "epoch": 0.2971769114981722, "grad_norm": 0.3507341742515564, "learning_rate": 1.8948767310634243e-05, "loss": 0.5636, "step": 9674 }, { "epoch": 0.2972076306331214, "grad_norm": 0.36541247367858887, "learning_rate": 1.894855160704092e-05, "loss": 0.5146, "step": 9675 }, { "epoch": 0.2972383497680705, "grad_norm": 0.3069060444831848, "learning_rate": 1.894833588254763e-05, "loss": 0.5857, "step": 9676 }, { "epoch": 0.2972690689030197, "grad_norm": 0.37467294931411743, "learning_rate": 1.8948120137154882e-05, "loss": 0.6485, "step": 9677 }, { "epoch": 0.29729978803796886, "grad_norm": 0.3611754775047302, "learning_rate": 1.894790437086317e-05, "loss": 0.5879, "step": 9678 }, { "epoch": 0.29733050717291803, "grad_norm": 0.35206878185272217, "learning_rate": 1.8947688583673005e-05, "loss": 0.59, "step": 9679 }, { "epoch": 0.29736122630786715, "grad_norm": 0.3507348895072937, "learning_rate": 1.8947472775584886e-05, "loss": 0.5969, "step": 9680 }, { "epoch": 0.29739194544281633, "grad_norm": 0.43686017394065857, "learning_rate": 1.8947256946599323e-05, "loss": 0.6193, "step": 9681 }, { "epoch": 0.2974226645777655, "grad_norm": 0.3680882155895233, "learning_rate": 1.8947041096716813e-05, "loss": 0.5458, "step": 9682 }, { "epoch": 0.2974533837127146, "grad_norm": 0.3411954343318939, "learning_rate": 1.894682522593786e-05, "loss": 0.5411, "step": 9683 }, { "epoch": 0.2974841028476638, "grad_norm": 0.3226383924484253, "learning_rate": 1.894660933426298e-05, "loss": 0.4761, "step": 9684 }, { "epoch": 0.297514821982613, "grad_norm": 0.3287183940410614, "learning_rate": 1.8946393421692666e-05, "loss": 0.5313, "step": 9685 }, { "epoch": 0.29754554111756215, "grad_norm": 0.36605122685432434, "learning_rate": 1.8946177488227425e-05, "loss": 0.6112, "step": 9686 }, { "epoch": 0.29757626025251127, "grad_norm": 0.341158002614975, "learning_rate": 1.8945961533867758e-05, "loss": 0.5711, "step": 9687 }, { "epoch": 0.29760697938746045, "grad_norm": 0.3511313199996948, "learning_rate": 1.894574555861418e-05, "loss": 0.5946, "step": 9688 }, { "epoch": 0.2976376985224096, "grad_norm": 0.3349838852882385, "learning_rate": 1.8945529562467183e-05, "loss": 0.5783, "step": 9689 }, { "epoch": 0.29766841765735874, "grad_norm": 0.33322975039482117, "learning_rate": 1.8945313545427278e-05, "loss": 0.5872, "step": 9690 }, { "epoch": 0.2976991367923079, "grad_norm": 0.3456408977508545, "learning_rate": 1.894509750749497e-05, "loss": 0.5115, "step": 9691 }, { "epoch": 0.2977298559272571, "grad_norm": 0.33262738585472107, "learning_rate": 1.8944881448670764e-05, "loss": 0.6059, "step": 9692 }, { "epoch": 0.29776057506220627, "grad_norm": 0.43280312418937683, "learning_rate": 1.894466536895516e-05, "loss": 0.5944, "step": 9693 }, { "epoch": 0.2977912941971554, "grad_norm": 0.373259961605072, "learning_rate": 1.8944449268348662e-05, "loss": 0.5739, "step": 9694 }, { "epoch": 0.29782201333210456, "grad_norm": 0.3306981921195984, "learning_rate": 1.8944233146851784e-05, "loss": 0.5369, "step": 9695 }, { "epoch": 0.29785273246705374, "grad_norm": 0.35690996050834656, "learning_rate": 1.8944017004465022e-05, "loss": 0.5134, "step": 9696 }, { "epoch": 0.2978834516020029, "grad_norm": 0.3423795998096466, "learning_rate": 1.8943800841188884e-05, "loss": 0.5529, "step": 9697 }, { "epoch": 0.29791417073695203, "grad_norm": 0.3673200309276581, "learning_rate": 1.8943584657023876e-05, "loss": 0.6672, "step": 9698 }, { "epoch": 0.2979448898719012, "grad_norm": 0.7449862957000732, "learning_rate": 1.8943368451970497e-05, "loss": 0.5315, "step": 9699 }, { "epoch": 0.2979756090068504, "grad_norm": 0.3451801538467407, "learning_rate": 1.8943152226029262e-05, "loss": 0.577, "step": 9700 }, { "epoch": 0.2980063281417995, "grad_norm": 0.35647618770599365, "learning_rate": 1.894293597920067e-05, "loss": 0.5921, "step": 9701 }, { "epoch": 0.2980370472767487, "grad_norm": 0.3581673800945282, "learning_rate": 1.8942719711485222e-05, "loss": 0.6615, "step": 9702 }, { "epoch": 0.29806776641169785, "grad_norm": 0.3532733917236328, "learning_rate": 1.894250342288343e-05, "loss": 0.6722, "step": 9703 }, { "epoch": 0.29809848554664703, "grad_norm": 0.3806120455265045, "learning_rate": 1.8942287113395795e-05, "loss": 0.6679, "step": 9704 }, { "epoch": 0.29812920468159615, "grad_norm": 0.38543716073036194, "learning_rate": 1.8942070783022827e-05, "loss": 0.5527, "step": 9705 }, { "epoch": 0.2981599238165453, "grad_norm": 0.42976677417755127, "learning_rate": 1.8941854431765026e-05, "loss": 0.6042, "step": 9706 }, { "epoch": 0.2981906429514945, "grad_norm": 0.36115238070487976, "learning_rate": 1.8941638059622902e-05, "loss": 0.6303, "step": 9707 }, { "epoch": 0.2982213620864436, "grad_norm": 0.34805527329444885, "learning_rate": 1.8941421666596954e-05, "loss": 0.5649, "step": 9708 }, { "epoch": 0.2982520812213928, "grad_norm": 0.30281224846839905, "learning_rate": 1.8941205252687697e-05, "loss": 0.5037, "step": 9709 }, { "epoch": 0.29828280035634197, "grad_norm": 0.3630819618701935, "learning_rate": 1.8940988817895625e-05, "loss": 0.5498, "step": 9710 }, { "epoch": 0.29831351949129115, "grad_norm": 0.5251935124397278, "learning_rate": 1.894077236222125e-05, "loss": 0.5154, "step": 9711 }, { "epoch": 0.29834423862624027, "grad_norm": 0.3680869936943054, "learning_rate": 1.894055588566508e-05, "loss": 0.4976, "step": 9712 }, { "epoch": 0.29837495776118944, "grad_norm": 0.4132069945335388, "learning_rate": 1.8940339388227613e-05, "loss": 0.5496, "step": 9713 }, { "epoch": 0.2984056768961386, "grad_norm": 0.31690049171447754, "learning_rate": 1.894012286990936e-05, "loss": 0.5123, "step": 9714 }, { "epoch": 0.2984363960310878, "grad_norm": 0.32258421182632446, "learning_rate": 1.893990633071083e-05, "loss": 0.5423, "step": 9715 }, { "epoch": 0.2984671151660369, "grad_norm": 0.38948675990104675, "learning_rate": 1.8939689770632518e-05, "loss": 0.5141, "step": 9716 }, { "epoch": 0.2984978343009861, "grad_norm": 0.40721848607063293, "learning_rate": 1.8939473189674943e-05, "loss": 0.5313, "step": 9717 }, { "epoch": 0.29852855343593526, "grad_norm": 0.46411463618278503, "learning_rate": 1.89392565878386e-05, "loss": 0.5724, "step": 9718 }, { "epoch": 0.2985592725708844, "grad_norm": 0.3649371862411499, "learning_rate": 1.8939039965123997e-05, "loss": 0.5826, "step": 9719 }, { "epoch": 0.29858999170583356, "grad_norm": 0.3373148441314697, "learning_rate": 1.8938823321531645e-05, "loss": 0.6442, "step": 9720 }, { "epoch": 0.29862071084078273, "grad_norm": 0.34521934390068054, "learning_rate": 1.8938606657062046e-05, "loss": 0.5778, "step": 9721 }, { "epoch": 0.2986514299757319, "grad_norm": 0.4534342885017395, "learning_rate": 1.8938389971715707e-05, "loss": 0.5956, "step": 9722 }, { "epoch": 0.29868214911068103, "grad_norm": 0.39713844656944275, "learning_rate": 1.8938173265493133e-05, "loss": 0.5674, "step": 9723 }, { "epoch": 0.2987128682456302, "grad_norm": 0.39545366168022156, "learning_rate": 1.8937956538394833e-05, "loss": 0.6148, "step": 9724 }, { "epoch": 0.2987435873805794, "grad_norm": 0.43316736817359924, "learning_rate": 1.893773979042131e-05, "loss": 0.5104, "step": 9725 }, { "epoch": 0.2987743065155285, "grad_norm": 0.35092291235923767, "learning_rate": 1.893752302157307e-05, "loss": 0.5924, "step": 9726 }, { "epoch": 0.2988050256504777, "grad_norm": 0.34616145491600037, "learning_rate": 1.8937306231850626e-05, "loss": 0.5936, "step": 9727 }, { "epoch": 0.29883574478542685, "grad_norm": 0.46127307415008545, "learning_rate": 1.8937089421254474e-05, "loss": 0.6318, "step": 9728 }, { "epoch": 0.298866463920376, "grad_norm": 0.321965754032135, "learning_rate": 1.893687258978513e-05, "loss": 0.5672, "step": 9729 }, { "epoch": 0.29889718305532514, "grad_norm": 0.35799023509025574, "learning_rate": 1.893665573744309e-05, "loss": 0.6111, "step": 9730 }, { "epoch": 0.2989279021902743, "grad_norm": 0.3644561767578125, "learning_rate": 1.893643886422887e-05, "loss": 0.5814, "step": 9731 }, { "epoch": 0.2989586213252235, "grad_norm": 0.32626402378082275, "learning_rate": 1.8936221970142973e-05, "loss": 0.6332, "step": 9732 }, { "epoch": 0.2989893404601726, "grad_norm": 0.37310850620269775, "learning_rate": 1.8936005055185904e-05, "loss": 0.5569, "step": 9733 }, { "epoch": 0.2990200595951218, "grad_norm": 0.3659583628177643, "learning_rate": 1.893578811935817e-05, "loss": 0.5864, "step": 9734 }, { "epoch": 0.29905077873007097, "grad_norm": 0.31997251510620117, "learning_rate": 1.8935571162660283e-05, "loss": 0.6191, "step": 9735 }, { "epoch": 0.29908149786502014, "grad_norm": 0.3536742031574249, "learning_rate": 1.8935354185092744e-05, "loss": 0.5276, "step": 9736 }, { "epoch": 0.29911221699996926, "grad_norm": 0.32654187083244324, "learning_rate": 1.8935137186656062e-05, "loss": 0.5148, "step": 9737 }, { "epoch": 0.29914293613491844, "grad_norm": 0.3629055619239807, "learning_rate": 1.893492016735074e-05, "loss": 0.6056, "step": 9738 }, { "epoch": 0.2991736552698676, "grad_norm": 0.35628142952919006, "learning_rate": 1.893470312717729e-05, "loss": 0.5945, "step": 9739 }, { "epoch": 0.2992043744048168, "grad_norm": 0.3607926368713379, "learning_rate": 1.8934486066136215e-05, "loss": 0.5968, "step": 9740 }, { "epoch": 0.2992350935397659, "grad_norm": 0.3886013627052307, "learning_rate": 1.8934268984228027e-05, "loss": 0.6821, "step": 9741 }, { "epoch": 0.2992658126747151, "grad_norm": 0.33939847350120544, "learning_rate": 1.8934051881453227e-05, "loss": 0.5894, "step": 9742 }, { "epoch": 0.29929653180966426, "grad_norm": 0.5812739729881287, "learning_rate": 1.893383475781233e-05, "loss": 0.6531, "step": 9743 }, { "epoch": 0.2993272509446134, "grad_norm": 0.3648979067802429, "learning_rate": 1.893361761330583e-05, "loss": 0.627, "step": 9744 }, { "epoch": 0.29935797007956255, "grad_norm": 0.3355835974216461, "learning_rate": 1.8933400447934245e-05, "loss": 0.5387, "step": 9745 }, { "epoch": 0.2993886892145117, "grad_norm": 0.32549765706062317, "learning_rate": 1.8933183261698083e-05, "loss": 0.6227, "step": 9746 }, { "epoch": 0.2994194083494609, "grad_norm": 0.3668591380119324, "learning_rate": 1.8932966054597847e-05, "loss": 0.5712, "step": 9747 }, { "epoch": 0.29945012748441, "grad_norm": 0.4148511290550232, "learning_rate": 1.8932748826634043e-05, "loss": 0.6631, "step": 9748 }, { "epoch": 0.2994808466193592, "grad_norm": 0.35653775930404663, "learning_rate": 1.8932531577807178e-05, "loss": 0.5603, "step": 9749 }, { "epoch": 0.2995115657543084, "grad_norm": 0.37883543968200684, "learning_rate": 1.8932314308117762e-05, "loss": 0.5792, "step": 9750 }, { "epoch": 0.2995422848892575, "grad_norm": 0.3593836724758148, "learning_rate": 1.8932097017566308e-05, "loss": 0.5799, "step": 9751 }, { "epoch": 0.29957300402420667, "grad_norm": 0.3820744752883911, "learning_rate": 1.893187970615331e-05, "loss": 0.506, "step": 9752 }, { "epoch": 0.29960372315915584, "grad_norm": 0.3519326448440552, "learning_rate": 1.893166237387929e-05, "loss": 0.5923, "step": 9753 }, { "epoch": 0.299634442294105, "grad_norm": 0.4042578637599945, "learning_rate": 1.8931445020744744e-05, "loss": 0.6344, "step": 9754 }, { "epoch": 0.29966516142905414, "grad_norm": 0.34212157130241394, "learning_rate": 1.8931227646750184e-05, "loss": 0.6168, "step": 9755 }, { "epoch": 0.2996958805640033, "grad_norm": 0.34083041548728943, "learning_rate": 1.8931010251896122e-05, "loss": 0.6382, "step": 9756 }, { "epoch": 0.2997265996989525, "grad_norm": 0.3626774251461029, "learning_rate": 1.893079283618306e-05, "loss": 0.6179, "step": 9757 }, { "epoch": 0.29975731883390166, "grad_norm": 0.3715450167655945, "learning_rate": 1.893057539961151e-05, "loss": 0.5738, "step": 9758 }, { "epoch": 0.2997880379688508, "grad_norm": 0.37936174869537354, "learning_rate": 1.8930357942181974e-05, "loss": 0.5287, "step": 9759 }, { "epoch": 0.29981875710379996, "grad_norm": 0.37417909502983093, "learning_rate": 1.8930140463894964e-05, "loss": 0.597, "step": 9760 }, { "epoch": 0.29984947623874914, "grad_norm": 0.406817764043808, "learning_rate": 1.8929922964750988e-05, "loss": 0.5499, "step": 9761 }, { "epoch": 0.29988019537369826, "grad_norm": 0.35832393169403076, "learning_rate": 1.892970544475055e-05, "loss": 0.5132, "step": 9762 }, { "epoch": 0.29991091450864743, "grad_norm": 0.3213738203048706, "learning_rate": 1.8929487903894164e-05, "loss": 0.528, "step": 9763 }, { "epoch": 0.2999416336435966, "grad_norm": 0.350562185049057, "learning_rate": 1.8929270342182337e-05, "loss": 0.6005, "step": 9764 }, { "epoch": 0.2999723527785458, "grad_norm": 0.401511549949646, "learning_rate": 1.8929052759615573e-05, "loss": 0.5994, "step": 9765 }, { "epoch": 0.3000030719134949, "grad_norm": 0.3740898668766022, "learning_rate": 1.8928835156194383e-05, "loss": 0.5572, "step": 9766 }, { "epoch": 0.3000337910484441, "grad_norm": 0.38960540294647217, "learning_rate": 1.8928617531919276e-05, "loss": 0.6311, "step": 9767 }, { "epoch": 0.30006451018339325, "grad_norm": 0.3660237491130829, "learning_rate": 1.892839988679076e-05, "loss": 0.5382, "step": 9768 }, { "epoch": 0.30009522931834237, "grad_norm": 0.3188658356666565, "learning_rate": 1.892818222080934e-05, "loss": 0.5701, "step": 9769 }, { "epoch": 0.30012594845329155, "grad_norm": 0.3346870541572571, "learning_rate": 1.892796453397553e-05, "loss": 0.6217, "step": 9770 }, { "epoch": 0.3001566675882407, "grad_norm": 0.42022961378097534, "learning_rate": 1.8927746826289833e-05, "loss": 0.5485, "step": 9771 }, { "epoch": 0.3001873867231899, "grad_norm": 0.3893304169178009, "learning_rate": 1.892752909775276e-05, "loss": 0.5919, "step": 9772 }, { "epoch": 0.300218105858139, "grad_norm": 0.3300955593585968, "learning_rate": 1.8927311348364818e-05, "loss": 0.5564, "step": 9773 }, { "epoch": 0.3002488249930882, "grad_norm": 0.3477356433868408, "learning_rate": 1.892709357812652e-05, "loss": 0.5877, "step": 9774 }, { "epoch": 0.30027954412803737, "grad_norm": 0.5334805250167847, "learning_rate": 1.892687578703837e-05, "loss": 0.5469, "step": 9775 }, { "epoch": 0.3003102632629865, "grad_norm": 0.3429693877696991, "learning_rate": 1.892665797510088e-05, "loss": 0.5195, "step": 9776 }, { "epoch": 0.30034098239793566, "grad_norm": 0.34982946515083313, "learning_rate": 1.8926440142314554e-05, "loss": 0.5623, "step": 9777 }, { "epoch": 0.30037170153288484, "grad_norm": 0.351254403591156, "learning_rate": 1.8926222288679903e-05, "loss": 0.5362, "step": 9778 }, { "epoch": 0.300402420667834, "grad_norm": 0.3713347911834717, "learning_rate": 1.892600441419744e-05, "loss": 0.622, "step": 9779 }, { "epoch": 0.30043313980278313, "grad_norm": 0.5005961060523987, "learning_rate": 1.8925786518867666e-05, "loss": 0.5601, "step": 9780 }, { "epoch": 0.3004638589377323, "grad_norm": 0.3979525566101074, "learning_rate": 1.8925568602691098e-05, "loss": 0.6036, "step": 9781 }, { "epoch": 0.3004945780726815, "grad_norm": 0.386653870344162, "learning_rate": 1.8925350665668238e-05, "loss": 0.591, "step": 9782 }, { "epoch": 0.30052529720763066, "grad_norm": 0.313401460647583, "learning_rate": 1.89251327077996e-05, "loss": 0.6257, "step": 9783 }, { "epoch": 0.3005560163425798, "grad_norm": 0.3643539845943451, "learning_rate": 1.892491472908569e-05, "loss": 0.6377, "step": 9784 }, { "epoch": 0.30058673547752895, "grad_norm": 0.3281020522117615, "learning_rate": 1.892469672952702e-05, "loss": 0.5528, "step": 9785 }, { "epoch": 0.30061745461247813, "grad_norm": 0.382314532995224, "learning_rate": 1.8924478709124096e-05, "loss": 0.6075, "step": 9786 }, { "epoch": 0.30064817374742725, "grad_norm": 0.32487237453460693, "learning_rate": 1.892426066787743e-05, "loss": 0.5783, "step": 9787 }, { "epoch": 0.3006788928823764, "grad_norm": 0.3571908473968506, "learning_rate": 1.892404260578753e-05, "loss": 0.5761, "step": 9788 }, { "epoch": 0.3007096120173256, "grad_norm": 0.3519640266895294, "learning_rate": 1.8923824522854904e-05, "loss": 0.6136, "step": 9789 }, { "epoch": 0.3007403311522748, "grad_norm": 0.5259837508201599, "learning_rate": 1.8923606419080067e-05, "loss": 0.7103, "step": 9790 }, { "epoch": 0.3007710502872239, "grad_norm": 0.3696957528591156, "learning_rate": 1.892338829446352e-05, "loss": 0.5925, "step": 9791 }, { "epoch": 0.30080176942217307, "grad_norm": 0.4547974169254303, "learning_rate": 1.8923170149005777e-05, "loss": 0.6638, "step": 9792 }, { "epoch": 0.30083248855712225, "grad_norm": 0.34266743063926697, "learning_rate": 1.8922951982707345e-05, "loss": 0.5936, "step": 9793 }, { "epoch": 0.30086320769207137, "grad_norm": 4.842319488525391, "learning_rate": 1.8922733795568738e-05, "loss": 0.5698, "step": 9794 }, { "epoch": 0.30089392682702054, "grad_norm": 0.3240568935871124, "learning_rate": 1.8922515587590464e-05, "loss": 0.598, "step": 9795 }, { "epoch": 0.3009246459619697, "grad_norm": 0.34860631823539734, "learning_rate": 1.8922297358773028e-05, "loss": 0.6358, "step": 9796 }, { "epoch": 0.3009553650969189, "grad_norm": 0.3302943706512451, "learning_rate": 1.8922079109116945e-05, "loss": 0.597, "step": 9797 }, { "epoch": 0.300986084231868, "grad_norm": 0.33245909214019775, "learning_rate": 1.8921860838622723e-05, "loss": 0.5513, "step": 9798 }, { "epoch": 0.3010168033668172, "grad_norm": 0.352185994386673, "learning_rate": 1.8921642547290874e-05, "loss": 0.6438, "step": 9799 }, { "epoch": 0.30104752250176636, "grad_norm": 0.32795363664627075, "learning_rate": 1.89214242351219e-05, "loss": 0.5527, "step": 9800 }, { "epoch": 0.3010782416367155, "grad_norm": 0.34153270721435547, "learning_rate": 1.8921205902116325e-05, "loss": 0.537, "step": 9801 }, { "epoch": 0.30110896077166466, "grad_norm": 0.3164662718772888, "learning_rate": 1.8920987548274644e-05, "loss": 0.5504, "step": 9802 }, { "epoch": 0.30113967990661383, "grad_norm": 0.36554601788520813, "learning_rate": 1.892076917359738e-05, "loss": 0.6489, "step": 9803 }, { "epoch": 0.301170399041563, "grad_norm": 0.4573613703250885, "learning_rate": 1.892055077808503e-05, "loss": 0.5672, "step": 9804 }, { "epoch": 0.30120111817651213, "grad_norm": 0.30034592747688293, "learning_rate": 1.8920332361738115e-05, "loss": 0.5158, "step": 9805 }, { "epoch": 0.3012318373114613, "grad_norm": 0.32124266028404236, "learning_rate": 1.8920113924557134e-05, "loss": 0.5972, "step": 9806 }, { "epoch": 0.3012625564464105, "grad_norm": 0.33118125796318054, "learning_rate": 1.891989546654261e-05, "loss": 0.5321, "step": 9807 }, { "epoch": 0.30129327558135965, "grad_norm": 0.39375749230384827, "learning_rate": 1.891967698769505e-05, "loss": 0.6092, "step": 9808 }, { "epoch": 0.3013239947163088, "grad_norm": 0.3627880811691284, "learning_rate": 1.8919458488014955e-05, "loss": 0.509, "step": 9809 }, { "epoch": 0.30135471385125795, "grad_norm": 0.4113309681415558, "learning_rate": 1.8919239967502845e-05, "loss": 0.6608, "step": 9810 }, { "epoch": 0.3013854329862071, "grad_norm": 0.3397701382637024, "learning_rate": 1.8919021426159227e-05, "loss": 0.5074, "step": 9811 }, { "epoch": 0.30141615212115624, "grad_norm": 0.36235353350639343, "learning_rate": 1.8918802863984612e-05, "loss": 0.552, "step": 9812 }, { "epoch": 0.3014468712561054, "grad_norm": 0.34509775042533875, "learning_rate": 1.8918584280979506e-05, "loss": 0.5753, "step": 9813 }, { "epoch": 0.3014775903910546, "grad_norm": 0.3207763433456421, "learning_rate": 1.891836567714443e-05, "loss": 0.5214, "step": 9814 }, { "epoch": 0.30150830952600377, "grad_norm": 0.48493248224258423, "learning_rate": 1.8918147052479884e-05, "loss": 0.6393, "step": 9815 }, { "epoch": 0.3015390286609529, "grad_norm": 0.33627498149871826, "learning_rate": 1.8917928406986382e-05, "loss": 0.6011, "step": 9816 }, { "epoch": 0.30156974779590207, "grad_norm": 0.3372403085231781, "learning_rate": 1.8917709740664435e-05, "loss": 0.5266, "step": 9817 }, { "epoch": 0.30160046693085124, "grad_norm": 1.3468095064163208, "learning_rate": 1.8917491053514557e-05, "loss": 0.5322, "step": 9818 }, { "epoch": 0.30163118606580036, "grad_norm": 0.35406410694122314, "learning_rate": 1.8917272345537254e-05, "loss": 0.5138, "step": 9819 }, { "epoch": 0.30166190520074954, "grad_norm": 0.34566235542297363, "learning_rate": 1.891705361673304e-05, "loss": 0.6013, "step": 9820 }, { "epoch": 0.3016926243356987, "grad_norm": 0.3499796390533447, "learning_rate": 1.8916834867102425e-05, "loss": 0.6188, "step": 9821 }, { "epoch": 0.3017233434706479, "grad_norm": 0.3265599310398102, "learning_rate": 1.891661609664592e-05, "loss": 0.4545, "step": 9822 }, { "epoch": 0.301754062605597, "grad_norm": 0.29185914993286133, "learning_rate": 1.891639730536403e-05, "loss": 0.5329, "step": 9823 }, { "epoch": 0.3017847817405462, "grad_norm": 0.33074602484703064, "learning_rate": 1.8916178493257277e-05, "loss": 0.6049, "step": 9824 }, { "epoch": 0.30181550087549536, "grad_norm": 0.35442373156547546, "learning_rate": 1.8915959660326165e-05, "loss": 0.5266, "step": 9825 }, { "epoch": 0.30184622001044453, "grad_norm": 0.39734819531440735, "learning_rate": 1.8915740806571205e-05, "loss": 0.6058, "step": 9826 }, { "epoch": 0.30187693914539365, "grad_norm": 4.812251091003418, "learning_rate": 1.891552193199291e-05, "loss": 0.5574, "step": 9827 }, { "epoch": 0.3019076582803428, "grad_norm": 0.3796232044696808, "learning_rate": 1.8915303036591793e-05, "loss": 0.6319, "step": 9828 }, { "epoch": 0.301938377415292, "grad_norm": 0.41651633381843567, "learning_rate": 1.891508412036836e-05, "loss": 0.5914, "step": 9829 }, { "epoch": 0.3019690965502411, "grad_norm": 0.3399599492549896, "learning_rate": 1.891486518332313e-05, "loss": 0.5971, "step": 9830 }, { "epoch": 0.3019998156851903, "grad_norm": 0.3675498366355896, "learning_rate": 1.8914646225456605e-05, "loss": 0.5656, "step": 9831 }, { "epoch": 0.3020305348201395, "grad_norm": 0.35865461826324463, "learning_rate": 1.8914427246769304e-05, "loss": 0.5999, "step": 9832 }, { "epoch": 0.30206125395508865, "grad_norm": 0.5465919971466064, "learning_rate": 1.8914208247261732e-05, "loss": 0.5834, "step": 9833 }, { "epoch": 0.30209197309003777, "grad_norm": 0.35263094305992126, "learning_rate": 1.891398922693441e-05, "loss": 0.6215, "step": 9834 }, { "epoch": 0.30212269222498694, "grad_norm": 0.3090915083885193, "learning_rate": 1.8913770185787837e-05, "loss": 0.5315, "step": 9835 }, { "epoch": 0.3021534113599361, "grad_norm": 0.3351338505744934, "learning_rate": 1.8913551123822536e-05, "loss": 0.5626, "step": 9836 }, { "epoch": 0.30218413049488524, "grad_norm": 0.3492033779621124, "learning_rate": 1.8913332041039016e-05, "loss": 0.6806, "step": 9837 }, { "epoch": 0.3022148496298344, "grad_norm": 0.446956068277359, "learning_rate": 1.891311293743778e-05, "loss": 0.6928, "step": 9838 }, { "epoch": 0.3022455687647836, "grad_norm": 0.3380007743835449, "learning_rate": 1.891289381301935e-05, "loss": 0.5004, "step": 9839 }, { "epoch": 0.30227628789973277, "grad_norm": 0.3815859258174896, "learning_rate": 1.8912674667784233e-05, "loss": 0.6452, "step": 9840 }, { "epoch": 0.3023070070346819, "grad_norm": 0.33512642979621887, "learning_rate": 1.8912455501732942e-05, "loss": 0.5905, "step": 9841 }, { "epoch": 0.30233772616963106, "grad_norm": 0.3482843041419983, "learning_rate": 1.891223631486599e-05, "loss": 0.5645, "step": 9842 }, { "epoch": 0.30236844530458024, "grad_norm": 0.34430447220802307, "learning_rate": 1.8912017107183886e-05, "loss": 0.5616, "step": 9843 }, { "epoch": 0.30239916443952936, "grad_norm": 2.0647683143615723, "learning_rate": 1.891179787868714e-05, "loss": 0.5531, "step": 9844 }, { "epoch": 0.30242988357447853, "grad_norm": 0.3844621479511261, "learning_rate": 1.8911578629376275e-05, "loss": 0.6194, "step": 9845 }, { "epoch": 0.3024606027094277, "grad_norm": 0.32170072197914124, "learning_rate": 1.891135935925179e-05, "loss": 0.5321, "step": 9846 }, { "epoch": 0.3024913218443769, "grad_norm": 0.3360769748687744, "learning_rate": 1.891114006831421e-05, "loss": 0.5887, "step": 9847 }, { "epoch": 0.302522040979326, "grad_norm": 0.46140265464782715, "learning_rate": 1.891092075656403e-05, "loss": 0.5637, "step": 9848 }, { "epoch": 0.3025527601142752, "grad_norm": 0.4348377585411072, "learning_rate": 1.891070142400178e-05, "loss": 0.575, "step": 9849 }, { "epoch": 0.30258347924922435, "grad_norm": 0.345496267080307, "learning_rate": 1.8910482070627964e-05, "loss": 0.4835, "step": 9850 }, { "epoch": 0.3026141983841735, "grad_norm": 0.35144153237342834, "learning_rate": 1.891026269644309e-05, "loss": 0.5694, "step": 9851 }, { "epoch": 0.30264491751912265, "grad_norm": 0.39945337176322937, "learning_rate": 1.8910043301447677e-05, "loss": 0.5902, "step": 9852 }, { "epoch": 0.3026756366540718, "grad_norm": 0.3443662226200104, "learning_rate": 1.8909823885642236e-05, "loss": 0.5363, "step": 9853 }, { "epoch": 0.302706355789021, "grad_norm": 0.35659876465797424, "learning_rate": 1.8909604449027283e-05, "loss": 0.577, "step": 9854 }, { "epoch": 0.3027370749239701, "grad_norm": 0.3273659348487854, "learning_rate": 1.8909384991603323e-05, "loss": 0.5568, "step": 9855 }, { "epoch": 0.3027677940589193, "grad_norm": 0.33538106083869934, "learning_rate": 1.8909165513370873e-05, "loss": 0.5809, "step": 9856 }, { "epoch": 0.30279851319386847, "grad_norm": 0.3387683629989624, "learning_rate": 1.8908946014330443e-05, "loss": 0.5514, "step": 9857 }, { "epoch": 0.30282923232881764, "grad_norm": 0.39087170362472534, "learning_rate": 1.890872649448255e-05, "loss": 0.6147, "step": 9858 }, { "epoch": 0.30285995146376676, "grad_norm": 0.34757962822914124, "learning_rate": 1.89085069538277e-05, "loss": 0.5368, "step": 9859 }, { "epoch": 0.30289067059871594, "grad_norm": 0.3617590665817261, "learning_rate": 1.8908287392366414e-05, "loss": 0.6018, "step": 9860 }, { "epoch": 0.3029213897336651, "grad_norm": 0.3540836572647095, "learning_rate": 1.89080678100992e-05, "loss": 0.6421, "step": 9861 }, { "epoch": 0.30295210886861423, "grad_norm": 0.4260205328464508, "learning_rate": 1.8907848207026572e-05, "loss": 0.6136, "step": 9862 }, { "epoch": 0.3029828280035634, "grad_norm": 0.5432726144790649, "learning_rate": 1.8907628583149036e-05, "loss": 0.5627, "step": 9863 }, { "epoch": 0.3030135471385126, "grad_norm": 0.3630073070526123, "learning_rate": 1.890740893846712e-05, "loss": 0.6272, "step": 9864 }, { "epoch": 0.30304426627346176, "grad_norm": 0.3523309826850891, "learning_rate": 1.8907189272981325e-05, "loss": 0.5886, "step": 9865 }, { "epoch": 0.3030749854084109, "grad_norm": 0.4383768141269684, "learning_rate": 1.890696958669217e-05, "loss": 0.5681, "step": 9866 }, { "epoch": 0.30310570454336005, "grad_norm": 0.4615125358104706, "learning_rate": 1.890674987960016e-05, "loss": 0.6566, "step": 9867 }, { "epoch": 0.30313642367830923, "grad_norm": 0.3570449948310852, "learning_rate": 1.8906530151705815e-05, "loss": 0.582, "step": 9868 }, { "epoch": 0.3031671428132584, "grad_norm": 0.42377203702926636, "learning_rate": 1.890631040300965e-05, "loss": 0.5363, "step": 9869 }, { "epoch": 0.3031978619482075, "grad_norm": 0.34619399905204773, "learning_rate": 1.890609063351217e-05, "loss": 0.5828, "step": 9870 }, { "epoch": 0.3032285810831567, "grad_norm": 0.3661535084247589, "learning_rate": 1.89058708432139e-05, "loss": 0.6161, "step": 9871 }, { "epoch": 0.3032593002181059, "grad_norm": 0.3999691903591156, "learning_rate": 1.8905651032115343e-05, "loss": 0.5907, "step": 9872 }, { "epoch": 0.303290019353055, "grad_norm": 0.5368747711181641, "learning_rate": 1.8905431200217015e-05, "loss": 0.5761, "step": 9873 }, { "epoch": 0.30332073848800417, "grad_norm": 0.4218517541885376, "learning_rate": 1.890521134751943e-05, "loss": 0.6385, "step": 9874 }, { "epoch": 0.30335145762295335, "grad_norm": 0.32408416271209717, "learning_rate": 1.8904991474023105e-05, "loss": 0.5681, "step": 9875 }, { "epoch": 0.3033821767579025, "grad_norm": 0.3015991151332855, "learning_rate": 1.890477157972855e-05, "loss": 0.5774, "step": 9876 }, { "epoch": 0.30341289589285164, "grad_norm": 0.3125377893447876, "learning_rate": 1.890455166463628e-05, "loss": 0.5334, "step": 9877 }, { "epoch": 0.3034436150278008, "grad_norm": 0.3599199950695038, "learning_rate": 1.8904331728746805e-05, "loss": 0.5616, "step": 9878 }, { "epoch": 0.30347433416275, "grad_norm": 0.39986562728881836, "learning_rate": 1.890411177206064e-05, "loss": 0.5864, "step": 9879 }, { "epoch": 0.3035050532976991, "grad_norm": 0.3243916928768158, "learning_rate": 1.8903891794578303e-05, "loss": 0.6336, "step": 9880 }, { "epoch": 0.3035357724326483, "grad_norm": 0.3348618745803833, "learning_rate": 1.8903671796300306e-05, "loss": 0.5309, "step": 9881 }, { "epoch": 0.30356649156759746, "grad_norm": 0.6293866634368896, "learning_rate": 1.890345177722716e-05, "loss": 0.5773, "step": 9882 }, { "epoch": 0.30359721070254664, "grad_norm": 0.39770057797431946, "learning_rate": 1.890323173735938e-05, "loss": 0.528, "step": 9883 }, { "epoch": 0.30362792983749576, "grad_norm": 0.3408668637275696, "learning_rate": 1.890301167669748e-05, "loss": 0.6154, "step": 9884 }, { "epoch": 0.30365864897244493, "grad_norm": 0.3378402590751648, "learning_rate": 1.8902791595241975e-05, "loss": 0.5815, "step": 9885 }, { "epoch": 0.3036893681073941, "grad_norm": 0.3075171113014221, "learning_rate": 1.8902571492993377e-05, "loss": 0.5143, "step": 9886 }, { "epoch": 0.30372008724234323, "grad_norm": 0.32091692090034485, "learning_rate": 1.8902351369952204e-05, "loss": 0.616, "step": 9887 }, { "epoch": 0.3037508063772924, "grad_norm": 0.3345976769924164, "learning_rate": 1.8902131226118967e-05, "loss": 0.6, "step": 9888 }, { "epoch": 0.3037815255122416, "grad_norm": 0.33128419518470764, "learning_rate": 1.890191106149418e-05, "loss": 0.5967, "step": 9889 }, { "epoch": 0.30381224464719075, "grad_norm": 0.3605402410030365, "learning_rate": 1.890169087607836e-05, "loss": 0.5244, "step": 9890 }, { "epoch": 0.3038429637821399, "grad_norm": 0.398389995098114, "learning_rate": 1.890147066987202e-05, "loss": 0.5616, "step": 9891 }, { "epoch": 0.30387368291708905, "grad_norm": 0.36818987131118774, "learning_rate": 1.890125044287567e-05, "loss": 0.5451, "step": 9892 }, { "epoch": 0.3039044020520382, "grad_norm": 0.5305724143981934, "learning_rate": 1.8901030195089827e-05, "loss": 0.6351, "step": 9893 }, { "epoch": 0.3039351211869874, "grad_norm": 0.3468591272830963, "learning_rate": 1.890080992651501e-05, "loss": 0.5988, "step": 9894 }, { "epoch": 0.3039658403219365, "grad_norm": 0.392211377620697, "learning_rate": 1.890058963715173e-05, "loss": 0.6237, "step": 9895 }, { "epoch": 0.3039965594568857, "grad_norm": 0.40877217054367065, "learning_rate": 1.89003693270005e-05, "loss": 0.5287, "step": 9896 }, { "epoch": 0.30402727859183487, "grad_norm": 0.311713308095932, "learning_rate": 1.8900148996061835e-05, "loss": 0.5368, "step": 9897 }, { "epoch": 0.304057997726784, "grad_norm": 0.340895414352417, "learning_rate": 1.889992864433625e-05, "loss": 0.5648, "step": 9898 }, { "epoch": 0.30408871686173317, "grad_norm": 0.40864527225494385, "learning_rate": 1.8899708271824264e-05, "loss": 0.6436, "step": 9899 }, { "epoch": 0.30411943599668234, "grad_norm": 0.3279697597026825, "learning_rate": 1.8899487878526383e-05, "loss": 0.6533, "step": 9900 }, { "epoch": 0.3041501551316315, "grad_norm": 0.36398983001708984, "learning_rate": 1.889926746444313e-05, "loss": 0.6985, "step": 9901 }, { "epoch": 0.30418087426658064, "grad_norm": 0.3686080276966095, "learning_rate": 1.8899047029575018e-05, "loss": 0.5568, "step": 9902 }, { "epoch": 0.3042115934015298, "grad_norm": 0.33880549669265747, "learning_rate": 1.8898826573922556e-05, "loss": 0.5993, "step": 9903 }, { "epoch": 0.304242312536479, "grad_norm": 0.32105740904808044, "learning_rate": 1.8898606097486264e-05, "loss": 0.5321, "step": 9904 }, { "epoch": 0.3042730316714281, "grad_norm": 0.35520651936531067, "learning_rate": 1.8898385600266657e-05, "loss": 0.545, "step": 9905 }, { "epoch": 0.3043037508063773, "grad_norm": 0.33155369758605957, "learning_rate": 1.8898165082264245e-05, "loss": 0.5629, "step": 9906 }, { "epoch": 0.30433446994132646, "grad_norm": 0.3769979476928711, "learning_rate": 1.8897944543479552e-05, "loss": 0.5596, "step": 9907 }, { "epoch": 0.30436518907627563, "grad_norm": 0.3615506589412689, "learning_rate": 1.8897723983913085e-05, "loss": 0.6341, "step": 9908 }, { "epoch": 0.30439590821122475, "grad_norm": 0.3326992988586426, "learning_rate": 1.8897503403565366e-05, "loss": 0.4492, "step": 9909 }, { "epoch": 0.30442662734617393, "grad_norm": 0.38371315598487854, "learning_rate": 1.8897282802436903e-05, "loss": 0.5841, "step": 9910 }, { "epoch": 0.3044573464811231, "grad_norm": 0.3910810053348541, "learning_rate": 1.8897062180528215e-05, "loss": 0.5869, "step": 9911 }, { "epoch": 0.3044880656160723, "grad_norm": 0.34559473395347595, "learning_rate": 1.8896841537839817e-05, "loss": 0.617, "step": 9912 }, { "epoch": 0.3045187847510214, "grad_norm": 0.34699463844299316, "learning_rate": 1.8896620874372226e-05, "loss": 0.621, "step": 9913 }, { "epoch": 0.3045495038859706, "grad_norm": 0.32255831360816956, "learning_rate": 1.8896400190125955e-05, "loss": 0.5442, "step": 9914 }, { "epoch": 0.30458022302091975, "grad_norm": 0.35639163851737976, "learning_rate": 1.8896179485101518e-05, "loss": 0.6233, "step": 9915 }, { "epoch": 0.30461094215586887, "grad_norm": 0.3640806972980499, "learning_rate": 1.8895958759299435e-05, "loss": 0.6905, "step": 9916 }, { "epoch": 0.30464166129081804, "grad_norm": 0.32711413502693176, "learning_rate": 1.8895738012720216e-05, "loss": 0.5743, "step": 9917 }, { "epoch": 0.3046723804257672, "grad_norm": 0.39371258020401, "learning_rate": 1.889551724536438e-05, "loss": 0.5752, "step": 9918 }, { "epoch": 0.3047030995607164, "grad_norm": 0.3582613468170166, "learning_rate": 1.8895296457232445e-05, "loss": 0.5562, "step": 9919 }, { "epoch": 0.3047338186956655, "grad_norm": 0.33960965275764465, "learning_rate": 1.8895075648324926e-05, "loss": 0.5751, "step": 9920 }, { "epoch": 0.3047645378306147, "grad_norm": 0.3630307614803314, "learning_rate": 1.8894854818642333e-05, "loss": 0.6625, "step": 9921 }, { "epoch": 0.30479525696556387, "grad_norm": 0.34760040044784546, "learning_rate": 1.889463396818518e-05, "loss": 0.6096, "step": 9922 }, { "epoch": 0.304825976100513, "grad_norm": 0.3874387741088867, "learning_rate": 1.8894413096954e-05, "loss": 0.5292, "step": 9923 }, { "epoch": 0.30485669523546216, "grad_norm": 0.3276844918727875, "learning_rate": 1.889419220494929e-05, "loss": 0.5229, "step": 9924 }, { "epoch": 0.30488741437041134, "grad_norm": 0.3395276367664337, "learning_rate": 1.8893971292171574e-05, "loss": 0.6243, "step": 9925 }, { "epoch": 0.3049181335053605, "grad_norm": 0.3286516070365906, "learning_rate": 1.8893750358621365e-05, "loss": 0.632, "step": 9926 }, { "epoch": 0.30494885264030963, "grad_norm": 0.3183748126029968, "learning_rate": 1.8893529404299184e-05, "loss": 0.6163, "step": 9927 }, { "epoch": 0.3049795717752588, "grad_norm": 0.33764857053756714, "learning_rate": 1.8893308429205543e-05, "loss": 0.596, "step": 9928 }, { "epoch": 0.305010290910208, "grad_norm": 0.32921215891838074, "learning_rate": 1.8893087433340956e-05, "loss": 0.5477, "step": 9929 }, { "epoch": 0.3050410100451571, "grad_norm": 0.329491525888443, "learning_rate": 1.8892866416705943e-05, "loss": 0.5129, "step": 9930 }, { "epoch": 0.3050717291801063, "grad_norm": 0.33389899134635925, "learning_rate": 1.889264537930102e-05, "loss": 0.5555, "step": 9931 }, { "epoch": 0.30510244831505545, "grad_norm": 0.35288867354393005, "learning_rate": 1.889242432112671e-05, "loss": 0.6272, "step": 9932 }, { "epoch": 0.3051331674500046, "grad_norm": 0.3469499349594116, "learning_rate": 1.8892203242183515e-05, "loss": 0.6328, "step": 9933 }, { "epoch": 0.30516388658495375, "grad_norm": 0.3566821813583374, "learning_rate": 1.8891982142471957e-05, "loss": 0.6506, "step": 9934 }, { "epoch": 0.3051946057199029, "grad_norm": 0.35370492935180664, "learning_rate": 1.8891761021992558e-05, "loss": 0.6303, "step": 9935 }, { "epoch": 0.3052253248548521, "grad_norm": 0.3763185441493988, "learning_rate": 1.8891539880745824e-05, "loss": 0.5715, "step": 9936 }, { "epoch": 0.3052560439898013, "grad_norm": 0.34496089816093445, "learning_rate": 1.8891318718732283e-05, "loss": 0.5301, "step": 9937 }, { "epoch": 0.3052867631247504, "grad_norm": 0.3579116463661194, "learning_rate": 1.8891097535952445e-05, "loss": 0.61, "step": 9938 }, { "epoch": 0.30531748225969957, "grad_norm": 0.4633854925632477, "learning_rate": 1.8890876332406828e-05, "loss": 0.595, "step": 9939 }, { "epoch": 0.30534820139464874, "grad_norm": 0.37265926599502563, "learning_rate": 1.8890655108095948e-05, "loss": 0.5301, "step": 9940 }, { "epoch": 0.30537892052959786, "grad_norm": 0.3488740622997284, "learning_rate": 1.8890433863020323e-05, "loss": 0.6332, "step": 9941 }, { "epoch": 0.30540963966454704, "grad_norm": 0.43392443656921387, "learning_rate": 1.8890212597180468e-05, "loss": 0.6091, "step": 9942 }, { "epoch": 0.3054403587994962, "grad_norm": 0.3420165479183197, "learning_rate": 1.8889991310576898e-05, "loss": 0.6326, "step": 9943 }, { "epoch": 0.3054710779344454, "grad_norm": 0.36513710021972656, "learning_rate": 1.8889770003210134e-05, "loss": 0.5935, "step": 9944 }, { "epoch": 0.3055017970693945, "grad_norm": 0.31938764452934265, "learning_rate": 1.8889548675080692e-05, "loss": 0.5705, "step": 9945 }, { "epoch": 0.3055325162043437, "grad_norm": 0.3531520366668701, "learning_rate": 1.8889327326189087e-05, "loss": 0.5895, "step": 9946 }, { "epoch": 0.30556323533929286, "grad_norm": 0.3799550533294678, "learning_rate": 1.888910595653584e-05, "loss": 0.6171, "step": 9947 }, { "epoch": 0.305593954474242, "grad_norm": 0.347092866897583, "learning_rate": 1.8888884566121464e-05, "loss": 0.5941, "step": 9948 }, { "epoch": 0.30562467360919116, "grad_norm": 0.41327041387557983, "learning_rate": 1.888866315494648e-05, "loss": 0.6435, "step": 9949 }, { "epoch": 0.30565539274414033, "grad_norm": 0.3668718934059143, "learning_rate": 1.88884417230114e-05, "loss": 0.557, "step": 9950 }, { "epoch": 0.3056861118790895, "grad_norm": 0.32888856530189514, "learning_rate": 1.8888220270316743e-05, "loss": 0.598, "step": 9951 }, { "epoch": 0.3057168310140386, "grad_norm": 0.38188689947128296, "learning_rate": 1.8887998796863024e-05, "loss": 0.6419, "step": 9952 }, { "epoch": 0.3057475501489878, "grad_norm": 0.3214038908481598, "learning_rate": 1.8887777302650767e-05, "loss": 0.5905, "step": 9953 }, { "epoch": 0.305778269283937, "grad_norm": 0.3435250222682953, "learning_rate": 1.8887555787680483e-05, "loss": 0.5726, "step": 9954 }, { "epoch": 0.30580898841888615, "grad_norm": 0.4776763319969177, "learning_rate": 1.8887334251952695e-05, "loss": 0.5335, "step": 9955 }, { "epoch": 0.30583970755383527, "grad_norm": 0.32121801376342773, "learning_rate": 1.8887112695467915e-05, "loss": 0.5696, "step": 9956 }, { "epoch": 0.30587042668878445, "grad_norm": 0.3925301134586334, "learning_rate": 1.8886891118226662e-05, "loss": 0.5791, "step": 9957 }, { "epoch": 0.3059011458237336, "grad_norm": 0.33178433775901794, "learning_rate": 1.8886669520229457e-05, "loss": 0.6259, "step": 9958 }, { "epoch": 0.30593186495868274, "grad_norm": 0.34174516797065735, "learning_rate": 1.888644790147681e-05, "loss": 0.5652, "step": 9959 }, { "epoch": 0.3059625840936319, "grad_norm": 0.41284656524658203, "learning_rate": 1.888622626196925e-05, "loss": 0.542, "step": 9960 }, { "epoch": 0.3059933032285811, "grad_norm": 0.313628613948822, "learning_rate": 1.8886004601707282e-05, "loss": 0.5397, "step": 9961 }, { "epoch": 0.30602402236353027, "grad_norm": 0.8357094526290894, "learning_rate": 1.8885782920691433e-05, "loss": 0.5804, "step": 9962 }, { "epoch": 0.3060547414984794, "grad_norm": 0.3251522183418274, "learning_rate": 1.8885561218922215e-05, "loss": 0.5301, "step": 9963 }, { "epoch": 0.30608546063342856, "grad_norm": 0.3312532305717468, "learning_rate": 1.888533949640015e-05, "loss": 0.5875, "step": 9964 }, { "epoch": 0.30611617976837774, "grad_norm": 0.3194107115268707, "learning_rate": 1.888511775312575e-05, "loss": 0.5937, "step": 9965 }, { "epoch": 0.30614689890332686, "grad_norm": 0.3785006105899811, "learning_rate": 1.888489598909954e-05, "loss": 0.6091, "step": 9966 }, { "epoch": 0.30617761803827603, "grad_norm": 0.2977648973464966, "learning_rate": 1.8884674204322038e-05, "loss": 0.497, "step": 9967 }, { "epoch": 0.3062083371732252, "grad_norm": 0.3456898033618927, "learning_rate": 1.8884452398793754e-05, "loss": 0.6084, "step": 9968 }, { "epoch": 0.3062390563081744, "grad_norm": 0.3789360523223877, "learning_rate": 1.8884230572515214e-05, "loss": 0.6294, "step": 9969 }, { "epoch": 0.3062697754431235, "grad_norm": 0.3968081772327423, "learning_rate": 1.8884008725486934e-05, "loss": 0.5075, "step": 9970 }, { "epoch": 0.3063004945780727, "grad_norm": 0.36105257272720337, "learning_rate": 1.8883786857709427e-05, "loss": 0.5815, "step": 9971 }, { "epoch": 0.30633121371302185, "grad_norm": 0.5641610622406006, "learning_rate": 1.8883564969183218e-05, "loss": 0.6045, "step": 9972 }, { "epoch": 0.306361932847971, "grad_norm": 0.33117684721946716, "learning_rate": 1.8883343059908825e-05, "loss": 0.6432, "step": 9973 }, { "epoch": 0.30639265198292015, "grad_norm": 0.34573259949684143, "learning_rate": 1.888312112988676e-05, "loss": 0.5487, "step": 9974 }, { "epoch": 0.3064233711178693, "grad_norm": 0.3382858633995056, "learning_rate": 1.8882899179117547e-05, "loss": 0.574, "step": 9975 }, { "epoch": 0.3064540902528185, "grad_norm": 0.31208863854408264, "learning_rate": 1.8882677207601702e-05, "loss": 0.5855, "step": 9976 }, { "epoch": 0.3064848093877676, "grad_norm": 0.3321830630302429, "learning_rate": 1.8882455215339744e-05, "loss": 0.6655, "step": 9977 }, { "epoch": 0.3065155285227168, "grad_norm": 0.41753196716308594, "learning_rate": 1.888223320233219e-05, "loss": 0.6121, "step": 9978 }, { "epoch": 0.30654624765766597, "grad_norm": 0.3487551212310791, "learning_rate": 1.888201116857956e-05, "loss": 0.6154, "step": 9979 }, { "epoch": 0.30657696679261515, "grad_norm": 0.32678884267807007, "learning_rate": 1.8881789114082378e-05, "loss": 0.6061, "step": 9980 }, { "epoch": 0.30660768592756427, "grad_norm": 0.3231872022151947, "learning_rate": 1.8881567038841152e-05, "loss": 0.5308, "step": 9981 }, { "epoch": 0.30663840506251344, "grad_norm": 0.43637797236442566, "learning_rate": 1.8881344942856404e-05, "loss": 0.5487, "step": 9982 }, { "epoch": 0.3066691241974626, "grad_norm": 0.32958486676216125, "learning_rate": 1.888112282612866e-05, "loss": 0.5043, "step": 9983 }, { "epoch": 0.30669984333241174, "grad_norm": 0.3088981807231903, "learning_rate": 1.888090068865843e-05, "loss": 0.5117, "step": 9984 }, { "epoch": 0.3067305624673609, "grad_norm": 0.37212008237838745, "learning_rate": 1.888067853044624e-05, "loss": 0.566, "step": 9985 }, { "epoch": 0.3067612816023101, "grad_norm": 0.3864917755126953, "learning_rate": 1.88804563514926e-05, "loss": 0.5928, "step": 9986 }, { "epoch": 0.30679200073725926, "grad_norm": 0.32944992184638977, "learning_rate": 1.8880234151798037e-05, "loss": 0.5172, "step": 9987 }, { "epoch": 0.3068227198722084, "grad_norm": 0.38873451948165894, "learning_rate": 1.888001193136307e-05, "loss": 0.6074, "step": 9988 }, { "epoch": 0.30685343900715756, "grad_norm": 0.3515758216381073, "learning_rate": 1.8879789690188207e-05, "loss": 0.5616, "step": 9989 }, { "epoch": 0.30688415814210673, "grad_norm": 0.33744922280311584, "learning_rate": 1.887956742827398e-05, "loss": 0.6179, "step": 9990 }, { "epoch": 0.30691487727705585, "grad_norm": 0.37170469760894775, "learning_rate": 1.8879345145620905e-05, "loss": 0.6421, "step": 9991 }, { "epoch": 0.30694559641200503, "grad_norm": 0.33720213174819946, "learning_rate": 1.8879122842229495e-05, "loss": 0.5433, "step": 9992 }, { "epoch": 0.3069763155469542, "grad_norm": 0.36064764857292175, "learning_rate": 1.8878900518100275e-05, "loss": 0.6484, "step": 9993 }, { "epoch": 0.3070070346819034, "grad_norm": 0.3280768394470215, "learning_rate": 1.8878678173233765e-05, "loss": 0.6127, "step": 9994 }, { "epoch": 0.3070377538168525, "grad_norm": 0.37556904554367065, "learning_rate": 1.887845580763048e-05, "loss": 0.587, "step": 9995 }, { "epoch": 0.3070684729518017, "grad_norm": 0.3739718496799469, "learning_rate": 1.887823342129094e-05, "loss": 0.6231, "step": 9996 }, { "epoch": 0.30709919208675085, "grad_norm": 0.38045257329940796, "learning_rate": 1.887801101421567e-05, "loss": 0.5637, "step": 9997 }, { "epoch": 0.30712991122169997, "grad_norm": 0.42614972591400146, "learning_rate": 1.887778858640518e-05, "loss": 0.6851, "step": 9998 }, { "epoch": 0.30716063035664914, "grad_norm": 0.3415154814720154, "learning_rate": 1.8877566137859997e-05, "loss": 0.5752, "step": 9999 }, { "epoch": 0.3071913494915983, "grad_norm": 0.3861374258995056, "learning_rate": 1.887734366858064e-05, "loss": 0.5641, "step": 10000 }, { "epoch": 0.3072220686265475, "grad_norm": 0.37221187353134155, "learning_rate": 1.8877121178567623e-05, "loss": 0.6766, "step": 10001 }, { "epoch": 0.3072527877614966, "grad_norm": 0.30903518199920654, "learning_rate": 1.8876898667821474e-05, "loss": 0.5787, "step": 10002 }, { "epoch": 0.3072835068964458, "grad_norm": 0.3347099721431732, "learning_rate": 1.8876676136342705e-05, "loss": 0.5643, "step": 10003 }, { "epoch": 0.30731422603139497, "grad_norm": 0.3674643635749817, "learning_rate": 1.8876453584131843e-05, "loss": 0.5903, "step": 10004 }, { "epoch": 0.30734494516634414, "grad_norm": 0.42166125774383545, "learning_rate": 1.8876231011189402e-05, "loss": 0.5661, "step": 10005 }, { "epoch": 0.30737566430129326, "grad_norm": 0.36162781715393066, "learning_rate": 1.8876008417515898e-05, "loss": 0.4917, "step": 10006 }, { "epoch": 0.30740638343624244, "grad_norm": 0.3370506167411804, "learning_rate": 1.8875785803111865e-05, "loss": 0.5598, "step": 10007 }, { "epoch": 0.3074371025711916, "grad_norm": 0.33867594599723816, "learning_rate": 1.8875563167977806e-05, "loss": 0.5801, "step": 10008 }, { "epoch": 0.30746782170614073, "grad_norm": 0.317330539226532, "learning_rate": 1.8875340512114255e-05, "loss": 0.6385, "step": 10009 }, { "epoch": 0.3074985408410899, "grad_norm": 0.29902172088623047, "learning_rate": 1.8875117835521727e-05, "loss": 0.519, "step": 10010 }, { "epoch": 0.3075292599760391, "grad_norm": 0.5451877117156982, "learning_rate": 1.8874895138200738e-05, "loss": 0.6542, "step": 10011 }, { "epoch": 0.30755997911098826, "grad_norm": 0.37885478138923645, "learning_rate": 1.8874672420151812e-05, "loss": 0.6242, "step": 10012 }, { "epoch": 0.3075906982459374, "grad_norm": 0.4138130247592926, "learning_rate": 1.8874449681375472e-05, "loss": 0.6446, "step": 10013 }, { "epoch": 0.30762141738088655, "grad_norm": 0.3593983054161072, "learning_rate": 1.8874226921872232e-05, "loss": 0.5621, "step": 10014 }, { "epoch": 0.30765213651583573, "grad_norm": 0.35434043407440186, "learning_rate": 1.8874004141642618e-05, "loss": 0.5065, "step": 10015 }, { "epoch": 0.30768285565078485, "grad_norm": 0.3308510482311249, "learning_rate": 1.887378134068714e-05, "loss": 0.5755, "step": 10016 }, { "epoch": 0.307713574785734, "grad_norm": 0.3736622631549835, "learning_rate": 1.8873558519006333e-05, "loss": 0.6554, "step": 10017 }, { "epoch": 0.3077442939206832, "grad_norm": 0.3409211337566376, "learning_rate": 1.887333567660071e-05, "loss": 0.6079, "step": 10018 }, { "epoch": 0.3077750130556324, "grad_norm": 0.37091508507728577, "learning_rate": 1.887311281347079e-05, "loss": 0.5837, "step": 10019 }, { "epoch": 0.3078057321905815, "grad_norm": 0.3164612650871277, "learning_rate": 1.8872889929617098e-05, "loss": 0.5393, "step": 10020 }, { "epoch": 0.30783645132553067, "grad_norm": 0.3782636523246765, "learning_rate": 1.887266702504015e-05, "loss": 0.6013, "step": 10021 }, { "epoch": 0.30786717046047984, "grad_norm": 0.3075632154941559, "learning_rate": 1.8872444099740467e-05, "loss": 0.6226, "step": 10022 }, { "epoch": 0.307897889595429, "grad_norm": 0.36339661478996277, "learning_rate": 1.8872221153718576e-05, "loss": 0.5415, "step": 10023 }, { "epoch": 0.30792860873037814, "grad_norm": 0.3364357054233551, "learning_rate": 1.8871998186974987e-05, "loss": 0.6786, "step": 10024 }, { "epoch": 0.3079593278653273, "grad_norm": 0.33377209305763245, "learning_rate": 1.8871775199510228e-05, "loss": 0.6022, "step": 10025 }, { "epoch": 0.3079900470002765, "grad_norm": 0.35578683018684387, "learning_rate": 1.887155219132482e-05, "loss": 0.6034, "step": 10026 }, { "epoch": 0.3080207661352256, "grad_norm": 0.4250582158565521, "learning_rate": 1.8871329162419282e-05, "loss": 0.5846, "step": 10027 }, { "epoch": 0.3080514852701748, "grad_norm": 0.30402323603630066, "learning_rate": 1.8871106112794136e-05, "loss": 0.581, "step": 10028 }, { "epoch": 0.30808220440512396, "grad_norm": 0.5473057627677917, "learning_rate": 1.8870883042449903e-05, "loss": 0.53, "step": 10029 }, { "epoch": 0.30811292354007314, "grad_norm": 0.35421037673950195, "learning_rate": 1.8870659951387098e-05, "loss": 0.5716, "step": 10030 }, { "epoch": 0.30814364267502226, "grad_norm": 0.3626708686351776, "learning_rate": 1.887043683960625e-05, "loss": 0.588, "step": 10031 }, { "epoch": 0.30817436180997143, "grad_norm": 0.3443436920642853, "learning_rate": 1.8870213707107878e-05, "loss": 0.4816, "step": 10032 }, { "epoch": 0.3082050809449206, "grad_norm": 0.40728700160980225, "learning_rate": 1.8869990553892503e-05, "loss": 0.6237, "step": 10033 }, { "epoch": 0.3082358000798697, "grad_norm": 0.8146523237228394, "learning_rate": 1.8869767379960643e-05, "loss": 0.6209, "step": 10034 }, { "epoch": 0.3082665192148189, "grad_norm": 0.3572864234447479, "learning_rate": 1.8869544185312825e-05, "loss": 0.5387, "step": 10035 }, { "epoch": 0.3082972383497681, "grad_norm": 0.33158814907073975, "learning_rate": 1.8869320969949565e-05, "loss": 0.5454, "step": 10036 }, { "epoch": 0.30832795748471725, "grad_norm": 0.3370170593261719, "learning_rate": 1.8869097733871386e-05, "loss": 0.5476, "step": 10037 }, { "epoch": 0.30835867661966637, "grad_norm": 0.3635900318622589, "learning_rate": 1.8868874477078813e-05, "loss": 0.6192, "step": 10038 }, { "epoch": 0.30838939575461555, "grad_norm": 0.3283371925354004, "learning_rate": 1.886865119957236e-05, "loss": 0.6255, "step": 10039 }, { "epoch": 0.3084201148895647, "grad_norm": 0.35886794328689575, "learning_rate": 1.8868427901352558e-05, "loss": 0.653, "step": 10040 }, { "epoch": 0.30845083402451384, "grad_norm": 0.35882341861724854, "learning_rate": 1.8868204582419917e-05, "loss": 0.5975, "step": 10041 }, { "epoch": 0.308481553159463, "grad_norm": 0.9218908548355103, "learning_rate": 1.886798124277497e-05, "loss": 0.6627, "step": 10042 }, { "epoch": 0.3085122722944122, "grad_norm": 0.34815147519111633, "learning_rate": 1.886775788241823e-05, "loss": 0.607, "step": 10043 }, { "epoch": 0.30854299142936137, "grad_norm": 0.3339928984642029, "learning_rate": 1.8867534501350224e-05, "loss": 0.5714, "step": 10044 }, { "epoch": 0.3085737105643105, "grad_norm": 0.3709262013435364, "learning_rate": 1.8867311099571472e-05, "loss": 0.5093, "step": 10045 }, { "epoch": 0.30860442969925966, "grad_norm": 0.5291286110877991, "learning_rate": 1.8867087677082497e-05, "loss": 0.6351, "step": 10046 }, { "epoch": 0.30863514883420884, "grad_norm": 0.3512604832649231, "learning_rate": 1.8866864233883814e-05, "loss": 0.6609, "step": 10047 }, { "epoch": 0.308665867969158, "grad_norm": 0.35419678688049316, "learning_rate": 1.8866640769975957e-05, "loss": 0.6043, "step": 10048 }, { "epoch": 0.30869658710410713, "grad_norm": 0.336814284324646, "learning_rate": 1.8866417285359436e-05, "loss": 0.5877, "step": 10049 }, { "epoch": 0.3087273062390563, "grad_norm": 0.43308067321777344, "learning_rate": 1.886619378003478e-05, "loss": 0.5061, "step": 10050 }, { "epoch": 0.3087580253740055, "grad_norm": 0.3544464409351349, "learning_rate": 1.886597025400251e-05, "loss": 0.6102, "step": 10051 }, { "epoch": 0.3087887445089546, "grad_norm": 0.34486865997314453, "learning_rate": 1.8865746707263147e-05, "loss": 0.671, "step": 10052 }, { "epoch": 0.3088194636439038, "grad_norm": 0.4074319303035736, "learning_rate": 1.886552313981721e-05, "loss": 0.6297, "step": 10053 }, { "epoch": 0.30885018277885296, "grad_norm": 0.35648417472839355, "learning_rate": 1.886529955166523e-05, "loss": 0.561, "step": 10054 }, { "epoch": 0.30888090191380213, "grad_norm": 0.3280024826526642, "learning_rate": 1.8865075942807723e-05, "loss": 0.5178, "step": 10055 }, { "epoch": 0.30891162104875125, "grad_norm": 0.31049469113349915, "learning_rate": 1.886485231324521e-05, "loss": 0.4671, "step": 10056 }, { "epoch": 0.3089423401837004, "grad_norm": 0.3374369740486145, "learning_rate": 1.8864628662978216e-05, "loss": 0.5848, "step": 10057 }, { "epoch": 0.3089730593186496, "grad_norm": 0.3599373996257782, "learning_rate": 1.886440499200726e-05, "loss": 0.596, "step": 10058 }, { "epoch": 0.3090037784535987, "grad_norm": 0.32661235332489014, "learning_rate": 1.8864181300332875e-05, "loss": 0.4915, "step": 10059 }, { "epoch": 0.3090344975885479, "grad_norm": 0.5037582516670227, "learning_rate": 1.8863957587955567e-05, "loss": 0.6571, "step": 10060 }, { "epoch": 0.30906521672349707, "grad_norm": 0.35929858684539795, "learning_rate": 1.8863733854875873e-05, "loss": 0.6282, "step": 10061 }, { "epoch": 0.30909593585844625, "grad_norm": 0.33099040389060974, "learning_rate": 1.8863510101094304e-05, "loss": 0.5739, "step": 10062 }, { "epoch": 0.30912665499339537, "grad_norm": 0.3487200438976288, "learning_rate": 1.886328632661139e-05, "loss": 0.5745, "step": 10063 }, { "epoch": 0.30915737412834454, "grad_norm": 0.34596195816993713, "learning_rate": 1.8863062531427653e-05, "loss": 0.6115, "step": 10064 }, { "epoch": 0.3091880932632937, "grad_norm": 0.30053335428237915, "learning_rate": 1.8862838715543614e-05, "loss": 0.6857, "step": 10065 }, { "epoch": 0.3092188123982429, "grad_norm": 0.43476206064224243, "learning_rate": 1.8862614878959797e-05, "loss": 0.5091, "step": 10066 }, { "epoch": 0.309249531533192, "grad_norm": 0.37464210391044617, "learning_rate": 1.8862391021676725e-05, "loss": 0.5727, "step": 10067 }, { "epoch": 0.3092802506681412, "grad_norm": 0.34806546568870544, "learning_rate": 1.8862167143694915e-05, "loss": 0.5247, "step": 10068 }, { "epoch": 0.30931096980309036, "grad_norm": 0.3702642023563385, "learning_rate": 1.88619432450149e-05, "loss": 0.6296, "step": 10069 }, { "epoch": 0.3093416889380395, "grad_norm": 0.346971333026886, "learning_rate": 1.8861719325637193e-05, "loss": 0.6063, "step": 10070 }, { "epoch": 0.30937240807298866, "grad_norm": 0.5474721193313599, "learning_rate": 1.8861495385562325e-05, "loss": 0.5756, "step": 10071 }, { "epoch": 0.30940312720793783, "grad_norm": 0.3258747458457947, "learning_rate": 1.8861271424790814e-05, "loss": 0.5888, "step": 10072 }, { "epoch": 0.309433846342887, "grad_norm": 0.34816378355026245, "learning_rate": 1.8861047443323187e-05, "loss": 0.6199, "step": 10073 }, { "epoch": 0.30946456547783613, "grad_norm": 0.333275705575943, "learning_rate": 1.8860823441159964e-05, "loss": 0.5874, "step": 10074 }, { "epoch": 0.3094952846127853, "grad_norm": 0.4470853805541992, "learning_rate": 1.886059941830167e-05, "loss": 0.5859, "step": 10075 }, { "epoch": 0.3095260037477345, "grad_norm": 0.3728362023830414, "learning_rate": 1.8860375374748824e-05, "loss": 0.5939, "step": 10076 }, { "epoch": 0.3095567228826836, "grad_norm": 0.3364766836166382, "learning_rate": 1.8860151310501952e-05, "loss": 0.4505, "step": 10077 }, { "epoch": 0.3095874420176328, "grad_norm": 0.3819984495639801, "learning_rate": 1.885992722556158e-05, "loss": 0.5895, "step": 10078 }, { "epoch": 0.30961816115258195, "grad_norm": 0.37821725010871887, "learning_rate": 1.8859703119928227e-05, "loss": 0.5896, "step": 10079 }, { "epoch": 0.3096488802875311, "grad_norm": 0.33330416679382324, "learning_rate": 1.885947899360242e-05, "loss": 0.6424, "step": 10080 }, { "epoch": 0.30967959942248025, "grad_norm": 0.3147616684436798, "learning_rate": 1.8859254846584686e-05, "loss": 0.5193, "step": 10081 }, { "epoch": 0.3097103185574294, "grad_norm": 0.38529089093208313, "learning_rate": 1.8859030678875537e-05, "loss": 0.5588, "step": 10082 }, { "epoch": 0.3097410376923786, "grad_norm": 0.3720923364162445, "learning_rate": 1.885880649047551e-05, "loss": 0.5961, "step": 10083 }, { "epoch": 0.3097717568273277, "grad_norm": 0.3209877908229828, "learning_rate": 1.8858582281385114e-05, "loss": 0.5233, "step": 10084 }, { "epoch": 0.3098024759622769, "grad_norm": 0.36094576120376587, "learning_rate": 1.8858358051604883e-05, "loss": 0.6017, "step": 10085 }, { "epoch": 0.30983319509722607, "grad_norm": 0.3370627164840698, "learning_rate": 1.8858133801135337e-05, "loss": 0.5723, "step": 10086 }, { "epoch": 0.30986391423217524, "grad_norm": 0.33130931854248047, "learning_rate": 1.8857909529977005e-05, "loss": 0.6039, "step": 10087 }, { "epoch": 0.30989463336712436, "grad_norm": 0.3265426754951477, "learning_rate": 1.8857685238130404e-05, "loss": 0.571, "step": 10088 }, { "epoch": 0.30992535250207354, "grad_norm": 0.3893640339374542, "learning_rate": 1.885746092559606e-05, "loss": 0.6045, "step": 10089 }, { "epoch": 0.3099560716370227, "grad_norm": 0.41857635974884033, "learning_rate": 1.8857236592374496e-05, "loss": 0.4607, "step": 10090 }, { "epoch": 0.3099867907719719, "grad_norm": 0.4230824410915375, "learning_rate": 1.885701223846624e-05, "loss": 0.6095, "step": 10091 }, { "epoch": 0.310017509906921, "grad_norm": 0.3286364674568176, "learning_rate": 1.8856787863871816e-05, "loss": 0.5408, "step": 10092 }, { "epoch": 0.3100482290418702, "grad_norm": 0.45589977502822876, "learning_rate": 1.885656346859174e-05, "loss": 0.6342, "step": 10093 }, { "epoch": 0.31007894817681936, "grad_norm": 0.3060173988342285, "learning_rate": 1.8856339052626544e-05, "loss": 0.6175, "step": 10094 }, { "epoch": 0.3101096673117685, "grad_norm": 0.3755647838115692, "learning_rate": 1.8856114615976748e-05, "loss": 0.5913, "step": 10095 }, { "epoch": 0.31014038644671765, "grad_norm": 0.37145721912384033, "learning_rate": 1.8855890158642884e-05, "loss": 0.6416, "step": 10096 }, { "epoch": 0.31017110558166683, "grad_norm": 0.37311220169067383, "learning_rate": 1.8855665680625465e-05, "loss": 0.5773, "step": 10097 }, { "epoch": 0.310201824716616, "grad_norm": 0.29805198311805725, "learning_rate": 1.8855441181925016e-05, "loss": 0.5409, "step": 10098 }, { "epoch": 0.3102325438515651, "grad_norm": 0.3259282410144806, "learning_rate": 1.8855216662542072e-05, "loss": 0.5376, "step": 10099 }, { "epoch": 0.3102632629865143, "grad_norm": 0.3848120868206024, "learning_rate": 1.885499212247715e-05, "loss": 0.5464, "step": 10100 }, { "epoch": 0.3102939821214635, "grad_norm": 0.33870917558670044, "learning_rate": 1.8854767561730774e-05, "loss": 0.5222, "step": 10101 }, { "epoch": 0.3103247012564126, "grad_norm": 0.38864636421203613, "learning_rate": 1.885454298030347e-05, "loss": 0.7, "step": 10102 }, { "epoch": 0.31035542039136177, "grad_norm": 0.3590394854545593, "learning_rate": 1.8854318378195765e-05, "loss": 0.6193, "step": 10103 }, { "epoch": 0.31038613952631094, "grad_norm": 0.3269961476325989, "learning_rate": 1.8854093755408177e-05, "loss": 0.6101, "step": 10104 }, { "epoch": 0.3104168586612601, "grad_norm": 0.3161408305168152, "learning_rate": 1.885386911194124e-05, "loss": 0.5529, "step": 10105 }, { "epoch": 0.31044757779620924, "grad_norm": 0.32937687635421753, "learning_rate": 1.8853644447795466e-05, "loss": 0.5541, "step": 10106 }, { "epoch": 0.3104782969311584, "grad_norm": 0.33331647515296936, "learning_rate": 1.8853419762971392e-05, "loss": 0.4693, "step": 10107 }, { "epoch": 0.3105090160661076, "grad_norm": 0.3682630956172943, "learning_rate": 1.8853195057469536e-05, "loss": 0.6209, "step": 10108 }, { "epoch": 0.31053973520105677, "grad_norm": 0.3355412781238556, "learning_rate": 1.8852970331290427e-05, "loss": 0.5694, "step": 10109 }, { "epoch": 0.3105704543360059, "grad_norm": 0.3648889362812042, "learning_rate": 1.8852745584434586e-05, "loss": 0.5443, "step": 10110 }, { "epoch": 0.31060117347095506, "grad_norm": 0.37513238191604614, "learning_rate": 1.8852520816902543e-05, "loss": 0.5213, "step": 10111 }, { "epoch": 0.31063189260590424, "grad_norm": 0.3685113191604614, "learning_rate": 1.8852296028694815e-05, "loss": 0.6206, "step": 10112 }, { "epoch": 0.31066261174085336, "grad_norm": 0.3165138065814972, "learning_rate": 1.885207121981193e-05, "loss": 0.638, "step": 10113 }, { "epoch": 0.31069333087580253, "grad_norm": 0.4087908864021301, "learning_rate": 1.885184639025442e-05, "loss": 0.5349, "step": 10114 }, { "epoch": 0.3107240500107517, "grad_norm": 0.41599997878074646, "learning_rate": 1.88516215400228e-05, "loss": 0.5389, "step": 10115 }, { "epoch": 0.3107547691457009, "grad_norm": 0.32947707176208496, "learning_rate": 1.88513966691176e-05, "loss": 0.499, "step": 10116 }, { "epoch": 0.31078548828065, "grad_norm": 0.4281942546367645, "learning_rate": 1.8851171777539346e-05, "loss": 0.5989, "step": 10117 }, { "epoch": 0.3108162074155992, "grad_norm": 0.3466014862060547, "learning_rate": 1.8850946865288562e-05, "loss": 0.6016, "step": 10118 }, { "epoch": 0.31084692655054835, "grad_norm": 0.37585967779159546, "learning_rate": 1.8850721932365774e-05, "loss": 0.5571, "step": 10119 }, { "epoch": 0.3108776456854975, "grad_norm": 0.33437803387641907, "learning_rate": 1.8850496978771505e-05, "loss": 0.4856, "step": 10120 }, { "epoch": 0.31090836482044665, "grad_norm": 0.34240373969078064, "learning_rate": 1.8850272004506288e-05, "loss": 0.6119, "step": 10121 }, { "epoch": 0.3109390839553958, "grad_norm": 0.3444538414478302, "learning_rate": 1.8850047009570635e-05, "loss": 0.5521, "step": 10122 }, { "epoch": 0.310969803090345, "grad_norm": 0.35504409670829773, "learning_rate": 1.8849821993965084e-05, "loss": 0.6462, "step": 10123 }, { "epoch": 0.3110005222252941, "grad_norm": 0.36577844619750977, "learning_rate": 1.8849596957690154e-05, "loss": 0.6359, "step": 10124 }, { "epoch": 0.3110312413602433, "grad_norm": 0.340678334236145, "learning_rate": 1.8849371900746373e-05, "loss": 0.5076, "step": 10125 }, { "epoch": 0.31106196049519247, "grad_norm": 0.344433456659317, "learning_rate": 1.8849146823134263e-05, "loss": 0.5591, "step": 10126 }, { "epoch": 0.3110926796301416, "grad_norm": 0.38046029210090637, "learning_rate": 1.8848921724854356e-05, "loss": 0.6157, "step": 10127 }, { "epoch": 0.31112339876509076, "grad_norm": 0.33657950162887573, "learning_rate": 1.8848696605907174e-05, "loss": 0.5147, "step": 10128 }, { "epoch": 0.31115411790003994, "grad_norm": 0.3590734601020813, "learning_rate": 1.884847146629324e-05, "loss": 0.5903, "step": 10129 }, { "epoch": 0.3111848370349891, "grad_norm": 0.37918850779533386, "learning_rate": 1.8848246306013088e-05, "loss": 0.5205, "step": 10130 }, { "epoch": 0.31121555616993823, "grad_norm": 0.38848331570625305, "learning_rate": 1.8848021125067234e-05, "loss": 0.5442, "step": 10131 }, { "epoch": 0.3112462753048874, "grad_norm": 0.35899046063423157, "learning_rate": 1.8847795923456213e-05, "loss": 0.5914, "step": 10132 }, { "epoch": 0.3112769944398366, "grad_norm": 0.4341716468334198, "learning_rate": 1.8847570701180542e-05, "loss": 0.5763, "step": 10133 }, { "epoch": 0.31130771357478576, "grad_norm": 0.3290901184082031, "learning_rate": 1.8847345458240753e-05, "loss": 0.5466, "step": 10134 }, { "epoch": 0.3113384327097349, "grad_norm": 0.6974817514419556, "learning_rate": 1.8847120194637373e-05, "loss": 0.6019, "step": 10135 }, { "epoch": 0.31136915184468406, "grad_norm": 0.3475024998188019, "learning_rate": 1.8846894910370925e-05, "loss": 0.5711, "step": 10136 }, { "epoch": 0.31139987097963323, "grad_norm": 0.35008350014686584, "learning_rate": 1.8846669605441934e-05, "loss": 0.6326, "step": 10137 }, { "epoch": 0.31143059011458235, "grad_norm": 0.3038123548030853, "learning_rate": 1.884644427985093e-05, "loss": 0.4606, "step": 10138 }, { "epoch": 0.3114613092495315, "grad_norm": 0.38448306918144226, "learning_rate": 1.884621893359844e-05, "loss": 0.6256, "step": 10139 }, { "epoch": 0.3114920283844807, "grad_norm": 0.34443485736846924, "learning_rate": 1.8845993566684987e-05, "loss": 0.5843, "step": 10140 }, { "epoch": 0.3115227475194299, "grad_norm": 0.36280083656311035, "learning_rate": 1.8845768179111094e-05, "loss": 0.5844, "step": 10141 }, { "epoch": 0.311553466654379, "grad_norm": 0.3561347723007202, "learning_rate": 1.8845542770877295e-05, "loss": 0.574, "step": 10142 }, { "epoch": 0.31158418578932817, "grad_norm": 0.38417309522628784, "learning_rate": 1.884531734198411e-05, "loss": 0.5898, "step": 10143 }, { "epoch": 0.31161490492427735, "grad_norm": 0.3686124384403229, "learning_rate": 1.8845091892432076e-05, "loss": 0.5914, "step": 10144 }, { "epoch": 0.31164562405922647, "grad_norm": 0.32857316732406616, "learning_rate": 1.8844866422221706e-05, "loss": 0.6424, "step": 10145 }, { "epoch": 0.31167634319417564, "grad_norm": 0.32416924834251404, "learning_rate": 1.8844640931353534e-05, "loss": 0.5488, "step": 10146 }, { "epoch": 0.3117070623291248, "grad_norm": 0.30492162704467773, "learning_rate": 1.8844415419828085e-05, "loss": 0.5608, "step": 10147 }, { "epoch": 0.311737781464074, "grad_norm": 0.3545151650905609, "learning_rate": 1.8844189887645885e-05, "loss": 0.6414, "step": 10148 }, { "epoch": 0.3117685005990231, "grad_norm": 0.39208006858825684, "learning_rate": 1.8843964334807465e-05, "loss": 0.5411, "step": 10149 }, { "epoch": 0.3117992197339723, "grad_norm": 0.36538007855415344, "learning_rate": 1.8843738761313346e-05, "loss": 0.5718, "step": 10150 }, { "epoch": 0.31182993886892146, "grad_norm": 0.32422593235969543, "learning_rate": 1.884351316716406e-05, "loss": 0.5763, "step": 10151 }, { "epoch": 0.3118606580038706, "grad_norm": 0.3490673899650574, "learning_rate": 1.884328755236013e-05, "loss": 0.5175, "step": 10152 }, { "epoch": 0.31189137713881976, "grad_norm": 0.3489876091480255, "learning_rate": 1.8843061916902082e-05, "loss": 0.6594, "step": 10153 }, { "epoch": 0.31192209627376893, "grad_norm": 0.5540232062339783, "learning_rate": 1.8842836260790447e-05, "loss": 0.7422, "step": 10154 }, { "epoch": 0.3119528154087181, "grad_norm": 0.38867658376693726, "learning_rate": 1.884261058402575e-05, "loss": 0.6033, "step": 10155 }, { "epoch": 0.31198353454366723, "grad_norm": 0.3383646011352539, "learning_rate": 1.8842384886608523e-05, "loss": 0.5329, "step": 10156 }, { "epoch": 0.3120142536786164, "grad_norm": 0.3349343240261078, "learning_rate": 1.8842159168539284e-05, "loss": 0.5855, "step": 10157 }, { "epoch": 0.3120449728135656, "grad_norm": 0.3391203284263611, "learning_rate": 1.8841933429818565e-05, "loss": 0.5312, "step": 10158 }, { "epoch": 0.31207569194851476, "grad_norm": 0.3235531449317932, "learning_rate": 1.8841707670446895e-05, "loss": 0.573, "step": 10159 }, { "epoch": 0.3121064110834639, "grad_norm": 0.3281766474246979, "learning_rate": 1.8841481890424796e-05, "loss": 0.5364, "step": 10160 }, { "epoch": 0.31213713021841305, "grad_norm": 0.3836991786956787, "learning_rate": 1.88412560897528e-05, "loss": 0.5795, "step": 10161 }, { "epoch": 0.3121678493533622, "grad_norm": 0.35236769914627075, "learning_rate": 1.8841030268431433e-05, "loss": 0.6746, "step": 10162 }, { "epoch": 0.31219856848831135, "grad_norm": 0.3586454391479492, "learning_rate": 1.884080442646122e-05, "loss": 0.5855, "step": 10163 }, { "epoch": 0.3122292876232605, "grad_norm": 0.3540007770061493, "learning_rate": 1.8840578563842694e-05, "loss": 0.5771, "step": 10164 }, { "epoch": 0.3122600067582097, "grad_norm": 0.3668319284915924, "learning_rate": 1.884035268057638e-05, "loss": 0.5725, "step": 10165 }, { "epoch": 0.31229072589315887, "grad_norm": 0.3228594958782196, "learning_rate": 1.8840126776662802e-05, "loss": 0.5348, "step": 10166 }, { "epoch": 0.312321445028108, "grad_norm": 0.36075928807258606, "learning_rate": 1.8839900852102496e-05, "loss": 0.5296, "step": 10167 }, { "epoch": 0.31235216416305717, "grad_norm": 0.33458513021469116, "learning_rate": 1.883967490689598e-05, "loss": 0.5741, "step": 10168 }, { "epoch": 0.31238288329800634, "grad_norm": 0.45339933037757874, "learning_rate": 1.8839448941043785e-05, "loss": 0.5772, "step": 10169 }, { "epoch": 0.31241360243295546, "grad_norm": 0.36477193236351013, "learning_rate": 1.883922295454644e-05, "loss": 0.5919, "step": 10170 }, { "epoch": 0.31244432156790464, "grad_norm": 0.351065993309021, "learning_rate": 1.8838996947404474e-05, "loss": 0.5727, "step": 10171 }, { "epoch": 0.3124750407028538, "grad_norm": 0.34001803398132324, "learning_rate": 1.8838770919618414e-05, "loss": 0.6192, "step": 10172 }, { "epoch": 0.312505759837803, "grad_norm": 0.3540021479129791, "learning_rate": 1.8838544871188785e-05, "loss": 0.5319, "step": 10173 }, { "epoch": 0.3125364789727521, "grad_norm": 0.38077518343925476, "learning_rate": 1.8838318802116116e-05, "loss": 0.6232, "step": 10174 }, { "epoch": 0.3125671981077013, "grad_norm": 0.36561235785484314, "learning_rate": 1.883809271240094e-05, "loss": 0.5059, "step": 10175 }, { "epoch": 0.31259791724265046, "grad_norm": 0.35212212800979614, "learning_rate": 1.883786660204378e-05, "loss": 0.6538, "step": 10176 }, { "epoch": 0.31262863637759963, "grad_norm": 0.3425794243812561, "learning_rate": 1.8837640471045163e-05, "loss": 0.6229, "step": 10177 }, { "epoch": 0.31265935551254875, "grad_norm": 0.3151055574417114, "learning_rate": 1.8837414319405623e-05, "loss": 0.5352, "step": 10178 }, { "epoch": 0.31269007464749793, "grad_norm": 0.35629838705062866, "learning_rate": 1.8837188147125683e-05, "loss": 0.5642, "step": 10179 }, { "epoch": 0.3127207937824471, "grad_norm": 0.39631232619285583, "learning_rate": 1.8836961954205874e-05, "loss": 0.5176, "step": 10180 }, { "epoch": 0.3127515129173962, "grad_norm": 0.34926873445510864, "learning_rate": 1.883673574064672e-05, "loss": 0.5469, "step": 10181 }, { "epoch": 0.3127822320523454, "grad_norm": 0.5440989136695862, "learning_rate": 1.8836509506448754e-05, "loss": 0.5191, "step": 10182 }, { "epoch": 0.3128129511872946, "grad_norm": 0.32009851932525635, "learning_rate": 1.8836283251612504e-05, "loss": 0.497, "step": 10183 }, { "epoch": 0.31284367032224375, "grad_norm": 0.33849743008613586, "learning_rate": 1.8836056976138496e-05, "loss": 0.5721, "step": 10184 }, { "epoch": 0.31287438945719287, "grad_norm": 0.3180125653743744, "learning_rate": 1.883583068002726e-05, "loss": 0.5259, "step": 10185 }, { "epoch": 0.31290510859214204, "grad_norm": 0.35506802797317505, "learning_rate": 1.8835604363279326e-05, "loss": 0.5361, "step": 10186 }, { "epoch": 0.3129358277270912, "grad_norm": 0.33124202489852905, "learning_rate": 1.8835378025895223e-05, "loss": 0.5457, "step": 10187 }, { "epoch": 0.31296654686204034, "grad_norm": 0.32262033224105835, "learning_rate": 1.8835151667875472e-05, "loss": 0.5292, "step": 10188 }, { "epoch": 0.3129972659969895, "grad_norm": 0.3302468955516815, "learning_rate": 1.883492528922061e-05, "loss": 0.5042, "step": 10189 }, { "epoch": 0.3130279851319387, "grad_norm": 0.3643263578414917, "learning_rate": 1.8834698889931164e-05, "loss": 0.5847, "step": 10190 }, { "epoch": 0.31305870426688787, "grad_norm": 0.3537326753139496, "learning_rate": 1.883447247000766e-05, "loss": 0.5985, "step": 10191 }, { "epoch": 0.313089423401837, "grad_norm": 0.3610600531101227, "learning_rate": 1.8834246029450627e-05, "loss": 0.5188, "step": 10192 }, { "epoch": 0.31312014253678616, "grad_norm": 0.3216906487941742, "learning_rate": 1.88340195682606e-05, "loss": 0.5984, "step": 10193 }, { "epoch": 0.31315086167173534, "grad_norm": 0.3426816761493683, "learning_rate": 1.88337930864381e-05, "loss": 0.5952, "step": 10194 }, { "epoch": 0.31318158080668446, "grad_norm": 0.3281533718109131, "learning_rate": 1.883356658398366e-05, "loss": 0.6029, "step": 10195 }, { "epoch": 0.31321229994163363, "grad_norm": 0.3344190716743469, "learning_rate": 1.8833340060897807e-05, "loss": 0.5476, "step": 10196 }, { "epoch": 0.3132430190765828, "grad_norm": 0.35871726274490356, "learning_rate": 1.8833113517181075e-05, "loss": 0.5423, "step": 10197 }, { "epoch": 0.313273738211532, "grad_norm": 0.37104782462120056, "learning_rate": 1.8832886952833985e-05, "loss": 0.5539, "step": 10198 }, { "epoch": 0.3133044573464811, "grad_norm": 0.3424052596092224, "learning_rate": 1.8832660367857075e-05, "loss": 0.5813, "step": 10199 }, { "epoch": 0.3133351764814303, "grad_norm": 0.34805747866630554, "learning_rate": 1.8832433762250865e-05, "loss": 0.618, "step": 10200 }, { "epoch": 0.31336589561637945, "grad_norm": 0.3593772053718567, "learning_rate": 1.883220713601589e-05, "loss": 0.6035, "step": 10201 }, { "epoch": 0.31339661475132863, "grad_norm": 0.3299776613712311, "learning_rate": 1.8831980489152685e-05, "loss": 0.509, "step": 10202 }, { "epoch": 0.31342733388627775, "grad_norm": 0.3468387722969055, "learning_rate": 1.8831753821661766e-05, "loss": 0.5912, "step": 10203 }, { "epoch": 0.3134580530212269, "grad_norm": 0.32769569754600525, "learning_rate": 1.8831527133543673e-05, "loss": 0.6339, "step": 10204 }, { "epoch": 0.3134887721561761, "grad_norm": 0.37946128845214844, "learning_rate": 1.883130042479893e-05, "loss": 0.5693, "step": 10205 }, { "epoch": 0.3135194912911252, "grad_norm": 0.3612501621246338, "learning_rate": 1.8831073695428066e-05, "loss": 0.6362, "step": 10206 }, { "epoch": 0.3135502104260744, "grad_norm": 0.3639802932739258, "learning_rate": 1.8830846945431615e-05, "loss": 0.5891, "step": 10207 }, { "epoch": 0.31358092956102357, "grad_norm": 0.3434635400772095, "learning_rate": 1.8830620174810104e-05, "loss": 0.6387, "step": 10208 }, { "epoch": 0.31361164869597274, "grad_norm": 0.3162698447704315, "learning_rate": 1.8830393383564063e-05, "loss": 0.5534, "step": 10209 }, { "epoch": 0.31364236783092186, "grad_norm": 0.3464967608451843, "learning_rate": 1.883016657169402e-05, "loss": 0.5822, "step": 10210 }, { "epoch": 0.31367308696587104, "grad_norm": 0.3357175290584564, "learning_rate": 1.882993973920051e-05, "loss": 0.602, "step": 10211 }, { "epoch": 0.3137038061008202, "grad_norm": 0.36601942777633667, "learning_rate": 1.8829712886084053e-05, "loss": 0.5831, "step": 10212 }, { "epoch": 0.31373452523576933, "grad_norm": 0.32678937911987305, "learning_rate": 1.8829486012345192e-05, "loss": 0.5382, "step": 10213 }, { "epoch": 0.3137652443707185, "grad_norm": 0.39790043234825134, "learning_rate": 1.8829259117984443e-05, "loss": 0.5868, "step": 10214 }, { "epoch": 0.3137959635056677, "grad_norm": 0.34391751885414124, "learning_rate": 1.8829032203002345e-05, "loss": 0.5651, "step": 10215 }, { "epoch": 0.31382668264061686, "grad_norm": 0.4029100835323334, "learning_rate": 1.8828805267399427e-05, "loss": 0.5559, "step": 10216 }, { "epoch": 0.313857401775566, "grad_norm": 0.3164682686328888, "learning_rate": 1.8828578311176216e-05, "loss": 0.6115, "step": 10217 }, { "epoch": 0.31388812091051516, "grad_norm": 0.41359400749206543, "learning_rate": 1.8828351334333246e-05, "loss": 0.655, "step": 10218 }, { "epoch": 0.31391884004546433, "grad_norm": 0.31845858693122864, "learning_rate": 1.8828124336871047e-05, "loss": 0.5347, "step": 10219 }, { "epoch": 0.3139495591804135, "grad_norm": 0.31494998931884766, "learning_rate": 1.882789731879014e-05, "loss": 0.6033, "step": 10220 }, { "epoch": 0.3139802783153626, "grad_norm": 0.3435204327106476, "learning_rate": 1.8827670280091065e-05, "loss": 0.5948, "step": 10221 }, { "epoch": 0.3140109974503118, "grad_norm": 0.35050079226493835, "learning_rate": 1.8827443220774353e-05, "loss": 0.5821, "step": 10222 }, { "epoch": 0.314041716585261, "grad_norm": 0.33773186802864075, "learning_rate": 1.8827216140840527e-05, "loss": 0.5756, "step": 10223 }, { "epoch": 0.3140724357202101, "grad_norm": 0.35729721188545227, "learning_rate": 1.8826989040290126e-05, "loss": 0.567, "step": 10224 }, { "epoch": 0.3141031548551593, "grad_norm": 0.34522542357444763, "learning_rate": 1.882676191912367e-05, "loss": 0.6538, "step": 10225 }, { "epoch": 0.31413387399010845, "grad_norm": 0.320198118686676, "learning_rate": 1.88265347773417e-05, "loss": 0.5461, "step": 10226 }, { "epoch": 0.3141645931250576, "grad_norm": 0.35079413652420044, "learning_rate": 1.8826307614944736e-05, "loss": 0.6021, "step": 10227 }, { "epoch": 0.31419531226000674, "grad_norm": 0.3602257966995239, "learning_rate": 1.882608043193332e-05, "loss": 0.5223, "step": 10228 }, { "epoch": 0.3142260313949559, "grad_norm": 0.3310004770755768, "learning_rate": 1.882585322830797e-05, "loss": 0.637, "step": 10229 }, { "epoch": 0.3142567505299051, "grad_norm": 0.3350307047367096, "learning_rate": 1.882562600406923e-05, "loss": 0.5737, "step": 10230 }, { "epoch": 0.3142874696648542, "grad_norm": 0.3511955440044403, "learning_rate": 1.8825398759217622e-05, "loss": 0.5773, "step": 10231 }, { "epoch": 0.3143181887998034, "grad_norm": 0.44921422004699707, "learning_rate": 1.8825171493753677e-05, "loss": 0.6087, "step": 10232 }, { "epoch": 0.31434890793475256, "grad_norm": 0.3474528193473816, "learning_rate": 1.882494420767793e-05, "loss": 0.5922, "step": 10233 }, { "epoch": 0.31437962706970174, "grad_norm": 0.36127743124961853, "learning_rate": 1.8824716900990908e-05, "loss": 0.4914, "step": 10234 }, { "epoch": 0.31441034620465086, "grad_norm": 0.33839255571365356, "learning_rate": 1.8824489573693144e-05, "loss": 0.5333, "step": 10235 }, { "epoch": 0.31444106533960003, "grad_norm": 0.37911146879196167, "learning_rate": 1.8824262225785168e-05, "loss": 0.5789, "step": 10236 }, { "epoch": 0.3144717844745492, "grad_norm": 0.33953458070755005, "learning_rate": 1.8824034857267512e-05, "loss": 0.6639, "step": 10237 }, { "epoch": 0.31450250360949833, "grad_norm": 0.3384494185447693, "learning_rate": 1.8823807468140704e-05, "loss": 0.5617, "step": 10238 }, { "epoch": 0.3145332227444475, "grad_norm": 0.33919548988342285, "learning_rate": 1.882358005840528e-05, "loss": 0.5423, "step": 10239 }, { "epoch": 0.3145639418793967, "grad_norm": 0.35729315876960754, "learning_rate": 1.882335262806177e-05, "loss": 0.6649, "step": 10240 }, { "epoch": 0.31459466101434586, "grad_norm": 0.3558410704135895, "learning_rate": 1.88231251771107e-05, "loss": 0.5773, "step": 10241 }, { "epoch": 0.314625380149295, "grad_norm": 0.3184834420681, "learning_rate": 1.8822897705552605e-05, "loss": 0.6452, "step": 10242 }, { "epoch": 0.31465609928424415, "grad_norm": 0.31638583540916443, "learning_rate": 1.8822670213388016e-05, "loss": 0.5753, "step": 10243 }, { "epoch": 0.3146868184191933, "grad_norm": 0.3459552824497223, "learning_rate": 1.8822442700617467e-05, "loss": 0.5833, "step": 10244 }, { "epoch": 0.3147175375541425, "grad_norm": 0.4149627387523651, "learning_rate": 1.8822215167241487e-05, "loss": 0.5332, "step": 10245 }, { "epoch": 0.3147482566890916, "grad_norm": 0.3723471760749817, "learning_rate": 1.8821987613260607e-05, "loss": 0.5846, "step": 10246 }, { "epoch": 0.3147789758240408, "grad_norm": 0.37196436524391174, "learning_rate": 1.8821760038675355e-05, "loss": 0.6206, "step": 10247 }, { "epoch": 0.31480969495898997, "grad_norm": 0.3790593445301056, "learning_rate": 1.882153244348627e-05, "loss": 0.5547, "step": 10248 }, { "epoch": 0.3148404140939391, "grad_norm": 0.4034518301486969, "learning_rate": 1.8821304827693884e-05, "loss": 0.619, "step": 10249 }, { "epoch": 0.31487113322888827, "grad_norm": 0.43165987730026245, "learning_rate": 1.8821077191298717e-05, "loss": 0.6153, "step": 10250 }, { "epoch": 0.31490185236383744, "grad_norm": 0.3570561707019806, "learning_rate": 1.882084953430131e-05, "loss": 0.5609, "step": 10251 }, { "epoch": 0.3149325714987866, "grad_norm": 0.3818589746952057, "learning_rate": 1.8820621856702196e-05, "loss": 0.5961, "step": 10252 }, { "epoch": 0.31496329063373574, "grad_norm": 0.36247026920318604, "learning_rate": 1.88203941585019e-05, "loss": 0.5274, "step": 10253 }, { "epoch": 0.3149940097686849, "grad_norm": 0.33482635021209717, "learning_rate": 1.882016643970096e-05, "loss": 0.5654, "step": 10254 }, { "epoch": 0.3150247289036341, "grad_norm": 0.33835846185684204, "learning_rate": 1.8819938700299905e-05, "loss": 0.5891, "step": 10255 }, { "epoch": 0.3150554480385832, "grad_norm": 0.39175713062286377, "learning_rate": 1.8819710940299263e-05, "loss": 0.6082, "step": 10256 }, { "epoch": 0.3150861671735324, "grad_norm": 0.32381680607795715, "learning_rate": 1.8819483159699573e-05, "loss": 0.6043, "step": 10257 }, { "epoch": 0.31511688630848156, "grad_norm": 0.314546138048172, "learning_rate": 1.8819255358501367e-05, "loss": 0.5411, "step": 10258 }, { "epoch": 0.31514760544343073, "grad_norm": 0.35210752487182617, "learning_rate": 1.8819027536705172e-05, "loss": 0.6301, "step": 10259 }, { "epoch": 0.31517832457837985, "grad_norm": 0.37179121375083923, "learning_rate": 1.8818799694311522e-05, "loss": 0.6221, "step": 10260 }, { "epoch": 0.31520904371332903, "grad_norm": 0.3643675148487091, "learning_rate": 1.881857183132095e-05, "loss": 0.5173, "step": 10261 }, { "epoch": 0.3152397628482782, "grad_norm": 0.3668730854988098, "learning_rate": 1.8818343947733987e-05, "loss": 0.6267, "step": 10262 }, { "epoch": 0.3152704819832274, "grad_norm": 0.3464733958244324, "learning_rate": 1.8818116043551167e-05, "loss": 0.5475, "step": 10263 }, { "epoch": 0.3153012011181765, "grad_norm": 0.3539682924747467, "learning_rate": 1.881788811877302e-05, "loss": 0.5906, "step": 10264 }, { "epoch": 0.3153319202531257, "grad_norm": 0.3298884928226471, "learning_rate": 1.8817660173400077e-05, "loss": 0.465, "step": 10265 }, { "epoch": 0.31536263938807485, "grad_norm": 0.31786584854125977, "learning_rate": 1.8817432207432878e-05, "loss": 0.6819, "step": 10266 }, { "epoch": 0.31539335852302397, "grad_norm": 0.3537514805793762, "learning_rate": 1.8817204220871946e-05, "loss": 0.5364, "step": 10267 }, { "epoch": 0.31542407765797315, "grad_norm": 0.34676802158355713, "learning_rate": 1.881697621371782e-05, "loss": 0.5893, "step": 10268 }, { "epoch": 0.3154547967929223, "grad_norm": 0.3786328434944153, "learning_rate": 1.881674818597103e-05, "loss": 0.6028, "step": 10269 }, { "epoch": 0.3154855159278715, "grad_norm": 0.34813159704208374, "learning_rate": 1.881652013763211e-05, "loss": 0.6062, "step": 10270 }, { "epoch": 0.3155162350628206, "grad_norm": 0.3288549780845642, "learning_rate": 1.8816292068701588e-05, "loss": 0.5435, "step": 10271 }, { "epoch": 0.3155469541977698, "grad_norm": 0.4264668822288513, "learning_rate": 1.8816063979180004e-05, "loss": 0.7402, "step": 10272 }, { "epoch": 0.31557767333271897, "grad_norm": 0.3753337562084198, "learning_rate": 1.8815835869067885e-05, "loss": 0.5026, "step": 10273 }, { "epoch": 0.3156083924676681, "grad_norm": 0.3764493465423584, "learning_rate": 1.8815607738365768e-05, "loss": 0.672, "step": 10274 }, { "epoch": 0.31563911160261726, "grad_norm": 0.32946935296058655, "learning_rate": 1.8815379587074177e-05, "loss": 0.5891, "step": 10275 }, { "epoch": 0.31566983073756644, "grad_norm": 0.3478465676307678, "learning_rate": 1.881515141519366e-05, "loss": 0.5777, "step": 10276 }, { "epoch": 0.3157005498725156, "grad_norm": 0.6545723080635071, "learning_rate": 1.8814923222724733e-05, "loss": 0.6545, "step": 10277 }, { "epoch": 0.31573126900746473, "grad_norm": 0.3580240309238434, "learning_rate": 1.8814695009667944e-05, "loss": 0.5796, "step": 10278 }, { "epoch": 0.3157619881424139, "grad_norm": 0.32342785596847534, "learning_rate": 1.8814466776023815e-05, "loss": 0.5825, "step": 10279 }, { "epoch": 0.3157927072773631, "grad_norm": 0.3625243604183197, "learning_rate": 1.8814238521792886e-05, "loss": 0.5491, "step": 10280 }, { "epoch": 0.3158234264123122, "grad_norm": 0.343291312456131, "learning_rate": 1.8814010246975685e-05, "loss": 0.5828, "step": 10281 }, { "epoch": 0.3158541455472614, "grad_norm": 0.3325168490409851, "learning_rate": 1.8813781951572748e-05, "loss": 0.6762, "step": 10282 }, { "epoch": 0.31588486468221055, "grad_norm": 0.537365734577179, "learning_rate": 1.8813553635584606e-05, "loss": 0.5699, "step": 10283 }, { "epoch": 0.31591558381715973, "grad_norm": 0.34361448884010315, "learning_rate": 1.8813325299011795e-05, "loss": 0.6247, "step": 10284 }, { "epoch": 0.31594630295210885, "grad_norm": 0.3684138357639313, "learning_rate": 1.881309694185485e-05, "loss": 0.6204, "step": 10285 }, { "epoch": 0.315977022087058, "grad_norm": 0.3150482177734375, "learning_rate": 1.88128685641143e-05, "loss": 0.4776, "step": 10286 }, { "epoch": 0.3160077412220072, "grad_norm": 0.3180637061595917, "learning_rate": 1.881264016579068e-05, "loss": 0.6006, "step": 10287 }, { "epoch": 0.3160384603569564, "grad_norm": 0.3347220718860626, "learning_rate": 1.881241174688452e-05, "loss": 0.5869, "step": 10288 }, { "epoch": 0.3160691794919055, "grad_norm": 0.3989174962043762, "learning_rate": 1.8812183307396364e-05, "loss": 0.6059, "step": 10289 }, { "epoch": 0.31609989862685467, "grad_norm": 0.345860093832016, "learning_rate": 1.8811954847326732e-05, "loss": 0.6257, "step": 10290 }, { "epoch": 0.31613061776180384, "grad_norm": 0.45442742109298706, "learning_rate": 1.8811726366676168e-05, "loss": 0.5705, "step": 10291 }, { "epoch": 0.31616133689675296, "grad_norm": 0.43948447704315186, "learning_rate": 1.88114978654452e-05, "loss": 0.6268, "step": 10292 }, { "epoch": 0.31619205603170214, "grad_norm": 0.37150612473487854, "learning_rate": 1.881126934363436e-05, "loss": 0.5584, "step": 10293 }, { "epoch": 0.3162227751666513, "grad_norm": 0.32819297909736633, "learning_rate": 1.8811040801244192e-05, "loss": 0.5496, "step": 10294 }, { "epoch": 0.3162534943016005, "grad_norm": 0.415400892496109, "learning_rate": 1.881081223827522e-05, "loss": 0.6886, "step": 10295 }, { "epoch": 0.3162842134365496, "grad_norm": 0.32257699966430664, "learning_rate": 1.881058365472798e-05, "loss": 0.5809, "step": 10296 }, { "epoch": 0.3163149325714988, "grad_norm": 0.3207363486289978, "learning_rate": 1.8810355050603008e-05, "loss": 0.4824, "step": 10297 }, { "epoch": 0.31634565170644796, "grad_norm": 0.35319289565086365, "learning_rate": 1.8810126425900833e-05, "loss": 0.6128, "step": 10298 }, { "epoch": 0.3163763708413971, "grad_norm": 0.35282081365585327, "learning_rate": 1.8809897780621996e-05, "loss": 0.5539, "step": 10299 }, { "epoch": 0.31640708997634626, "grad_norm": 0.3254168629646301, "learning_rate": 1.8809669114767027e-05, "loss": 0.5897, "step": 10300 }, { "epoch": 0.31643780911129543, "grad_norm": 0.4251907765865326, "learning_rate": 1.8809440428336456e-05, "loss": 0.6885, "step": 10301 }, { "epoch": 0.3164685282462446, "grad_norm": 0.39222365617752075, "learning_rate": 1.8809211721330828e-05, "loss": 0.5833, "step": 10302 }, { "epoch": 0.3164992473811937, "grad_norm": 0.3966994881629944, "learning_rate": 1.8808982993750668e-05, "loss": 0.6116, "step": 10303 }, { "epoch": 0.3165299665161429, "grad_norm": 0.4037255346775055, "learning_rate": 1.880875424559651e-05, "loss": 0.5746, "step": 10304 }, { "epoch": 0.3165606856510921, "grad_norm": 0.4391864538192749, "learning_rate": 1.8808525476868897e-05, "loss": 0.6623, "step": 10305 }, { "epoch": 0.31659140478604125, "grad_norm": 0.30929034948349, "learning_rate": 1.8808296687568356e-05, "loss": 0.6098, "step": 10306 }, { "epoch": 0.3166221239209904, "grad_norm": 0.35834476351737976, "learning_rate": 1.8808067877695418e-05, "loss": 0.6192, "step": 10307 }, { "epoch": 0.31665284305593955, "grad_norm": 0.37792113423347473, "learning_rate": 1.8807839047250628e-05, "loss": 0.6472, "step": 10308 }, { "epoch": 0.3166835621908887, "grad_norm": 0.3793802261352539, "learning_rate": 1.8807610196234515e-05, "loss": 0.5165, "step": 10309 }, { "epoch": 0.31671428132583784, "grad_norm": 0.40697750449180603, "learning_rate": 1.880738132464761e-05, "loss": 0.7193, "step": 10310 }, { "epoch": 0.316745000460787, "grad_norm": 0.35743093490600586, "learning_rate": 1.880715243249045e-05, "loss": 0.6725, "step": 10311 }, { "epoch": 0.3167757195957362, "grad_norm": 0.35486525297164917, "learning_rate": 1.8806923519763575e-05, "loss": 0.4943, "step": 10312 }, { "epoch": 0.31680643873068537, "grad_norm": 0.31848639249801636, "learning_rate": 1.880669458646751e-05, "loss": 0.5546, "step": 10313 }, { "epoch": 0.3168371578656345, "grad_norm": 0.37886863946914673, "learning_rate": 1.88064656326028e-05, "loss": 0.5135, "step": 10314 }, { "epoch": 0.31686787700058366, "grad_norm": 0.35622113943099976, "learning_rate": 1.8806236658169972e-05, "loss": 0.5169, "step": 10315 }, { "epoch": 0.31689859613553284, "grad_norm": 0.3233979046344757, "learning_rate": 1.880600766316956e-05, "loss": 0.5308, "step": 10316 }, { "epoch": 0.31692931527048196, "grad_norm": 0.3506705164909363, "learning_rate": 1.8805778647602108e-05, "loss": 0.5282, "step": 10317 }, { "epoch": 0.31696003440543113, "grad_norm": 0.3626779019832611, "learning_rate": 1.880554961146814e-05, "loss": 0.5135, "step": 10318 }, { "epoch": 0.3169907535403803, "grad_norm": 0.34216010570526123, "learning_rate": 1.88053205547682e-05, "loss": 0.5902, "step": 10319 }, { "epoch": 0.3170214726753295, "grad_norm": 0.3438121974468231, "learning_rate": 1.8805091477502816e-05, "loss": 0.5476, "step": 10320 }, { "epoch": 0.3170521918102786, "grad_norm": 0.3156261742115021, "learning_rate": 1.8804862379672527e-05, "loss": 0.5501, "step": 10321 }, { "epoch": 0.3170829109452278, "grad_norm": 0.3951742649078369, "learning_rate": 1.8804633261277866e-05, "loss": 0.5198, "step": 10322 }, { "epoch": 0.31711363008017696, "grad_norm": 0.38282960653305054, "learning_rate": 1.8804404122319367e-05, "loss": 0.5729, "step": 10323 }, { "epoch": 0.3171443492151261, "grad_norm": 0.4546809494495392, "learning_rate": 1.880417496279757e-05, "loss": 0.599, "step": 10324 }, { "epoch": 0.31717506835007525, "grad_norm": 0.3678719103336334, "learning_rate": 1.880394578271301e-05, "loss": 0.6741, "step": 10325 }, { "epoch": 0.3172057874850244, "grad_norm": 0.3521033227443695, "learning_rate": 1.8803716582066216e-05, "loss": 0.6208, "step": 10326 }, { "epoch": 0.3172365066199736, "grad_norm": 0.3485983908176422, "learning_rate": 1.880348736085773e-05, "loss": 0.5446, "step": 10327 }, { "epoch": 0.3172672257549227, "grad_norm": 0.3643217086791992, "learning_rate": 1.8803258119088084e-05, "loss": 0.6113, "step": 10328 }, { "epoch": 0.3172979448898719, "grad_norm": 0.33563220500946045, "learning_rate": 1.8803028856757813e-05, "loss": 0.6112, "step": 10329 }, { "epoch": 0.31732866402482107, "grad_norm": 0.3731679320335388, "learning_rate": 1.880279957386745e-05, "loss": 0.6095, "step": 10330 }, { "epoch": 0.31735938315977025, "grad_norm": 0.38268938660621643, "learning_rate": 1.8802570270417538e-05, "loss": 0.5826, "step": 10331 }, { "epoch": 0.31739010229471937, "grad_norm": 0.33716657757759094, "learning_rate": 1.880234094640861e-05, "loss": 0.5908, "step": 10332 }, { "epoch": 0.31742082142966854, "grad_norm": 0.35238581895828247, "learning_rate": 1.8802111601841196e-05, "loss": 0.6451, "step": 10333 }, { "epoch": 0.3174515405646177, "grad_norm": 0.3362695574760437, "learning_rate": 1.8801882236715837e-05, "loss": 0.5729, "step": 10334 }, { "epoch": 0.31748225969956684, "grad_norm": 0.4412815272808075, "learning_rate": 1.8801652851033066e-05, "loss": 0.5874, "step": 10335 }, { "epoch": 0.317512978834516, "grad_norm": 0.35605287551879883, "learning_rate": 1.8801423444793422e-05, "loss": 0.6101, "step": 10336 }, { "epoch": 0.3175436979694652, "grad_norm": 0.34616634249687195, "learning_rate": 1.8801194017997437e-05, "loss": 0.5173, "step": 10337 }, { "epoch": 0.31757441710441436, "grad_norm": 0.37092649936676025, "learning_rate": 1.880096457064565e-05, "loss": 0.5639, "step": 10338 }, { "epoch": 0.3176051362393635, "grad_norm": 0.4190486669540405, "learning_rate": 1.8800735102738595e-05, "loss": 0.7096, "step": 10339 }, { "epoch": 0.31763585537431266, "grad_norm": 0.3465522825717926, "learning_rate": 1.880050561427681e-05, "loss": 0.5528, "step": 10340 }, { "epoch": 0.31766657450926183, "grad_norm": 0.3458513617515564, "learning_rate": 1.880027610526083e-05, "loss": 0.6736, "step": 10341 }, { "epoch": 0.31769729364421095, "grad_norm": 0.33456161618232727, "learning_rate": 1.8800046575691186e-05, "loss": 0.5333, "step": 10342 }, { "epoch": 0.31772801277916013, "grad_norm": 0.33494603633880615, "learning_rate": 1.879981702556842e-05, "loss": 0.6157, "step": 10343 }, { "epoch": 0.3177587319141093, "grad_norm": 0.47835129499435425, "learning_rate": 1.879958745489307e-05, "loss": 0.6236, "step": 10344 }, { "epoch": 0.3177894510490585, "grad_norm": 0.3685917854309082, "learning_rate": 1.879935786366567e-05, "loss": 0.6129, "step": 10345 }, { "epoch": 0.3178201701840076, "grad_norm": 0.37018319964408875, "learning_rate": 1.879912825188675e-05, "loss": 0.6311, "step": 10346 }, { "epoch": 0.3178508893189568, "grad_norm": 0.651077389717102, "learning_rate": 1.8798898619556857e-05, "loss": 0.5351, "step": 10347 }, { "epoch": 0.31788160845390595, "grad_norm": 0.3516663610935211, "learning_rate": 1.8798668966676516e-05, "loss": 0.6577, "step": 10348 }, { "epoch": 0.31791232758885507, "grad_norm": 0.352861613035202, "learning_rate": 1.8798439293246272e-05, "loss": 0.4871, "step": 10349 }, { "epoch": 0.31794304672380425, "grad_norm": 0.3361690640449524, "learning_rate": 1.879820959926666e-05, "loss": 0.5524, "step": 10350 }, { "epoch": 0.3179737658587534, "grad_norm": 0.3244180977344513, "learning_rate": 1.879797988473821e-05, "loss": 0.5395, "step": 10351 }, { "epoch": 0.3180044849937026, "grad_norm": 0.36683109402656555, "learning_rate": 1.879775014966147e-05, "loss": 0.5757, "step": 10352 }, { "epoch": 0.3180352041286517, "grad_norm": 0.35545703768730164, "learning_rate": 1.8797520394036965e-05, "loss": 0.541, "step": 10353 }, { "epoch": 0.3180659232636009, "grad_norm": 0.33326125144958496, "learning_rate": 1.879729061786524e-05, "loss": 0.5288, "step": 10354 }, { "epoch": 0.31809664239855007, "grad_norm": 0.4097500145435333, "learning_rate": 1.8797060821146825e-05, "loss": 0.5245, "step": 10355 }, { "epoch": 0.31812736153349924, "grad_norm": 0.41521528363227844, "learning_rate": 1.8796831003882265e-05, "loss": 0.5278, "step": 10356 }, { "epoch": 0.31815808066844836, "grad_norm": 0.3441811501979828, "learning_rate": 1.8796601166072087e-05, "loss": 0.5887, "step": 10357 }, { "epoch": 0.31818879980339754, "grad_norm": 0.3411293029785156, "learning_rate": 1.8796371307716837e-05, "loss": 0.5671, "step": 10358 }, { "epoch": 0.3182195189383467, "grad_norm": 0.5855925679206848, "learning_rate": 1.8796141428817043e-05, "loss": 0.5489, "step": 10359 }, { "epoch": 0.31825023807329583, "grad_norm": 0.530379056930542, "learning_rate": 1.8795911529373248e-05, "loss": 0.6221, "step": 10360 }, { "epoch": 0.318280957208245, "grad_norm": 0.4495023190975189, "learning_rate": 1.879568160938599e-05, "loss": 0.6072, "step": 10361 }, { "epoch": 0.3183116763431942, "grad_norm": 0.37415197491645813, "learning_rate": 1.87954516688558e-05, "loss": 0.595, "step": 10362 }, { "epoch": 0.31834239547814336, "grad_norm": 0.3902260959148407, "learning_rate": 1.879522170778322e-05, "loss": 0.5869, "step": 10363 }, { "epoch": 0.3183731146130925, "grad_norm": 0.3843447268009186, "learning_rate": 1.8794991726168783e-05, "loss": 0.6294, "step": 10364 }, { "epoch": 0.31840383374804165, "grad_norm": 0.37625810503959656, "learning_rate": 1.879476172401303e-05, "loss": 0.6159, "step": 10365 }, { "epoch": 0.31843455288299083, "grad_norm": 0.38067394495010376, "learning_rate": 1.87945317013165e-05, "loss": 0.6003, "step": 10366 }, { "epoch": 0.31846527201793995, "grad_norm": 0.3400162160396576, "learning_rate": 1.8794301658079724e-05, "loss": 0.5277, "step": 10367 }, { "epoch": 0.3184959911528891, "grad_norm": 0.323940247297287, "learning_rate": 1.8794071594303243e-05, "loss": 0.5434, "step": 10368 }, { "epoch": 0.3185267102878383, "grad_norm": 0.3974185883998871, "learning_rate": 1.8793841509987592e-05, "loss": 0.6006, "step": 10369 }, { "epoch": 0.3185574294227875, "grad_norm": 0.3514109253883362, "learning_rate": 1.8793611405133313e-05, "loss": 0.4918, "step": 10370 }, { "epoch": 0.3185881485577366, "grad_norm": 0.3499881327152252, "learning_rate": 1.879338127974094e-05, "loss": 0.6489, "step": 10371 }, { "epoch": 0.31861886769268577, "grad_norm": 0.31905511021614075, "learning_rate": 1.8793151133811007e-05, "loss": 0.6239, "step": 10372 }, { "epoch": 0.31864958682763495, "grad_norm": 0.38234943151474, "learning_rate": 1.8792920967344056e-05, "loss": 0.6301, "step": 10373 }, { "epoch": 0.3186803059625841, "grad_norm": 0.3488442599773407, "learning_rate": 1.879269078034063e-05, "loss": 0.5856, "step": 10374 }, { "epoch": 0.31871102509753324, "grad_norm": 0.4142951965332031, "learning_rate": 1.8792460572801254e-05, "loss": 0.56, "step": 10375 }, { "epoch": 0.3187417442324824, "grad_norm": 0.33907902240753174, "learning_rate": 1.8792230344726477e-05, "loss": 0.5932, "step": 10376 }, { "epoch": 0.3187724633674316, "grad_norm": 0.3936256468296051, "learning_rate": 1.8792000096116825e-05, "loss": 0.6562, "step": 10377 }, { "epoch": 0.3188031825023807, "grad_norm": 0.34662601351737976, "learning_rate": 1.8791769826972853e-05, "loss": 0.5714, "step": 10378 }, { "epoch": 0.3188339016373299, "grad_norm": 0.36017748713493347, "learning_rate": 1.879153953729508e-05, "loss": 0.5935, "step": 10379 }, { "epoch": 0.31886462077227906, "grad_norm": 0.47439631819725037, "learning_rate": 1.8791309227084057e-05, "loss": 0.6551, "step": 10380 }, { "epoch": 0.31889533990722824, "grad_norm": 0.48930343985557556, "learning_rate": 1.8791078896340314e-05, "loss": 0.6614, "step": 10381 }, { "epoch": 0.31892605904217736, "grad_norm": 0.3326151371002197, "learning_rate": 1.8790848545064395e-05, "loss": 0.5371, "step": 10382 }, { "epoch": 0.31895677817712653, "grad_norm": 0.4107203781604767, "learning_rate": 1.8790618173256834e-05, "loss": 0.6163, "step": 10383 }, { "epoch": 0.3189874973120757, "grad_norm": 0.39710697531700134, "learning_rate": 1.879038778091817e-05, "loss": 0.6236, "step": 10384 }, { "epoch": 0.3190182164470248, "grad_norm": 0.35794708132743835, "learning_rate": 1.8790157368048945e-05, "loss": 0.5539, "step": 10385 }, { "epoch": 0.319048935581974, "grad_norm": 0.3403283953666687, "learning_rate": 1.878992693464969e-05, "loss": 0.5901, "step": 10386 }, { "epoch": 0.3190796547169232, "grad_norm": 0.3751843273639679, "learning_rate": 1.878969648072095e-05, "loss": 0.6975, "step": 10387 }, { "epoch": 0.31911037385187235, "grad_norm": 0.3646900951862335, "learning_rate": 1.8789466006263254e-05, "loss": 0.5511, "step": 10388 }, { "epoch": 0.3191410929868215, "grad_norm": 0.5699176788330078, "learning_rate": 1.878923551127715e-05, "loss": 0.5605, "step": 10389 }, { "epoch": 0.31917181212177065, "grad_norm": 0.4766837954521179, "learning_rate": 1.878900499576317e-05, "loss": 0.6373, "step": 10390 }, { "epoch": 0.3192025312567198, "grad_norm": 0.3415882885456085, "learning_rate": 1.878877445972186e-05, "loss": 0.5736, "step": 10391 }, { "epoch": 0.31923325039166894, "grad_norm": 0.48607170581817627, "learning_rate": 1.878854390315375e-05, "loss": 0.7226, "step": 10392 }, { "epoch": 0.3192639695266181, "grad_norm": 0.3539358377456665, "learning_rate": 1.8788313326059383e-05, "loss": 0.5621, "step": 10393 }, { "epoch": 0.3192946886615673, "grad_norm": 0.32131141424179077, "learning_rate": 1.8788082728439296e-05, "loss": 0.6331, "step": 10394 }, { "epoch": 0.31932540779651647, "grad_norm": 0.34996354579925537, "learning_rate": 1.878785211029403e-05, "loss": 0.5838, "step": 10395 }, { "epoch": 0.3193561269314656, "grad_norm": 0.3839188814163208, "learning_rate": 1.878762147162412e-05, "loss": 0.5793, "step": 10396 }, { "epoch": 0.31938684606641476, "grad_norm": 0.39505118131637573, "learning_rate": 1.8787390812430105e-05, "loss": 0.5602, "step": 10397 }, { "epoch": 0.31941756520136394, "grad_norm": 0.3634696304798126, "learning_rate": 1.878716013271253e-05, "loss": 0.519, "step": 10398 }, { "epoch": 0.3194482843363131, "grad_norm": 0.3570019602775574, "learning_rate": 1.878692943247192e-05, "loss": 0.635, "step": 10399 }, { "epoch": 0.31947900347126224, "grad_norm": 0.376730352640152, "learning_rate": 1.878669871170883e-05, "loss": 0.5809, "step": 10400 }, { "epoch": 0.3195097226062114, "grad_norm": 0.3459199368953705, "learning_rate": 1.878646797042379e-05, "loss": 0.5691, "step": 10401 }, { "epoch": 0.3195404417411606, "grad_norm": 0.3691335916519165, "learning_rate": 1.8786237208617338e-05, "loss": 0.5362, "step": 10402 }, { "epoch": 0.3195711608761097, "grad_norm": 0.3914206326007843, "learning_rate": 1.8786006426290018e-05, "loss": 0.6286, "step": 10403 }, { "epoch": 0.3196018800110589, "grad_norm": 0.3577726483345032, "learning_rate": 1.8785775623442365e-05, "loss": 0.5687, "step": 10404 }, { "epoch": 0.31963259914600806, "grad_norm": 0.42798563838005066, "learning_rate": 1.878554480007492e-05, "loss": 0.6102, "step": 10405 }, { "epoch": 0.31966331828095723, "grad_norm": 0.3485327363014221, "learning_rate": 1.8785313956188223e-05, "loss": 0.5635, "step": 10406 }, { "epoch": 0.31969403741590635, "grad_norm": 0.3698714077472687, "learning_rate": 1.878508309178281e-05, "loss": 0.5831, "step": 10407 }, { "epoch": 0.3197247565508555, "grad_norm": 0.37608012557029724, "learning_rate": 1.878485220685922e-05, "loss": 0.4726, "step": 10408 }, { "epoch": 0.3197554756858047, "grad_norm": 0.40061071515083313, "learning_rate": 1.8784621301418e-05, "loss": 0.5658, "step": 10409 }, { "epoch": 0.3197861948207538, "grad_norm": 0.35453954339027405, "learning_rate": 1.8784390375459675e-05, "loss": 0.5519, "step": 10410 }, { "epoch": 0.319816913955703, "grad_norm": 0.4018420875072479, "learning_rate": 1.87841594289848e-05, "loss": 0.5603, "step": 10411 }, { "epoch": 0.3198476330906522, "grad_norm": 0.34728437662124634, "learning_rate": 1.8783928461993906e-05, "loss": 0.6214, "step": 10412 }, { "epoch": 0.31987835222560135, "grad_norm": 0.3605533838272095, "learning_rate": 1.878369747448753e-05, "loss": 0.5732, "step": 10413 }, { "epoch": 0.31990907136055047, "grad_norm": 0.3274520933628082, "learning_rate": 1.878346646646622e-05, "loss": 0.6246, "step": 10414 }, { "epoch": 0.31993979049549964, "grad_norm": 0.5785011649131775, "learning_rate": 1.8783235437930507e-05, "loss": 0.659, "step": 10415 }, { "epoch": 0.3199705096304488, "grad_norm": 0.34852394461631775, "learning_rate": 1.8783004388880934e-05, "loss": 0.5732, "step": 10416 }, { "epoch": 0.320001228765398, "grad_norm": 0.3095927834510803, "learning_rate": 1.8782773319318043e-05, "loss": 0.5369, "step": 10417 }, { "epoch": 0.3200319479003471, "grad_norm": 0.3479742705821991, "learning_rate": 1.8782542229242372e-05, "loss": 0.5773, "step": 10418 }, { "epoch": 0.3200626670352963, "grad_norm": 0.3420276641845703, "learning_rate": 1.8782311118654457e-05, "loss": 0.5852, "step": 10419 }, { "epoch": 0.32009338617024546, "grad_norm": 0.3882110118865967, "learning_rate": 1.8782079987554843e-05, "loss": 0.5576, "step": 10420 }, { "epoch": 0.3201241053051946, "grad_norm": 0.34206265211105347, "learning_rate": 1.8781848835944068e-05, "loss": 0.5734, "step": 10421 }, { "epoch": 0.32015482444014376, "grad_norm": 0.35433822870254517, "learning_rate": 1.878161766382267e-05, "loss": 0.523, "step": 10422 }, { "epoch": 0.32018554357509293, "grad_norm": 0.3119700253009796, "learning_rate": 1.8781386471191192e-05, "loss": 0.5769, "step": 10423 }, { "epoch": 0.3202162627100421, "grad_norm": 0.3361133635044098, "learning_rate": 1.8781155258050176e-05, "loss": 0.5952, "step": 10424 }, { "epoch": 0.32024698184499123, "grad_norm": 0.34520527720451355, "learning_rate": 1.8780924024400154e-05, "loss": 0.6, "step": 10425 }, { "epoch": 0.3202777009799404, "grad_norm": 0.33207187056541443, "learning_rate": 1.878069277024167e-05, "loss": 0.608, "step": 10426 }, { "epoch": 0.3203084201148896, "grad_norm": 0.3994479775428772, "learning_rate": 1.8780461495575268e-05, "loss": 0.5091, "step": 10427 }, { "epoch": 0.3203391392498387, "grad_norm": 0.4845339357852936, "learning_rate": 1.8780230200401482e-05, "loss": 0.5907, "step": 10428 }, { "epoch": 0.3203698583847879, "grad_norm": 0.34427645802497864, "learning_rate": 1.877999888472086e-05, "loss": 0.5725, "step": 10429 }, { "epoch": 0.32040057751973705, "grad_norm": 0.3655513525009155, "learning_rate": 1.877976754853393e-05, "loss": 0.5663, "step": 10430 }, { "epoch": 0.3204312966546862, "grad_norm": 0.3395141363143921, "learning_rate": 1.8779536191841247e-05, "loss": 0.5686, "step": 10431 }, { "epoch": 0.32046201578963535, "grad_norm": 0.3447889983654022, "learning_rate": 1.8779304814643338e-05, "loss": 0.5495, "step": 10432 }, { "epoch": 0.3204927349245845, "grad_norm": 0.32104000449180603, "learning_rate": 1.8779073416940758e-05, "loss": 0.5361, "step": 10433 }, { "epoch": 0.3205234540595337, "grad_norm": 0.3978859782218933, "learning_rate": 1.8778841998734033e-05, "loss": 0.5736, "step": 10434 }, { "epoch": 0.3205541731944828, "grad_norm": 0.3821896016597748, "learning_rate": 1.8778610560023708e-05, "loss": 0.6077, "step": 10435 }, { "epoch": 0.320584892329432, "grad_norm": 0.37851133942604065, "learning_rate": 1.8778379100810326e-05, "loss": 0.5903, "step": 10436 }, { "epoch": 0.32061561146438117, "grad_norm": 0.36211201548576355, "learning_rate": 1.8778147621094428e-05, "loss": 0.6628, "step": 10437 }, { "epoch": 0.32064633059933034, "grad_norm": 0.6063089370727539, "learning_rate": 1.877791612087655e-05, "loss": 0.4477, "step": 10438 }, { "epoch": 0.32067704973427946, "grad_norm": 0.37523066997528076, "learning_rate": 1.877768460015724e-05, "loss": 0.5413, "step": 10439 }, { "epoch": 0.32070776886922864, "grad_norm": 0.3735823333263397, "learning_rate": 1.8777453058937033e-05, "loss": 0.5741, "step": 10440 }, { "epoch": 0.3207384880041778, "grad_norm": 0.3618384301662445, "learning_rate": 1.8777221497216473e-05, "loss": 0.5269, "step": 10441 }, { "epoch": 0.320769207139127, "grad_norm": 0.39149394631385803, "learning_rate": 1.87769899149961e-05, "loss": 0.5705, "step": 10442 }, { "epoch": 0.3207999262740761, "grad_norm": 0.3562537431716919, "learning_rate": 1.877675831227645e-05, "loss": 0.6048, "step": 10443 }, { "epoch": 0.3208306454090253, "grad_norm": 0.39754435420036316, "learning_rate": 1.8776526689058067e-05, "loss": 0.5982, "step": 10444 }, { "epoch": 0.32086136454397446, "grad_norm": 0.3652607798576355, "learning_rate": 1.8776295045341496e-05, "loss": 0.4592, "step": 10445 }, { "epoch": 0.3208920836789236, "grad_norm": 0.4440177083015442, "learning_rate": 1.8776063381127276e-05, "loss": 0.6217, "step": 10446 }, { "epoch": 0.32092280281387275, "grad_norm": 0.3403913974761963, "learning_rate": 1.8775831696415946e-05, "loss": 0.6246, "step": 10447 }, { "epoch": 0.32095352194882193, "grad_norm": 0.34125635027885437, "learning_rate": 1.8775599991208046e-05, "loss": 0.4942, "step": 10448 }, { "epoch": 0.3209842410837711, "grad_norm": 0.332894504070282, "learning_rate": 1.8775368265504123e-05, "loss": 0.5073, "step": 10449 }, { "epoch": 0.3210149602187202, "grad_norm": 0.34188079833984375, "learning_rate": 1.8775136519304713e-05, "loss": 0.5598, "step": 10450 }, { "epoch": 0.3210456793536694, "grad_norm": 0.30355602502822876, "learning_rate": 1.877490475261036e-05, "loss": 0.5501, "step": 10451 }, { "epoch": 0.3210763984886186, "grad_norm": 0.3346308767795563, "learning_rate": 1.8774672965421602e-05, "loss": 0.5969, "step": 10452 }, { "epoch": 0.3211071176235677, "grad_norm": 0.4474734365940094, "learning_rate": 1.877444115773898e-05, "loss": 0.5647, "step": 10453 }, { "epoch": 0.32113783675851687, "grad_norm": 0.36739033460617065, "learning_rate": 1.8774209329563042e-05, "loss": 0.5937, "step": 10454 }, { "epoch": 0.32116855589346605, "grad_norm": 0.3273799419403076, "learning_rate": 1.8773977480894327e-05, "loss": 0.6075, "step": 10455 }, { "epoch": 0.3211992750284152, "grad_norm": 0.40559348464012146, "learning_rate": 1.877374561173337e-05, "loss": 0.5578, "step": 10456 }, { "epoch": 0.32122999416336434, "grad_norm": 0.33529654145240784, "learning_rate": 1.8773513722080722e-05, "loss": 0.5062, "step": 10457 }, { "epoch": 0.3212607132983135, "grad_norm": 0.33447501063346863, "learning_rate": 1.8773281811936916e-05, "loss": 0.5716, "step": 10458 }, { "epoch": 0.3212914324332627, "grad_norm": 0.38884878158569336, "learning_rate": 1.87730498813025e-05, "loss": 0.5142, "step": 10459 }, { "epoch": 0.32132215156821187, "grad_norm": 0.39758390188217163, "learning_rate": 1.877281793017801e-05, "loss": 0.626, "step": 10460 }, { "epoch": 0.321352870703161, "grad_norm": 0.3643445670604706, "learning_rate": 1.8772585958563994e-05, "loss": 0.5609, "step": 10461 }, { "epoch": 0.32138358983811016, "grad_norm": 0.37395721673965454, "learning_rate": 1.877235396646099e-05, "loss": 0.5845, "step": 10462 }, { "epoch": 0.32141430897305934, "grad_norm": 0.34857746958732605, "learning_rate": 1.8772121953869542e-05, "loss": 0.6616, "step": 10463 }, { "epoch": 0.32144502810800846, "grad_norm": 0.3876510262489319, "learning_rate": 1.877188992079019e-05, "loss": 0.5812, "step": 10464 }, { "epoch": 0.32147574724295763, "grad_norm": 0.36043038964271545, "learning_rate": 1.8771657867223476e-05, "loss": 0.6066, "step": 10465 }, { "epoch": 0.3215064663779068, "grad_norm": 0.3366979956626892, "learning_rate": 1.8771425793169942e-05, "loss": 0.5796, "step": 10466 }, { "epoch": 0.321537185512856, "grad_norm": 0.33658766746520996, "learning_rate": 1.8771193698630134e-05, "loss": 0.5692, "step": 10467 }, { "epoch": 0.3215679046478051, "grad_norm": 0.3641218841075897, "learning_rate": 1.8770961583604586e-05, "loss": 0.6263, "step": 10468 }, { "epoch": 0.3215986237827543, "grad_norm": 0.33797866106033325, "learning_rate": 1.8770729448093845e-05, "loss": 0.6145, "step": 10469 }, { "epoch": 0.32162934291770345, "grad_norm": 0.38558706641197205, "learning_rate": 1.8770497292098456e-05, "loss": 0.6651, "step": 10470 }, { "epoch": 0.3216600620526526, "grad_norm": 0.38323575258255005, "learning_rate": 1.8770265115618957e-05, "loss": 0.6075, "step": 10471 }, { "epoch": 0.32169078118760175, "grad_norm": 0.3677614629268646, "learning_rate": 1.8770032918655887e-05, "loss": 0.6531, "step": 10472 }, { "epoch": 0.3217215003225509, "grad_norm": 0.3235359489917755, "learning_rate": 1.8769800701209796e-05, "loss": 0.5559, "step": 10473 }, { "epoch": 0.3217522194575001, "grad_norm": 0.3164503872394562, "learning_rate": 1.876956846328122e-05, "loss": 0.4915, "step": 10474 }, { "epoch": 0.3217829385924492, "grad_norm": 0.37993302941322327, "learning_rate": 1.876933620487071e-05, "loss": 0.6184, "step": 10475 }, { "epoch": 0.3218136577273984, "grad_norm": 0.4029066264629364, "learning_rate": 1.87691039259788e-05, "loss": 0.663, "step": 10476 }, { "epoch": 0.32184437686234757, "grad_norm": 0.3376162350177765, "learning_rate": 1.8768871626606036e-05, "loss": 0.5813, "step": 10477 }, { "epoch": 0.3218750959972967, "grad_norm": 0.35696524381637573, "learning_rate": 1.8768639306752957e-05, "loss": 0.5178, "step": 10478 }, { "epoch": 0.32190581513224586, "grad_norm": 0.34637251496315, "learning_rate": 1.8768406966420115e-05, "loss": 0.563, "step": 10479 }, { "epoch": 0.32193653426719504, "grad_norm": 0.3301503360271454, "learning_rate": 1.8768174605608042e-05, "loss": 0.5603, "step": 10480 }, { "epoch": 0.3219672534021442, "grad_norm": 0.31788602471351624, "learning_rate": 1.876794222431728e-05, "loss": 0.4902, "step": 10481 }, { "epoch": 0.32199797253709334, "grad_norm": 0.35312265157699585, "learning_rate": 1.8767709822548387e-05, "loss": 0.633, "step": 10482 }, { "epoch": 0.3220286916720425, "grad_norm": 0.4057193994522095, "learning_rate": 1.876747740030189e-05, "loss": 0.5948, "step": 10483 }, { "epoch": 0.3220594108069917, "grad_norm": 0.3522506654262543, "learning_rate": 1.8767244957578335e-05, "loss": 0.6292, "step": 10484 }, { "epoch": 0.32209012994194086, "grad_norm": 0.36108675599098206, "learning_rate": 1.876701249437827e-05, "loss": 0.5531, "step": 10485 }, { "epoch": 0.32212084907689, "grad_norm": 0.3845078647136688, "learning_rate": 1.8766780010702234e-05, "loss": 0.5234, "step": 10486 }, { "epoch": 0.32215156821183916, "grad_norm": 0.3433791697025299, "learning_rate": 1.876654750655077e-05, "loss": 0.5271, "step": 10487 }, { "epoch": 0.32218228734678833, "grad_norm": 0.3829619884490967, "learning_rate": 1.8766314981924423e-05, "loss": 0.5792, "step": 10488 }, { "epoch": 0.32221300648173745, "grad_norm": 0.34608232975006104, "learning_rate": 1.8766082436823734e-05, "loss": 0.6047, "step": 10489 }, { "epoch": 0.3222437256166866, "grad_norm": 0.307329386472702, "learning_rate": 1.876584987124925e-05, "loss": 0.5365, "step": 10490 }, { "epoch": 0.3222744447516358, "grad_norm": 0.3400796949863434, "learning_rate": 1.876561728520151e-05, "loss": 0.55, "step": 10491 }, { "epoch": 0.322305163886585, "grad_norm": 0.3400638699531555, "learning_rate": 1.8765384678681057e-05, "loss": 0.5602, "step": 10492 }, { "epoch": 0.3223358830215341, "grad_norm": 0.3324618637561798, "learning_rate": 1.876515205168844e-05, "loss": 0.5139, "step": 10493 }, { "epoch": 0.3223666021564833, "grad_norm": 0.3323565125465393, "learning_rate": 1.8764919404224196e-05, "loss": 0.6073, "step": 10494 }, { "epoch": 0.32239732129143245, "grad_norm": 0.3662666380405426, "learning_rate": 1.876468673628887e-05, "loss": 0.6019, "step": 10495 }, { "epoch": 0.32242804042638157, "grad_norm": 0.3799459934234619, "learning_rate": 1.8764454047883007e-05, "loss": 0.6777, "step": 10496 }, { "epoch": 0.32245875956133074, "grad_norm": 0.3329426646232605, "learning_rate": 1.876422133900715e-05, "loss": 0.5717, "step": 10497 }, { "epoch": 0.3224894786962799, "grad_norm": 0.3386916518211365, "learning_rate": 1.876398860966184e-05, "loss": 0.6117, "step": 10498 }, { "epoch": 0.3225201978312291, "grad_norm": 0.3396860361099243, "learning_rate": 1.8763755859847622e-05, "loss": 0.6002, "step": 10499 }, { "epoch": 0.3225509169661782, "grad_norm": 0.3209167718887329, "learning_rate": 1.8763523089565044e-05, "loss": 0.6198, "step": 10500 }, { "epoch": 0.3225816361011274, "grad_norm": 0.3563041090965271, "learning_rate": 1.8763290298814643e-05, "loss": 0.5286, "step": 10501 }, { "epoch": 0.32261235523607656, "grad_norm": 0.3484567701816559, "learning_rate": 1.8763057487596965e-05, "loss": 0.6303, "step": 10502 }, { "epoch": 0.3226430743710257, "grad_norm": 0.40856701135635376, "learning_rate": 1.8762824655912558e-05, "loss": 0.6375, "step": 10503 }, { "epoch": 0.32267379350597486, "grad_norm": 0.32248178124427795, "learning_rate": 1.8762591803761955e-05, "loss": 0.538, "step": 10504 }, { "epoch": 0.32270451264092403, "grad_norm": 0.3276817500591278, "learning_rate": 1.876235893114571e-05, "loss": 0.5806, "step": 10505 }, { "epoch": 0.3227352317758732, "grad_norm": 0.33032435178756714, "learning_rate": 1.876212603806437e-05, "loss": 0.6151, "step": 10506 }, { "epoch": 0.32276595091082233, "grad_norm": 0.40004220604896545, "learning_rate": 1.8761893124518463e-05, "loss": 0.5492, "step": 10507 }, { "epoch": 0.3227966700457715, "grad_norm": 0.45349806547164917, "learning_rate": 1.876166019050855e-05, "loss": 0.6354, "step": 10508 }, { "epoch": 0.3228273891807207, "grad_norm": 0.3451940715312958, "learning_rate": 1.8761427236035167e-05, "loss": 0.6564, "step": 10509 }, { "epoch": 0.32285810831566986, "grad_norm": 0.3290732204914093, "learning_rate": 1.8761194261098854e-05, "loss": 0.5813, "step": 10510 }, { "epoch": 0.322888827450619, "grad_norm": 0.34436333179473877, "learning_rate": 1.8760961265700163e-05, "loss": 0.5666, "step": 10511 }, { "epoch": 0.32291954658556815, "grad_norm": 0.3301370441913605, "learning_rate": 1.8760728249839633e-05, "loss": 0.4631, "step": 10512 }, { "epoch": 0.3229502657205173, "grad_norm": 0.3396921157836914, "learning_rate": 1.8760495213517813e-05, "loss": 0.5426, "step": 10513 }, { "epoch": 0.32298098485546645, "grad_norm": 0.3267461359500885, "learning_rate": 1.8760262156735244e-05, "loss": 0.5881, "step": 10514 }, { "epoch": 0.3230117039904156, "grad_norm": 0.3818013072013855, "learning_rate": 1.876002907949247e-05, "loss": 0.5576, "step": 10515 }, { "epoch": 0.3230424231253648, "grad_norm": 0.34604912996292114, "learning_rate": 1.875979598179004e-05, "loss": 0.5429, "step": 10516 }, { "epoch": 0.323073142260314, "grad_norm": 0.3351454436779022, "learning_rate": 1.875956286362849e-05, "loss": 0.5782, "step": 10517 }, { "epoch": 0.3231038613952631, "grad_norm": 0.3306818902492523, "learning_rate": 1.875932972500837e-05, "loss": 0.5823, "step": 10518 }, { "epoch": 0.32313458053021227, "grad_norm": 0.47395914793014526, "learning_rate": 1.8759096565930224e-05, "loss": 0.5885, "step": 10519 }, { "epoch": 0.32316529966516144, "grad_norm": 0.3053765892982483, "learning_rate": 1.8758863386394597e-05, "loss": 0.5549, "step": 10520 }, { "epoch": 0.32319601880011056, "grad_norm": 0.33267444372177124, "learning_rate": 1.875863018640203e-05, "loss": 0.5991, "step": 10521 }, { "epoch": 0.32322673793505974, "grad_norm": 0.338569700717926, "learning_rate": 1.8758396965953075e-05, "loss": 0.5707, "step": 10522 }, { "epoch": 0.3232574570700089, "grad_norm": 0.4039997458457947, "learning_rate": 1.875816372504827e-05, "loss": 0.6456, "step": 10523 }, { "epoch": 0.3232881762049581, "grad_norm": 0.3354153335094452, "learning_rate": 1.8757930463688164e-05, "loss": 0.5035, "step": 10524 }, { "epoch": 0.3233188953399072, "grad_norm": 0.3468908965587616, "learning_rate": 1.87576971818733e-05, "loss": 0.47, "step": 10525 }, { "epoch": 0.3233496144748564, "grad_norm": 0.3235833942890167, "learning_rate": 1.8757463879604217e-05, "loss": 0.5702, "step": 10526 }, { "epoch": 0.32338033360980556, "grad_norm": 0.38212302327156067, "learning_rate": 1.8757230556881468e-05, "loss": 0.6359, "step": 10527 }, { "epoch": 0.32341105274475473, "grad_norm": 0.3509364724159241, "learning_rate": 1.8756997213705598e-05, "loss": 0.6209, "step": 10528 }, { "epoch": 0.32344177187970385, "grad_norm": 0.3443186581134796, "learning_rate": 1.8756763850077145e-05, "loss": 0.7022, "step": 10529 }, { "epoch": 0.32347249101465303, "grad_norm": 0.3335057497024536, "learning_rate": 1.8756530465996664e-05, "loss": 0.5631, "step": 10530 }, { "epoch": 0.3235032101496022, "grad_norm": 0.37644749879837036, "learning_rate": 1.8756297061464692e-05, "loss": 0.5655, "step": 10531 }, { "epoch": 0.3235339292845513, "grad_norm": 0.32338613271713257, "learning_rate": 1.8756063636481777e-05, "loss": 0.5391, "step": 10532 }, { "epoch": 0.3235646484195005, "grad_norm": 0.36534014344215393, "learning_rate": 1.8755830191048464e-05, "loss": 0.5661, "step": 10533 }, { "epoch": 0.3235953675544497, "grad_norm": 0.3516930639743805, "learning_rate": 1.8755596725165295e-05, "loss": 0.5392, "step": 10534 }, { "epoch": 0.32362608668939885, "grad_norm": 0.398168683052063, "learning_rate": 1.8755363238832824e-05, "loss": 0.5855, "step": 10535 }, { "epoch": 0.32365680582434797, "grad_norm": 0.3500240743160248, "learning_rate": 1.8755129732051584e-05, "loss": 0.5494, "step": 10536 }, { "epoch": 0.32368752495929715, "grad_norm": 0.3421843945980072, "learning_rate": 1.8754896204822133e-05, "loss": 0.5977, "step": 10537 }, { "epoch": 0.3237182440942463, "grad_norm": 0.379447340965271, "learning_rate": 1.8754662657145008e-05, "loss": 0.5889, "step": 10538 }, { "epoch": 0.32374896322919544, "grad_norm": 0.38925641775131226, "learning_rate": 1.8754429089020754e-05, "loss": 0.5777, "step": 10539 }, { "epoch": 0.3237796823641446, "grad_norm": 0.353607177734375, "learning_rate": 1.8754195500449925e-05, "loss": 0.6063, "step": 10540 }, { "epoch": 0.3238104014990938, "grad_norm": 0.3793995678424835, "learning_rate": 1.875396189143306e-05, "loss": 0.5756, "step": 10541 }, { "epoch": 0.32384112063404297, "grad_norm": 0.6309292912483215, "learning_rate": 1.8753728261970702e-05, "loss": 0.5096, "step": 10542 }, { "epoch": 0.3238718397689921, "grad_norm": 0.34683921933174133, "learning_rate": 1.8753494612063402e-05, "loss": 0.604, "step": 10543 }, { "epoch": 0.32390255890394126, "grad_norm": 0.32761454582214355, "learning_rate": 1.8753260941711708e-05, "loss": 0.6437, "step": 10544 }, { "epoch": 0.32393327803889044, "grad_norm": 0.3088163435459137, "learning_rate": 1.8753027250916155e-05, "loss": 0.6332, "step": 10545 }, { "epoch": 0.32396399717383956, "grad_norm": 0.34737327694892883, "learning_rate": 1.87527935396773e-05, "loss": 0.5567, "step": 10546 }, { "epoch": 0.32399471630878873, "grad_norm": 0.3143158257007599, "learning_rate": 1.8752559807995684e-05, "loss": 0.5326, "step": 10547 }, { "epoch": 0.3240254354437379, "grad_norm": 0.31621211767196655, "learning_rate": 1.8752326055871853e-05, "loss": 0.5847, "step": 10548 }, { "epoch": 0.3240561545786871, "grad_norm": 0.3131178915500641, "learning_rate": 1.8752092283306352e-05, "loss": 0.5048, "step": 10549 }, { "epoch": 0.3240868737136362, "grad_norm": 0.3822215497493744, "learning_rate": 1.8751858490299727e-05, "loss": 0.5454, "step": 10550 }, { "epoch": 0.3241175928485854, "grad_norm": 0.3671550452709198, "learning_rate": 1.875162467685253e-05, "loss": 0.5033, "step": 10551 }, { "epoch": 0.32414831198353455, "grad_norm": 0.38618919253349304, "learning_rate": 1.87513908429653e-05, "loss": 0.5985, "step": 10552 }, { "epoch": 0.32417903111848373, "grad_norm": 0.3504545986652374, "learning_rate": 1.8751156988638584e-05, "loss": 0.652, "step": 10553 }, { "epoch": 0.32420975025343285, "grad_norm": 0.37287670373916626, "learning_rate": 1.875092311387293e-05, "loss": 0.622, "step": 10554 }, { "epoch": 0.324240469388382, "grad_norm": 0.3176255226135254, "learning_rate": 1.8750689218668884e-05, "loss": 0.5808, "step": 10555 }, { "epoch": 0.3242711885233312, "grad_norm": 0.33974918723106384, "learning_rate": 1.8750455303026992e-05, "loss": 0.6002, "step": 10556 }, { "epoch": 0.3243019076582803, "grad_norm": 0.3801966905593872, "learning_rate": 1.87502213669478e-05, "loss": 0.6014, "step": 10557 }, { "epoch": 0.3243326267932295, "grad_norm": 0.4835933744907379, "learning_rate": 1.8749987410431857e-05, "loss": 0.49, "step": 10558 }, { "epoch": 0.32436334592817867, "grad_norm": 0.3491896688938141, "learning_rate": 1.8749753433479708e-05, "loss": 0.5353, "step": 10559 }, { "epoch": 0.32439406506312785, "grad_norm": 0.34013205766677856, "learning_rate": 1.87495194360919e-05, "loss": 0.5387, "step": 10560 }, { "epoch": 0.32442478419807697, "grad_norm": 0.33870619535446167, "learning_rate": 1.8749285418268975e-05, "loss": 0.5817, "step": 10561 }, { "epoch": 0.32445550333302614, "grad_norm": 0.337568074464798, "learning_rate": 1.874905138001148e-05, "loss": 0.6003, "step": 10562 }, { "epoch": 0.3244862224679753, "grad_norm": 0.3554910123348236, "learning_rate": 1.8748817321319972e-05, "loss": 0.5299, "step": 10563 }, { "epoch": 0.32451694160292444, "grad_norm": 0.4804076850414276, "learning_rate": 1.8748583242194986e-05, "loss": 0.6233, "step": 10564 }, { "epoch": 0.3245476607378736, "grad_norm": 0.395429402589798, "learning_rate": 1.874834914263707e-05, "loss": 0.5888, "step": 10565 }, { "epoch": 0.3245783798728228, "grad_norm": 0.3135973811149597, "learning_rate": 1.8748115022646776e-05, "loss": 0.5589, "step": 10566 }, { "epoch": 0.32460909900777196, "grad_norm": 0.34097617864608765, "learning_rate": 1.874788088222465e-05, "loss": 0.5962, "step": 10567 }, { "epoch": 0.3246398181427211, "grad_norm": 0.31954723596572876, "learning_rate": 1.8747646721371238e-05, "loss": 0.5776, "step": 10568 }, { "epoch": 0.32467053727767026, "grad_norm": 0.32285502552986145, "learning_rate": 1.8747412540087085e-05, "loss": 0.5916, "step": 10569 }, { "epoch": 0.32470125641261943, "grad_norm": 0.4999246299266815, "learning_rate": 1.8747178338372738e-05, "loss": 0.5855, "step": 10570 }, { "epoch": 0.3247319755475686, "grad_norm": 0.3397347927093506, "learning_rate": 1.8746944116228745e-05, "loss": 0.5719, "step": 10571 }, { "epoch": 0.3247626946825177, "grad_norm": 0.3430301249027252, "learning_rate": 1.8746709873655656e-05, "loss": 0.55, "step": 10572 }, { "epoch": 0.3247934138174669, "grad_norm": 0.36109620332717896, "learning_rate": 1.874647561065401e-05, "loss": 0.5967, "step": 10573 }, { "epoch": 0.3248241329524161, "grad_norm": 0.40350741147994995, "learning_rate": 1.874624132722436e-05, "loss": 0.5382, "step": 10574 }, { "epoch": 0.3248548520873652, "grad_norm": 0.3613792061805725, "learning_rate": 1.874600702336726e-05, "loss": 0.6162, "step": 10575 }, { "epoch": 0.3248855712223144, "grad_norm": 0.41203629970550537, "learning_rate": 1.8745772699083243e-05, "loss": 0.626, "step": 10576 }, { "epoch": 0.32491629035726355, "grad_norm": 0.6378921270370483, "learning_rate": 1.8745538354372862e-05, "loss": 0.6058, "step": 10577 }, { "epoch": 0.3249470094922127, "grad_norm": 0.36279022693634033, "learning_rate": 1.874530398923667e-05, "loss": 0.5939, "step": 10578 }, { "epoch": 0.32497772862716184, "grad_norm": 0.36846721172332764, "learning_rate": 1.874506960367521e-05, "loss": 0.6337, "step": 10579 }, { "epoch": 0.325008447762111, "grad_norm": 0.3302701711654663, "learning_rate": 1.8744835197689023e-05, "loss": 0.5302, "step": 10580 }, { "epoch": 0.3250391668970602, "grad_norm": 0.39234647154808044, "learning_rate": 1.874460077127867e-05, "loss": 0.6003, "step": 10581 }, { "epoch": 0.3250698860320093, "grad_norm": 0.3444295823574066, "learning_rate": 1.8744366324444688e-05, "loss": 0.5851, "step": 10582 }, { "epoch": 0.3251006051669585, "grad_norm": 0.3630450665950775, "learning_rate": 1.8744131857187626e-05, "loss": 0.5542, "step": 10583 }, { "epoch": 0.32513132430190766, "grad_norm": 0.32693299651145935, "learning_rate": 1.8743897369508034e-05, "loss": 0.6134, "step": 10584 }, { "epoch": 0.32516204343685684, "grad_norm": 0.34906721115112305, "learning_rate": 1.8743662861406464e-05, "loss": 0.5319, "step": 10585 }, { "epoch": 0.32519276257180596, "grad_norm": 0.5232595205307007, "learning_rate": 1.8743428332883454e-05, "loss": 0.5143, "step": 10586 }, { "epoch": 0.32522348170675514, "grad_norm": 0.3465864062309265, "learning_rate": 1.8743193783939558e-05, "loss": 0.553, "step": 10587 }, { "epoch": 0.3252542008417043, "grad_norm": 0.38344210386276245, "learning_rate": 1.8742959214575317e-05, "loss": 0.6173, "step": 10588 }, { "epoch": 0.32528491997665343, "grad_norm": 0.3510704040527344, "learning_rate": 1.874272462479129e-05, "loss": 0.562, "step": 10589 }, { "epoch": 0.3253156391116026, "grad_norm": 0.33947932720184326, "learning_rate": 1.874249001458802e-05, "loss": 0.6487, "step": 10590 }, { "epoch": 0.3253463582465518, "grad_norm": 0.3177601099014282, "learning_rate": 1.874225538396605e-05, "loss": 0.6862, "step": 10591 }, { "epoch": 0.32537707738150096, "grad_norm": 0.38161271810531616, "learning_rate": 1.8742020732925933e-05, "loss": 0.5578, "step": 10592 }, { "epoch": 0.3254077965164501, "grad_norm": 0.34065550565719604, "learning_rate": 1.8741786061468218e-05, "loss": 0.5831, "step": 10593 }, { "epoch": 0.32543851565139925, "grad_norm": 0.3902231454849243, "learning_rate": 1.8741551369593448e-05, "loss": 0.6531, "step": 10594 }, { "epoch": 0.3254692347863484, "grad_norm": 0.4177478551864624, "learning_rate": 1.874131665730218e-05, "loss": 0.6114, "step": 10595 }, { "epoch": 0.3254999539212976, "grad_norm": 0.3598543405532837, "learning_rate": 1.8741081924594952e-05, "loss": 0.5709, "step": 10596 }, { "epoch": 0.3255306730562467, "grad_norm": 0.3625763952732086, "learning_rate": 1.8740847171472317e-05, "loss": 0.6235, "step": 10597 }, { "epoch": 0.3255613921911959, "grad_norm": 0.34656277298927307, "learning_rate": 1.8740612397934823e-05, "loss": 0.5941, "step": 10598 }, { "epoch": 0.3255921113261451, "grad_norm": 0.33417102694511414, "learning_rate": 1.8740377603983018e-05, "loss": 0.5924, "step": 10599 }, { "epoch": 0.3256228304610942, "grad_norm": 0.36694541573524475, "learning_rate": 1.874014278961745e-05, "loss": 0.6747, "step": 10600 }, { "epoch": 0.32565354959604337, "grad_norm": 0.3144792318344116, "learning_rate": 1.8739907954838668e-05, "loss": 0.5609, "step": 10601 }, { "epoch": 0.32568426873099254, "grad_norm": 0.33663344383239746, "learning_rate": 1.8739673099647225e-05, "loss": 0.5639, "step": 10602 }, { "epoch": 0.3257149878659417, "grad_norm": 0.3847487270832062, "learning_rate": 1.8739438224043663e-05, "loss": 0.549, "step": 10603 }, { "epoch": 0.32574570700089084, "grad_norm": 0.36885571479797363, "learning_rate": 1.8739203328028532e-05, "loss": 0.6118, "step": 10604 }, { "epoch": 0.32577642613584, "grad_norm": 0.357280969619751, "learning_rate": 1.873896841160238e-05, "loss": 0.544, "step": 10605 }, { "epoch": 0.3258071452707892, "grad_norm": 0.3556792438030243, "learning_rate": 1.873873347476576e-05, "loss": 0.5867, "step": 10606 }, { "epoch": 0.3258378644057383, "grad_norm": 0.3673684597015381, "learning_rate": 1.8738498517519216e-05, "loss": 0.6115, "step": 10607 }, { "epoch": 0.3258685835406875, "grad_norm": 0.3714742362499237, "learning_rate": 1.87382635398633e-05, "loss": 0.6725, "step": 10608 }, { "epoch": 0.32589930267563666, "grad_norm": 0.34037017822265625, "learning_rate": 1.8738028541798556e-05, "loss": 0.6613, "step": 10609 }, { "epoch": 0.32593002181058583, "grad_norm": 0.3501949906349182, "learning_rate": 1.8737793523325533e-05, "loss": 0.592, "step": 10610 }, { "epoch": 0.32596074094553495, "grad_norm": 0.37736231088638306, "learning_rate": 1.873755848444479e-05, "loss": 0.6664, "step": 10611 }, { "epoch": 0.32599146008048413, "grad_norm": 0.4172481894493103, "learning_rate": 1.8737323425156865e-05, "loss": 0.6545, "step": 10612 }, { "epoch": 0.3260221792154333, "grad_norm": 0.34072646498680115, "learning_rate": 1.8737088345462314e-05, "loss": 0.6501, "step": 10613 }, { "epoch": 0.3260528983503825, "grad_norm": 0.35689839720726013, "learning_rate": 1.8736853245361682e-05, "loss": 0.6019, "step": 10614 }, { "epoch": 0.3260836174853316, "grad_norm": 0.36337536573410034, "learning_rate": 1.8736618124855518e-05, "loss": 0.4636, "step": 10615 }, { "epoch": 0.3261143366202808, "grad_norm": 0.3420611619949341, "learning_rate": 1.873638298394437e-05, "loss": 0.5215, "step": 10616 }, { "epoch": 0.32614505575522995, "grad_norm": 0.37100815773010254, "learning_rate": 1.8736147822628793e-05, "loss": 0.5276, "step": 10617 }, { "epoch": 0.32617577489017907, "grad_norm": 0.38689830899238586, "learning_rate": 1.873591264090933e-05, "loss": 0.616, "step": 10618 }, { "epoch": 0.32620649402512825, "grad_norm": 0.40596750378608704, "learning_rate": 1.8735677438786538e-05, "loss": 0.5844, "step": 10619 }, { "epoch": 0.3262372131600774, "grad_norm": 0.3629716634750366, "learning_rate": 1.8735442216260957e-05, "loss": 0.5409, "step": 10620 }, { "epoch": 0.3262679322950266, "grad_norm": 0.3378835618495941, "learning_rate": 1.873520697333314e-05, "loss": 0.6169, "step": 10621 }, { "epoch": 0.3262986514299757, "grad_norm": 0.3328899145126343, "learning_rate": 1.873497171000364e-05, "loss": 0.5753, "step": 10622 }, { "epoch": 0.3263293705649249, "grad_norm": 0.3297642469406128, "learning_rate": 1.8734736426273e-05, "loss": 0.6285, "step": 10623 }, { "epoch": 0.32636008969987407, "grad_norm": 0.41585639119148254, "learning_rate": 1.8734501122141776e-05, "loss": 0.6763, "step": 10624 }, { "epoch": 0.3263908088348232, "grad_norm": 0.36052653193473816, "learning_rate": 1.8734265797610515e-05, "loss": 0.6174, "step": 10625 }, { "epoch": 0.32642152796977236, "grad_norm": 0.33579128980636597, "learning_rate": 1.8734030452679763e-05, "loss": 0.6374, "step": 10626 }, { "epoch": 0.32645224710472154, "grad_norm": 0.47292041778564453, "learning_rate": 1.8733795087350076e-05, "loss": 0.5936, "step": 10627 }, { "epoch": 0.3264829662396707, "grad_norm": 0.34830424189567566, "learning_rate": 1.8733559701622e-05, "loss": 0.5874, "step": 10628 }, { "epoch": 0.32651368537461983, "grad_norm": 0.3463381826877594, "learning_rate": 1.8733324295496085e-05, "loss": 0.6374, "step": 10629 }, { "epoch": 0.326544404509569, "grad_norm": 0.3429344594478607, "learning_rate": 1.873308886897288e-05, "loss": 0.5277, "step": 10630 }, { "epoch": 0.3265751236445182, "grad_norm": 0.37598368525505066, "learning_rate": 1.8732853422052936e-05, "loss": 0.5292, "step": 10631 }, { "epoch": 0.3266058427794673, "grad_norm": 0.34578216075897217, "learning_rate": 1.8732617954736806e-05, "loss": 0.5905, "step": 10632 }, { "epoch": 0.3266365619144165, "grad_norm": 0.32971879839897156, "learning_rate": 1.8732382467025034e-05, "loss": 0.5383, "step": 10633 }, { "epoch": 0.32666728104936565, "grad_norm": 0.3679504990577698, "learning_rate": 1.8732146958918175e-05, "loss": 0.6421, "step": 10634 }, { "epoch": 0.32669800018431483, "grad_norm": 0.39869388937950134, "learning_rate": 1.8731911430416776e-05, "loss": 0.5585, "step": 10635 }, { "epoch": 0.32672871931926395, "grad_norm": 0.3706117570400238, "learning_rate": 1.8731675881521385e-05, "loss": 0.5164, "step": 10636 }, { "epoch": 0.3267594384542131, "grad_norm": 0.366694837808609, "learning_rate": 1.8731440312232558e-05, "loss": 0.6436, "step": 10637 }, { "epoch": 0.3267901575891623, "grad_norm": 0.342157244682312, "learning_rate": 1.8731204722550843e-05, "loss": 0.562, "step": 10638 }, { "epoch": 0.3268208767241115, "grad_norm": 0.5613229870796204, "learning_rate": 1.873096911247679e-05, "loss": 0.5865, "step": 10639 }, { "epoch": 0.3268515958590606, "grad_norm": 0.3509702682495117, "learning_rate": 1.8730733482010946e-05, "loss": 0.6017, "step": 10640 }, { "epoch": 0.32688231499400977, "grad_norm": 0.3775944411754608, "learning_rate": 1.8730497831153866e-05, "loss": 0.5659, "step": 10641 }, { "epoch": 0.32691303412895895, "grad_norm": 0.4076125919818878, "learning_rate": 1.8730262159906097e-05, "loss": 0.5946, "step": 10642 }, { "epoch": 0.32694375326390807, "grad_norm": 0.3452241122722626, "learning_rate": 1.8730026468268195e-05, "loss": 0.5869, "step": 10643 }, { "epoch": 0.32697447239885724, "grad_norm": 0.33952343463897705, "learning_rate": 1.8729790756240703e-05, "loss": 0.6203, "step": 10644 }, { "epoch": 0.3270051915338064, "grad_norm": 0.36473217606544495, "learning_rate": 1.8729555023824175e-05, "loss": 0.571, "step": 10645 }, { "epoch": 0.3270359106687556, "grad_norm": 0.34806913137435913, "learning_rate": 1.872931927101916e-05, "loss": 0.6055, "step": 10646 }, { "epoch": 0.3270666298037047, "grad_norm": 0.3367176055908203, "learning_rate": 1.8729083497826213e-05, "loss": 0.6254, "step": 10647 }, { "epoch": 0.3270973489386539, "grad_norm": 0.3585708737373352, "learning_rate": 1.8728847704245884e-05, "loss": 0.6614, "step": 10648 }, { "epoch": 0.32712806807360306, "grad_norm": 0.3506544828414917, "learning_rate": 1.872861189027872e-05, "loss": 0.5887, "step": 10649 }, { "epoch": 0.3271587872085522, "grad_norm": 0.33110272884368896, "learning_rate": 1.8728376055925272e-05, "loss": 0.5391, "step": 10650 }, { "epoch": 0.32718950634350136, "grad_norm": 0.37524256110191345, "learning_rate": 1.8728140201186094e-05, "loss": 0.5168, "step": 10651 }, { "epoch": 0.32722022547845053, "grad_norm": 0.34931573271751404, "learning_rate": 1.8727904326061733e-05, "loss": 0.5146, "step": 10652 }, { "epoch": 0.3272509446133997, "grad_norm": 0.3322073817253113, "learning_rate": 1.8727668430552742e-05, "loss": 0.535, "step": 10653 }, { "epoch": 0.32728166374834883, "grad_norm": 0.36246514320373535, "learning_rate": 1.8727432514659672e-05, "loss": 0.5498, "step": 10654 }, { "epoch": 0.327312382883298, "grad_norm": 0.3730427920818329, "learning_rate": 1.8727196578383077e-05, "loss": 0.6042, "step": 10655 }, { "epoch": 0.3273431020182472, "grad_norm": 0.3390539884567261, "learning_rate": 1.87269606217235e-05, "loss": 0.5829, "step": 10656 }, { "epoch": 0.3273738211531963, "grad_norm": 0.37792879343032837, "learning_rate": 1.87267246446815e-05, "loss": 0.5645, "step": 10657 }, { "epoch": 0.3274045402881455, "grad_norm": 0.3017767369747162, "learning_rate": 1.8726488647257623e-05, "loss": 0.5857, "step": 10658 }, { "epoch": 0.32743525942309465, "grad_norm": 0.37412741780281067, "learning_rate": 1.8726252629452425e-05, "loss": 0.5453, "step": 10659 }, { "epoch": 0.3274659785580438, "grad_norm": 0.32777178287506104, "learning_rate": 1.872601659126645e-05, "loss": 0.5971, "step": 10660 }, { "epoch": 0.32749669769299294, "grad_norm": 0.40001529455184937, "learning_rate": 1.872578053270026e-05, "loss": 0.5927, "step": 10661 }, { "epoch": 0.3275274168279421, "grad_norm": 0.4078710675239563, "learning_rate": 1.8725544453754394e-05, "loss": 0.6445, "step": 10662 }, { "epoch": 0.3275581359628913, "grad_norm": 0.313996821641922, "learning_rate": 1.8725308354429414e-05, "loss": 0.5882, "step": 10663 }, { "epoch": 0.32758885509784047, "grad_norm": 0.3578588664531708, "learning_rate": 1.8725072234725865e-05, "loss": 0.5398, "step": 10664 }, { "epoch": 0.3276195742327896, "grad_norm": 0.36366888880729675, "learning_rate": 1.87248360946443e-05, "loss": 0.5992, "step": 10665 }, { "epoch": 0.32765029336773877, "grad_norm": 0.3819820284843445, "learning_rate": 1.8724599934185273e-05, "loss": 0.5472, "step": 10666 }, { "epoch": 0.32768101250268794, "grad_norm": 0.3554011583328247, "learning_rate": 1.8724363753349333e-05, "loss": 0.6054, "step": 10667 }, { "epoch": 0.32771173163763706, "grad_norm": 0.37807202339172363, "learning_rate": 1.872412755213703e-05, "loss": 0.5691, "step": 10668 }, { "epoch": 0.32774245077258624, "grad_norm": 0.4579107165336609, "learning_rate": 1.8723891330548923e-05, "loss": 0.5653, "step": 10669 }, { "epoch": 0.3277731699075354, "grad_norm": 0.36247122287750244, "learning_rate": 1.8723655088585554e-05, "loss": 0.5806, "step": 10670 }, { "epoch": 0.3278038890424846, "grad_norm": 0.42966148257255554, "learning_rate": 1.872341882624748e-05, "loss": 0.5778, "step": 10671 }, { "epoch": 0.3278346081774337, "grad_norm": 0.38631224632263184, "learning_rate": 1.8723182543535253e-05, "loss": 0.5904, "step": 10672 }, { "epoch": 0.3278653273123829, "grad_norm": 0.37173765897750854, "learning_rate": 1.8722946240449424e-05, "loss": 0.6373, "step": 10673 }, { "epoch": 0.32789604644733206, "grad_norm": 0.3452288806438446, "learning_rate": 1.872270991699054e-05, "loss": 0.549, "step": 10674 }, { "epoch": 0.3279267655822812, "grad_norm": 0.31921160221099854, "learning_rate": 1.8722473573159164e-05, "loss": 0.5176, "step": 10675 }, { "epoch": 0.32795748471723035, "grad_norm": 0.3393431305885315, "learning_rate": 1.872223720895584e-05, "loss": 0.5412, "step": 10676 }, { "epoch": 0.3279882038521795, "grad_norm": 0.33237138390541077, "learning_rate": 1.8722000824381123e-05, "loss": 0.5639, "step": 10677 }, { "epoch": 0.3280189229871287, "grad_norm": 0.38065606355667114, "learning_rate": 1.8721764419435563e-05, "loss": 0.683, "step": 10678 }, { "epoch": 0.3280496421220778, "grad_norm": 0.3393557667732239, "learning_rate": 1.872152799411971e-05, "loss": 0.5159, "step": 10679 }, { "epoch": 0.328080361257027, "grad_norm": 0.34869417548179626, "learning_rate": 1.8721291548434123e-05, "loss": 0.53, "step": 10680 }, { "epoch": 0.3281110803919762, "grad_norm": 0.3947012424468994, "learning_rate": 1.8721055082379348e-05, "loss": 0.6059, "step": 10681 }, { "epoch": 0.32814179952692535, "grad_norm": 0.3635810613632202, "learning_rate": 1.8720818595955944e-05, "loss": 0.5273, "step": 10682 }, { "epoch": 0.32817251866187447, "grad_norm": 0.3817093074321747, "learning_rate": 1.8720582089164457e-05, "loss": 0.5959, "step": 10683 }, { "epoch": 0.32820323779682364, "grad_norm": 0.33807286620140076, "learning_rate": 1.8720345562005436e-05, "loss": 0.5521, "step": 10684 }, { "epoch": 0.3282339569317728, "grad_norm": 0.3850533962249756, "learning_rate": 1.8720109014479447e-05, "loss": 0.5615, "step": 10685 }, { "epoch": 0.32826467606672194, "grad_norm": 0.35778698325157166, "learning_rate": 1.8719872446587032e-05, "loss": 0.6171, "step": 10686 }, { "epoch": 0.3282953952016711, "grad_norm": 0.43501678109169006, "learning_rate": 1.8719635858328743e-05, "loss": 0.5335, "step": 10687 }, { "epoch": 0.3283261143366203, "grad_norm": 0.3563230633735657, "learning_rate": 1.871939924970514e-05, "loss": 0.5446, "step": 10688 }, { "epoch": 0.32835683347156946, "grad_norm": 0.35284650325775146, "learning_rate": 1.8719162620716767e-05, "loss": 0.6584, "step": 10689 }, { "epoch": 0.3283875526065186, "grad_norm": 0.40835055708885193, "learning_rate": 1.871892597136418e-05, "loss": 0.6342, "step": 10690 }, { "epoch": 0.32841827174146776, "grad_norm": 0.3492220342159271, "learning_rate": 1.8718689301647935e-05, "loss": 0.5332, "step": 10691 }, { "epoch": 0.32844899087641694, "grad_norm": 0.4193934202194214, "learning_rate": 1.8718452611568585e-05, "loss": 0.6797, "step": 10692 }, { "epoch": 0.32847971001136606, "grad_norm": 0.3828330934047699, "learning_rate": 1.8718215901126674e-05, "loss": 0.5424, "step": 10693 }, { "epoch": 0.32851042914631523, "grad_norm": 0.3417256474494934, "learning_rate": 1.8717979170322766e-05, "loss": 0.5488, "step": 10694 }, { "epoch": 0.3285411482812644, "grad_norm": 0.41494297981262207, "learning_rate": 1.871774241915741e-05, "loss": 0.5957, "step": 10695 }, { "epoch": 0.3285718674162136, "grad_norm": 0.3286798894405365, "learning_rate": 1.871750564763115e-05, "loss": 0.5808, "step": 10696 }, { "epoch": 0.3286025865511627, "grad_norm": 0.3678062856197357, "learning_rate": 1.8717268855744553e-05, "loss": 0.5898, "step": 10697 }, { "epoch": 0.3286333056861119, "grad_norm": 0.36581316590309143, "learning_rate": 1.8717032043498165e-05, "loss": 0.5828, "step": 10698 }, { "epoch": 0.32866402482106105, "grad_norm": 0.34290972352027893, "learning_rate": 1.871679521089254e-05, "loss": 0.6835, "step": 10699 }, { "epoch": 0.32869474395601017, "grad_norm": 0.3774523437023163, "learning_rate": 1.8716558357928227e-05, "loss": 0.5638, "step": 10700 }, { "epoch": 0.32872546309095935, "grad_norm": 0.3280346691608429, "learning_rate": 1.8716321484605787e-05, "loss": 0.5972, "step": 10701 }, { "epoch": 0.3287561822259085, "grad_norm": 0.3625257611274719, "learning_rate": 1.8716084590925768e-05, "loss": 0.5455, "step": 10702 }, { "epoch": 0.3287869013608577, "grad_norm": 0.35756370425224304, "learning_rate": 1.8715847676888724e-05, "loss": 0.5806, "step": 10703 }, { "epoch": 0.3288176204958068, "grad_norm": 0.33832335472106934, "learning_rate": 1.8715610742495212e-05, "loss": 0.5511, "step": 10704 }, { "epoch": 0.328848339630756, "grad_norm": 0.3505648076534271, "learning_rate": 1.871537378774578e-05, "loss": 0.5068, "step": 10705 }, { "epoch": 0.32887905876570517, "grad_norm": 0.47498634457588196, "learning_rate": 1.8715136812640985e-05, "loss": 0.7084, "step": 10706 }, { "epoch": 0.32890977790065434, "grad_norm": 0.39254215359687805, "learning_rate": 1.871489981718138e-05, "loss": 0.6062, "step": 10707 }, { "epoch": 0.32894049703560346, "grad_norm": 0.35227489471435547, "learning_rate": 1.8714662801367516e-05, "loss": 0.5666, "step": 10708 }, { "epoch": 0.32897121617055264, "grad_norm": 0.35651397705078125, "learning_rate": 1.871442576519995e-05, "loss": 0.5654, "step": 10709 }, { "epoch": 0.3290019353055018, "grad_norm": 0.34810325503349304, "learning_rate": 1.871418870867923e-05, "loss": 0.4831, "step": 10710 }, { "epoch": 0.32903265444045093, "grad_norm": 0.35779282450675964, "learning_rate": 1.871395163180592e-05, "loss": 0.6215, "step": 10711 }, { "epoch": 0.3290633735754001, "grad_norm": 0.3724839985370636, "learning_rate": 1.8713714534580564e-05, "loss": 0.6107, "step": 10712 }, { "epoch": 0.3290940927103493, "grad_norm": 0.3303056061267853, "learning_rate": 1.871347741700372e-05, "loss": 0.5437, "step": 10713 }, { "epoch": 0.32912481184529846, "grad_norm": 0.4173175096511841, "learning_rate": 1.8713240279075938e-05, "loss": 0.5628, "step": 10714 }, { "epoch": 0.3291555309802476, "grad_norm": 0.3347472548484802, "learning_rate": 1.8713003120797778e-05, "loss": 0.5944, "step": 10715 }, { "epoch": 0.32918625011519675, "grad_norm": 0.3835369646549225, "learning_rate": 1.871276594216979e-05, "loss": 0.5688, "step": 10716 }, { "epoch": 0.32921696925014593, "grad_norm": 0.3942273259162903, "learning_rate": 1.871252874319253e-05, "loss": 0.6437, "step": 10717 }, { "epoch": 0.32924768838509505, "grad_norm": 0.3356528878211975, "learning_rate": 1.8712291523866546e-05, "loss": 0.594, "step": 10718 }, { "epoch": 0.3292784075200442, "grad_norm": 0.33523091673851013, "learning_rate": 1.87120542841924e-05, "loss": 0.4623, "step": 10719 }, { "epoch": 0.3293091266549934, "grad_norm": 0.3324328362941742, "learning_rate": 1.8711817024170643e-05, "loss": 0.5786, "step": 10720 }, { "epoch": 0.3293398457899426, "grad_norm": 0.3859851658344269, "learning_rate": 1.8711579743801828e-05, "loss": 0.5877, "step": 10721 }, { "epoch": 0.3293705649248917, "grad_norm": 0.3619343340396881, "learning_rate": 1.8711342443086512e-05, "loss": 0.6209, "step": 10722 }, { "epoch": 0.32940128405984087, "grad_norm": 0.3476257622241974, "learning_rate": 1.8711105122025245e-05, "loss": 0.5586, "step": 10723 }, { "epoch": 0.32943200319479005, "grad_norm": 0.350563645362854, "learning_rate": 1.8710867780618585e-05, "loss": 0.6027, "step": 10724 }, { "epoch": 0.3294627223297392, "grad_norm": 0.3395825922489166, "learning_rate": 1.8710630418867085e-05, "loss": 0.5832, "step": 10725 }, { "epoch": 0.32949344146468834, "grad_norm": 0.3227066993713379, "learning_rate": 1.87103930367713e-05, "loss": 0.5983, "step": 10726 }, { "epoch": 0.3295241605996375, "grad_norm": 0.46847808361053467, "learning_rate": 1.871015563433178e-05, "loss": 0.5409, "step": 10727 }, { "epoch": 0.3295548797345867, "grad_norm": 0.33713844418525696, "learning_rate": 1.870991821154909e-05, "loss": 0.6004, "step": 10728 }, { "epoch": 0.3295855988695358, "grad_norm": 0.29889196157455444, "learning_rate": 1.870968076842377e-05, "loss": 0.5445, "step": 10729 }, { "epoch": 0.329616318004485, "grad_norm": 0.3569268584251404, "learning_rate": 1.8709443304956386e-05, "loss": 0.5388, "step": 10730 }, { "epoch": 0.32964703713943416, "grad_norm": 0.4334929883480072, "learning_rate": 1.870920582114749e-05, "loss": 0.6189, "step": 10731 }, { "epoch": 0.32967775627438334, "grad_norm": 0.34755197167396545, "learning_rate": 1.870896831699763e-05, "loss": 0.5075, "step": 10732 }, { "epoch": 0.32970847540933246, "grad_norm": 0.4003734588623047, "learning_rate": 1.870873079250737e-05, "loss": 0.5331, "step": 10733 }, { "epoch": 0.32973919454428163, "grad_norm": 0.3313238024711609, "learning_rate": 1.8708493247677265e-05, "loss": 0.5068, "step": 10734 }, { "epoch": 0.3297699136792308, "grad_norm": 0.3736873269081116, "learning_rate": 1.870825568250786e-05, "loss": 0.5512, "step": 10735 }, { "epoch": 0.32980063281417993, "grad_norm": 0.30627554655075073, "learning_rate": 1.870801809699972e-05, "loss": 0.4557, "step": 10736 }, { "epoch": 0.3298313519491291, "grad_norm": 0.3260178565979004, "learning_rate": 1.8707780491153392e-05, "loss": 0.5973, "step": 10737 }, { "epoch": 0.3298620710840783, "grad_norm": 0.35606032609939575, "learning_rate": 1.8707542864969437e-05, "loss": 0.493, "step": 10738 }, { "epoch": 0.32989279021902745, "grad_norm": 0.36151257157325745, "learning_rate": 1.8707305218448405e-05, "loss": 0.5949, "step": 10739 }, { "epoch": 0.3299235093539766, "grad_norm": 0.3384949266910553, "learning_rate": 1.870706755159086e-05, "loss": 0.6118, "step": 10740 }, { "epoch": 0.32995422848892575, "grad_norm": 0.4093090295791626, "learning_rate": 1.870682986439734e-05, "loss": 0.6238, "step": 10741 }, { "epoch": 0.3299849476238749, "grad_norm": 0.3672820031642914, "learning_rate": 1.870659215686842e-05, "loss": 0.6648, "step": 10742 }, { "epoch": 0.33001566675882404, "grad_norm": 0.35856547951698303, "learning_rate": 1.870635442900464e-05, "loss": 0.61, "step": 10743 }, { "epoch": 0.3300463858937732, "grad_norm": 0.44638675451278687, "learning_rate": 1.8706116680806564e-05, "loss": 0.5886, "step": 10744 }, { "epoch": 0.3300771050287224, "grad_norm": 0.3471231460571289, "learning_rate": 1.8705878912274745e-05, "loss": 0.583, "step": 10745 }, { "epoch": 0.33010782416367157, "grad_norm": 0.36316269636154175, "learning_rate": 1.8705641123409737e-05, "loss": 0.6077, "step": 10746 }, { "epoch": 0.3301385432986207, "grad_norm": 0.3631818890571594, "learning_rate": 1.8705403314212097e-05, "loss": 0.5559, "step": 10747 }, { "epoch": 0.33016926243356987, "grad_norm": 0.3552229404449463, "learning_rate": 1.870516548468238e-05, "loss": 0.5778, "step": 10748 }, { "epoch": 0.33019998156851904, "grad_norm": 0.3626943528652191, "learning_rate": 1.8704927634821136e-05, "loss": 0.5475, "step": 10749 }, { "epoch": 0.3302307007034682, "grad_norm": 0.5007736682891846, "learning_rate": 1.870468976462893e-05, "loss": 0.5597, "step": 10750 }, { "epoch": 0.33026141983841734, "grad_norm": 0.3488624393939972, "learning_rate": 1.8704451874106312e-05, "loss": 0.6162, "step": 10751 }, { "epoch": 0.3302921389733665, "grad_norm": 0.3394465446472168, "learning_rate": 1.8704213963253842e-05, "loss": 0.549, "step": 10752 }, { "epoch": 0.3303228581083157, "grad_norm": 0.3779711425304413, "learning_rate": 1.870397603207207e-05, "loss": 0.537, "step": 10753 }, { "epoch": 0.3303535772432648, "grad_norm": 0.3807176649570465, "learning_rate": 1.8703738080561553e-05, "loss": 0.5852, "step": 10754 }, { "epoch": 0.330384296378214, "grad_norm": 0.43642377853393555, "learning_rate": 1.8703500108722847e-05, "loss": 0.5486, "step": 10755 }, { "epoch": 0.33041501551316316, "grad_norm": 0.3626044988632202, "learning_rate": 1.870326211655651e-05, "loss": 0.5495, "step": 10756 }, { "epoch": 0.33044573464811233, "grad_norm": 0.6367958784103394, "learning_rate": 1.8703024104063096e-05, "loss": 0.6783, "step": 10757 }, { "epoch": 0.33047645378306145, "grad_norm": 0.38847169280052185, "learning_rate": 1.8702786071243164e-05, "loss": 0.5973, "step": 10758 }, { "epoch": 0.3305071729180106, "grad_norm": 0.34670236706733704, "learning_rate": 1.8702548018097263e-05, "loss": 0.6137, "step": 10759 }, { "epoch": 0.3305378920529598, "grad_norm": 0.3769471049308777, "learning_rate": 1.870230994462596e-05, "loss": 0.5496, "step": 10760 }, { "epoch": 0.3305686111879089, "grad_norm": 0.3451610207557678, "learning_rate": 1.87020718508298e-05, "loss": 0.6665, "step": 10761 }, { "epoch": 0.3305993303228581, "grad_norm": 0.32633426785469055, "learning_rate": 1.8701833736709343e-05, "loss": 0.5424, "step": 10762 }, { "epoch": 0.3306300494578073, "grad_norm": 0.31399422883987427, "learning_rate": 1.8701595602265142e-05, "loss": 0.6208, "step": 10763 }, { "epoch": 0.33066076859275645, "grad_norm": 0.3511022627353668, "learning_rate": 1.8701357447497765e-05, "loss": 0.5727, "step": 10764 }, { "epoch": 0.33069148772770557, "grad_norm": 0.36166709661483765, "learning_rate": 1.8701119272407756e-05, "loss": 0.5813, "step": 10765 }, { "epoch": 0.33072220686265474, "grad_norm": 0.3299630582332611, "learning_rate": 1.870088107699567e-05, "loss": 0.5675, "step": 10766 }, { "epoch": 0.3307529259976039, "grad_norm": 0.35226187109947205, "learning_rate": 1.8700642861262074e-05, "loss": 0.5967, "step": 10767 }, { "epoch": 0.3307836451325531, "grad_norm": 0.37466368079185486, "learning_rate": 1.870040462520752e-05, "loss": 0.6119, "step": 10768 }, { "epoch": 0.3308143642675022, "grad_norm": 2.747154951095581, "learning_rate": 1.8700166368832563e-05, "loss": 0.5626, "step": 10769 }, { "epoch": 0.3308450834024514, "grad_norm": 0.3685750365257263, "learning_rate": 1.869992809213776e-05, "loss": 0.5871, "step": 10770 }, { "epoch": 0.33087580253740057, "grad_norm": 0.3678615093231201, "learning_rate": 1.8699689795123664e-05, "loss": 0.538, "step": 10771 }, { "epoch": 0.3309065216723497, "grad_norm": 0.33392995595932007, "learning_rate": 1.8699451477790837e-05, "loss": 0.5955, "step": 10772 }, { "epoch": 0.33093724080729886, "grad_norm": 0.34633758664131165, "learning_rate": 1.8699213140139837e-05, "loss": 0.6536, "step": 10773 }, { "epoch": 0.33096795994224804, "grad_norm": 0.38190239667892456, "learning_rate": 1.869897478217121e-05, "loss": 0.5923, "step": 10774 }, { "epoch": 0.3309986790771972, "grad_norm": 0.31362736225128174, "learning_rate": 1.8698736403885526e-05, "loss": 0.5561, "step": 10775 }, { "epoch": 0.33102939821214633, "grad_norm": 0.3279740512371063, "learning_rate": 1.8698498005283333e-05, "loss": 0.5015, "step": 10776 }, { "epoch": 0.3310601173470955, "grad_norm": 0.3533557653427124, "learning_rate": 1.869825958636519e-05, "loss": 0.6598, "step": 10777 }, { "epoch": 0.3310908364820447, "grad_norm": 0.37287917733192444, "learning_rate": 1.8698021147131656e-05, "loss": 0.6042, "step": 10778 }, { "epoch": 0.3311215556169938, "grad_norm": 0.30979570746421814, "learning_rate": 1.8697782687583284e-05, "loss": 0.6009, "step": 10779 }, { "epoch": 0.331152274751943, "grad_norm": 0.33390170335769653, "learning_rate": 1.8697544207720636e-05, "loss": 0.4967, "step": 10780 }, { "epoch": 0.33118299388689215, "grad_norm": 0.33535197377204895, "learning_rate": 1.869730570754426e-05, "loss": 0.5494, "step": 10781 }, { "epoch": 0.3312137130218413, "grad_norm": 0.3805495500564575, "learning_rate": 1.8697067187054725e-05, "loss": 0.6402, "step": 10782 }, { "epoch": 0.33124443215679045, "grad_norm": 0.32948389649391174, "learning_rate": 1.869682864625258e-05, "loss": 0.5782, "step": 10783 }, { "epoch": 0.3312751512917396, "grad_norm": 0.38789892196655273, "learning_rate": 1.8696590085138388e-05, "loss": 0.6276, "step": 10784 }, { "epoch": 0.3313058704266888, "grad_norm": 0.34525445103645325, "learning_rate": 1.8696351503712698e-05, "loss": 0.513, "step": 10785 }, { "epoch": 0.3313365895616379, "grad_norm": 0.35260438919067383, "learning_rate": 1.8696112901976073e-05, "loss": 0.571, "step": 10786 }, { "epoch": 0.3313673086965871, "grad_norm": 0.3353536128997803, "learning_rate": 1.869587427992907e-05, "loss": 0.5906, "step": 10787 }, { "epoch": 0.33139802783153627, "grad_norm": 0.33226028084754944, "learning_rate": 1.8695635637572243e-05, "loss": 0.5055, "step": 10788 }, { "epoch": 0.33142874696648544, "grad_norm": 0.3630063235759735, "learning_rate": 1.8695396974906153e-05, "loss": 0.5706, "step": 10789 }, { "epoch": 0.33145946610143456, "grad_norm": 0.3309083580970764, "learning_rate": 1.8695158291931354e-05, "loss": 0.5284, "step": 10790 }, { "epoch": 0.33149018523638374, "grad_norm": 0.3475259840488434, "learning_rate": 1.869491958864841e-05, "loss": 0.5758, "step": 10791 }, { "epoch": 0.3315209043713329, "grad_norm": 0.29768988490104675, "learning_rate": 1.869468086505787e-05, "loss": 0.5855, "step": 10792 }, { "epoch": 0.3315516235062821, "grad_norm": 0.34590888023376465, "learning_rate": 1.8694442121160298e-05, "loss": 0.5433, "step": 10793 }, { "epoch": 0.3315823426412312, "grad_norm": 0.36587777733802795, "learning_rate": 1.8694203356956248e-05, "loss": 0.5591, "step": 10794 }, { "epoch": 0.3316130617761804, "grad_norm": 0.3860008120536804, "learning_rate": 1.869396457244628e-05, "loss": 0.6475, "step": 10795 }, { "epoch": 0.33164378091112956, "grad_norm": 0.519795298576355, "learning_rate": 1.869372576763095e-05, "loss": 0.6711, "step": 10796 }, { "epoch": 0.3316745000460787, "grad_norm": 0.35144469141960144, "learning_rate": 1.8693486942510818e-05, "loss": 0.5992, "step": 10797 }, { "epoch": 0.33170521918102785, "grad_norm": 0.3557735085487366, "learning_rate": 1.8693248097086436e-05, "loss": 0.5739, "step": 10798 }, { "epoch": 0.33173593831597703, "grad_norm": 0.30258288979530334, "learning_rate": 1.869300923135837e-05, "loss": 0.545, "step": 10799 }, { "epoch": 0.3317666574509262, "grad_norm": 0.3355347514152527, "learning_rate": 1.869277034532717e-05, "loss": 0.4997, "step": 10800 }, { "epoch": 0.3317973765858753, "grad_norm": 0.3408590257167816, "learning_rate": 1.86925314389934e-05, "loss": 0.586, "step": 10801 }, { "epoch": 0.3318280957208245, "grad_norm": 12.224002838134766, "learning_rate": 1.8692292512357614e-05, "loss": 0.5359, "step": 10802 }, { "epoch": 0.3318588148557737, "grad_norm": 0.3779182732105255, "learning_rate": 1.8692053565420376e-05, "loss": 0.501, "step": 10803 }, { "epoch": 0.3318895339907228, "grad_norm": 0.35320040583610535, "learning_rate": 1.8691814598182238e-05, "loss": 0.6041, "step": 10804 }, { "epoch": 0.33192025312567197, "grad_norm": 0.3651285767555237, "learning_rate": 1.869157561064376e-05, "loss": 0.5444, "step": 10805 }, { "epoch": 0.33195097226062115, "grad_norm": 0.3797574043273926, "learning_rate": 1.86913366028055e-05, "loss": 0.6674, "step": 10806 }, { "epoch": 0.3319816913955703, "grad_norm": 0.34154045581817627, "learning_rate": 1.8691097574668012e-05, "loss": 0.5742, "step": 10807 }, { "epoch": 0.33201241053051944, "grad_norm": 0.3455929756164551, "learning_rate": 1.8690858526231866e-05, "loss": 0.6231, "step": 10808 }, { "epoch": 0.3320431296654686, "grad_norm": 0.314819872379303, "learning_rate": 1.8690619457497608e-05, "loss": 0.4538, "step": 10809 }, { "epoch": 0.3320738488004178, "grad_norm": 0.33903348445892334, "learning_rate": 1.8690380368465802e-05, "loss": 0.5933, "step": 10810 }, { "epoch": 0.33210456793536697, "grad_norm": 0.3888603746891022, "learning_rate": 1.8690141259137006e-05, "loss": 0.6127, "step": 10811 }, { "epoch": 0.3321352870703161, "grad_norm": 0.3223203122615814, "learning_rate": 1.868990212951178e-05, "loss": 0.5673, "step": 10812 }, { "epoch": 0.33216600620526526, "grad_norm": 0.3116982579231262, "learning_rate": 1.8689662979590677e-05, "loss": 0.5096, "step": 10813 }, { "epoch": 0.33219672534021444, "grad_norm": 0.35707351565361023, "learning_rate": 1.8689423809374263e-05, "loss": 0.6217, "step": 10814 }, { "epoch": 0.33222744447516356, "grad_norm": 0.37554264068603516, "learning_rate": 1.868918461886309e-05, "loss": 0.6583, "step": 10815 }, { "epoch": 0.33225816361011273, "grad_norm": 0.4192793667316437, "learning_rate": 1.8688945408057722e-05, "loss": 0.5947, "step": 10816 }, { "epoch": 0.3322888827450619, "grad_norm": 0.3308614492416382, "learning_rate": 1.8688706176958714e-05, "loss": 0.5444, "step": 10817 }, { "epoch": 0.3323196018800111, "grad_norm": 0.34052881598472595, "learning_rate": 1.8688466925566623e-05, "loss": 0.6079, "step": 10818 }, { "epoch": 0.3323503210149602, "grad_norm": 0.3226393461227417, "learning_rate": 1.8688227653882017e-05, "loss": 0.592, "step": 10819 }, { "epoch": 0.3323810401499094, "grad_norm": 0.4906481206417084, "learning_rate": 1.8687988361905444e-05, "loss": 0.5658, "step": 10820 }, { "epoch": 0.33241175928485855, "grad_norm": 0.38401439785957336, "learning_rate": 1.868774904963747e-05, "loss": 0.5879, "step": 10821 }, { "epoch": 0.3324424784198077, "grad_norm": 0.3387812376022339, "learning_rate": 1.868750971707865e-05, "loss": 0.6676, "step": 10822 }, { "epoch": 0.33247319755475685, "grad_norm": 0.32928329706192017, "learning_rate": 1.8687270364229545e-05, "loss": 0.492, "step": 10823 }, { "epoch": 0.332503916689706, "grad_norm": 0.34988105297088623, "learning_rate": 1.8687030991090708e-05, "loss": 0.4899, "step": 10824 }, { "epoch": 0.3325346358246552, "grad_norm": 0.3623489439487457, "learning_rate": 1.868679159766271e-05, "loss": 0.6367, "step": 10825 }, { "epoch": 0.3325653549596043, "grad_norm": 0.34259331226348877, "learning_rate": 1.86865521839461e-05, "loss": 0.6272, "step": 10826 }, { "epoch": 0.3325960740945535, "grad_norm": 0.37767159938812256, "learning_rate": 1.8686312749941444e-05, "loss": 0.5589, "step": 10827 }, { "epoch": 0.33262679322950267, "grad_norm": 0.32660403847694397, "learning_rate": 1.8686073295649295e-05, "loss": 0.4939, "step": 10828 }, { "epoch": 0.3326575123644518, "grad_norm": 0.3226092755794525, "learning_rate": 1.8685833821070215e-05, "loss": 0.5889, "step": 10829 }, { "epoch": 0.33268823149940097, "grad_norm": 0.3906758725643158, "learning_rate": 1.8685594326204768e-05, "loss": 0.681, "step": 10830 }, { "epoch": 0.33271895063435014, "grad_norm": 0.3242807686328888, "learning_rate": 1.8685354811053505e-05, "loss": 0.6347, "step": 10831 }, { "epoch": 0.3327496697692993, "grad_norm": 0.3344544470310211, "learning_rate": 1.868511527561699e-05, "loss": 0.6014, "step": 10832 }, { "epoch": 0.33278038890424844, "grad_norm": 0.3356534540653229, "learning_rate": 1.868487571989578e-05, "loss": 0.554, "step": 10833 }, { "epoch": 0.3328111080391976, "grad_norm": 0.373982310295105, "learning_rate": 1.868463614389044e-05, "loss": 0.6177, "step": 10834 }, { "epoch": 0.3328418271741468, "grad_norm": 0.4723977744579315, "learning_rate": 1.8684396547601524e-05, "loss": 0.5116, "step": 10835 }, { "epoch": 0.33287254630909596, "grad_norm": 0.4226072132587433, "learning_rate": 1.8684156931029592e-05, "loss": 0.6077, "step": 10836 }, { "epoch": 0.3329032654440451, "grad_norm": 0.3025416433811188, "learning_rate": 1.8683917294175206e-05, "loss": 0.5768, "step": 10837 }, { "epoch": 0.33293398457899426, "grad_norm": 0.3488057255744934, "learning_rate": 1.8683677637038923e-05, "loss": 0.6533, "step": 10838 }, { "epoch": 0.33296470371394343, "grad_norm": 0.333935022354126, "learning_rate": 1.8683437959621308e-05, "loss": 0.5826, "step": 10839 }, { "epoch": 0.33299542284889255, "grad_norm": 0.360336571931839, "learning_rate": 1.868319826192291e-05, "loss": 0.6315, "step": 10840 }, { "epoch": 0.33302614198384173, "grad_norm": 0.3025347888469696, "learning_rate": 1.8682958543944307e-05, "loss": 0.562, "step": 10841 }, { "epoch": 0.3330568611187909, "grad_norm": 0.3501327931880951, "learning_rate": 1.868271880568604e-05, "loss": 0.5515, "step": 10842 }, { "epoch": 0.3330875802537401, "grad_norm": 0.3521806597709656, "learning_rate": 1.868247904714868e-05, "loss": 0.5394, "step": 10843 }, { "epoch": 0.3331182993886892, "grad_norm": 0.35016387701034546, "learning_rate": 1.8682239268332782e-05, "loss": 0.5446, "step": 10844 }, { "epoch": 0.3331490185236384, "grad_norm": 0.32804715633392334, "learning_rate": 1.8681999469238908e-05, "loss": 0.5483, "step": 10845 }, { "epoch": 0.33317973765858755, "grad_norm": 0.3784356415271759, "learning_rate": 1.868175964986762e-05, "loss": 0.6144, "step": 10846 }, { "epoch": 0.33321045679353667, "grad_norm": 0.3240474760532379, "learning_rate": 1.8681519810219475e-05, "loss": 0.5551, "step": 10847 }, { "epoch": 0.33324117592848584, "grad_norm": 0.3785206973552704, "learning_rate": 1.868127995029503e-05, "loss": 0.5997, "step": 10848 }, { "epoch": 0.333271895063435, "grad_norm": 0.341886430978775, "learning_rate": 1.868104007009486e-05, "loss": 0.5829, "step": 10849 }, { "epoch": 0.3333026141983842, "grad_norm": 0.318952351808548, "learning_rate": 1.8680800169619506e-05, "loss": 0.5486, "step": 10850 }, { "epoch": 0.3333333333333333, "grad_norm": 0.3860340714454651, "learning_rate": 1.8680560248869538e-05, "loss": 0.6275, "step": 10851 }, { "epoch": 0.3333640524682825, "grad_norm": 0.34685030579566956, "learning_rate": 1.8680320307845515e-05, "loss": 0.5494, "step": 10852 }, { "epoch": 0.33339477160323167, "grad_norm": 0.31490716338157654, "learning_rate": 1.8680080346548e-05, "loss": 0.5663, "step": 10853 }, { "epoch": 0.3334254907381808, "grad_norm": 0.37204214930534363, "learning_rate": 1.867984036497755e-05, "loss": 0.5671, "step": 10854 }, { "epoch": 0.33345620987312996, "grad_norm": 0.3750583827495575, "learning_rate": 1.8679600363134724e-05, "loss": 0.5827, "step": 10855 }, { "epoch": 0.33348692900807914, "grad_norm": 0.34068822860717773, "learning_rate": 1.8679360341020086e-05, "loss": 0.5583, "step": 10856 }, { "epoch": 0.3335176481430283, "grad_norm": 0.3412506580352783, "learning_rate": 1.8679120298634198e-05, "loss": 0.5842, "step": 10857 }, { "epoch": 0.33354836727797743, "grad_norm": 0.33057186007499695, "learning_rate": 1.867888023597762e-05, "loss": 0.5407, "step": 10858 }, { "epoch": 0.3335790864129266, "grad_norm": 0.32028576731681824, "learning_rate": 1.867864015305091e-05, "loss": 0.5899, "step": 10859 }, { "epoch": 0.3336098055478758, "grad_norm": 0.34127864241600037, "learning_rate": 1.867840004985463e-05, "loss": 0.5967, "step": 10860 }, { "epoch": 0.33364052468282496, "grad_norm": 0.3202706277370453, "learning_rate": 1.8678159926389338e-05, "loss": 0.4617, "step": 10861 }, { "epoch": 0.3336712438177741, "grad_norm": 0.36463484168052673, "learning_rate": 1.86779197826556e-05, "loss": 0.554, "step": 10862 }, { "epoch": 0.33370196295272325, "grad_norm": 0.3657853901386261, "learning_rate": 1.867767961865397e-05, "loss": 0.5855, "step": 10863 }, { "epoch": 0.3337326820876724, "grad_norm": 0.34839653968811035, "learning_rate": 1.8677439434385016e-05, "loss": 0.594, "step": 10864 }, { "epoch": 0.33376340122262155, "grad_norm": 0.36807185411453247, "learning_rate": 1.8677199229849297e-05, "loss": 0.6144, "step": 10865 }, { "epoch": 0.3337941203575707, "grad_norm": 0.37034061551094055, "learning_rate": 1.8676959005047373e-05, "loss": 0.5769, "step": 10866 }, { "epoch": 0.3338248394925199, "grad_norm": 0.3780219256877899, "learning_rate": 1.8676718759979806e-05, "loss": 0.6318, "step": 10867 }, { "epoch": 0.3338555586274691, "grad_norm": 0.3350030183792114, "learning_rate": 1.8676478494647156e-05, "loss": 0.5824, "step": 10868 }, { "epoch": 0.3338862777624182, "grad_norm": 0.39518478512763977, "learning_rate": 1.867623820904998e-05, "loss": 0.4551, "step": 10869 }, { "epoch": 0.33391699689736737, "grad_norm": 0.6111534237861633, "learning_rate": 1.867599790318885e-05, "loss": 0.5592, "step": 10870 }, { "epoch": 0.33394771603231654, "grad_norm": 0.31226202845573425, "learning_rate": 1.8675757577064317e-05, "loss": 0.5408, "step": 10871 }, { "epoch": 0.33397843516726566, "grad_norm": 0.35071665048599243, "learning_rate": 1.867551723067695e-05, "loss": 0.5816, "step": 10872 }, { "epoch": 0.33400915430221484, "grad_norm": 0.32620468735694885, "learning_rate": 1.8675276864027303e-05, "loss": 0.6619, "step": 10873 }, { "epoch": 0.334039873437164, "grad_norm": 0.4871514141559601, "learning_rate": 1.867503647711594e-05, "loss": 0.6035, "step": 10874 }, { "epoch": 0.3340705925721132, "grad_norm": 0.336295485496521, "learning_rate": 1.867479606994343e-05, "loss": 0.5322, "step": 10875 }, { "epoch": 0.3341013117070623, "grad_norm": 0.3334338963031769, "learning_rate": 1.8674555642510323e-05, "loss": 0.5551, "step": 10876 }, { "epoch": 0.3341320308420115, "grad_norm": 0.3878858983516693, "learning_rate": 1.8674315194817184e-05, "loss": 0.5836, "step": 10877 }, { "epoch": 0.33416274997696066, "grad_norm": 0.3123989701271057, "learning_rate": 1.8674074726864576e-05, "loss": 0.5518, "step": 10878 }, { "epoch": 0.33419346911190984, "grad_norm": 0.36274293065071106, "learning_rate": 1.8673834238653065e-05, "loss": 0.6297, "step": 10879 }, { "epoch": 0.33422418824685896, "grad_norm": 0.3462165892124176, "learning_rate": 1.8673593730183208e-05, "loss": 0.5925, "step": 10880 }, { "epoch": 0.33425490738180813, "grad_norm": 0.36019420623779297, "learning_rate": 1.8673353201455563e-05, "loss": 0.584, "step": 10881 }, { "epoch": 0.3342856265167573, "grad_norm": 0.353059858083725, "learning_rate": 1.86731126524707e-05, "loss": 0.5714, "step": 10882 }, { "epoch": 0.3343163456517064, "grad_norm": 0.3824971616268158, "learning_rate": 1.8672872083229175e-05, "loss": 0.6061, "step": 10883 }, { "epoch": 0.3343470647866556, "grad_norm": 0.33549046516418457, "learning_rate": 1.867263149373155e-05, "loss": 0.5839, "step": 10884 }, { "epoch": 0.3343777839216048, "grad_norm": 0.3835899531841278, "learning_rate": 1.867239088397839e-05, "loss": 0.6295, "step": 10885 }, { "epoch": 0.33440850305655395, "grad_norm": 0.33937525749206543, "learning_rate": 1.8672150253970256e-05, "loss": 0.5181, "step": 10886 }, { "epoch": 0.33443922219150307, "grad_norm": 0.35673198103904724, "learning_rate": 1.8671909603707708e-05, "loss": 0.5745, "step": 10887 }, { "epoch": 0.33446994132645225, "grad_norm": 0.3678250312805176, "learning_rate": 1.8671668933191308e-05, "loss": 0.6088, "step": 10888 }, { "epoch": 0.3345006604614014, "grad_norm": 0.3162587285041809, "learning_rate": 1.8671428242421624e-05, "loss": 0.578, "step": 10889 }, { "epoch": 0.33453137959635054, "grad_norm": 0.3558366894721985, "learning_rate": 1.8671187531399214e-05, "loss": 0.5411, "step": 10890 }, { "epoch": 0.3345620987312997, "grad_norm": 0.36291614174842834, "learning_rate": 1.8670946800124636e-05, "loss": 0.5984, "step": 10891 }, { "epoch": 0.3345928178662489, "grad_norm": 0.34553563594818115, "learning_rate": 1.8670706048598458e-05, "loss": 0.5831, "step": 10892 }, { "epoch": 0.33462353700119807, "grad_norm": 0.3777993619441986, "learning_rate": 1.8670465276821237e-05, "loss": 0.5844, "step": 10893 }, { "epoch": 0.3346542561361472, "grad_norm": 0.3236714005470276, "learning_rate": 1.8670224484793543e-05, "loss": 0.5152, "step": 10894 }, { "epoch": 0.33468497527109636, "grad_norm": 0.3524039685726166, "learning_rate": 1.8669983672515933e-05, "loss": 0.4997, "step": 10895 }, { "epoch": 0.33471569440604554, "grad_norm": 0.36430954933166504, "learning_rate": 1.8669742839988974e-05, "loss": 0.6655, "step": 10896 }, { "epoch": 0.33474641354099466, "grad_norm": 0.3671779930591583, "learning_rate": 1.8669501987213218e-05, "loss": 0.5747, "step": 10897 }, { "epoch": 0.33477713267594383, "grad_norm": 0.34294915199279785, "learning_rate": 1.8669261114189238e-05, "loss": 0.5839, "step": 10898 }, { "epoch": 0.334807851810893, "grad_norm": 0.33036041259765625, "learning_rate": 1.86690202209176e-05, "loss": 0.5367, "step": 10899 }, { "epoch": 0.3348385709458422, "grad_norm": 0.3706984221935272, "learning_rate": 1.866877930739885e-05, "loss": 0.6118, "step": 10900 }, { "epoch": 0.3348692900807913, "grad_norm": 0.33208152651786804, "learning_rate": 1.8668538373633565e-05, "loss": 0.619, "step": 10901 }, { "epoch": 0.3349000092157405, "grad_norm": 0.4025624990463257, "learning_rate": 1.86682974196223e-05, "loss": 0.5615, "step": 10902 }, { "epoch": 0.33493072835068965, "grad_norm": 0.3630326986312866, "learning_rate": 1.8668056445365622e-05, "loss": 0.5889, "step": 10903 }, { "epoch": 0.33496144748563883, "grad_norm": 0.30965954065322876, "learning_rate": 1.8667815450864097e-05, "loss": 0.5818, "step": 10904 }, { "epoch": 0.33499216662058795, "grad_norm": 0.33696281909942627, "learning_rate": 1.866757443611828e-05, "loss": 0.5817, "step": 10905 }, { "epoch": 0.3350228857555371, "grad_norm": 0.9126240611076355, "learning_rate": 1.866733340112874e-05, "loss": 0.5708, "step": 10906 }, { "epoch": 0.3350536048904863, "grad_norm": 0.42187410593032837, "learning_rate": 1.8667092345896036e-05, "loss": 0.6849, "step": 10907 }, { "epoch": 0.3350843240254354, "grad_norm": 0.32993242144584656, "learning_rate": 1.866685127042073e-05, "loss": 0.6177, "step": 10908 }, { "epoch": 0.3351150431603846, "grad_norm": 0.34601879119873047, "learning_rate": 1.866661017470339e-05, "loss": 0.5594, "step": 10909 }, { "epoch": 0.33514576229533377, "grad_norm": 0.4783385694026947, "learning_rate": 1.8666369058744574e-05, "loss": 0.5806, "step": 10910 }, { "epoch": 0.33517648143028295, "grad_norm": 0.3539155125617981, "learning_rate": 1.866612792254485e-05, "loss": 0.544, "step": 10911 }, { "epoch": 0.33520720056523207, "grad_norm": 0.34251606464385986, "learning_rate": 1.8665886766104782e-05, "loss": 0.5475, "step": 10912 }, { "epoch": 0.33523791970018124, "grad_norm": 0.3415076732635498, "learning_rate": 1.8665645589424928e-05, "loss": 0.5813, "step": 10913 }, { "epoch": 0.3352686388351304, "grad_norm": 0.33746659755706787, "learning_rate": 1.8665404392505856e-05, "loss": 0.5616, "step": 10914 }, { "epoch": 0.33529935797007954, "grad_norm": 0.7081437110900879, "learning_rate": 1.8665163175348123e-05, "loss": 0.5666, "step": 10915 }, { "epoch": 0.3353300771050287, "grad_norm": 0.3497101962566376, "learning_rate": 1.8664921937952297e-05, "loss": 0.5869, "step": 10916 }, { "epoch": 0.3353607962399779, "grad_norm": 0.3300389349460602, "learning_rate": 1.866468068031894e-05, "loss": 0.5962, "step": 10917 }, { "epoch": 0.33539151537492706, "grad_norm": 0.33430016040802, "learning_rate": 1.8664439402448617e-05, "loss": 0.6818, "step": 10918 }, { "epoch": 0.3354222345098762, "grad_norm": 0.34055614471435547, "learning_rate": 1.866419810434189e-05, "loss": 0.5176, "step": 10919 }, { "epoch": 0.33545295364482536, "grad_norm": 0.3307061195373535, "learning_rate": 1.8663956785999323e-05, "loss": 0.5127, "step": 10920 }, { "epoch": 0.33548367277977453, "grad_norm": 0.3276344835758209, "learning_rate": 1.866371544742148e-05, "loss": 0.6273, "step": 10921 }, { "epoch": 0.3355143919147237, "grad_norm": 0.3619306683540344, "learning_rate": 1.8663474088608927e-05, "loss": 0.5606, "step": 10922 }, { "epoch": 0.33554511104967283, "grad_norm": 0.39632904529571533, "learning_rate": 1.8663232709562226e-05, "loss": 0.5379, "step": 10923 }, { "epoch": 0.335575830184622, "grad_norm": 0.3501565754413605, "learning_rate": 1.8662991310281934e-05, "loss": 0.5334, "step": 10924 }, { "epoch": 0.3356065493195712, "grad_norm": 0.37133729457855225, "learning_rate": 1.8662749890768627e-05, "loss": 0.5859, "step": 10925 }, { "epoch": 0.3356372684545203, "grad_norm": 0.3497900068759918, "learning_rate": 1.8662508451022857e-05, "loss": 0.5146, "step": 10926 }, { "epoch": 0.3356679875894695, "grad_norm": 0.36382007598876953, "learning_rate": 1.86622669910452e-05, "loss": 0.6073, "step": 10927 }, { "epoch": 0.33569870672441865, "grad_norm": 0.46482035517692566, "learning_rate": 1.866202551083621e-05, "loss": 0.6333, "step": 10928 }, { "epoch": 0.3357294258593678, "grad_norm": 0.3581355810165405, "learning_rate": 1.8661784010396453e-05, "loss": 0.6324, "step": 10929 }, { "epoch": 0.33576014499431694, "grad_norm": 0.40294739603996277, "learning_rate": 1.8661542489726497e-05, "loss": 0.6211, "step": 10930 }, { "epoch": 0.3357908641292661, "grad_norm": 0.36245110630989075, "learning_rate": 1.86613009488269e-05, "loss": 0.5247, "step": 10931 }, { "epoch": 0.3358215832642153, "grad_norm": 0.3166002035140991, "learning_rate": 1.8661059387698233e-05, "loss": 0.573, "step": 10932 }, { "epoch": 0.3358523023991644, "grad_norm": 0.3686404526233673, "learning_rate": 1.8660817806341053e-05, "loss": 0.6263, "step": 10933 }, { "epoch": 0.3358830215341136, "grad_norm": 0.3514450490474701, "learning_rate": 1.8660576204755934e-05, "loss": 0.5458, "step": 10934 }, { "epoch": 0.33591374066906277, "grad_norm": 0.32614681124687195, "learning_rate": 1.866033458294343e-05, "loss": 0.487, "step": 10935 }, { "epoch": 0.33594445980401194, "grad_norm": 0.3215641975402832, "learning_rate": 1.8660092940904112e-05, "loss": 0.5575, "step": 10936 }, { "epoch": 0.33597517893896106, "grad_norm": 0.35702359676361084, "learning_rate": 1.8659851278638542e-05, "loss": 0.5214, "step": 10937 }, { "epoch": 0.33600589807391024, "grad_norm": 0.3821672797203064, "learning_rate": 1.8659609596147285e-05, "loss": 0.6394, "step": 10938 }, { "epoch": 0.3360366172088594, "grad_norm": 0.381294310092926, "learning_rate": 1.8659367893430904e-05, "loss": 0.6423, "step": 10939 }, { "epoch": 0.33606733634380853, "grad_norm": 0.3565676212310791, "learning_rate": 1.865912617048996e-05, "loss": 0.6072, "step": 10940 }, { "epoch": 0.3360980554787577, "grad_norm": 0.3549792170524597, "learning_rate": 1.8658884427325033e-05, "loss": 0.5684, "step": 10941 }, { "epoch": 0.3361287746137069, "grad_norm": 0.38265424966812134, "learning_rate": 1.865864266393667e-05, "loss": 0.6225, "step": 10942 }, { "epoch": 0.33615949374865606, "grad_norm": 0.35470449924468994, "learning_rate": 1.865840088032544e-05, "loss": 0.5816, "step": 10943 }, { "epoch": 0.3361902128836052, "grad_norm": 0.3419837951660156, "learning_rate": 1.8658159076491915e-05, "loss": 0.5592, "step": 10944 }, { "epoch": 0.33622093201855435, "grad_norm": 0.3367605209350586, "learning_rate": 1.865791725243665e-05, "loss": 0.5645, "step": 10945 }, { "epoch": 0.33625165115350353, "grad_norm": 0.32456347346305847, "learning_rate": 1.8657675408160218e-05, "loss": 0.5555, "step": 10946 }, { "epoch": 0.3362823702884527, "grad_norm": 0.34183183312416077, "learning_rate": 1.865743354366318e-05, "loss": 0.5278, "step": 10947 }, { "epoch": 0.3363130894234018, "grad_norm": 0.34094947576522827, "learning_rate": 1.86571916589461e-05, "loss": 0.6778, "step": 10948 }, { "epoch": 0.336343808558351, "grad_norm": 0.36560675501823425, "learning_rate": 1.8656949754009548e-05, "loss": 0.5464, "step": 10949 }, { "epoch": 0.3363745276933002, "grad_norm": 0.3206081986427307, "learning_rate": 1.865670782885408e-05, "loss": 0.5567, "step": 10950 }, { "epoch": 0.3364052468282493, "grad_norm": 0.3212815225124359, "learning_rate": 1.865646588348027e-05, "loss": 0.5431, "step": 10951 }, { "epoch": 0.33643596596319847, "grad_norm": 0.36628374457359314, "learning_rate": 1.865622391788868e-05, "loss": 0.5209, "step": 10952 }, { "epoch": 0.33646668509814764, "grad_norm": 0.35054340958595276, "learning_rate": 1.865598193207987e-05, "loss": 0.6049, "step": 10953 }, { "epoch": 0.3364974042330968, "grad_norm": 0.3340693414211273, "learning_rate": 1.865573992605441e-05, "loss": 0.608, "step": 10954 }, { "epoch": 0.33652812336804594, "grad_norm": 0.36932793259620667, "learning_rate": 1.8655497899812867e-05, "loss": 0.6004, "step": 10955 }, { "epoch": 0.3365588425029951, "grad_norm": 0.37458693981170654, "learning_rate": 1.865525585335581e-05, "loss": 0.5845, "step": 10956 }, { "epoch": 0.3365895616379443, "grad_norm": 0.3339962661266327, "learning_rate": 1.865501378668379e-05, "loss": 0.6057, "step": 10957 }, { "epoch": 0.3366202807728934, "grad_norm": 0.3687262237071991, "learning_rate": 1.865477169979738e-05, "loss": 0.6378, "step": 10958 }, { "epoch": 0.3366509999078426, "grad_norm": 0.34732627868652344, "learning_rate": 1.865452959269715e-05, "loss": 0.6538, "step": 10959 }, { "epoch": 0.33668171904279176, "grad_norm": 0.494842529296875, "learning_rate": 1.8654287465383665e-05, "loss": 0.5504, "step": 10960 }, { "epoch": 0.33671243817774094, "grad_norm": 0.3244752585887909, "learning_rate": 1.8654045317857483e-05, "loss": 0.575, "step": 10961 }, { "epoch": 0.33674315731269006, "grad_norm": 0.4256282150745392, "learning_rate": 1.8653803150119177e-05, "loss": 0.6437, "step": 10962 }, { "epoch": 0.33677387644763923, "grad_norm": 0.3013494610786438, "learning_rate": 1.8653560962169307e-05, "loss": 0.5412, "step": 10963 }, { "epoch": 0.3368045955825884, "grad_norm": 0.3456588387489319, "learning_rate": 1.865331875400844e-05, "loss": 0.5643, "step": 10964 }, { "epoch": 0.3368353147175376, "grad_norm": 0.3726348578929901, "learning_rate": 1.8653076525637145e-05, "loss": 0.6001, "step": 10965 }, { "epoch": 0.3368660338524867, "grad_norm": 0.34074866771698, "learning_rate": 1.8652834277055985e-05, "loss": 0.6311, "step": 10966 }, { "epoch": 0.3368967529874359, "grad_norm": 0.3440648019313812, "learning_rate": 1.8652592008265523e-05, "loss": 0.506, "step": 10967 }, { "epoch": 0.33692747212238505, "grad_norm": 0.42854443192481995, "learning_rate": 1.8652349719266333e-05, "loss": 0.6239, "step": 10968 }, { "epoch": 0.33695819125733417, "grad_norm": 0.33472809195518494, "learning_rate": 1.865210741005897e-05, "loss": 0.6469, "step": 10969 }, { "epoch": 0.33698891039228335, "grad_norm": 0.3574744760990143, "learning_rate": 1.865186508064401e-05, "loss": 0.6638, "step": 10970 }, { "epoch": 0.3370196295272325, "grad_norm": 0.3538910150527954, "learning_rate": 1.8651622731022017e-05, "loss": 0.6148, "step": 10971 }, { "epoch": 0.3370503486621817, "grad_norm": 0.33094072341918945, "learning_rate": 1.865138036119355e-05, "loss": 0.5947, "step": 10972 }, { "epoch": 0.3370810677971308, "grad_norm": 0.45138198137283325, "learning_rate": 1.8651137971159184e-05, "loss": 0.5634, "step": 10973 }, { "epoch": 0.33711178693208, "grad_norm": 0.3563874363899231, "learning_rate": 1.8650895560919476e-05, "loss": 0.6736, "step": 10974 }, { "epoch": 0.33714250606702917, "grad_norm": 0.3239794075489044, "learning_rate": 1.8650653130475e-05, "loss": 0.5734, "step": 10975 }, { "epoch": 0.3371732252019783, "grad_norm": 0.370481014251709, "learning_rate": 1.8650410679826323e-05, "loss": 0.6031, "step": 10976 }, { "epoch": 0.33720394433692746, "grad_norm": 0.35725533962249756, "learning_rate": 1.8650168208974e-05, "loss": 0.5987, "step": 10977 }, { "epoch": 0.33723466347187664, "grad_norm": 0.3613501191139221, "learning_rate": 1.864992571791861e-05, "loss": 0.663, "step": 10978 }, { "epoch": 0.3372653826068258, "grad_norm": 0.35239166021347046, "learning_rate": 1.8649683206660717e-05, "loss": 0.6276, "step": 10979 }, { "epoch": 0.33729610174177493, "grad_norm": 0.3890765607357025, "learning_rate": 1.8649440675200883e-05, "loss": 0.6288, "step": 10980 }, { "epoch": 0.3373268208767241, "grad_norm": 0.34113267064094543, "learning_rate": 1.8649198123539673e-05, "loss": 0.5985, "step": 10981 }, { "epoch": 0.3373575400116733, "grad_norm": 0.44092419743537903, "learning_rate": 1.864895555167766e-05, "loss": 0.5744, "step": 10982 }, { "epoch": 0.3373882591466224, "grad_norm": 0.2994008958339691, "learning_rate": 1.8648712959615406e-05, "loss": 0.5526, "step": 10983 }, { "epoch": 0.3374189782815716, "grad_norm": 0.3473685681819916, "learning_rate": 1.8648470347353477e-05, "loss": 0.6463, "step": 10984 }, { "epoch": 0.33744969741652076, "grad_norm": 0.3143885135650635, "learning_rate": 1.8648227714892442e-05, "loss": 0.592, "step": 10985 }, { "epoch": 0.33748041655146993, "grad_norm": 0.37550538778305054, "learning_rate": 1.864798506223287e-05, "loss": 0.6264, "step": 10986 }, { "epoch": 0.33751113568641905, "grad_norm": 0.31911855936050415, "learning_rate": 1.8647742389375323e-05, "loss": 0.577, "step": 10987 }, { "epoch": 0.3375418548213682, "grad_norm": 0.35962316393852234, "learning_rate": 1.8647499696320373e-05, "loss": 0.6209, "step": 10988 }, { "epoch": 0.3375725739563174, "grad_norm": 0.3588690161705017, "learning_rate": 1.8647256983068576e-05, "loss": 0.5322, "step": 10989 }, { "epoch": 0.3376032930912666, "grad_norm": 0.3337661623954773, "learning_rate": 1.8647014249620516e-05, "loss": 0.5666, "step": 10990 }, { "epoch": 0.3376340122262157, "grad_norm": 0.41163039207458496, "learning_rate": 1.8646771495976744e-05, "loss": 0.6076, "step": 10991 }, { "epoch": 0.33766473136116487, "grad_norm": 0.69966721534729, "learning_rate": 1.8646528722137836e-05, "loss": 0.5944, "step": 10992 }, { "epoch": 0.33769545049611405, "grad_norm": 0.35384923219680786, "learning_rate": 1.864628592810435e-05, "loss": 0.5824, "step": 10993 }, { "epoch": 0.33772616963106317, "grad_norm": 2.094980478286743, "learning_rate": 1.864604311387687e-05, "loss": 0.5665, "step": 10994 }, { "epoch": 0.33775688876601234, "grad_norm": 0.3644348084926605, "learning_rate": 1.8645800279455946e-05, "loss": 0.5883, "step": 10995 }, { "epoch": 0.3377876079009615, "grad_norm": 0.3173195421695709, "learning_rate": 1.864555742484215e-05, "loss": 0.6402, "step": 10996 }, { "epoch": 0.3378183270359107, "grad_norm": 0.3439981937408447, "learning_rate": 1.8645314550036054e-05, "loss": 0.5555, "step": 10997 }, { "epoch": 0.3378490461708598, "grad_norm": 0.34280335903167725, "learning_rate": 1.864507165503822e-05, "loss": 0.6046, "step": 10998 }, { "epoch": 0.337879765305809, "grad_norm": 0.38314467668533325, "learning_rate": 1.864482873984922e-05, "loss": 0.5658, "step": 10999 }, { "epoch": 0.33791048444075816, "grad_norm": 0.3391181528568268, "learning_rate": 1.8644585804469616e-05, "loss": 0.6004, "step": 11000 }, { "epoch": 0.3379412035757073, "grad_norm": 0.3410094976425171, "learning_rate": 1.8644342848899983e-05, "loss": 0.6088, "step": 11001 }, { "epoch": 0.33797192271065646, "grad_norm": 0.30816569924354553, "learning_rate": 1.864409987314088e-05, "loss": 0.5516, "step": 11002 }, { "epoch": 0.33800264184560563, "grad_norm": 0.4419858157634735, "learning_rate": 1.8643856877192876e-05, "loss": 0.6223, "step": 11003 }, { "epoch": 0.3380333609805548, "grad_norm": 0.3581450879573822, "learning_rate": 1.8643613861056544e-05, "loss": 0.5593, "step": 11004 }, { "epoch": 0.33806408011550393, "grad_norm": 1.5918728113174438, "learning_rate": 1.8643370824732446e-05, "loss": 0.5961, "step": 11005 }, { "epoch": 0.3380947992504531, "grad_norm": 0.33549004793167114, "learning_rate": 1.864312776822115e-05, "loss": 0.5692, "step": 11006 }, { "epoch": 0.3381255183854023, "grad_norm": 0.33342182636260986, "learning_rate": 1.864288469152323e-05, "loss": 0.4973, "step": 11007 }, { "epoch": 0.3381562375203514, "grad_norm": 0.3189191222190857, "learning_rate": 1.864264159463925e-05, "loss": 0.5396, "step": 11008 }, { "epoch": 0.3381869566553006, "grad_norm": 0.4685843288898468, "learning_rate": 1.8642398477569774e-05, "loss": 0.5651, "step": 11009 }, { "epoch": 0.33821767579024975, "grad_norm": 0.34129664301872253, "learning_rate": 1.864215534031537e-05, "loss": 0.5725, "step": 11010 }, { "epoch": 0.3382483949251989, "grad_norm": 0.35004666447639465, "learning_rate": 1.8641912182876614e-05, "loss": 0.6296, "step": 11011 }, { "epoch": 0.33827911406014805, "grad_norm": 0.3638405203819275, "learning_rate": 1.8641669005254067e-05, "loss": 0.5628, "step": 11012 }, { "epoch": 0.3383098331950972, "grad_norm": 0.33280259370803833, "learning_rate": 1.8641425807448298e-05, "loss": 0.6158, "step": 11013 }, { "epoch": 0.3383405523300464, "grad_norm": 0.3526116609573364, "learning_rate": 1.8641182589459875e-05, "loss": 0.5496, "step": 11014 }, { "epoch": 0.33837127146499557, "grad_norm": 0.35919174551963806, "learning_rate": 1.8640939351289367e-05, "loss": 0.4933, "step": 11015 }, { "epoch": 0.3384019905999447, "grad_norm": 0.3377506136894226, "learning_rate": 1.864069609293734e-05, "loss": 0.5154, "step": 11016 }, { "epoch": 0.33843270973489387, "grad_norm": 0.33010929822921753, "learning_rate": 1.8640452814404368e-05, "loss": 0.5765, "step": 11017 }, { "epoch": 0.33846342886984304, "grad_norm": 0.38478830456733704, "learning_rate": 1.8640209515691012e-05, "loss": 0.5133, "step": 11018 }, { "epoch": 0.33849414800479216, "grad_norm": 0.3408884108066559, "learning_rate": 1.8639966196797844e-05, "loss": 0.5467, "step": 11019 }, { "epoch": 0.33852486713974134, "grad_norm": 0.49715012311935425, "learning_rate": 1.863972285772543e-05, "loss": 0.5156, "step": 11020 }, { "epoch": 0.3385555862746905, "grad_norm": 0.31690165400505066, "learning_rate": 1.8639479498474342e-05, "loss": 0.5857, "step": 11021 }, { "epoch": 0.3385863054096397, "grad_norm": 0.4242798388004303, "learning_rate": 1.8639236119045146e-05, "loss": 0.5237, "step": 11022 }, { "epoch": 0.3386170245445888, "grad_norm": 0.3345758020877838, "learning_rate": 1.8638992719438408e-05, "loss": 0.5842, "step": 11023 }, { "epoch": 0.338647743679538, "grad_norm": 0.32147929072380066, "learning_rate": 1.8638749299654702e-05, "loss": 0.5299, "step": 11024 }, { "epoch": 0.33867846281448716, "grad_norm": 0.35453954339027405, "learning_rate": 1.8638505859694594e-05, "loss": 0.6693, "step": 11025 }, { "epoch": 0.3387091819494363, "grad_norm": 0.47281602025032043, "learning_rate": 1.863826239955865e-05, "loss": 0.6733, "step": 11026 }, { "epoch": 0.33873990108438545, "grad_norm": 0.34411466121673584, "learning_rate": 1.8638018919247442e-05, "loss": 0.644, "step": 11027 }, { "epoch": 0.33877062021933463, "grad_norm": 0.35835275053977966, "learning_rate": 1.8637775418761537e-05, "loss": 0.6007, "step": 11028 }, { "epoch": 0.3388013393542838, "grad_norm": 0.3387131094932556, "learning_rate": 1.8637531898101503e-05, "loss": 0.6317, "step": 11029 }, { "epoch": 0.3388320584892329, "grad_norm": 0.3222666084766388, "learning_rate": 1.8637288357267915e-05, "loss": 0.5497, "step": 11030 }, { "epoch": 0.3388627776241821, "grad_norm": 0.3386552333831787, "learning_rate": 1.8637044796261333e-05, "loss": 0.5295, "step": 11031 }, { "epoch": 0.3388934967591313, "grad_norm": 0.38005682826042175, "learning_rate": 1.863680121508233e-05, "loss": 0.5673, "step": 11032 }, { "epoch": 0.33892421589408045, "grad_norm": 0.3063769042491913, "learning_rate": 1.8636557613731474e-05, "loss": 0.6039, "step": 11033 }, { "epoch": 0.33895493502902957, "grad_norm": 0.3599638342857361, "learning_rate": 1.8636313992209335e-05, "loss": 0.4753, "step": 11034 }, { "epoch": 0.33898565416397874, "grad_norm": 0.33506882190704346, "learning_rate": 1.8636070350516485e-05, "loss": 0.64, "step": 11035 }, { "epoch": 0.3390163732989279, "grad_norm": 0.33065932989120483, "learning_rate": 1.8635826688653487e-05, "loss": 0.5524, "step": 11036 }, { "epoch": 0.33904709243387704, "grad_norm": 0.40019503235816956, "learning_rate": 1.8635583006620913e-05, "loss": 0.6273, "step": 11037 }, { "epoch": 0.3390778115688262, "grad_norm": 0.3395794630050659, "learning_rate": 1.863533930441933e-05, "loss": 0.6404, "step": 11038 }, { "epoch": 0.3391085307037754, "grad_norm": 0.31555625796318054, "learning_rate": 1.8635095582049308e-05, "loss": 0.542, "step": 11039 }, { "epoch": 0.33913924983872457, "grad_norm": 0.3160875141620636, "learning_rate": 1.863485183951142e-05, "loss": 0.567, "step": 11040 }, { "epoch": 0.3391699689736737, "grad_norm": 0.332449734210968, "learning_rate": 1.8634608076806232e-05, "loss": 0.5305, "step": 11041 }, { "epoch": 0.33920068810862286, "grad_norm": 0.3688943684101105, "learning_rate": 1.8634364293934314e-05, "loss": 0.6316, "step": 11042 }, { "epoch": 0.33923140724357204, "grad_norm": 0.343375563621521, "learning_rate": 1.8634120490896233e-05, "loss": 0.4962, "step": 11043 }, { "epoch": 0.33926212637852116, "grad_norm": 0.2909402847290039, "learning_rate": 1.8633876667692564e-05, "loss": 0.5429, "step": 11044 }, { "epoch": 0.33929284551347033, "grad_norm": 0.35118696093559265, "learning_rate": 1.863363282432387e-05, "loss": 0.5873, "step": 11045 }, { "epoch": 0.3393235646484195, "grad_norm": 0.3540039360523224, "learning_rate": 1.8633388960790726e-05, "loss": 0.6079, "step": 11046 }, { "epoch": 0.3393542837833687, "grad_norm": 0.6016457676887512, "learning_rate": 1.8633145077093696e-05, "loss": 0.5813, "step": 11047 }, { "epoch": 0.3393850029183178, "grad_norm": 0.3109283745288849, "learning_rate": 1.8632901173233354e-05, "loss": 0.52, "step": 11048 }, { "epoch": 0.339415722053267, "grad_norm": 0.35506173968315125, "learning_rate": 1.863265724921027e-05, "loss": 0.6037, "step": 11049 }, { "epoch": 0.33944644118821615, "grad_norm": 0.3438050448894501, "learning_rate": 1.863241330502501e-05, "loss": 0.5872, "step": 11050 }, { "epoch": 0.3394771603231653, "grad_norm": 0.4035172760486603, "learning_rate": 1.863216934067815e-05, "loss": 0.6325, "step": 11051 }, { "epoch": 0.33950787945811445, "grad_norm": 0.3438054323196411, "learning_rate": 1.863192535617025e-05, "loss": 0.6495, "step": 11052 }, { "epoch": 0.3395385985930636, "grad_norm": 0.34196579456329346, "learning_rate": 1.863168135150189e-05, "loss": 0.6008, "step": 11053 }, { "epoch": 0.3395693177280128, "grad_norm": 0.349669486284256, "learning_rate": 1.8631437326673635e-05, "loss": 0.562, "step": 11054 }, { "epoch": 0.3396000368629619, "grad_norm": 0.3356345295906067, "learning_rate": 1.863119328168605e-05, "loss": 0.6225, "step": 11055 }, { "epoch": 0.3396307559979111, "grad_norm": 0.37015679478645325, "learning_rate": 1.8630949216539716e-05, "loss": 0.6329, "step": 11056 }, { "epoch": 0.33966147513286027, "grad_norm": 0.36511096358299255, "learning_rate": 1.8630705131235193e-05, "loss": 0.5771, "step": 11057 }, { "epoch": 0.33969219426780944, "grad_norm": 0.3766273856163025, "learning_rate": 1.8630461025773062e-05, "loss": 0.572, "step": 11058 }, { "epoch": 0.33972291340275856, "grad_norm": 0.3525831699371338, "learning_rate": 1.863021690015388e-05, "loss": 0.5089, "step": 11059 }, { "epoch": 0.33975363253770774, "grad_norm": 0.4090384840965271, "learning_rate": 1.862997275437823e-05, "loss": 0.6026, "step": 11060 }, { "epoch": 0.3397843516726569, "grad_norm": 0.3799320459365845, "learning_rate": 1.8629728588446666e-05, "loss": 0.6246, "step": 11061 }, { "epoch": 0.33981507080760603, "grad_norm": 0.33971408009529114, "learning_rate": 1.8629484402359777e-05, "loss": 0.5315, "step": 11062 }, { "epoch": 0.3398457899425552, "grad_norm": 0.349869042634964, "learning_rate": 1.8629240196118123e-05, "loss": 0.4634, "step": 11063 }, { "epoch": 0.3398765090775044, "grad_norm": 0.3581966757774353, "learning_rate": 1.8628995969722275e-05, "loss": 0.6208, "step": 11064 }, { "epoch": 0.33990722821245356, "grad_norm": 0.33979761600494385, "learning_rate": 1.8628751723172803e-05, "loss": 0.5982, "step": 11065 }, { "epoch": 0.3399379473474027, "grad_norm": 0.38578012585639954, "learning_rate": 1.8628507456470276e-05, "loss": 0.5067, "step": 11066 }, { "epoch": 0.33996866648235186, "grad_norm": 0.32075613737106323, "learning_rate": 1.8628263169615273e-05, "loss": 0.5767, "step": 11067 }, { "epoch": 0.33999938561730103, "grad_norm": 0.3237263858318329, "learning_rate": 1.8628018862608356e-05, "loss": 0.5462, "step": 11068 }, { "epoch": 0.34003010475225015, "grad_norm": 0.3333624005317688, "learning_rate": 1.86277745354501e-05, "loss": 0.5709, "step": 11069 }, { "epoch": 0.3400608238871993, "grad_norm": 0.353435218334198, "learning_rate": 1.8627530188141074e-05, "loss": 0.5212, "step": 11070 }, { "epoch": 0.3400915430221485, "grad_norm": 0.3702336549758911, "learning_rate": 1.8627285820681846e-05, "loss": 0.5122, "step": 11071 }, { "epoch": 0.3401222621570977, "grad_norm": 0.3219890892505646, "learning_rate": 1.8627041433072993e-05, "loss": 0.5975, "step": 11072 }, { "epoch": 0.3401529812920468, "grad_norm": 0.3582148551940918, "learning_rate": 1.8626797025315078e-05, "loss": 0.5776, "step": 11073 }, { "epoch": 0.34018370042699597, "grad_norm": 0.33101049065589905, "learning_rate": 1.862655259740868e-05, "loss": 0.6125, "step": 11074 }, { "epoch": 0.34021441956194515, "grad_norm": 0.3608841001987457, "learning_rate": 1.8626308149354365e-05, "loss": 0.6563, "step": 11075 }, { "epoch": 0.3402451386968943, "grad_norm": 0.37662819027900696, "learning_rate": 1.86260636811527e-05, "loss": 0.5603, "step": 11076 }, { "epoch": 0.34027585783184344, "grad_norm": 0.33038198947906494, "learning_rate": 1.8625819192804268e-05, "loss": 0.6225, "step": 11077 }, { "epoch": 0.3403065769667926, "grad_norm": 0.3986881375312805, "learning_rate": 1.862557468430963e-05, "loss": 0.6234, "step": 11078 }, { "epoch": 0.3403372961017418, "grad_norm": 0.3569243550300598, "learning_rate": 1.862533015566936e-05, "loss": 0.6045, "step": 11079 }, { "epoch": 0.3403680152366909, "grad_norm": 0.397916704416275, "learning_rate": 1.8625085606884027e-05, "loss": 0.6103, "step": 11080 }, { "epoch": 0.3403987343716401, "grad_norm": 0.3846897780895233, "learning_rate": 1.8624841037954205e-05, "loss": 0.5336, "step": 11081 }, { "epoch": 0.34042945350658926, "grad_norm": 0.3215053379535675, "learning_rate": 1.862459644888047e-05, "loss": 0.551, "step": 11082 }, { "epoch": 0.34046017264153844, "grad_norm": 0.3341764509677887, "learning_rate": 1.8624351839663378e-05, "loss": 0.5819, "step": 11083 }, { "epoch": 0.34049089177648756, "grad_norm": 0.3392646014690399, "learning_rate": 1.8624107210303518e-05, "loss": 0.5757, "step": 11084 }, { "epoch": 0.34052161091143673, "grad_norm": 0.38126063346862793, "learning_rate": 1.8623862560801452e-05, "loss": 0.6286, "step": 11085 }, { "epoch": 0.3405523300463859, "grad_norm": 0.43984255194664, "learning_rate": 1.8623617891157752e-05, "loss": 0.5892, "step": 11086 }, { "epoch": 0.34058304918133503, "grad_norm": 1.0877629518508911, "learning_rate": 1.8623373201372988e-05, "loss": 0.5688, "step": 11087 }, { "epoch": 0.3406137683162842, "grad_norm": 0.34341078996658325, "learning_rate": 1.8623128491447735e-05, "loss": 0.6009, "step": 11088 }, { "epoch": 0.3406444874512334, "grad_norm": 0.38723084330558777, "learning_rate": 1.8622883761382562e-05, "loss": 0.5808, "step": 11089 }, { "epoch": 0.34067520658618256, "grad_norm": 0.34634119272232056, "learning_rate": 1.862263901117804e-05, "loss": 0.5435, "step": 11090 }, { "epoch": 0.3407059257211317, "grad_norm": 0.36457398533821106, "learning_rate": 1.862239424083475e-05, "loss": 0.5423, "step": 11091 }, { "epoch": 0.34073664485608085, "grad_norm": 0.37311074137687683, "learning_rate": 1.862214945035325e-05, "loss": 0.6413, "step": 11092 }, { "epoch": 0.34076736399103, "grad_norm": 0.34568992257118225, "learning_rate": 1.862190463973412e-05, "loss": 0.5629, "step": 11093 }, { "epoch": 0.34079808312597915, "grad_norm": 0.34738636016845703, "learning_rate": 1.8621659808977925e-05, "loss": 0.5576, "step": 11094 }, { "epoch": 0.3408288022609283, "grad_norm": 0.32529547810554504, "learning_rate": 1.8621414958085247e-05, "loss": 0.5925, "step": 11095 }, { "epoch": 0.3408595213958775, "grad_norm": 0.3720070421695709, "learning_rate": 1.862117008705665e-05, "loss": 0.6468, "step": 11096 }, { "epoch": 0.34089024053082667, "grad_norm": 0.31440281867980957, "learning_rate": 1.8620925195892708e-05, "loss": 0.5314, "step": 11097 }, { "epoch": 0.3409209596657758, "grad_norm": 0.436015248298645, "learning_rate": 1.862068028459399e-05, "loss": 0.6796, "step": 11098 }, { "epoch": 0.34095167880072497, "grad_norm": 0.387199342250824, "learning_rate": 1.8620435353161078e-05, "loss": 0.5944, "step": 11099 }, { "epoch": 0.34098239793567414, "grad_norm": 0.3803299367427826, "learning_rate": 1.8620190401594534e-05, "loss": 0.6009, "step": 11100 }, { "epoch": 0.3410131170706233, "grad_norm": 0.3533187508583069, "learning_rate": 1.861994542989493e-05, "loss": 0.5797, "step": 11101 }, { "epoch": 0.34104383620557244, "grad_norm": 0.329806387424469, "learning_rate": 1.861970043806285e-05, "loss": 0.5087, "step": 11102 }, { "epoch": 0.3410745553405216, "grad_norm": 0.3524596393108368, "learning_rate": 1.861945542609885e-05, "loss": 0.6064, "step": 11103 }, { "epoch": 0.3411052744754708, "grad_norm": 0.32363978028297424, "learning_rate": 1.861921039400351e-05, "loss": 0.6324, "step": 11104 }, { "epoch": 0.3411359936104199, "grad_norm": 0.3449985384941101, "learning_rate": 1.8618965341777408e-05, "loss": 0.5818, "step": 11105 }, { "epoch": 0.3411667127453691, "grad_norm": 0.32888537645339966, "learning_rate": 1.8618720269421106e-05, "loss": 0.5699, "step": 11106 }, { "epoch": 0.34119743188031826, "grad_norm": 0.3665076196193695, "learning_rate": 1.861847517693518e-05, "loss": 0.5566, "step": 11107 }, { "epoch": 0.34122815101526743, "grad_norm": 0.31529533863067627, "learning_rate": 1.8618230064320205e-05, "loss": 0.6221, "step": 11108 }, { "epoch": 0.34125887015021655, "grad_norm": 0.3532223701477051, "learning_rate": 1.8617984931576752e-05, "loss": 0.618, "step": 11109 }, { "epoch": 0.34128958928516573, "grad_norm": 0.42688506841659546, "learning_rate": 1.861773977870539e-05, "loss": 0.5504, "step": 11110 }, { "epoch": 0.3413203084201149, "grad_norm": 0.37073132395744324, "learning_rate": 1.86174946057067e-05, "loss": 0.6374, "step": 11111 }, { "epoch": 0.341351027555064, "grad_norm": 0.3597749173641205, "learning_rate": 1.861724941258125e-05, "loss": 0.6309, "step": 11112 }, { "epoch": 0.3413817466900132, "grad_norm": 0.3593454658985138, "learning_rate": 1.861700419932961e-05, "loss": 0.5908, "step": 11113 }, { "epoch": 0.3414124658249624, "grad_norm": 0.37456005811691284, "learning_rate": 1.8616758965952354e-05, "loss": 0.579, "step": 11114 }, { "epoch": 0.34144318495991155, "grad_norm": 0.3402197062969208, "learning_rate": 1.8616513712450057e-05, "loss": 0.5968, "step": 11115 }, { "epoch": 0.34147390409486067, "grad_norm": 0.309853732585907, "learning_rate": 1.861626843882329e-05, "loss": 0.5528, "step": 11116 }, { "epoch": 0.34150462322980984, "grad_norm": 0.4240471422672272, "learning_rate": 1.8616023145072626e-05, "loss": 0.6023, "step": 11117 }, { "epoch": 0.341535342364759, "grad_norm": 0.41890910267829895, "learning_rate": 1.8615777831198635e-05, "loss": 0.5387, "step": 11118 }, { "epoch": 0.3415660614997082, "grad_norm": 0.3436014652252197, "learning_rate": 1.8615532497201898e-05, "loss": 0.4585, "step": 11119 }, { "epoch": 0.3415967806346573, "grad_norm": 0.35184210538864136, "learning_rate": 1.861528714308298e-05, "loss": 0.5669, "step": 11120 }, { "epoch": 0.3416274997696065, "grad_norm": 0.3153057098388672, "learning_rate": 1.861504176884246e-05, "loss": 0.6079, "step": 11121 }, { "epoch": 0.34165821890455567, "grad_norm": 0.35655996203422546, "learning_rate": 1.8614796374480905e-05, "loss": 0.6727, "step": 11122 }, { "epoch": 0.3416889380395048, "grad_norm": 0.353419691324234, "learning_rate": 1.8614550959998895e-05, "loss": 0.5936, "step": 11123 }, { "epoch": 0.34171965717445396, "grad_norm": 0.3351269066333771, "learning_rate": 1.8614305525397e-05, "loss": 0.5111, "step": 11124 }, { "epoch": 0.34175037630940314, "grad_norm": 0.3439008593559265, "learning_rate": 1.8614060070675788e-05, "loss": 0.5423, "step": 11125 }, { "epoch": 0.3417810954443523, "grad_norm": 0.35411331057548523, "learning_rate": 1.861381459583584e-05, "loss": 0.6396, "step": 11126 }, { "epoch": 0.34181181457930143, "grad_norm": 0.3278159201145172, "learning_rate": 1.8613569100877727e-05, "loss": 0.5909, "step": 11127 }, { "epoch": 0.3418425337142506, "grad_norm": 0.3639346659183502, "learning_rate": 1.861332358580202e-05, "loss": 0.5921, "step": 11128 }, { "epoch": 0.3418732528491998, "grad_norm": 0.384227454662323, "learning_rate": 1.8613078050609297e-05, "loss": 0.596, "step": 11129 }, { "epoch": 0.3419039719841489, "grad_norm": 0.37164050340652466, "learning_rate": 1.8612832495300127e-05, "loss": 0.5539, "step": 11130 }, { "epoch": 0.3419346911190981, "grad_norm": 0.3934893012046814, "learning_rate": 1.8612586919875088e-05, "loss": 0.6164, "step": 11131 }, { "epoch": 0.34196541025404725, "grad_norm": 0.33525359630584717, "learning_rate": 1.8612341324334747e-05, "loss": 0.6407, "step": 11132 }, { "epoch": 0.34199612938899643, "grad_norm": 0.32964587211608887, "learning_rate": 1.861209570867968e-05, "loss": 0.5533, "step": 11133 }, { "epoch": 0.34202684852394555, "grad_norm": 0.36004775762557983, "learning_rate": 1.8611850072910465e-05, "loss": 0.5956, "step": 11134 }, { "epoch": 0.3420575676588947, "grad_norm": 0.33482539653778076, "learning_rate": 1.8611604417027673e-05, "loss": 0.595, "step": 11135 }, { "epoch": 0.3420882867938439, "grad_norm": 0.3980102241039276, "learning_rate": 1.8611358741031874e-05, "loss": 0.6425, "step": 11136 }, { "epoch": 0.342119005928793, "grad_norm": 0.34282219409942627, "learning_rate": 1.861111304492365e-05, "loss": 0.5576, "step": 11137 }, { "epoch": 0.3421497250637422, "grad_norm": 0.34741151332855225, "learning_rate": 1.8610867328703568e-05, "loss": 0.5464, "step": 11138 }, { "epoch": 0.34218044419869137, "grad_norm": 0.3520882725715637, "learning_rate": 1.8610621592372202e-05, "loss": 0.5824, "step": 11139 }, { "epoch": 0.34221116333364054, "grad_norm": 0.47586777806282043, "learning_rate": 1.861037583593013e-05, "loss": 0.5916, "step": 11140 }, { "epoch": 0.34224188246858966, "grad_norm": 0.36276936531066895, "learning_rate": 1.8610130059377923e-05, "loss": 0.5722, "step": 11141 }, { "epoch": 0.34227260160353884, "grad_norm": 0.35634395480155945, "learning_rate": 1.8609884262716158e-05, "loss": 0.5475, "step": 11142 }, { "epoch": 0.342303320738488, "grad_norm": 0.4217076897621155, "learning_rate": 1.8609638445945407e-05, "loss": 0.5927, "step": 11143 }, { "epoch": 0.3423340398734372, "grad_norm": 0.3871571123600006, "learning_rate": 1.860939260906624e-05, "loss": 0.5914, "step": 11144 }, { "epoch": 0.3423647590083863, "grad_norm": 0.33531180024147034, "learning_rate": 1.860914675207924e-05, "loss": 0.5456, "step": 11145 }, { "epoch": 0.3423954781433355, "grad_norm": 0.48474451899528503, "learning_rate": 1.8608900874984974e-05, "loss": 0.5879, "step": 11146 }, { "epoch": 0.34242619727828466, "grad_norm": 0.3741089701652527, "learning_rate": 1.8608654977784017e-05, "loss": 0.548, "step": 11147 }, { "epoch": 0.3424569164132338, "grad_norm": 0.38475582003593445, "learning_rate": 1.8608409060476947e-05, "loss": 0.5686, "step": 11148 }, { "epoch": 0.34248763554818296, "grad_norm": 0.35391002893447876, "learning_rate": 1.860816312306434e-05, "loss": 0.5365, "step": 11149 }, { "epoch": 0.34251835468313213, "grad_norm": 0.33900707960128784, "learning_rate": 1.860791716554676e-05, "loss": 0.6506, "step": 11150 }, { "epoch": 0.3425490738180813, "grad_norm": 0.40120187401771545, "learning_rate": 1.8607671187924795e-05, "loss": 0.6486, "step": 11151 }, { "epoch": 0.3425797929530304, "grad_norm": 0.41863295435905457, "learning_rate": 1.8607425190199008e-05, "loss": 0.4805, "step": 11152 }, { "epoch": 0.3426105120879796, "grad_norm": 0.33172130584716797, "learning_rate": 1.860717917236998e-05, "loss": 0.6059, "step": 11153 }, { "epoch": 0.3426412312229288, "grad_norm": 0.36837539076805115, "learning_rate": 1.8606933134438284e-05, "loss": 0.5573, "step": 11154 }, { "epoch": 0.3426719503578779, "grad_norm": 2.698664903640747, "learning_rate": 1.8606687076404497e-05, "loss": 0.5937, "step": 11155 }, { "epoch": 0.3427026694928271, "grad_norm": 0.3552975654602051, "learning_rate": 1.8606440998269187e-05, "loss": 0.6694, "step": 11156 }, { "epoch": 0.34273338862777625, "grad_norm": 0.38292258977890015, "learning_rate": 1.8606194900032936e-05, "loss": 0.5444, "step": 11157 }, { "epoch": 0.3427641077627254, "grad_norm": 0.32747316360473633, "learning_rate": 1.860594878169631e-05, "loss": 0.6881, "step": 11158 }, { "epoch": 0.34279482689767454, "grad_norm": 0.3625706732273102, "learning_rate": 1.8605702643259896e-05, "loss": 0.6028, "step": 11159 }, { "epoch": 0.3428255460326237, "grad_norm": 0.34614983201026917, "learning_rate": 1.8605456484724263e-05, "loss": 0.6218, "step": 11160 }, { "epoch": 0.3428562651675729, "grad_norm": 0.3427746593952179, "learning_rate": 1.860521030608998e-05, "loss": 0.5447, "step": 11161 }, { "epoch": 0.34288698430252207, "grad_norm": 0.4864635169506073, "learning_rate": 1.8604964107357632e-05, "loss": 0.6335, "step": 11162 }, { "epoch": 0.3429177034374712, "grad_norm": 0.3370722532272339, "learning_rate": 1.860471788852779e-05, "loss": 0.6029, "step": 11163 }, { "epoch": 0.34294842257242036, "grad_norm": 0.4125654995441437, "learning_rate": 1.8604471649601024e-05, "loss": 0.5331, "step": 11164 }, { "epoch": 0.34297914170736954, "grad_norm": 0.3870820105075836, "learning_rate": 1.8604225390577916e-05, "loss": 0.6349, "step": 11165 }, { "epoch": 0.34300986084231866, "grad_norm": 0.376215398311615, "learning_rate": 1.8603979111459035e-05, "loss": 0.5467, "step": 11166 }, { "epoch": 0.34304057997726783, "grad_norm": 0.3559834659099579, "learning_rate": 1.8603732812244966e-05, "loss": 0.5686, "step": 11167 }, { "epoch": 0.343071299112217, "grad_norm": 0.5057979226112366, "learning_rate": 1.8603486492936276e-05, "loss": 0.604, "step": 11168 }, { "epoch": 0.3431020182471662, "grad_norm": 0.34592190384864807, "learning_rate": 1.8603240153533542e-05, "loss": 0.5617, "step": 11169 }, { "epoch": 0.3431327373821153, "grad_norm": 0.30156487226486206, "learning_rate": 1.8602993794037337e-05, "loss": 0.5494, "step": 11170 }, { "epoch": 0.3431634565170645, "grad_norm": 0.34259510040283203, "learning_rate": 1.8602747414448243e-05, "loss": 0.6202, "step": 11171 }, { "epoch": 0.34319417565201366, "grad_norm": 0.40559443831443787, "learning_rate": 1.860250101476683e-05, "loss": 0.5545, "step": 11172 }, { "epoch": 0.3432248947869628, "grad_norm": 0.3801218569278717, "learning_rate": 1.8602254594993673e-05, "loss": 0.513, "step": 11173 }, { "epoch": 0.34325561392191195, "grad_norm": 0.3341814875602722, "learning_rate": 1.8602008155129355e-05, "loss": 0.6334, "step": 11174 }, { "epoch": 0.3432863330568611, "grad_norm": 0.3737727403640747, "learning_rate": 1.8601761695174443e-05, "loss": 0.635, "step": 11175 }, { "epoch": 0.3433170521918103, "grad_norm": 0.45473435521125793, "learning_rate": 1.8601515215129515e-05, "loss": 0.4685, "step": 11176 }, { "epoch": 0.3433477713267594, "grad_norm": 0.3221685290336609, "learning_rate": 1.8601268714995148e-05, "loss": 0.5156, "step": 11177 }, { "epoch": 0.3433784904617086, "grad_norm": 0.34333279728889465, "learning_rate": 1.860102219477192e-05, "loss": 0.5286, "step": 11178 }, { "epoch": 0.34340920959665777, "grad_norm": 0.35848498344421387, "learning_rate": 1.8600775654460398e-05, "loss": 0.5283, "step": 11179 }, { "epoch": 0.3434399287316069, "grad_norm": 0.3704570531845093, "learning_rate": 1.860052909406117e-05, "loss": 0.5726, "step": 11180 }, { "epoch": 0.34347064786655607, "grad_norm": 0.3448808193206787, "learning_rate": 1.86002825135748e-05, "loss": 0.5344, "step": 11181 }, { "epoch": 0.34350136700150524, "grad_norm": 0.34196579456329346, "learning_rate": 1.8600035913001876e-05, "loss": 0.571, "step": 11182 }, { "epoch": 0.3435320861364544, "grad_norm": 0.34442174434661865, "learning_rate": 1.8599789292342964e-05, "loss": 0.6118, "step": 11183 }, { "epoch": 0.34356280527140354, "grad_norm": 0.3427630662918091, "learning_rate": 1.8599542651598643e-05, "loss": 0.5678, "step": 11184 }, { "epoch": 0.3435935244063527, "grad_norm": 0.3552021384239197, "learning_rate": 1.859929599076949e-05, "loss": 0.583, "step": 11185 }, { "epoch": 0.3436242435413019, "grad_norm": 0.35238149762153625, "learning_rate": 1.859904930985608e-05, "loss": 0.5585, "step": 11186 }, { "epoch": 0.34365496267625106, "grad_norm": 0.33924660086631775, "learning_rate": 1.8598802608858993e-05, "loss": 0.6018, "step": 11187 }, { "epoch": 0.3436856818112002, "grad_norm": 0.34806764125823975, "learning_rate": 1.85985558877788e-05, "loss": 0.6826, "step": 11188 }, { "epoch": 0.34371640094614936, "grad_norm": 0.33460476994514465, "learning_rate": 1.859830914661608e-05, "loss": 0.5426, "step": 11189 }, { "epoch": 0.34374712008109853, "grad_norm": 0.3422606587409973, "learning_rate": 1.8598062385371405e-05, "loss": 0.5795, "step": 11190 }, { "epoch": 0.34377783921604765, "grad_norm": 0.33373022079467773, "learning_rate": 1.8597815604045356e-05, "loss": 0.5494, "step": 11191 }, { "epoch": 0.34380855835099683, "grad_norm": 0.3859928846359253, "learning_rate": 1.859756880263851e-05, "loss": 0.6635, "step": 11192 }, { "epoch": 0.343839277485946, "grad_norm": 0.31303292512893677, "learning_rate": 1.8597321981151443e-05, "loss": 0.5164, "step": 11193 }, { "epoch": 0.3438699966208952, "grad_norm": 0.32232704758644104, "learning_rate": 1.8597075139584725e-05, "loss": 0.5215, "step": 11194 }, { "epoch": 0.3439007157558443, "grad_norm": 0.34424033761024475, "learning_rate": 1.859682827793894e-05, "loss": 0.5741, "step": 11195 }, { "epoch": 0.3439314348907935, "grad_norm": 0.34334006905555725, "learning_rate": 1.8596581396214666e-05, "loss": 0.5322, "step": 11196 }, { "epoch": 0.34396215402574265, "grad_norm": 0.439343124628067, "learning_rate": 1.859633449441247e-05, "loss": 0.619, "step": 11197 }, { "epoch": 0.34399287316069177, "grad_norm": 0.31768932938575745, "learning_rate": 1.8596087572532938e-05, "loss": 0.5914, "step": 11198 }, { "epoch": 0.34402359229564095, "grad_norm": 0.3302557170391083, "learning_rate": 1.8595840630576644e-05, "loss": 0.5807, "step": 11199 }, { "epoch": 0.3440543114305901, "grad_norm": 0.35169729590415955, "learning_rate": 1.8595593668544162e-05, "loss": 0.5746, "step": 11200 }, { "epoch": 0.3440850305655393, "grad_norm": 0.3714078962802887, "learning_rate": 1.859534668643607e-05, "loss": 0.592, "step": 11201 }, { "epoch": 0.3441157497004884, "grad_norm": 0.34859949350357056, "learning_rate": 1.8595099684252945e-05, "loss": 0.5723, "step": 11202 }, { "epoch": 0.3441464688354376, "grad_norm": 0.38575413823127747, "learning_rate": 1.8594852661995367e-05, "loss": 0.5724, "step": 11203 }, { "epoch": 0.34417718797038677, "grad_norm": 0.33201536536216736, "learning_rate": 1.8594605619663907e-05, "loss": 0.6505, "step": 11204 }, { "epoch": 0.3442079071053359, "grad_norm": 0.3625660240650177, "learning_rate": 1.859435855725915e-05, "loss": 0.5085, "step": 11205 }, { "epoch": 0.34423862624028506, "grad_norm": 0.36338937282562256, "learning_rate": 1.8594111474781664e-05, "loss": 0.5191, "step": 11206 }, { "epoch": 0.34426934537523424, "grad_norm": 0.36481937766075134, "learning_rate": 1.8593864372232034e-05, "loss": 0.6289, "step": 11207 }, { "epoch": 0.3443000645101834, "grad_norm": 0.33946773409843445, "learning_rate": 1.8593617249610832e-05, "loss": 0.5288, "step": 11208 }, { "epoch": 0.34433078364513253, "grad_norm": 0.3741496205329895, "learning_rate": 1.8593370106918636e-05, "loss": 0.546, "step": 11209 }, { "epoch": 0.3443615027800817, "grad_norm": 0.3364458680152893, "learning_rate": 1.8593122944156022e-05, "loss": 0.53, "step": 11210 }, { "epoch": 0.3443922219150309, "grad_norm": 0.32655152678489685, "learning_rate": 1.859287576132357e-05, "loss": 0.5286, "step": 11211 }, { "epoch": 0.34442294104998006, "grad_norm": 0.331542044878006, "learning_rate": 1.8592628558421858e-05, "loss": 0.5469, "step": 11212 }, { "epoch": 0.3444536601849292, "grad_norm": 0.34452229738235474, "learning_rate": 1.8592381335451468e-05, "loss": 0.4848, "step": 11213 }, { "epoch": 0.34448437931987835, "grad_norm": 0.3501024842262268, "learning_rate": 1.8592134092412962e-05, "loss": 0.5621, "step": 11214 }, { "epoch": 0.34451509845482753, "grad_norm": 0.3410688638687134, "learning_rate": 1.8591886829306932e-05, "loss": 0.5929, "step": 11215 }, { "epoch": 0.34454581758977665, "grad_norm": 0.41633424162864685, "learning_rate": 1.859163954613395e-05, "loss": 0.5701, "step": 11216 }, { "epoch": 0.3445765367247258, "grad_norm": 0.3624337315559387, "learning_rate": 1.859139224289459e-05, "loss": 0.7357, "step": 11217 }, { "epoch": 0.344607255859675, "grad_norm": 0.4070107340812683, "learning_rate": 1.8591144919589433e-05, "loss": 0.5536, "step": 11218 }, { "epoch": 0.3446379749946242, "grad_norm": 0.36096134781837463, "learning_rate": 1.8590897576219062e-05, "loss": 0.5927, "step": 11219 }, { "epoch": 0.3446686941295733, "grad_norm": 0.3674006462097168, "learning_rate": 1.8590650212784046e-05, "loss": 0.6853, "step": 11220 }, { "epoch": 0.34469941326452247, "grad_norm": 0.34657496213912964, "learning_rate": 1.8590402829284967e-05, "loss": 0.5833, "step": 11221 }, { "epoch": 0.34473013239947164, "grad_norm": 0.35482463240623474, "learning_rate": 1.8590155425722405e-05, "loss": 0.5512, "step": 11222 }, { "epoch": 0.34476085153442076, "grad_norm": 0.3270860016345978, "learning_rate": 1.8589908002096933e-05, "loss": 0.5893, "step": 11223 }, { "epoch": 0.34479157066936994, "grad_norm": 0.3118964433670044, "learning_rate": 1.858966055840913e-05, "loss": 0.5692, "step": 11224 }, { "epoch": 0.3448222898043191, "grad_norm": 0.32091259956359863, "learning_rate": 1.8589413094659575e-05, "loss": 0.6167, "step": 11225 }, { "epoch": 0.3448530089392683, "grad_norm": 0.38866788148880005, "learning_rate": 1.8589165610848848e-05, "loss": 0.6251, "step": 11226 }, { "epoch": 0.3448837280742174, "grad_norm": 0.3217463195323944, "learning_rate": 1.8588918106977524e-05, "loss": 0.5833, "step": 11227 }, { "epoch": 0.3449144472091666, "grad_norm": 0.36134350299835205, "learning_rate": 1.8588670583046183e-05, "loss": 0.6199, "step": 11228 }, { "epoch": 0.34494516634411576, "grad_norm": 0.3493403494358063, "learning_rate": 1.8588423039055398e-05, "loss": 0.63, "step": 11229 }, { "epoch": 0.34497588547906494, "grad_norm": 0.3260267674922943, "learning_rate": 1.8588175475005754e-05, "loss": 0.5485, "step": 11230 }, { "epoch": 0.34500660461401406, "grad_norm": 0.35145947337150574, "learning_rate": 1.858792789089783e-05, "loss": 0.601, "step": 11231 }, { "epoch": 0.34503732374896323, "grad_norm": 0.33157092332839966, "learning_rate": 1.8587680286732196e-05, "loss": 0.5897, "step": 11232 }, { "epoch": 0.3450680428839124, "grad_norm": 0.3617112338542938, "learning_rate": 1.8587432662509434e-05, "loss": 0.5532, "step": 11233 }, { "epoch": 0.3450987620188615, "grad_norm": 0.44221076369285583, "learning_rate": 1.8587185018230124e-05, "loss": 0.557, "step": 11234 }, { "epoch": 0.3451294811538107, "grad_norm": 0.33483797311782837, "learning_rate": 1.8586937353894846e-05, "loss": 0.5695, "step": 11235 }, { "epoch": 0.3451602002887599, "grad_norm": 0.3388766944408417, "learning_rate": 1.8586689669504175e-05, "loss": 0.5792, "step": 11236 }, { "epoch": 0.34519091942370905, "grad_norm": 0.3923039734363556, "learning_rate": 1.8586441965058692e-05, "loss": 0.5251, "step": 11237 }, { "epoch": 0.3452216385586582, "grad_norm": 0.3503727614879608, "learning_rate": 1.8586194240558972e-05, "loss": 0.5661, "step": 11238 }, { "epoch": 0.34525235769360735, "grad_norm": 0.33487585186958313, "learning_rate": 1.8585946496005594e-05, "loss": 0.5886, "step": 11239 }, { "epoch": 0.3452830768285565, "grad_norm": 0.4074653685092926, "learning_rate": 1.858569873139914e-05, "loss": 0.6235, "step": 11240 }, { "epoch": 0.34531379596350564, "grad_norm": 0.3628336787223816, "learning_rate": 1.8585450946740187e-05, "loss": 0.5189, "step": 11241 }, { "epoch": 0.3453445150984548, "grad_norm": 0.34823763370513916, "learning_rate": 1.8585203142029317e-05, "loss": 0.5449, "step": 11242 }, { "epoch": 0.345375234233404, "grad_norm": 0.3484329283237457, "learning_rate": 1.8584955317267103e-05, "loss": 0.5512, "step": 11243 }, { "epoch": 0.34540595336835317, "grad_norm": 0.34514352679252625, "learning_rate": 1.8584707472454125e-05, "loss": 0.5893, "step": 11244 }, { "epoch": 0.3454366725033023, "grad_norm": 0.4540562927722931, "learning_rate": 1.8584459607590966e-05, "loss": 0.5766, "step": 11245 }, { "epoch": 0.34546739163825146, "grad_norm": 0.3405349850654602, "learning_rate": 1.8584211722678198e-05, "loss": 0.658, "step": 11246 }, { "epoch": 0.34549811077320064, "grad_norm": 0.3715977966785431, "learning_rate": 1.8583963817716408e-05, "loss": 0.5637, "step": 11247 }, { "epoch": 0.34552882990814976, "grad_norm": 0.3134032189846039, "learning_rate": 1.8583715892706167e-05, "loss": 0.5459, "step": 11248 }, { "epoch": 0.34555954904309893, "grad_norm": 0.3011392056941986, "learning_rate": 1.8583467947648063e-05, "loss": 0.5599, "step": 11249 }, { "epoch": 0.3455902681780481, "grad_norm": 0.3482937514781952, "learning_rate": 1.8583219982542668e-05, "loss": 0.561, "step": 11250 }, { "epoch": 0.3456209873129973, "grad_norm": 0.34786826372146606, "learning_rate": 1.858297199739056e-05, "loss": 0.6864, "step": 11251 }, { "epoch": 0.3456517064479464, "grad_norm": 0.3709131181240082, "learning_rate": 1.8582723992192325e-05, "loss": 0.619, "step": 11252 }, { "epoch": 0.3456824255828956, "grad_norm": 0.3258141875267029, "learning_rate": 1.8582475966948535e-05, "loss": 0.5608, "step": 11253 }, { "epoch": 0.34571314471784476, "grad_norm": 0.5525398850440979, "learning_rate": 1.858222792165978e-05, "loss": 0.545, "step": 11254 }, { "epoch": 0.34574386385279393, "grad_norm": 0.3218780755996704, "learning_rate": 1.8581979856326625e-05, "loss": 0.5334, "step": 11255 }, { "epoch": 0.34577458298774305, "grad_norm": 0.3381626307964325, "learning_rate": 1.8581731770949663e-05, "loss": 0.5897, "step": 11256 }, { "epoch": 0.3458053021226922, "grad_norm": 0.3144895136356354, "learning_rate": 1.8581483665529462e-05, "loss": 0.5814, "step": 11257 }, { "epoch": 0.3458360212576414, "grad_norm": 0.36351871490478516, "learning_rate": 1.8581235540066608e-05, "loss": 0.551, "step": 11258 }, { "epoch": 0.3458667403925905, "grad_norm": 0.4331835210323334, "learning_rate": 1.858098739456168e-05, "loss": 0.5963, "step": 11259 }, { "epoch": 0.3458974595275397, "grad_norm": 0.33595946431159973, "learning_rate": 1.8580739229015256e-05, "loss": 0.5256, "step": 11260 }, { "epoch": 0.34592817866248887, "grad_norm": 0.33455654978752136, "learning_rate": 1.8580491043427916e-05, "loss": 0.5773, "step": 11261 }, { "epoch": 0.34595889779743805, "grad_norm": 0.32490256428718567, "learning_rate": 1.858024283780024e-05, "loss": 0.5742, "step": 11262 }, { "epoch": 0.34598961693238717, "grad_norm": 0.39453595876693726, "learning_rate": 1.857999461213281e-05, "loss": 0.5796, "step": 11263 }, { "epoch": 0.34602033606733634, "grad_norm": 0.31992843747138977, "learning_rate": 1.8579746366426202e-05, "loss": 0.5449, "step": 11264 }, { "epoch": 0.3460510552022855, "grad_norm": 0.3947654366493225, "learning_rate": 1.8579498100680994e-05, "loss": 0.5444, "step": 11265 }, { "epoch": 0.34608177433723464, "grad_norm": 0.35084033012390137, "learning_rate": 1.857924981489777e-05, "loss": 0.5085, "step": 11266 }, { "epoch": 0.3461124934721838, "grad_norm": 0.347378671169281, "learning_rate": 1.8579001509077114e-05, "loss": 0.5353, "step": 11267 }, { "epoch": 0.346143212607133, "grad_norm": 0.417138010263443, "learning_rate": 1.8578753183219597e-05, "loss": 0.6675, "step": 11268 }, { "epoch": 0.34617393174208216, "grad_norm": 0.3344566524028778, "learning_rate": 1.8578504837325803e-05, "loss": 0.5295, "step": 11269 }, { "epoch": 0.3462046508770313, "grad_norm": 0.3077961802482605, "learning_rate": 1.8578256471396313e-05, "loss": 0.5289, "step": 11270 }, { "epoch": 0.34623537001198046, "grad_norm": 0.47217461466789246, "learning_rate": 1.8578008085431705e-05, "loss": 0.5669, "step": 11271 }, { "epoch": 0.34626608914692963, "grad_norm": 0.35591238737106323, "learning_rate": 1.857775967943256e-05, "loss": 0.6573, "step": 11272 }, { "epoch": 0.3462968082818788, "grad_norm": 0.3537255525588989, "learning_rate": 1.8577511253399457e-05, "loss": 0.569, "step": 11273 }, { "epoch": 0.34632752741682793, "grad_norm": 0.35315176844596863, "learning_rate": 1.857726280733298e-05, "loss": 0.5449, "step": 11274 }, { "epoch": 0.3463582465517771, "grad_norm": 0.33679524064064026, "learning_rate": 1.8577014341233704e-05, "loss": 0.5971, "step": 11275 }, { "epoch": 0.3463889656867263, "grad_norm": 0.35834091901779175, "learning_rate": 1.8576765855102212e-05, "loss": 0.5032, "step": 11276 }, { "epoch": 0.3464196848216754, "grad_norm": 0.3583539128303528, "learning_rate": 1.8576517348939086e-05, "loss": 0.6096, "step": 11277 }, { "epoch": 0.3464504039566246, "grad_norm": 0.39821240305900574, "learning_rate": 1.8576268822744904e-05, "loss": 0.6257, "step": 11278 }, { "epoch": 0.34648112309157375, "grad_norm": 0.3785853683948517, "learning_rate": 1.857602027652025e-05, "loss": 0.551, "step": 11279 }, { "epoch": 0.3465118422265229, "grad_norm": 0.36794379353523254, "learning_rate": 1.85757717102657e-05, "loss": 0.5194, "step": 11280 }, { "epoch": 0.34654256136147205, "grad_norm": 0.3680347502231598, "learning_rate": 1.8575523123981834e-05, "loss": 0.6241, "step": 11281 }, { "epoch": 0.3465732804964212, "grad_norm": 0.39371201395988464, "learning_rate": 1.8575274517669234e-05, "loss": 0.4955, "step": 11282 }, { "epoch": 0.3466039996313704, "grad_norm": 0.36055275797843933, "learning_rate": 1.8575025891328487e-05, "loss": 0.5721, "step": 11283 }, { "epoch": 0.3466347187663195, "grad_norm": 0.3374411165714264, "learning_rate": 1.8574777244960164e-05, "loss": 0.5884, "step": 11284 }, { "epoch": 0.3466654379012687, "grad_norm": 0.33182451128959656, "learning_rate": 1.857452857856485e-05, "loss": 0.6147, "step": 11285 }, { "epoch": 0.34669615703621787, "grad_norm": 0.3455299139022827, "learning_rate": 1.8574279892143125e-05, "loss": 0.5635, "step": 11286 }, { "epoch": 0.34672687617116704, "grad_norm": 0.3219197690486908, "learning_rate": 1.8574031185695572e-05, "loss": 0.5795, "step": 11287 }, { "epoch": 0.34675759530611616, "grad_norm": 0.3226257264614105, "learning_rate": 1.857378245922277e-05, "loss": 0.6214, "step": 11288 }, { "epoch": 0.34678831444106534, "grad_norm": 0.37457162141799927, "learning_rate": 1.8573533712725304e-05, "loss": 0.5472, "step": 11289 }, { "epoch": 0.3468190335760145, "grad_norm": 0.3191659450531006, "learning_rate": 1.8573284946203748e-05, "loss": 0.6239, "step": 11290 }, { "epoch": 0.34684975271096363, "grad_norm": 0.38813430070877075, "learning_rate": 1.8573036159658685e-05, "loss": 0.5099, "step": 11291 }, { "epoch": 0.3468804718459128, "grad_norm": 0.32210323214530945, "learning_rate": 1.85727873530907e-05, "loss": 0.5163, "step": 11292 }, { "epoch": 0.346911190980862, "grad_norm": 0.35284456610679626, "learning_rate": 1.857253852650037e-05, "loss": 0.54, "step": 11293 }, { "epoch": 0.34694191011581116, "grad_norm": 0.3450183868408203, "learning_rate": 1.8572289679888276e-05, "loss": 0.6215, "step": 11294 }, { "epoch": 0.3469726292507603, "grad_norm": 0.36493393778800964, "learning_rate": 1.8572040813255002e-05, "loss": 0.6076, "step": 11295 }, { "epoch": 0.34700334838570945, "grad_norm": 0.32535120844841003, "learning_rate": 1.8571791926601128e-05, "loss": 0.5745, "step": 11296 }, { "epoch": 0.34703406752065863, "grad_norm": 0.32826805114746094, "learning_rate": 1.8571543019927234e-05, "loss": 0.5308, "step": 11297 }, { "epoch": 0.3470647866556078, "grad_norm": 0.3406274914741516, "learning_rate": 1.8571294093233906e-05, "loss": 0.5135, "step": 11298 }, { "epoch": 0.3470955057905569, "grad_norm": 0.3948979079723358, "learning_rate": 1.857104514652172e-05, "loss": 0.5909, "step": 11299 }, { "epoch": 0.3471262249255061, "grad_norm": 0.44431060552597046, "learning_rate": 1.857079617979126e-05, "loss": 0.5837, "step": 11300 }, { "epoch": 0.3471569440604553, "grad_norm": 0.3312034606933594, "learning_rate": 1.8570547193043108e-05, "loss": 0.5273, "step": 11301 }, { "epoch": 0.3471876631954044, "grad_norm": 0.3460303246974945, "learning_rate": 1.857029818627784e-05, "loss": 0.6054, "step": 11302 }, { "epoch": 0.34721838233035357, "grad_norm": 0.3718481659889221, "learning_rate": 1.8570049159496047e-05, "loss": 0.6351, "step": 11303 }, { "epoch": 0.34724910146530275, "grad_norm": 0.3493027985095978, "learning_rate": 1.8569800112698302e-05, "loss": 0.5778, "step": 11304 }, { "epoch": 0.3472798206002519, "grad_norm": 0.37516704201698303, "learning_rate": 1.8569551045885193e-05, "loss": 0.5555, "step": 11305 }, { "epoch": 0.34731053973520104, "grad_norm": 0.31815439462661743, "learning_rate": 1.8569301959057298e-05, "loss": 0.6083, "step": 11306 }, { "epoch": 0.3473412588701502, "grad_norm": 0.3229027986526489, "learning_rate": 1.85690528522152e-05, "loss": 0.571, "step": 11307 }, { "epoch": 0.3473719780050994, "grad_norm": 0.3448328673839569, "learning_rate": 1.8568803725359482e-05, "loss": 0.6108, "step": 11308 }, { "epoch": 0.3474026971400485, "grad_norm": 0.39303791522979736, "learning_rate": 1.856855457849072e-05, "loss": 0.5247, "step": 11309 }, { "epoch": 0.3474334162749977, "grad_norm": 0.3434562385082245, "learning_rate": 1.8568305411609505e-05, "loss": 0.6242, "step": 11310 }, { "epoch": 0.34746413540994686, "grad_norm": 0.3166676461696625, "learning_rate": 1.856805622471641e-05, "loss": 0.6165, "step": 11311 }, { "epoch": 0.34749485454489604, "grad_norm": 0.37675273418426514, "learning_rate": 1.8567807017812027e-05, "loss": 0.6063, "step": 11312 }, { "epoch": 0.34752557367984516, "grad_norm": 0.3328113853931427, "learning_rate": 1.8567557790896927e-05, "loss": 0.4963, "step": 11313 }, { "epoch": 0.34755629281479433, "grad_norm": 0.3584270477294922, "learning_rate": 1.8567308543971704e-05, "loss": 0.5822, "step": 11314 }, { "epoch": 0.3475870119497435, "grad_norm": 0.3785799443721771, "learning_rate": 1.8567059277036926e-05, "loss": 0.6188, "step": 11315 }, { "epoch": 0.3476177310846927, "grad_norm": 0.3279271125793457, "learning_rate": 1.8566809990093188e-05, "loss": 0.5432, "step": 11316 }, { "epoch": 0.3476484502196418, "grad_norm": 0.33291903138160706, "learning_rate": 1.8566560683141067e-05, "loss": 0.5421, "step": 11317 }, { "epoch": 0.347679169354591, "grad_norm": 0.32083624601364136, "learning_rate": 1.856631135618114e-05, "loss": 0.5139, "step": 11318 }, { "epoch": 0.34770988848954015, "grad_norm": 0.3569769561290741, "learning_rate": 1.8566062009214e-05, "loss": 0.6054, "step": 11319 }, { "epoch": 0.3477406076244893, "grad_norm": 0.4451017677783966, "learning_rate": 1.8565812642240222e-05, "loss": 0.5733, "step": 11320 }, { "epoch": 0.34777132675943845, "grad_norm": 0.5139105916023254, "learning_rate": 1.8565563255260388e-05, "loss": 0.5306, "step": 11321 }, { "epoch": 0.3478020458943876, "grad_norm": 0.3682486414909363, "learning_rate": 1.8565313848275086e-05, "loss": 0.6392, "step": 11322 }, { "epoch": 0.3478327650293368, "grad_norm": 0.31385675072669983, "learning_rate": 1.8565064421284895e-05, "loss": 0.5103, "step": 11323 }, { "epoch": 0.3478634841642859, "grad_norm": 0.32131722569465637, "learning_rate": 1.8564814974290395e-05, "loss": 0.6123, "step": 11324 }, { "epoch": 0.3478942032992351, "grad_norm": 0.32916051149368286, "learning_rate": 1.8564565507292175e-05, "loss": 0.5857, "step": 11325 }, { "epoch": 0.34792492243418427, "grad_norm": 0.3693629503250122, "learning_rate": 1.856431602029081e-05, "loss": 0.5967, "step": 11326 }, { "epoch": 0.3479556415691334, "grad_norm": 0.330618679523468, "learning_rate": 1.856406651328689e-05, "loss": 0.5569, "step": 11327 }, { "epoch": 0.34798636070408256, "grad_norm": 0.35617756843566895, "learning_rate": 1.8563816986280997e-05, "loss": 0.6025, "step": 11328 }, { "epoch": 0.34801707983903174, "grad_norm": 0.3227764368057251, "learning_rate": 1.8563567439273708e-05, "loss": 0.6119, "step": 11329 }, { "epoch": 0.3480477989739809, "grad_norm": 0.3970007002353668, "learning_rate": 1.856331787226561e-05, "loss": 0.5393, "step": 11330 }, { "epoch": 0.34807851810893004, "grad_norm": 0.35579559206962585, "learning_rate": 1.8563068285257282e-05, "loss": 0.511, "step": 11331 }, { "epoch": 0.3481092372438792, "grad_norm": 0.3367045223712921, "learning_rate": 1.8562818678249314e-05, "loss": 0.5178, "step": 11332 }, { "epoch": 0.3481399563788284, "grad_norm": 0.33038878440856934, "learning_rate": 1.8562569051242284e-05, "loss": 0.5467, "step": 11333 }, { "epoch": 0.3481706755137775, "grad_norm": 0.36526498198509216, "learning_rate": 1.8562319404236776e-05, "loss": 0.5896, "step": 11334 }, { "epoch": 0.3482013946487267, "grad_norm": 0.3464156985282898, "learning_rate": 1.856206973723337e-05, "loss": 0.5854, "step": 11335 }, { "epoch": 0.34823211378367586, "grad_norm": 0.4402366578578949, "learning_rate": 1.8561820050232656e-05, "loss": 0.5993, "step": 11336 }, { "epoch": 0.34826283291862503, "grad_norm": 0.397234171628952, "learning_rate": 1.8561570343235213e-05, "loss": 0.5778, "step": 11337 }, { "epoch": 0.34829355205357415, "grad_norm": 0.37768489122390747, "learning_rate": 1.8561320616241625e-05, "loss": 0.4939, "step": 11338 }, { "epoch": 0.3483242711885233, "grad_norm": 0.32640519738197327, "learning_rate": 1.856107086925247e-05, "loss": 0.5473, "step": 11339 }, { "epoch": 0.3483549903234725, "grad_norm": 0.3244088292121887, "learning_rate": 1.8560821102268343e-05, "loss": 0.5466, "step": 11340 }, { "epoch": 0.3483857094584217, "grad_norm": 0.3455619215965271, "learning_rate": 1.8560571315289814e-05, "loss": 0.6108, "step": 11341 }, { "epoch": 0.3484164285933708, "grad_norm": 0.34417280554771423, "learning_rate": 1.8560321508317475e-05, "loss": 0.5716, "step": 11342 }, { "epoch": 0.34844714772832, "grad_norm": 0.33532771468162537, "learning_rate": 1.856007168135191e-05, "loss": 0.5971, "step": 11343 }, { "epoch": 0.34847786686326915, "grad_norm": 0.34179261326789856, "learning_rate": 1.8559821834393697e-05, "loss": 0.6285, "step": 11344 }, { "epoch": 0.34850858599821827, "grad_norm": 0.34417346119880676, "learning_rate": 1.8559571967443422e-05, "loss": 0.5188, "step": 11345 }, { "epoch": 0.34853930513316744, "grad_norm": 0.3438042998313904, "learning_rate": 1.8559322080501673e-05, "loss": 0.6149, "step": 11346 }, { "epoch": 0.3485700242681166, "grad_norm": 0.37867894768714905, "learning_rate": 1.8559072173569025e-05, "loss": 0.5938, "step": 11347 }, { "epoch": 0.3486007434030658, "grad_norm": 0.3460482954978943, "learning_rate": 1.8558822246646066e-05, "loss": 0.6034, "step": 11348 }, { "epoch": 0.3486314625380149, "grad_norm": 0.3162902593612671, "learning_rate": 1.855857229973338e-05, "loss": 0.5812, "step": 11349 }, { "epoch": 0.3486621816729641, "grad_norm": 0.33971893787384033, "learning_rate": 1.8558322332831556e-05, "loss": 0.5433, "step": 11350 }, { "epoch": 0.34869290080791326, "grad_norm": 0.3647651672363281, "learning_rate": 1.8558072345941166e-05, "loss": 0.554, "step": 11351 }, { "epoch": 0.3487236199428624, "grad_norm": 0.9307363629341125, "learning_rate": 1.8557822339062807e-05, "loss": 0.5837, "step": 11352 }, { "epoch": 0.34875433907781156, "grad_norm": 0.327669233083725, "learning_rate": 1.855757231219705e-05, "loss": 0.591, "step": 11353 }, { "epoch": 0.34878505821276073, "grad_norm": 0.34422361850738525, "learning_rate": 1.8557322265344488e-05, "loss": 0.5592, "step": 11354 }, { "epoch": 0.3488157773477099, "grad_norm": 0.3470379710197449, "learning_rate": 1.85570721985057e-05, "loss": 0.5555, "step": 11355 }, { "epoch": 0.34884649648265903, "grad_norm": 0.33371609449386597, "learning_rate": 1.8556822111681276e-05, "loss": 0.6048, "step": 11356 }, { "epoch": 0.3488772156176082, "grad_norm": 0.30926525592803955, "learning_rate": 1.8556572004871793e-05, "loss": 0.5409, "step": 11357 }, { "epoch": 0.3489079347525574, "grad_norm": 0.3215295672416687, "learning_rate": 1.855632187807784e-05, "loss": 0.6052, "step": 11358 }, { "epoch": 0.3489386538875065, "grad_norm": 0.3876514136791229, "learning_rate": 1.85560717313e-05, "loss": 0.5042, "step": 11359 }, { "epoch": 0.3489693730224557, "grad_norm": 0.3220072388648987, "learning_rate": 1.8555821564538857e-05, "loss": 0.5836, "step": 11360 }, { "epoch": 0.34900009215740485, "grad_norm": 0.37900614738464355, "learning_rate": 1.8555571377794993e-05, "loss": 0.6616, "step": 11361 }, { "epoch": 0.349030811292354, "grad_norm": 0.37210026383399963, "learning_rate": 1.8555321171068996e-05, "loss": 0.6522, "step": 11362 }, { "epoch": 0.34906153042730315, "grad_norm": 0.40646421909332275, "learning_rate": 1.855507094436145e-05, "loss": 0.555, "step": 11363 }, { "epoch": 0.3490922495622523, "grad_norm": 0.709987461566925, "learning_rate": 1.8554820697672936e-05, "loss": 0.5895, "step": 11364 }, { "epoch": 0.3491229686972015, "grad_norm": 0.32982030510902405, "learning_rate": 1.8554570431004043e-05, "loss": 0.4806, "step": 11365 }, { "epoch": 0.34915368783215067, "grad_norm": 0.3467276096343994, "learning_rate": 1.8554320144355354e-05, "loss": 0.6252, "step": 11366 }, { "epoch": 0.3491844069670998, "grad_norm": 0.3455829620361328, "learning_rate": 1.855406983772745e-05, "loss": 0.6042, "step": 11367 }, { "epoch": 0.34921512610204897, "grad_norm": 0.33869993686676025, "learning_rate": 1.855381951112092e-05, "loss": 0.5617, "step": 11368 }, { "epoch": 0.34924584523699814, "grad_norm": 0.3459802269935608, "learning_rate": 1.855356916453635e-05, "loss": 0.5827, "step": 11369 }, { "epoch": 0.34927656437194726, "grad_norm": 0.36970004439353943, "learning_rate": 1.8553318797974317e-05, "loss": 0.6649, "step": 11370 }, { "epoch": 0.34930728350689644, "grad_norm": 0.3584221601486206, "learning_rate": 1.855306841143541e-05, "loss": 0.5953, "step": 11371 }, { "epoch": 0.3493380026418456, "grad_norm": 0.3171955645084381, "learning_rate": 1.855281800492022e-05, "loss": 0.6348, "step": 11372 }, { "epoch": 0.3493687217767948, "grad_norm": 0.329387366771698, "learning_rate": 1.8552567578429322e-05, "loss": 0.5438, "step": 11373 }, { "epoch": 0.3493994409117439, "grad_norm": 0.3319295048713684, "learning_rate": 1.8552317131963307e-05, "loss": 0.6549, "step": 11374 }, { "epoch": 0.3494301600466931, "grad_norm": 0.36993980407714844, "learning_rate": 1.8552066665522757e-05, "loss": 0.4858, "step": 11375 }, { "epoch": 0.34946087918164226, "grad_norm": 0.3740242123603821, "learning_rate": 1.855181617910826e-05, "loss": 0.6252, "step": 11376 }, { "epoch": 0.3494915983165914, "grad_norm": 0.2974475026130676, "learning_rate": 1.8551565672720395e-05, "loss": 0.5845, "step": 11377 }, { "epoch": 0.34952231745154055, "grad_norm": 0.3549022972583771, "learning_rate": 1.8551315146359755e-05, "loss": 0.5595, "step": 11378 }, { "epoch": 0.34955303658648973, "grad_norm": 0.3620690107345581, "learning_rate": 1.855106460002692e-05, "loss": 0.6163, "step": 11379 }, { "epoch": 0.3495837557214389, "grad_norm": 0.3406987190246582, "learning_rate": 1.8550814033722476e-05, "loss": 0.5375, "step": 11380 }, { "epoch": 0.349614474856388, "grad_norm": 0.36824026703834534, "learning_rate": 1.8550563447447008e-05, "loss": 0.5264, "step": 11381 }, { "epoch": 0.3496451939913372, "grad_norm": 0.34925124049186707, "learning_rate": 1.8550312841201102e-05, "loss": 0.612, "step": 11382 }, { "epoch": 0.3496759131262864, "grad_norm": 0.3438829779624939, "learning_rate": 1.8550062214985344e-05, "loss": 0.649, "step": 11383 }, { "epoch": 0.34970663226123555, "grad_norm": 0.3503747582435608, "learning_rate": 1.8549811568800317e-05, "loss": 0.4798, "step": 11384 }, { "epoch": 0.34973735139618467, "grad_norm": 0.409397691488266, "learning_rate": 1.854956090264661e-05, "loss": 0.5689, "step": 11385 }, { "epoch": 0.34976807053113385, "grad_norm": 0.3222496807575226, "learning_rate": 1.8549310216524806e-05, "loss": 0.4894, "step": 11386 }, { "epoch": 0.349798789666083, "grad_norm": 0.33247601985931396, "learning_rate": 1.854905951043549e-05, "loss": 0.5379, "step": 11387 }, { "epoch": 0.34982950880103214, "grad_norm": 0.4208299219608307, "learning_rate": 1.854880878437925e-05, "loss": 0.5376, "step": 11388 }, { "epoch": 0.3498602279359813, "grad_norm": 0.5223402380943298, "learning_rate": 1.854855803835667e-05, "loss": 0.5184, "step": 11389 }, { "epoch": 0.3498909470709305, "grad_norm": 0.40320107340812683, "learning_rate": 1.8548307272368332e-05, "loss": 0.6402, "step": 11390 }, { "epoch": 0.34992166620587967, "grad_norm": 0.3758806884288788, "learning_rate": 1.8548056486414832e-05, "loss": 0.6097, "step": 11391 }, { "epoch": 0.3499523853408288, "grad_norm": 0.4362967610359192, "learning_rate": 1.8547805680496745e-05, "loss": 0.5726, "step": 11392 }, { "epoch": 0.34998310447577796, "grad_norm": 0.3200242519378662, "learning_rate": 1.8547554854614664e-05, "loss": 0.6144, "step": 11393 }, { "epoch": 0.35001382361072714, "grad_norm": 0.3994579613208771, "learning_rate": 1.8547304008769168e-05, "loss": 0.5823, "step": 11394 }, { "epoch": 0.35004454274567626, "grad_norm": 0.3615278899669647, "learning_rate": 1.8547053142960844e-05, "loss": 0.5702, "step": 11395 }, { "epoch": 0.35007526188062543, "grad_norm": 0.33120134472846985, "learning_rate": 1.8546802257190288e-05, "loss": 0.6288, "step": 11396 }, { "epoch": 0.3501059810155746, "grad_norm": 0.36457139253616333, "learning_rate": 1.8546551351458072e-05, "loss": 0.6334, "step": 11397 }, { "epoch": 0.3501367001505238, "grad_norm": 0.35045766830444336, "learning_rate": 1.8546300425764793e-05, "loss": 0.6763, "step": 11398 }, { "epoch": 0.3501674192854729, "grad_norm": 0.3679502010345459, "learning_rate": 1.8546049480111026e-05, "loss": 0.5791, "step": 11399 }, { "epoch": 0.3501981384204221, "grad_norm": 0.5291891098022461, "learning_rate": 1.854579851449737e-05, "loss": 0.5257, "step": 11400 }, { "epoch": 0.35022885755537125, "grad_norm": 0.37934985756874084, "learning_rate": 1.8545547528924404e-05, "loss": 0.5747, "step": 11401 }, { "epoch": 0.3502595766903204, "grad_norm": 0.4006384015083313, "learning_rate": 1.854529652339271e-05, "loss": 0.543, "step": 11402 }, { "epoch": 0.35029029582526955, "grad_norm": 0.35804417729377747, "learning_rate": 1.8545045497902886e-05, "loss": 0.5736, "step": 11403 }, { "epoch": 0.3503210149602187, "grad_norm": 0.34525272250175476, "learning_rate": 1.854479445245551e-05, "loss": 0.5306, "step": 11404 }, { "epoch": 0.3503517340951679, "grad_norm": 0.32720622420310974, "learning_rate": 1.8544543387051166e-05, "loss": 0.567, "step": 11405 }, { "epoch": 0.350382453230117, "grad_norm": 0.3479689061641693, "learning_rate": 1.8544292301690445e-05, "loss": 0.4996, "step": 11406 }, { "epoch": 0.3504131723650662, "grad_norm": 0.4191547632217407, "learning_rate": 1.8544041196373933e-05, "loss": 0.5763, "step": 11407 }, { "epoch": 0.35044389150001537, "grad_norm": 0.32668542861938477, "learning_rate": 1.8543790071102215e-05, "loss": 0.5865, "step": 11408 }, { "epoch": 0.35047461063496455, "grad_norm": 0.3522021472454071, "learning_rate": 1.8543538925875882e-05, "loss": 0.6242, "step": 11409 }, { "epoch": 0.35050532976991366, "grad_norm": 0.3100605010986328, "learning_rate": 1.8543287760695516e-05, "loss": 0.5011, "step": 11410 }, { "epoch": 0.35053604890486284, "grad_norm": 0.3536990284919739, "learning_rate": 1.8543036575561704e-05, "loss": 0.5774, "step": 11411 }, { "epoch": 0.350566768039812, "grad_norm": 0.30476662516593933, "learning_rate": 1.8542785370475034e-05, "loss": 0.5256, "step": 11412 }, { "epoch": 0.35059748717476114, "grad_norm": 0.36229774355888367, "learning_rate": 1.8542534145436093e-05, "loss": 0.624, "step": 11413 }, { "epoch": 0.3506282063097103, "grad_norm": 0.33306655287742615, "learning_rate": 1.8542282900445464e-05, "loss": 0.5838, "step": 11414 }, { "epoch": 0.3506589254446595, "grad_norm": 0.38411885499954224, "learning_rate": 1.854203163550374e-05, "loss": 0.6649, "step": 11415 }, { "epoch": 0.35068964457960866, "grad_norm": 0.7171433568000793, "learning_rate": 1.8541780350611502e-05, "loss": 0.5477, "step": 11416 }, { "epoch": 0.3507203637145578, "grad_norm": 0.34940046072006226, "learning_rate": 1.8541529045769342e-05, "loss": 0.5472, "step": 11417 }, { "epoch": 0.35075108284950696, "grad_norm": 0.4714857041835785, "learning_rate": 1.854127772097784e-05, "loss": 0.5235, "step": 11418 }, { "epoch": 0.35078180198445613, "grad_norm": 0.3419811725616455, "learning_rate": 1.8541026376237594e-05, "loss": 0.5619, "step": 11419 }, { "epoch": 0.35081252111940525, "grad_norm": 0.32352641224861145, "learning_rate": 1.8540775011549178e-05, "loss": 0.4987, "step": 11420 }, { "epoch": 0.3508432402543544, "grad_norm": 0.3277982175350189, "learning_rate": 1.854052362691319e-05, "loss": 0.6212, "step": 11421 }, { "epoch": 0.3508739593893036, "grad_norm": 0.32353901863098145, "learning_rate": 1.854027222233021e-05, "loss": 0.5577, "step": 11422 }, { "epoch": 0.3509046785242528, "grad_norm": 0.36119136214256287, "learning_rate": 1.854002079780083e-05, "loss": 0.5652, "step": 11423 }, { "epoch": 0.3509353976592019, "grad_norm": 0.34616777300834656, "learning_rate": 1.8539769353325635e-05, "loss": 0.6008, "step": 11424 }, { "epoch": 0.3509661167941511, "grad_norm": 0.3428565561771393, "learning_rate": 1.853951788890521e-05, "loss": 0.5055, "step": 11425 }, { "epoch": 0.35099683592910025, "grad_norm": 0.3552015721797943, "learning_rate": 1.8539266404540145e-05, "loss": 0.5952, "step": 11426 }, { "epoch": 0.3510275550640494, "grad_norm": 0.3203696012496948, "learning_rate": 1.8539014900231026e-05, "loss": 0.5584, "step": 11427 }, { "epoch": 0.35105827419899854, "grad_norm": 0.48179933428764343, "learning_rate": 1.8538763375978445e-05, "loss": 0.5958, "step": 11428 }, { "epoch": 0.3510889933339477, "grad_norm": 0.3545813262462616, "learning_rate": 1.8538511831782982e-05, "loss": 0.4833, "step": 11429 }, { "epoch": 0.3511197124688969, "grad_norm": 0.3908509314060211, "learning_rate": 1.853826026764523e-05, "loss": 0.6058, "step": 11430 }, { "epoch": 0.351150431603846, "grad_norm": 0.3349266052246094, "learning_rate": 1.8538008683565776e-05, "loss": 0.5974, "step": 11431 }, { "epoch": 0.3511811507387952, "grad_norm": 0.356124609708786, "learning_rate": 1.8537757079545203e-05, "loss": 0.5624, "step": 11432 }, { "epoch": 0.35121186987374436, "grad_norm": 0.3393619954586029, "learning_rate": 1.8537505455584105e-05, "loss": 0.5769, "step": 11433 }, { "epoch": 0.35124258900869354, "grad_norm": 0.37571656703948975, "learning_rate": 1.853725381168307e-05, "loss": 0.6098, "step": 11434 }, { "epoch": 0.35127330814364266, "grad_norm": 0.33763784170150757, "learning_rate": 1.8537002147842673e-05, "loss": 0.6007, "step": 11435 }, { "epoch": 0.35130402727859183, "grad_norm": 0.34126365184783936, "learning_rate": 1.853675046406352e-05, "loss": 0.5798, "step": 11436 }, { "epoch": 0.351334746413541, "grad_norm": 0.3419937193393707, "learning_rate": 1.8536498760346185e-05, "loss": 0.5837, "step": 11437 }, { "epoch": 0.35136546554849013, "grad_norm": 0.3030388057231903, "learning_rate": 1.853624703669126e-05, "loss": 0.5001, "step": 11438 }, { "epoch": 0.3513961846834393, "grad_norm": 0.3724290430545807, "learning_rate": 1.853599529309934e-05, "loss": 0.5261, "step": 11439 }, { "epoch": 0.3514269038183885, "grad_norm": 0.3508874475955963, "learning_rate": 1.8535743529571003e-05, "loss": 0.6041, "step": 11440 }, { "epoch": 0.35145762295333766, "grad_norm": 0.4091031551361084, "learning_rate": 1.8535491746106838e-05, "loss": 0.5956, "step": 11441 }, { "epoch": 0.3514883420882868, "grad_norm": 0.3116965591907501, "learning_rate": 1.853523994270744e-05, "loss": 0.5343, "step": 11442 }, { "epoch": 0.35151906122323595, "grad_norm": 0.3288998603820801, "learning_rate": 1.8534988119373395e-05, "loss": 0.5692, "step": 11443 }, { "epoch": 0.3515497803581851, "grad_norm": 0.3593905568122864, "learning_rate": 1.8534736276105286e-05, "loss": 0.6196, "step": 11444 }, { "epoch": 0.35158049949313425, "grad_norm": 0.3350728154182434, "learning_rate": 1.8534484412903703e-05, "loss": 0.5877, "step": 11445 }, { "epoch": 0.3516112186280834, "grad_norm": 0.404083251953125, "learning_rate": 1.853423252976924e-05, "loss": 0.5913, "step": 11446 }, { "epoch": 0.3516419377630326, "grad_norm": 0.3559688627719879, "learning_rate": 1.8533980626702477e-05, "loss": 0.663, "step": 11447 }, { "epoch": 0.3516726568979818, "grad_norm": 0.3542003929615021, "learning_rate": 1.8533728703704007e-05, "loss": 0.5822, "step": 11448 }, { "epoch": 0.3517033760329309, "grad_norm": 0.3604080379009247, "learning_rate": 1.8533476760774418e-05, "loss": 0.5982, "step": 11449 }, { "epoch": 0.35173409516788007, "grad_norm": 0.35006746649742126, "learning_rate": 1.85332247979143e-05, "loss": 0.5279, "step": 11450 }, { "epoch": 0.35176481430282924, "grad_norm": 0.30903640389442444, "learning_rate": 1.8532972815124237e-05, "loss": 0.5206, "step": 11451 }, { "epoch": 0.3517955334377784, "grad_norm": 0.33051788806915283, "learning_rate": 1.8532720812404822e-05, "loss": 0.5449, "step": 11452 }, { "epoch": 0.35182625257272754, "grad_norm": 0.36778247356414795, "learning_rate": 1.853246878975664e-05, "loss": 0.5955, "step": 11453 }, { "epoch": 0.3518569717076767, "grad_norm": 0.34637144207954407, "learning_rate": 1.8532216747180277e-05, "loss": 0.4836, "step": 11454 }, { "epoch": 0.3518876908426259, "grad_norm": 0.3609873652458191, "learning_rate": 1.8531964684676335e-05, "loss": 0.5534, "step": 11455 }, { "epoch": 0.351918409977575, "grad_norm": 0.3656162917613983, "learning_rate": 1.8531712602245386e-05, "loss": 0.6411, "step": 11456 }, { "epoch": 0.3519491291125242, "grad_norm": 0.33143141865730286, "learning_rate": 1.853146049988803e-05, "loss": 0.5956, "step": 11457 }, { "epoch": 0.35197984824747336, "grad_norm": 0.3211456537246704, "learning_rate": 1.8531208377604853e-05, "loss": 0.5191, "step": 11458 }, { "epoch": 0.35201056738242253, "grad_norm": 0.3926007151603699, "learning_rate": 1.8530956235396442e-05, "loss": 0.6057, "step": 11459 }, { "epoch": 0.35204128651737165, "grad_norm": 0.3720177710056305, "learning_rate": 1.8530704073263386e-05, "loss": 0.5869, "step": 11460 }, { "epoch": 0.35207200565232083, "grad_norm": 0.41847288608551025, "learning_rate": 1.8530451891206275e-05, "loss": 0.6043, "step": 11461 }, { "epoch": 0.35210272478727, "grad_norm": 0.367041677236557, "learning_rate": 1.85301996892257e-05, "loss": 0.4682, "step": 11462 }, { "epoch": 0.3521334439222191, "grad_norm": 0.3595093786716461, "learning_rate": 1.8529947467322245e-05, "loss": 0.5884, "step": 11463 }, { "epoch": 0.3521641630571683, "grad_norm": 0.34937435388565063, "learning_rate": 1.85296952254965e-05, "loss": 0.5948, "step": 11464 }, { "epoch": 0.3521948821921175, "grad_norm": 0.3384823203086853, "learning_rate": 1.8529442963749064e-05, "loss": 0.6592, "step": 11465 }, { "epoch": 0.35222560132706665, "grad_norm": 0.35371148586273193, "learning_rate": 1.8529190682080512e-05, "loss": 0.5823, "step": 11466 }, { "epoch": 0.35225632046201577, "grad_norm": 0.3468400835990906, "learning_rate": 1.852893838049144e-05, "loss": 0.5768, "step": 11467 }, { "epoch": 0.35228703959696495, "grad_norm": 0.31745636463165283, "learning_rate": 1.8528686058982437e-05, "loss": 0.5729, "step": 11468 }, { "epoch": 0.3523177587319141, "grad_norm": 0.32052767276763916, "learning_rate": 1.8528433717554092e-05, "loss": 0.5325, "step": 11469 }, { "epoch": 0.3523484778668633, "grad_norm": 0.31184476613998413, "learning_rate": 1.8528181356207e-05, "loss": 0.5263, "step": 11470 }, { "epoch": 0.3523791970018124, "grad_norm": 0.36517980694770813, "learning_rate": 1.852792897494174e-05, "loss": 0.6142, "step": 11471 }, { "epoch": 0.3524099161367616, "grad_norm": 0.36476296186447144, "learning_rate": 1.8527676573758905e-05, "loss": 0.6372, "step": 11472 }, { "epoch": 0.35244063527171077, "grad_norm": 0.4000464975833893, "learning_rate": 1.8527424152659085e-05, "loss": 0.4544, "step": 11473 }, { "epoch": 0.3524713544066599, "grad_norm": 0.3541850447654724, "learning_rate": 1.8527171711642874e-05, "loss": 0.5383, "step": 11474 }, { "epoch": 0.35250207354160906, "grad_norm": 0.3406923711299896, "learning_rate": 1.852691925071086e-05, "loss": 0.6073, "step": 11475 }, { "epoch": 0.35253279267655824, "grad_norm": 0.339729905128479, "learning_rate": 1.8526666769863625e-05, "loss": 0.6065, "step": 11476 }, { "epoch": 0.3525635118115074, "grad_norm": 0.33906829357147217, "learning_rate": 1.8526414269101765e-05, "loss": 0.5372, "step": 11477 }, { "epoch": 0.35259423094645653, "grad_norm": 0.3264419436454773, "learning_rate": 1.852616174842587e-05, "loss": 0.5938, "step": 11478 }, { "epoch": 0.3526249500814057, "grad_norm": 0.33565816283226013, "learning_rate": 1.852590920783653e-05, "loss": 0.5895, "step": 11479 }, { "epoch": 0.3526556692163549, "grad_norm": 0.3542287349700928, "learning_rate": 1.8525656647334333e-05, "loss": 0.592, "step": 11480 }, { "epoch": 0.352686388351304, "grad_norm": 0.3261890411376953, "learning_rate": 1.8525404066919867e-05, "loss": 0.6024, "step": 11481 }, { "epoch": 0.3527171074862532, "grad_norm": 0.38986608386039734, "learning_rate": 1.8525151466593726e-05, "loss": 0.6664, "step": 11482 }, { "epoch": 0.35274782662120235, "grad_norm": 0.3316037654876709, "learning_rate": 1.85248988463565e-05, "loss": 0.585, "step": 11483 }, { "epoch": 0.35277854575615153, "grad_norm": 0.40001174807548523, "learning_rate": 1.852464620620878e-05, "loss": 0.557, "step": 11484 }, { "epoch": 0.35280926489110065, "grad_norm": 0.3321998715400696, "learning_rate": 1.8524393546151146e-05, "loss": 0.5267, "step": 11485 }, { "epoch": 0.3528399840260498, "grad_norm": 0.38968223333358765, "learning_rate": 1.85241408661842e-05, "loss": 0.6284, "step": 11486 }, { "epoch": 0.352870703160999, "grad_norm": 0.3427540361881256, "learning_rate": 1.8523888166308527e-05, "loss": 0.6477, "step": 11487 }, { "epoch": 0.3529014222959481, "grad_norm": 0.34098300337791443, "learning_rate": 1.8523635446524717e-05, "loss": 0.5267, "step": 11488 }, { "epoch": 0.3529321414308973, "grad_norm": 0.34810203313827515, "learning_rate": 1.8523382706833365e-05, "loss": 0.5821, "step": 11489 }, { "epoch": 0.35296286056584647, "grad_norm": 0.34803032875061035, "learning_rate": 1.8523129947235053e-05, "loss": 0.5048, "step": 11490 }, { "epoch": 0.35299357970079565, "grad_norm": 0.7327767610549927, "learning_rate": 1.8522877167730378e-05, "loss": 0.5974, "step": 11491 }, { "epoch": 0.35302429883574477, "grad_norm": 0.3427952229976654, "learning_rate": 1.8522624368319928e-05, "loss": 0.6301, "step": 11492 }, { "epoch": 0.35305501797069394, "grad_norm": 0.3678911030292511, "learning_rate": 1.852237154900429e-05, "loss": 0.5979, "step": 11493 }, { "epoch": 0.3530857371056431, "grad_norm": 0.369081974029541, "learning_rate": 1.852211870978406e-05, "loss": 0.6133, "step": 11494 }, { "epoch": 0.3531164562405923, "grad_norm": 0.34198662638664246, "learning_rate": 1.852186585065983e-05, "loss": 0.6056, "step": 11495 }, { "epoch": 0.3531471753755414, "grad_norm": 0.3411914110183716, "learning_rate": 1.8521612971632183e-05, "loss": 0.5445, "step": 11496 }, { "epoch": 0.3531778945104906, "grad_norm": 0.30093953013420105, "learning_rate": 1.8521360072701715e-05, "loss": 0.5369, "step": 11497 }, { "epoch": 0.35320861364543976, "grad_norm": 0.31739476323127747, "learning_rate": 1.8521107153869017e-05, "loss": 0.578, "step": 11498 }, { "epoch": 0.3532393327803889, "grad_norm": 0.3721875846385956, "learning_rate": 1.8520854215134678e-05, "loss": 0.6078, "step": 11499 }, { "epoch": 0.35327005191533806, "grad_norm": 0.3360198140144348, "learning_rate": 1.8520601256499286e-05, "loss": 0.6866, "step": 11500 }, { "epoch": 0.35330077105028723, "grad_norm": 0.3670846223831177, "learning_rate": 1.8520348277963437e-05, "loss": 0.5955, "step": 11501 }, { "epoch": 0.3533314901852364, "grad_norm": 0.33760786056518555, "learning_rate": 1.8520095279527722e-05, "loss": 0.575, "step": 11502 }, { "epoch": 0.3533622093201855, "grad_norm": 0.3524162769317627, "learning_rate": 1.8519842261192723e-05, "loss": 0.6122, "step": 11503 }, { "epoch": 0.3533929284551347, "grad_norm": 0.3725300431251526, "learning_rate": 1.8519589222959043e-05, "loss": 0.5406, "step": 11504 }, { "epoch": 0.3534236475900839, "grad_norm": 0.4205290675163269, "learning_rate": 1.8519336164827268e-05, "loss": 0.6016, "step": 11505 }, { "epoch": 0.353454366725033, "grad_norm": 0.38610199093818665, "learning_rate": 1.851908308679798e-05, "loss": 0.6052, "step": 11506 }, { "epoch": 0.3534850858599822, "grad_norm": 0.3163902461528778, "learning_rate": 1.8518829988871788e-05, "loss": 0.5759, "step": 11507 }, { "epoch": 0.35351580499493135, "grad_norm": 0.32458093762397766, "learning_rate": 1.851857687104927e-05, "loss": 0.4406, "step": 11508 }, { "epoch": 0.3535465241298805, "grad_norm": 0.3575226962566376, "learning_rate": 1.8518323733331017e-05, "loss": 0.6388, "step": 11509 }, { "epoch": 0.35357724326482964, "grad_norm": 0.343838632106781, "learning_rate": 1.851807057571763e-05, "loss": 0.5979, "step": 11510 }, { "epoch": 0.3536079623997788, "grad_norm": 0.31983163952827454, "learning_rate": 1.851781739820969e-05, "loss": 0.5195, "step": 11511 }, { "epoch": 0.353638681534728, "grad_norm": 0.37681853771209717, "learning_rate": 1.8517564200807797e-05, "loss": 0.5674, "step": 11512 }, { "epoch": 0.3536694006696771, "grad_norm": 0.38879549503326416, "learning_rate": 1.8517310983512534e-05, "loss": 0.5688, "step": 11513 }, { "epoch": 0.3537001198046263, "grad_norm": 0.3684677481651306, "learning_rate": 1.85170577463245e-05, "loss": 0.641, "step": 11514 }, { "epoch": 0.35373083893957546, "grad_norm": 0.5679585933685303, "learning_rate": 1.851680448924428e-05, "loss": 0.5682, "step": 11515 }, { "epoch": 0.35376155807452464, "grad_norm": 0.33732616901397705, "learning_rate": 1.8516551212272468e-05, "loss": 0.6082, "step": 11516 }, { "epoch": 0.35379227720947376, "grad_norm": 0.35099220275878906, "learning_rate": 1.8516297915409658e-05, "loss": 0.5703, "step": 11517 }, { "epoch": 0.35382299634442294, "grad_norm": 0.37042465806007385, "learning_rate": 1.8516044598656437e-05, "loss": 0.5875, "step": 11518 }, { "epoch": 0.3538537154793721, "grad_norm": 0.3431532680988312, "learning_rate": 1.85157912620134e-05, "loss": 0.4817, "step": 11519 }, { "epoch": 0.3538844346143213, "grad_norm": 0.356351763010025, "learning_rate": 1.8515537905481138e-05, "loss": 0.5867, "step": 11520 }, { "epoch": 0.3539151537492704, "grad_norm": 0.40702322125434875, "learning_rate": 1.8515284529060243e-05, "loss": 0.5282, "step": 11521 }, { "epoch": 0.3539458728842196, "grad_norm": 0.5621986985206604, "learning_rate": 1.8515031132751306e-05, "loss": 0.6001, "step": 11522 }, { "epoch": 0.35397659201916876, "grad_norm": 0.40977662801742554, "learning_rate": 1.8514777716554916e-05, "loss": 0.6062, "step": 11523 }, { "epoch": 0.3540073111541179, "grad_norm": 0.3304763436317444, "learning_rate": 1.8514524280471673e-05, "loss": 0.544, "step": 11524 }, { "epoch": 0.35403803028906705, "grad_norm": 0.3564305901527405, "learning_rate": 1.8514270824502163e-05, "loss": 0.5655, "step": 11525 }, { "epoch": 0.3540687494240162, "grad_norm": 0.32620295882225037, "learning_rate": 1.8514017348646976e-05, "loss": 0.5789, "step": 11526 }, { "epoch": 0.3540994685589654, "grad_norm": 0.3867633640766144, "learning_rate": 1.851376385290671e-05, "loss": 0.5793, "step": 11527 }, { "epoch": 0.3541301876939145, "grad_norm": 0.3682733178138733, "learning_rate": 1.8513510337281952e-05, "loss": 0.5924, "step": 11528 }, { "epoch": 0.3541609068288637, "grad_norm": 0.4305447041988373, "learning_rate": 1.8513256801773298e-05, "loss": 0.5872, "step": 11529 }, { "epoch": 0.3541916259638129, "grad_norm": 0.35685843229293823, "learning_rate": 1.8513003246381337e-05, "loss": 0.5464, "step": 11530 }, { "epoch": 0.354222345098762, "grad_norm": 0.3484806716442108, "learning_rate": 1.851274967110666e-05, "loss": 0.5674, "step": 11531 }, { "epoch": 0.35425306423371117, "grad_norm": 0.36485713720321655, "learning_rate": 1.8512496075949865e-05, "loss": 0.5934, "step": 11532 }, { "epoch": 0.35428378336866034, "grad_norm": 0.32841429114341736, "learning_rate": 1.851224246091154e-05, "loss": 0.564, "step": 11533 }, { "epoch": 0.3543145025036095, "grad_norm": 0.3329140245914459, "learning_rate": 1.851198882599228e-05, "loss": 0.6468, "step": 11534 }, { "epoch": 0.35434522163855864, "grad_norm": 0.33404651284217834, "learning_rate": 1.8511735171192667e-05, "loss": 0.4884, "step": 11535 }, { "epoch": 0.3543759407735078, "grad_norm": 0.512713611125946, "learning_rate": 1.8511481496513314e-05, "loss": 0.5211, "step": 11536 }, { "epoch": 0.354406659908457, "grad_norm": 0.37229400873184204, "learning_rate": 1.8511227801954793e-05, "loss": 0.6149, "step": 11537 }, { "epoch": 0.35443737904340616, "grad_norm": 0.8387864232063293, "learning_rate": 1.851097408751771e-05, "loss": 0.6292, "step": 11538 }, { "epoch": 0.3544680981783553, "grad_norm": 0.363537073135376, "learning_rate": 1.851072035320265e-05, "loss": 0.608, "step": 11539 }, { "epoch": 0.35449881731330446, "grad_norm": 0.39967870712280273, "learning_rate": 1.851046659901021e-05, "loss": 0.6181, "step": 11540 }, { "epoch": 0.35452953644825363, "grad_norm": 0.503963828086853, "learning_rate": 1.851021282494098e-05, "loss": 0.5306, "step": 11541 }, { "epoch": 0.35456025558320275, "grad_norm": 0.38950490951538086, "learning_rate": 1.850995903099555e-05, "loss": 0.6003, "step": 11542 }, { "epoch": 0.35459097471815193, "grad_norm": 0.35672727227211, "learning_rate": 1.850970521717452e-05, "loss": 0.6366, "step": 11543 }, { "epoch": 0.3546216938531011, "grad_norm": 0.5695829391479492, "learning_rate": 1.8509451383478482e-05, "loss": 0.5116, "step": 11544 }, { "epoch": 0.3546524129880503, "grad_norm": 0.6924694180488586, "learning_rate": 1.850919752990802e-05, "loss": 0.4818, "step": 11545 }, { "epoch": 0.3546831321229994, "grad_norm": 0.38127410411834717, "learning_rate": 1.8508943656463737e-05, "loss": 0.5358, "step": 11546 }, { "epoch": 0.3547138512579486, "grad_norm": 0.35982370376586914, "learning_rate": 1.850868976314622e-05, "loss": 0.6266, "step": 11547 }, { "epoch": 0.35474457039289775, "grad_norm": 0.341376394033432, "learning_rate": 1.8508435849956064e-05, "loss": 0.5238, "step": 11548 }, { "epoch": 0.35477528952784687, "grad_norm": 0.4522336423397064, "learning_rate": 1.8508181916893865e-05, "loss": 0.6384, "step": 11549 }, { "epoch": 0.35480600866279605, "grad_norm": 0.37664806842803955, "learning_rate": 1.8507927963960208e-05, "loss": 0.604, "step": 11550 }, { "epoch": 0.3548367277977452, "grad_norm": 0.3381824791431427, "learning_rate": 1.8507673991155693e-05, "loss": 0.6151, "step": 11551 }, { "epoch": 0.3548674469326944, "grad_norm": 0.37792062759399414, "learning_rate": 1.8507419998480914e-05, "loss": 0.5944, "step": 11552 }, { "epoch": 0.3548981660676435, "grad_norm": 0.31625279784202576, "learning_rate": 1.8507165985936456e-05, "loss": 0.6786, "step": 11553 }, { "epoch": 0.3549288852025927, "grad_norm": 0.4614350199699402, "learning_rate": 1.8506911953522923e-05, "loss": 0.5852, "step": 11554 }, { "epoch": 0.35495960433754187, "grad_norm": 0.31837478280067444, "learning_rate": 1.85066579012409e-05, "loss": 0.5849, "step": 11555 }, { "epoch": 0.354990323472491, "grad_norm": 0.3598499298095703, "learning_rate": 1.8506403829090985e-05, "loss": 0.5626, "step": 11556 }, { "epoch": 0.35502104260744016, "grad_norm": 0.3209093511104584, "learning_rate": 1.850614973707377e-05, "loss": 0.6366, "step": 11557 }, { "epoch": 0.35505176174238934, "grad_norm": 0.42452552914619446, "learning_rate": 1.8505895625189846e-05, "loss": 0.5825, "step": 11558 }, { "epoch": 0.3550824808773385, "grad_norm": 0.3284940719604492, "learning_rate": 1.850564149343981e-05, "loss": 0.4991, "step": 11559 }, { "epoch": 0.35511320001228763, "grad_norm": 0.32355445623397827, "learning_rate": 1.8505387341824254e-05, "loss": 0.5908, "step": 11560 }, { "epoch": 0.3551439191472368, "grad_norm": 0.3206007778644562, "learning_rate": 1.850513317034377e-05, "loss": 0.5376, "step": 11561 }, { "epoch": 0.355174638282186, "grad_norm": 0.37981000542640686, "learning_rate": 1.8504878978998955e-05, "loss": 0.519, "step": 11562 }, { "epoch": 0.35520535741713516, "grad_norm": 0.3483045995235443, "learning_rate": 1.85046247677904e-05, "loss": 0.5141, "step": 11563 }, { "epoch": 0.3552360765520843, "grad_norm": 0.35553765296936035, "learning_rate": 1.8504370536718707e-05, "loss": 0.539, "step": 11564 }, { "epoch": 0.35526679568703345, "grad_norm": 0.36032044887542725, "learning_rate": 1.8504116285784456e-05, "loss": 0.596, "step": 11565 }, { "epoch": 0.35529751482198263, "grad_norm": 0.37693607807159424, "learning_rate": 1.8503862014988248e-05, "loss": 0.5776, "step": 11566 }, { "epoch": 0.35532823395693175, "grad_norm": 0.3534729778766632, "learning_rate": 1.8503607724330676e-05, "loss": 0.6845, "step": 11567 }, { "epoch": 0.3553589530918809, "grad_norm": 0.3586033880710602, "learning_rate": 1.8503353413812333e-05, "loss": 0.5548, "step": 11568 }, { "epoch": 0.3553896722268301, "grad_norm": 0.6165415644645691, "learning_rate": 1.850309908343382e-05, "loss": 0.657, "step": 11569 }, { "epoch": 0.3554203913617793, "grad_norm": 0.38743531703948975, "learning_rate": 1.850284473319572e-05, "loss": 0.6361, "step": 11570 }, { "epoch": 0.3554511104967284, "grad_norm": 0.358366996049881, "learning_rate": 1.8502590363098637e-05, "loss": 0.5201, "step": 11571 }, { "epoch": 0.35548182963167757, "grad_norm": 0.3286268413066864, "learning_rate": 1.8502335973143156e-05, "loss": 0.429, "step": 11572 }, { "epoch": 0.35551254876662675, "grad_norm": 0.34214577078819275, "learning_rate": 1.8502081563329878e-05, "loss": 0.5651, "step": 11573 }, { "epoch": 0.35554326790157587, "grad_norm": 0.33934396505355835, "learning_rate": 1.850182713365939e-05, "loss": 0.5848, "step": 11574 }, { "epoch": 0.35557398703652504, "grad_norm": 0.33535683155059814, "learning_rate": 1.85015726841323e-05, "loss": 0.6079, "step": 11575 }, { "epoch": 0.3556047061714742, "grad_norm": 0.6228156685829163, "learning_rate": 1.8501318214749186e-05, "loss": 0.5165, "step": 11576 }, { "epoch": 0.3556354253064234, "grad_norm": 0.3579408824443817, "learning_rate": 1.8501063725510652e-05, "loss": 0.5282, "step": 11577 }, { "epoch": 0.3556661444413725, "grad_norm": 0.35360994935035706, "learning_rate": 1.850080921641729e-05, "loss": 0.5677, "step": 11578 }, { "epoch": 0.3556968635763217, "grad_norm": 0.3685913681983948, "learning_rate": 1.850055468746969e-05, "loss": 0.5281, "step": 11579 }, { "epoch": 0.35572758271127086, "grad_norm": 0.34141677618026733, "learning_rate": 1.8500300138668458e-05, "loss": 0.5444, "step": 11580 }, { "epoch": 0.35575830184622004, "grad_norm": 0.3426637351512909, "learning_rate": 1.850004557001418e-05, "loss": 0.5869, "step": 11581 }, { "epoch": 0.35578902098116916, "grad_norm": 0.3443811535835266, "learning_rate": 1.849979098150745e-05, "loss": 0.5148, "step": 11582 }, { "epoch": 0.35581974011611833, "grad_norm": 0.40011927485466003, "learning_rate": 1.8499536373148862e-05, "loss": 0.5776, "step": 11583 }, { "epoch": 0.3558504592510675, "grad_norm": 0.3567568361759186, "learning_rate": 1.8499281744939015e-05, "loss": 0.5705, "step": 11584 }, { "epoch": 0.35588117838601663, "grad_norm": 0.32651814818382263, "learning_rate": 1.84990270968785e-05, "loss": 0.5883, "step": 11585 }, { "epoch": 0.3559118975209658, "grad_norm": 0.35985854268074036, "learning_rate": 1.849877242896792e-05, "loss": 0.5879, "step": 11586 }, { "epoch": 0.355942616655915, "grad_norm": 0.36706897616386414, "learning_rate": 1.849851774120786e-05, "loss": 0.7047, "step": 11587 }, { "epoch": 0.35597333579086415, "grad_norm": 0.35213524103164673, "learning_rate": 1.8498263033598918e-05, "loss": 0.5813, "step": 11588 }, { "epoch": 0.3560040549258133, "grad_norm": 0.4593988358974457, "learning_rate": 1.849800830614169e-05, "loss": 0.6584, "step": 11589 }, { "epoch": 0.35603477406076245, "grad_norm": 0.3547353148460388, "learning_rate": 1.8497753558836766e-05, "loss": 0.5955, "step": 11590 }, { "epoch": 0.3560654931957116, "grad_norm": 0.42281806468963623, "learning_rate": 1.8497498791684753e-05, "loss": 0.6292, "step": 11591 }, { "epoch": 0.35609621233066074, "grad_norm": 0.3334449529647827, "learning_rate": 1.849724400468623e-05, "loss": 0.4666, "step": 11592 }, { "epoch": 0.3561269314656099, "grad_norm": 0.34311679005622864, "learning_rate": 1.8496989197841807e-05, "loss": 0.5764, "step": 11593 }, { "epoch": 0.3561576506005591, "grad_norm": 0.3677627444267273, "learning_rate": 1.8496734371152065e-05, "loss": 0.5919, "step": 11594 }, { "epoch": 0.35618836973550827, "grad_norm": 0.3686603903770447, "learning_rate": 1.8496479524617612e-05, "loss": 0.6146, "step": 11595 }, { "epoch": 0.3562190888704574, "grad_norm": 0.3793373703956604, "learning_rate": 1.8496224658239037e-05, "loss": 0.6876, "step": 11596 }, { "epoch": 0.35624980800540657, "grad_norm": 0.38275212049484253, "learning_rate": 1.8495969772016934e-05, "loss": 0.5018, "step": 11597 }, { "epoch": 0.35628052714035574, "grad_norm": 0.310195654630661, "learning_rate": 1.84957148659519e-05, "loss": 0.563, "step": 11598 }, { "epoch": 0.35631124627530486, "grad_norm": 0.3128136992454529, "learning_rate": 1.8495459940044535e-05, "loss": 0.6123, "step": 11599 }, { "epoch": 0.35634196541025404, "grad_norm": 0.34654533863067627, "learning_rate": 1.8495204994295425e-05, "loss": 0.6166, "step": 11600 }, { "epoch": 0.3563726845452032, "grad_norm": 0.33667364716529846, "learning_rate": 1.8494950028705176e-05, "loss": 0.5761, "step": 11601 }, { "epoch": 0.3564034036801524, "grad_norm": 0.3667745590209961, "learning_rate": 1.8494695043274372e-05, "loss": 0.5291, "step": 11602 }, { "epoch": 0.3564341228151015, "grad_norm": 0.39484214782714844, "learning_rate": 1.8494440038003617e-05, "loss": 0.5903, "step": 11603 }, { "epoch": 0.3564648419500507, "grad_norm": 0.347368448972702, "learning_rate": 1.84941850128935e-05, "loss": 0.5359, "step": 11604 }, { "epoch": 0.35649556108499986, "grad_norm": 0.3288450539112091, "learning_rate": 1.8493929967944626e-05, "loss": 0.5185, "step": 11605 }, { "epoch": 0.35652628021994903, "grad_norm": 0.39362528920173645, "learning_rate": 1.8493674903157585e-05, "loss": 0.6382, "step": 11606 }, { "epoch": 0.35655699935489815, "grad_norm": 0.3316851556301117, "learning_rate": 1.8493419818532976e-05, "loss": 0.5942, "step": 11607 }, { "epoch": 0.3565877184898473, "grad_norm": 0.46946170926094055, "learning_rate": 1.8493164714071383e-05, "loss": 0.67, "step": 11608 }, { "epoch": 0.3566184376247965, "grad_norm": 0.3725631833076477, "learning_rate": 1.8492909589773417e-05, "loss": 0.6534, "step": 11609 }, { "epoch": 0.3566491567597456, "grad_norm": 1.6233924627304077, "learning_rate": 1.8492654445639667e-05, "loss": 0.5559, "step": 11610 }, { "epoch": 0.3566798758946948, "grad_norm": 0.34608110785484314, "learning_rate": 1.8492399281670727e-05, "loss": 0.5992, "step": 11611 }, { "epoch": 0.356710595029644, "grad_norm": 0.3288954794406891, "learning_rate": 1.84921440978672e-05, "loss": 0.5372, "step": 11612 }, { "epoch": 0.35674131416459315, "grad_norm": 0.3373855948448181, "learning_rate": 1.849188889422967e-05, "loss": 0.5462, "step": 11613 }, { "epoch": 0.35677203329954227, "grad_norm": 0.5265805721282959, "learning_rate": 1.8491633670758745e-05, "loss": 0.5814, "step": 11614 }, { "epoch": 0.35680275243449144, "grad_norm": 0.39878928661346436, "learning_rate": 1.8491378427455018e-05, "loss": 0.6456, "step": 11615 }, { "epoch": 0.3568334715694406, "grad_norm": 0.4096943438053131, "learning_rate": 1.849112316431908e-05, "loss": 0.5835, "step": 11616 }, { "epoch": 0.35686419070438974, "grad_norm": 0.3363865911960602, "learning_rate": 1.8490867881351533e-05, "loss": 0.5008, "step": 11617 }, { "epoch": 0.3568949098393389, "grad_norm": 0.346894234418869, "learning_rate": 1.849061257855297e-05, "loss": 0.5403, "step": 11618 }, { "epoch": 0.3569256289742881, "grad_norm": 0.3581332564353943, "learning_rate": 1.849035725592399e-05, "loss": 0.5926, "step": 11619 }, { "epoch": 0.35695634810923726, "grad_norm": 0.3502283990383148, "learning_rate": 1.8490101913465186e-05, "loss": 0.5832, "step": 11620 }, { "epoch": 0.3569870672441864, "grad_norm": 0.3850718140602112, "learning_rate": 1.8489846551177156e-05, "loss": 0.6334, "step": 11621 }, { "epoch": 0.35701778637913556, "grad_norm": 0.3347374498844147, "learning_rate": 1.8489591169060497e-05, "loss": 0.6291, "step": 11622 }, { "epoch": 0.35704850551408474, "grad_norm": 0.3877977430820465, "learning_rate": 1.8489335767115807e-05, "loss": 0.6389, "step": 11623 }, { "epoch": 0.3570792246490339, "grad_norm": 0.3602772653102875, "learning_rate": 1.848908034534368e-05, "loss": 0.5769, "step": 11624 }, { "epoch": 0.35710994378398303, "grad_norm": 0.31323307752609253, "learning_rate": 1.8488824903744707e-05, "loss": 0.5221, "step": 11625 }, { "epoch": 0.3571406629189322, "grad_norm": 0.37536412477493286, "learning_rate": 1.8488569442319497e-05, "loss": 0.6052, "step": 11626 }, { "epoch": 0.3571713820538814, "grad_norm": 0.3558522164821625, "learning_rate": 1.8488313961068637e-05, "loss": 0.6148, "step": 11627 }, { "epoch": 0.3572021011888305, "grad_norm": 0.40029868483543396, "learning_rate": 1.848805845999273e-05, "loss": 0.5683, "step": 11628 }, { "epoch": 0.3572328203237797, "grad_norm": 0.40381908416748047, "learning_rate": 1.848780293909237e-05, "loss": 0.6395, "step": 11629 }, { "epoch": 0.35726353945872885, "grad_norm": 0.3321738541126251, "learning_rate": 1.8487547398368147e-05, "loss": 0.5362, "step": 11630 }, { "epoch": 0.357294258593678, "grad_norm": 0.3116963803768158, "learning_rate": 1.848729183782067e-05, "loss": 0.5794, "step": 11631 }, { "epoch": 0.35732497772862715, "grad_norm": 0.4029768705368042, "learning_rate": 1.8487036257450527e-05, "loss": 0.5798, "step": 11632 }, { "epoch": 0.3573556968635763, "grad_norm": 0.4539543390274048, "learning_rate": 1.848678065725832e-05, "loss": 0.6249, "step": 11633 }, { "epoch": 0.3573864159985255, "grad_norm": 0.35968858003616333, "learning_rate": 1.8486525037244643e-05, "loss": 0.5941, "step": 11634 }, { "epoch": 0.3574171351334746, "grad_norm": 0.3408176302909851, "learning_rate": 1.8486269397410096e-05, "loss": 0.6362, "step": 11635 }, { "epoch": 0.3574478542684238, "grad_norm": 0.344531387090683, "learning_rate": 1.8486013737755276e-05, "loss": 0.5188, "step": 11636 }, { "epoch": 0.35747857340337297, "grad_norm": 0.4240998327732086, "learning_rate": 1.8485758058280773e-05, "loss": 0.6627, "step": 11637 }, { "epoch": 0.35750929253832214, "grad_norm": 0.3494747281074524, "learning_rate": 1.8485502358987193e-05, "loss": 0.5536, "step": 11638 }, { "epoch": 0.35754001167327126, "grad_norm": 0.3498668074607849, "learning_rate": 1.848524663987513e-05, "loss": 0.52, "step": 11639 }, { "epoch": 0.35757073080822044, "grad_norm": 0.31525862216949463, "learning_rate": 1.8484990900945177e-05, "loss": 0.5196, "step": 11640 }, { "epoch": 0.3576014499431696, "grad_norm": 0.33531031012535095, "learning_rate": 1.8484735142197938e-05, "loss": 0.5601, "step": 11641 }, { "epoch": 0.35763216907811873, "grad_norm": 0.3574948310852051, "learning_rate": 1.8484479363634007e-05, "loss": 0.6111, "step": 11642 }, { "epoch": 0.3576628882130679, "grad_norm": 0.5686089992523193, "learning_rate": 1.848422356525398e-05, "loss": 0.5916, "step": 11643 }, { "epoch": 0.3576936073480171, "grad_norm": 0.36022162437438965, "learning_rate": 1.848396774705846e-05, "loss": 0.5763, "step": 11644 }, { "epoch": 0.35772432648296626, "grad_norm": 0.3505399227142334, "learning_rate": 1.8483711909048038e-05, "loss": 0.5939, "step": 11645 }, { "epoch": 0.3577550456179154, "grad_norm": 0.4645322859287262, "learning_rate": 1.8483456051223318e-05, "loss": 0.5793, "step": 11646 }, { "epoch": 0.35778576475286455, "grad_norm": 0.38813725113868713, "learning_rate": 1.848320017358489e-05, "loss": 0.5374, "step": 11647 }, { "epoch": 0.35781648388781373, "grad_norm": 0.3335341513156891, "learning_rate": 1.8482944276133357e-05, "loss": 0.651, "step": 11648 }, { "epoch": 0.3578472030227629, "grad_norm": 0.34052199125289917, "learning_rate": 1.8482688358869315e-05, "loss": 0.6361, "step": 11649 }, { "epoch": 0.357877922157712, "grad_norm": 0.34598883986473083, "learning_rate": 1.848243242179336e-05, "loss": 0.5734, "step": 11650 }, { "epoch": 0.3579086412926612, "grad_norm": 0.34892889857292175, "learning_rate": 1.8482176464906097e-05, "loss": 0.5903, "step": 11651 }, { "epoch": 0.3579393604276104, "grad_norm": 0.3182014524936676, "learning_rate": 1.8481920488208115e-05, "loss": 0.6114, "step": 11652 }, { "epoch": 0.3579700795625595, "grad_norm": 0.33331069350242615, "learning_rate": 1.8481664491700015e-05, "loss": 0.5851, "step": 11653 }, { "epoch": 0.35800079869750867, "grad_norm": 0.3161930739879608, "learning_rate": 1.8481408475382396e-05, "loss": 0.5569, "step": 11654 }, { "epoch": 0.35803151783245785, "grad_norm": 0.3309761583805084, "learning_rate": 1.8481152439255854e-05, "loss": 0.5772, "step": 11655 }, { "epoch": 0.358062236967407, "grad_norm": 0.4448913633823395, "learning_rate": 1.8480896383320986e-05, "loss": 0.6744, "step": 11656 }, { "epoch": 0.35809295610235614, "grad_norm": 0.548290491104126, "learning_rate": 1.84806403075784e-05, "loss": 0.5877, "step": 11657 }, { "epoch": 0.3581236752373053, "grad_norm": 0.36135393381118774, "learning_rate": 1.848038421202868e-05, "loss": 0.4878, "step": 11658 }, { "epoch": 0.3581543943722545, "grad_norm": 0.4675532579421997, "learning_rate": 1.848012809667243e-05, "loss": 0.61, "step": 11659 }, { "epoch": 0.3581851135072036, "grad_norm": 0.34075579047203064, "learning_rate": 1.8479871961510253e-05, "loss": 0.6245, "step": 11660 }, { "epoch": 0.3582158326421528, "grad_norm": 0.33287501335144043, "learning_rate": 1.847961580654274e-05, "loss": 0.5722, "step": 11661 }, { "epoch": 0.35824655177710196, "grad_norm": 0.3130570948123932, "learning_rate": 1.847935963177049e-05, "loss": 0.5991, "step": 11662 }, { "epoch": 0.35827727091205114, "grad_norm": 0.36148953437805176, "learning_rate": 1.8479103437194106e-05, "loss": 0.5638, "step": 11663 }, { "epoch": 0.35830799004700026, "grad_norm": 0.3345212936401367, "learning_rate": 1.8478847222814184e-05, "loss": 0.5441, "step": 11664 }, { "epoch": 0.35833870918194943, "grad_norm": 0.3704197406768799, "learning_rate": 1.8478590988631322e-05, "loss": 0.5402, "step": 11665 }, { "epoch": 0.3583694283168986, "grad_norm": 0.3576642572879791, "learning_rate": 1.8478334734646117e-05, "loss": 0.6156, "step": 11666 }, { "epoch": 0.3584001474518478, "grad_norm": 0.3286057710647583, "learning_rate": 1.847807846085917e-05, "loss": 0.5222, "step": 11667 }, { "epoch": 0.3584308665867969, "grad_norm": 0.3608143925666809, "learning_rate": 1.847782216727108e-05, "loss": 0.6025, "step": 11668 }, { "epoch": 0.3584615857217461, "grad_norm": 0.3678131103515625, "learning_rate": 1.8477565853882444e-05, "loss": 0.6068, "step": 11669 }, { "epoch": 0.35849230485669525, "grad_norm": 0.3440263569355011, "learning_rate": 1.847730952069386e-05, "loss": 0.6395, "step": 11670 }, { "epoch": 0.3585230239916444, "grad_norm": 0.36154410243034363, "learning_rate": 1.8477053167705928e-05, "loss": 0.5564, "step": 11671 }, { "epoch": 0.35855374312659355, "grad_norm": 0.3672161102294922, "learning_rate": 1.8476796794919247e-05, "loss": 0.6747, "step": 11672 }, { "epoch": 0.3585844622615427, "grad_norm": 0.3448673486709595, "learning_rate": 1.847654040233441e-05, "loss": 0.6132, "step": 11673 }, { "epoch": 0.3586151813964919, "grad_norm": 0.9930031299591064, "learning_rate": 1.8476283989952025e-05, "loss": 0.5749, "step": 11674 }, { "epoch": 0.358645900531441, "grad_norm": 0.34139832854270935, "learning_rate": 1.847602755777269e-05, "loss": 0.5917, "step": 11675 }, { "epoch": 0.3586766196663902, "grad_norm": 0.3703128695487976, "learning_rate": 1.8475771105796994e-05, "loss": 0.5882, "step": 11676 }, { "epoch": 0.35870733880133937, "grad_norm": 0.34542617201805115, "learning_rate": 1.8475514634025548e-05, "loss": 0.5805, "step": 11677 }, { "epoch": 0.3587380579362885, "grad_norm": 0.34635254740715027, "learning_rate": 1.8475258142458945e-05, "loss": 0.6125, "step": 11678 }, { "epoch": 0.35876877707123767, "grad_norm": 0.390459269285202, "learning_rate": 1.8475001631097786e-05, "loss": 0.5915, "step": 11679 }, { "epoch": 0.35879949620618684, "grad_norm": 0.5761540532112122, "learning_rate": 1.8474745099942662e-05, "loss": 0.5337, "step": 11680 }, { "epoch": 0.358830215341136, "grad_norm": 0.3574976623058319, "learning_rate": 1.8474488548994184e-05, "loss": 0.5338, "step": 11681 }, { "epoch": 0.35886093447608514, "grad_norm": 0.3608132600784302, "learning_rate": 1.8474231978252945e-05, "loss": 0.5725, "step": 11682 }, { "epoch": 0.3588916536110343, "grad_norm": 0.32292333245277405, "learning_rate": 1.8473975387719548e-05, "loss": 0.6122, "step": 11683 }, { "epoch": 0.3589223727459835, "grad_norm": 0.3710794150829315, "learning_rate": 1.8473718777394586e-05, "loss": 0.5591, "step": 11684 }, { "epoch": 0.3589530918809326, "grad_norm": 0.34265628457069397, "learning_rate": 1.847346214727866e-05, "loss": 0.4679, "step": 11685 }, { "epoch": 0.3589838110158818, "grad_norm": 0.3529175817966461, "learning_rate": 1.8473205497372378e-05, "loss": 0.5258, "step": 11686 }, { "epoch": 0.35901453015083096, "grad_norm": 0.4560835659503937, "learning_rate": 1.847294882767633e-05, "loss": 0.6576, "step": 11687 }, { "epoch": 0.35904524928578013, "grad_norm": 0.3602703809738159, "learning_rate": 1.8472692138191116e-05, "loss": 0.5805, "step": 11688 }, { "epoch": 0.35907596842072925, "grad_norm": 0.3633745610713959, "learning_rate": 1.847243542891734e-05, "loss": 0.5367, "step": 11689 }, { "epoch": 0.3591066875556784, "grad_norm": 0.3564493656158447, "learning_rate": 1.8472178699855598e-05, "loss": 0.6797, "step": 11690 }, { "epoch": 0.3591374066906276, "grad_norm": 0.3669878840446472, "learning_rate": 1.847192195100649e-05, "loss": 0.5921, "step": 11691 }, { "epoch": 0.3591681258255768, "grad_norm": 0.4042571783065796, "learning_rate": 1.847166518237062e-05, "loss": 0.5985, "step": 11692 }, { "epoch": 0.3591988449605259, "grad_norm": 0.33851373195648193, "learning_rate": 1.8471408393948586e-05, "loss": 0.516, "step": 11693 }, { "epoch": 0.3592295640954751, "grad_norm": 0.4881036579608917, "learning_rate": 1.8471151585740982e-05, "loss": 0.6427, "step": 11694 }, { "epoch": 0.35926028323042425, "grad_norm": 0.3767602741718292, "learning_rate": 1.8470894757748414e-05, "loss": 0.6679, "step": 11695 }, { "epoch": 0.35929100236537337, "grad_norm": 0.32512032985687256, "learning_rate": 1.8470637909971476e-05, "loss": 0.5773, "step": 11696 }, { "epoch": 0.35932172150032254, "grad_norm": 0.34986332058906555, "learning_rate": 1.8470381042410774e-05, "loss": 0.563, "step": 11697 }, { "epoch": 0.3593524406352717, "grad_norm": 0.3612254858016968, "learning_rate": 1.8470124155066905e-05, "loss": 0.5948, "step": 11698 }, { "epoch": 0.3593831597702209, "grad_norm": 0.330427885055542, "learning_rate": 1.846986724794047e-05, "loss": 0.6128, "step": 11699 }, { "epoch": 0.35941387890517, "grad_norm": 0.37628844380378723, "learning_rate": 1.8469610321032068e-05, "loss": 0.6784, "step": 11700 }, { "epoch": 0.3594445980401192, "grad_norm": 0.3277653753757477, "learning_rate": 1.8469353374342302e-05, "loss": 0.5736, "step": 11701 }, { "epoch": 0.35947531717506837, "grad_norm": 0.3490961194038391, "learning_rate": 1.8469096407871766e-05, "loss": 0.5704, "step": 11702 }, { "epoch": 0.3595060363100175, "grad_norm": 0.3874584138393402, "learning_rate": 1.8468839421621066e-05, "loss": 0.5958, "step": 11703 }, { "epoch": 0.35953675544496666, "grad_norm": 0.3736210763454437, "learning_rate": 1.8468582415590802e-05, "loss": 0.6311, "step": 11704 }, { "epoch": 0.35956747457991584, "grad_norm": 0.3222174048423767, "learning_rate": 1.846832538978157e-05, "loss": 0.5532, "step": 11705 }, { "epoch": 0.359598193714865, "grad_norm": 0.3440132737159729, "learning_rate": 1.8468068344193972e-05, "loss": 0.6094, "step": 11706 }, { "epoch": 0.35962891284981413, "grad_norm": 0.37193572521209717, "learning_rate": 1.846781127882861e-05, "loss": 0.5793, "step": 11707 }, { "epoch": 0.3596596319847633, "grad_norm": 0.4482770562171936, "learning_rate": 1.846755419368608e-05, "loss": 0.6265, "step": 11708 }, { "epoch": 0.3596903511197125, "grad_norm": 0.3558589220046997, "learning_rate": 1.846729708876699e-05, "loss": 0.6208, "step": 11709 }, { "epoch": 0.3597210702546616, "grad_norm": 0.40075141191482544, "learning_rate": 1.8467039964071936e-05, "loss": 0.5948, "step": 11710 }, { "epoch": 0.3597517893896108, "grad_norm": 0.345641553401947, "learning_rate": 1.846678281960152e-05, "loss": 0.5895, "step": 11711 }, { "epoch": 0.35978250852455995, "grad_norm": 0.376119464635849, "learning_rate": 1.846652565535634e-05, "loss": 0.5373, "step": 11712 }, { "epoch": 0.3598132276595091, "grad_norm": 0.34080782532691956, "learning_rate": 1.8466268471336995e-05, "loss": 0.6081, "step": 11713 }, { "epoch": 0.35984394679445825, "grad_norm": 0.31522583961486816, "learning_rate": 1.846601126754409e-05, "loss": 0.5593, "step": 11714 }, { "epoch": 0.3598746659294074, "grad_norm": 0.36093562841415405, "learning_rate": 1.8465754043978225e-05, "loss": 0.532, "step": 11715 }, { "epoch": 0.3599053850643566, "grad_norm": 0.37767502665519714, "learning_rate": 1.846549680064e-05, "loss": 0.5495, "step": 11716 }, { "epoch": 0.3599361041993058, "grad_norm": 0.34518328309059143, "learning_rate": 1.846523953753002e-05, "loss": 0.5695, "step": 11717 }, { "epoch": 0.3599668233342549, "grad_norm": 0.3545691967010498, "learning_rate": 1.846498225464888e-05, "loss": 0.5154, "step": 11718 }, { "epoch": 0.35999754246920407, "grad_norm": 0.3569391369819641, "learning_rate": 1.846472495199718e-05, "loss": 0.5872, "step": 11719 }, { "epoch": 0.36002826160415324, "grad_norm": 0.35374027490615845, "learning_rate": 1.8464467629575523e-05, "loss": 0.63, "step": 11720 }, { "epoch": 0.36005898073910236, "grad_norm": 0.4996431767940521, "learning_rate": 1.8464210287384516e-05, "loss": 0.636, "step": 11721 }, { "epoch": 0.36008969987405154, "grad_norm": 0.31261909008026123, "learning_rate": 1.846395292542475e-05, "loss": 0.5588, "step": 11722 }, { "epoch": 0.3601204190090007, "grad_norm": 0.34321102499961853, "learning_rate": 1.8463695543696832e-05, "loss": 0.5617, "step": 11723 }, { "epoch": 0.3601511381439499, "grad_norm": 0.3565045893192291, "learning_rate": 1.8463438142201362e-05, "loss": 0.5099, "step": 11724 }, { "epoch": 0.360181857278899, "grad_norm": 0.32138797640800476, "learning_rate": 1.8463180720938942e-05, "loss": 0.5709, "step": 11725 }, { "epoch": 0.3602125764138482, "grad_norm": 0.3534223437309265, "learning_rate": 1.846292327991017e-05, "loss": 0.6011, "step": 11726 }, { "epoch": 0.36024329554879736, "grad_norm": 0.42619574069976807, "learning_rate": 1.8462665819115652e-05, "loss": 0.6102, "step": 11727 }, { "epoch": 0.3602740146837465, "grad_norm": 0.46899691224098206, "learning_rate": 1.846240833855599e-05, "loss": 0.631, "step": 11728 }, { "epoch": 0.36030473381869565, "grad_norm": 0.4168407618999481, "learning_rate": 1.8462150838231778e-05, "loss": 0.5691, "step": 11729 }, { "epoch": 0.36033545295364483, "grad_norm": 0.34950071573257446, "learning_rate": 1.846189331814362e-05, "loss": 0.5858, "step": 11730 }, { "epoch": 0.360366172088594, "grad_norm": 0.3997926414012909, "learning_rate": 1.8461635778292123e-05, "loss": 0.5638, "step": 11731 }, { "epoch": 0.3603968912235431, "grad_norm": 0.32428717613220215, "learning_rate": 1.846137821867788e-05, "loss": 0.5547, "step": 11732 }, { "epoch": 0.3604276103584923, "grad_norm": 0.38384801149368286, "learning_rate": 1.84611206393015e-05, "loss": 0.566, "step": 11733 }, { "epoch": 0.3604583294934415, "grad_norm": 0.3421359062194824, "learning_rate": 1.8460863040163585e-05, "loss": 0.5948, "step": 11734 }, { "epoch": 0.36048904862839065, "grad_norm": 0.3239459693431854, "learning_rate": 1.846060542126473e-05, "loss": 0.5079, "step": 11735 }, { "epoch": 0.36051976776333977, "grad_norm": 0.3780747056007385, "learning_rate": 1.8460347782605543e-05, "loss": 0.5738, "step": 11736 }, { "epoch": 0.36055048689828895, "grad_norm": 0.3340228497982025, "learning_rate": 1.846009012418662e-05, "loss": 0.6009, "step": 11737 }, { "epoch": 0.3605812060332381, "grad_norm": 0.3393832743167877, "learning_rate": 1.8459832446008566e-05, "loss": 0.5689, "step": 11738 }, { "epoch": 0.36061192516818724, "grad_norm": 0.3381108343601227, "learning_rate": 1.8459574748071985e-05, "loss": 0.551, "step": 11739 }, { "epoch": 0.3606426443031364, "grad_norm": 0.33748963475227356, "learning_rate": 1.845931703037747e-05, "loss": 0.5666, "step": 11740 }, { "epoch": 0.3606733634380856, "grad_norm": 0.34414440393447876, "learning_rate": 1.8459059292925635e-05, "loss": 0.5919, "step": 11741 }, { "epoch": 0.36070408257303477, "grad_norm": 0.31701144576072693, "learning_rate": 1.8458801535717076e-05, "loss": 0.54, "step": 11742 }, { "epoch": 0.3607348017079839, "grad_norm": 0.3418810963630676, "learning_rate": 1.8458543758752396e-05, "loss": 0.523, "step": 11743 }, { "epoch": 0.36076552084293306, "grad_norm": 0.3167153298854828, "learning_rate": 1.8458285962032193e-05, "loss": 0.5438, "step": 11744 }, { "epoch": 0.36079623997788224, "grad_norm": 0.3444059193134308, "learning_rate": 1.8458028145557074e-05, "loss": 0.5843, "step": 11745 }, { "epoch": 0.36082695911283136, "grad_norm": 0.395694375038147, "learning_rate": 1.845777030932764e-05, "loss": 0.5315, "step": 11746 }, { "epoch": 0.36085767824778053, "grad_norm": 0.35943877696990967, "learning_rate": 1.8457512453344492e-05, "loss": 0.6575, "step": 11747 }, { "epoch": 0.3608883973827297, "grad_norm": 0.42896050214767456, "learning_rate": 1.8457254577608234e-05, "loss": 0.5875, "step": 11748 }, { "epoch": 0.3609191165176789, "grad_norm": 0.35356003046035767, "learning_rate": 1.8456996682119466e-05, "loss": 0.6081, "step": 11749 }, { "epoch": 0.360949835652628, "grad_norm": 0.3356860876083374, "learning_rate": 1.845673876687879e-05, "loss": 0.5833, "step": 11750 }, { "epoch": 0.3609805547875772, "grad_norm": 0.3371163606643677, "learning_rate": 1.8456480831886813e-05, "loss": 0.6341, "step": 11751 }, { "epoch": 0.36101127392252635, "grad_norm": 0.47714465856552124, "learning_rate": 1.8456222877144133e-05, "loss": 0.6423, "step": 11752 }, { "epoch": 0.3610419930574755, "grad_norm": 0.32031431794166565, "learning_rate": 1.8455964902651356e-05, "loss": 0.5327, "step": 11753 }, { "epoch": 0.36107271219242465, "grad_norm": 0.4019853174686432, "learning_rate": 1.845570690840908e-05, "loss": 0.649, "step": 11754 }, { "epoch": 0.3611034313273738, "grad_norm": 0.3390180468559265, "learning_rate": 1.8455448894417913e-05, "loss": 0.5607, "step": 11755 }, { "epoch": 0.361134150462323, "grad_norm": 0.34782758355140686, "learning_rate": 1.845519086067845e-05, "loss": 0.6434, "step": 11756 }, { "epoch": 0.3611648695972721, "grad_norm": 0.36065974831581116, "learning_rate": 1.8454932807191302e-05, "loss": 0.5498, "step": 11757 }, { "epoch": 0.3611955887322213, "grad_norm": 0.39988359808921814, "learning_rate": 1.8454674733957064e-05, "loss": 0.6454, "step": 11758 }, { "epoch": 0.36122630786717047, "grad_norm": 0.357385516166687, "learning_rate": 1.8454416640976348e-05, "loss": 0.5179, "step": 11759 }, { "epoch": 0.36125702700211965, "grad_norm": 0.48375433683395386, "learning_rate": 1.8454158528249748e-05, "loss": 0.5407, "step": 11760 }, { "epoch": 0.36128774613706877, "grad_norm": 0.34277579188346863, "learning_rate": 1.8453900395777872e-05, "loss": 0.6115, "step": 11761 }, { "epoch": 0.36131846527201794, "grad_norm": 0.4010598659515381, "learning_rate": 1.845364224356132e-05, "loss": 0.5794, "step": 11762 }, { "epoch": 0.3613491844069671, "grad_norm": 0.31356751918792725, "learning_rate": 1.8453384071600696e-05, "loss": 0.531, "step": 11763 }, { "epoch": 0.36137990354191624, "grad_norm": 0.37324443459510803, "learning_rate": 1.8453125879896605e-05, "loss": 0.6206, "step": 11764 }, { "epoch": 0.3614106226768654, "grad_norm": 0.3590243458747864, "learning_rate": 1.8452867668449644e-05, "loss": 0.6099, "step": 11765 }, { "epoch": 0.3614413418118146, "grad_norm": 0.35223227739334106, "learning_rate": 1.8452609437260425e-05, "loss": 0.5893, "step": 11766 }, { "epoch": 0.36147206094676376, "grad_norm": 0.3552345335483551, "learning_rate": 1.8452351186329544e-05, "loss": 0.7082, "step": 11767 }, { "epoch": 0.3615027800817129, "grad_norm": 0.510814368724823, "learning_rate": 1.8452092915657606e-05, "loss": 0.594, "step": 11768 }, { "epoch": 0.36153349921666206, "grad_norm": 0.3880462646484375, "learning_rate": 1.8451834625245215e-05, "loss": 0.5904, "step": 11769 }, { "epoch": 0.36156421835161123, "grad_norm": 0.3834557831287384, "learning_rate": 1.8451576315092972e-05, "loss": 0.609, "step": 11770 }, { "epoch": 0.36159493748656035, "grad_norm": 0.3290703296661377, "learning_rate": 1.8451317985201487e-05, "loss": 0.5858, "step": 11771 }, { "epoch": 0.36162565662150953, "grad_norm": 0.3098900318145752, "learning_rate": 1.8451059635571356e-05, "loss": 0.5548, "step": 11772 }, { "epoch": 0.3616563757564587, "grad_norm": 0.3525715172290802, "learning_rate": 1.8450801266203186e-05, "loss": 0.5924, "step": 11773 }, { "epoch": 0.3616870948914079, "grad_norm": 0.34760451316833496, "learning_rate": 1.8450542877097583e-05, "loss": 0.6714, "step": 11774 }, { "epoch": 0.361717814026357, "grad_norm": 0.4033433198928833, "learning_rate": 1.845028446825514e-05, "loss": 0.5357, "step": 11775 }, { "epoch": 0.3617485331613062, "grad_norm": 0.3559158444404602, "learning_rate": 1.845002603967647e-05, "loss": 0.5346, "step": 11776 }, { "epoch": 0.36177925229625535, "grad_norm": 0.37289223074913025, "learning_rate": 1.844976759136217e-05, "loss": 0.5718, "step": 11777 }, { "epoch": 0.3618099714312045, "grad_norm": 0.3542799651622772, "learning_rate": 1.8449509123312857e-05, "loss": 0.6234, "step": 11778 }, { "epoch": 0.36184069056615364, "grad_norm": 0.3097146451473236, "learning_rate": 1.8449250635529118e-05, "loss": 0.6243, "step": 11779 }, { "epoch": 0.3618714097011028, "grad_norm": 0.3591543436050415, "learning_rate": 1.8448992128011568e-05, "loss": 0.6352, "step": 11780 }, { "epoch": 0.361902128836052, "grad_norm": 0.36390185356140137, "learning_rate": 1.8448733600760803e-05, "loss": 0.5989, "step": 11781 }, { "epoch": 0.3619328479710011, "grad_norm": 0.3217698633670807, "learning_rate": 1.8448475053777434e-05, "loss": 0.5514, "step": 11782 }, { "epoch": 0.3619635671059503, "grad_norm": 0.5795149207115173, "learning_rate": 1.8448216487062062e-05, "loss": 0.6071, "step": 11783 }, { "epoch": 0.36199428624089947, "grad_norm": 0.31856992840766907, "learning_rate": 1.844795790061529e-05, "loss": 0.659, "step": 11784 }, { "epoch": 0.36202500537584864, "grad_norm": 0.35359975695610046, "learning_rate": 1.8447699294437716e-05, "loss": 0.5299, "step": 11785 }, { "epoch": 0.36205572451079776, "grad_norm": 0.34842702746391296, "learning_rate": 1.8447440668529958e-05, "loss": 0.5012, "step": 11786 }, { "epoch": 0.36208644364574694, "grad_norm": 0.41335752606391907, "learning_rate": 1.844718202289261e-05, "loss": 0.5535, "step": 11787 }, { "epoch": 0.3621171627806961, "grad_norm": 0.3241906762123108, "learning_rate": 1.844692335752628e-05, "loss": 0.5828, "step": 11788 }, { "epoch": 0.36214788191564523, "grad_norm": 0.3808067739009857, "learning_rate": 1.8446664672431565e-05, "loss": 0.5974, "step": 11789 }, { "epoch": 0.3621786010505944, "grad_norm": 0.36013904213905334, "learning_rate": 1.844640596760908e-05, "loss": 0.5634, "step": 11790 }, { "epoch": 0.3622093201855436, "grad_norm": 0.38249385356903076, "learning_rate": 1.844614724305942e-05, "loss": 0.6572, "step": 11791 }, { "epoch": 0.36224003932049276, "grad_norm": 0.36368465423583984, "learning_rate": 1.8445888498783197e-05, "loss": 0.5703, "step": 11792 }, { "epoch": 0.3622707584554419, "grad_norm": 0.3404916524887085, "learning_rate": 1.844562973478101e-05, "loss": 0.6509, "step": 11793 }, { "epoch": 0.36230147759039105, "grad_norm": 0.3582603931427002, "learning_rate": 1.8445370951053465e-05, "loss": 0.5179, "step": 11794 }, { "epoch": 0.3623321967253402, "grad_norm": 0.3905051648616791, "learning_rate": 1.8445112147601166e-05, "loss": 0.5509, "step": 11795 }, { "epoch": 0.36236291586028935, "grad_norm": 0.6167184710502625, "learning_rate": 1.8444853324424717e-05, "loss": 0.5896, "step": 11796 }, { "epoch": 0.3623936349952385, "grad_norm": 0.5049555897712708, "learning_rate": 1.8444594481524725e-05, "loss": 0.6538, "step": 11797 }, { "epoch": 0.3624243541301877, "grad_norm": 0.322294145822525, "learning_rate": 1.8444335618901795e-05, "loss": 0.6052, "step": 11798 }, { "epoch": 0.3624550732651369, "grad_norm": 0.33478739857673645, "learning_rate": 1.8444076736556523e-05, "loss": 0.5385, "step": 11799 }, { "epoch": 0.362485792400086, "grad_norm": 0.3377440273761749, "learning_rate": 1.8443817834489525e-05, "loss": 0.5185, "step": 11800 }, { "epoch": 0.36251651153503517, "grad_norm": 0.33605456352233887, "learning_rate": 1.84435589127014e-05, "loss": 0.545, "step": 11801 }, { "epoch": 0.36254723066998434, "grad_norm": 0.3490135967731476, "learning_rate": 1.8443299971192755e-05, "loss": 0.5185, "step": 11802 }, { "epoch": 0.3625779498049335, "grad_norm": 0.31062743067741394, "learning_rate": 1.844304100996419e-05, "loss": 0.5096, "step": 11803 }, { "epoch": 0.36260866893988264, "grad_norm": 0.3322119116783142, "learning_rate": 1.8442782029016313e-05, "loss": 0.5666, "step": 11804 }, { "epoch": 0.3626393880748318, "grad_norm": 0.3489355444908142, "learning_rate": 1.8442523028349726e-05, "loss": 0.5294, "step": 11805 }, { "epoch": 0.362670107209781, "grad_norm": 0.33937618136405945, "learning_rate": 1.8442264007965047e-05, "loss": 0.5422, "step": 11806 }, { "epoch": 0.3627008263447301, "grad_norm": 0.374136745929718, "learning_rate": 1.8442004967862863e-05, "loss": 0.6159, "step": 11807 }, { "epoch": 0.3627315454796793, "grad_norm": 0.310507208108902, "learning_rate": 1.8441745908043788e-05, "loss": 0.5868, "step": 11808 }, { "epoch": 0.36276226461462846, "grad_norm": 0.32051563262939453, "learning_rate": 1.844148682850843e-05, "loss": 0.5783, "step": 11809 }, { "epoch": 0.36279298374957764, "grad_norm": 0.34414660930633545, "learning_rate": 1.8441227729257383e-05, "loss": 0.6195, "step": 11810 }, { "epoch": 0.36282370288452676, "grad_norm": 0.34124505519866943, "learning_rate": 1.8440968610291265e-05, "loss": 0.5754, "step": 11811 }, { "epoch": 0.36285442201947593, "grad_norm": 0.3917175531387329, "learning_rate": 1.844070947161067e-05, "loss": 0.5983, "step": 11812 }, { "epoch": 0.3628851411544251, "grad_norm": 0.3770959675312042, "learning_rate": 1.8440450313216213e-05, "loss": 0.5566, "step": 11813 }, { "epoch": 0.3629158602893742, "grad_norm": 0.47437870502471924, "learning_rate": 1.8440191135108493e-05, "loss": 0.5853, "step": 11814 }, { "epoch": 0.3629465794243234, "grad_norm": 0.31868767738342285, "learning_rate": 1.8439931937288117e-05, "loss": 0.5575, "step": 11815 }, { "epoch": 0.3629772985592726, "grad_norm": 0.34456491470336914, "learning_rate": 1.843967271975569e-05, "loss": 0.6013, "step": 11816 }, { "epoch": 0.36300801769422175, "grad_norm": 0.35280242562294006, "learning_rate": 1.8439413482511818e-05, "loss": 0.5629, "step": 11817 }, { "epoch": 0.36303873682917087, "grad_norm": 0.3844134211540222, "learning_rate": 1.843915422555711e-05, "loss": 0.4704, "step": 11818 }, { "epoch": 0.36306945596412005, "grad_norm": 0.33375969529151917, "learning_rate": 1.8438894948892163e-05, "loss": 0.6177, "step": 11819 }, { "epoch": 0.3631001750990692, "grad_norm": 0.3329445421695709, "learning_rate": 1.843863565251759e-05, "loss": 0.5712, "step": 11820 }, { "epoch": 0.3631308942340184, "grad_norm": 0.319169282913208, "learning_rate": 1.8438376336433993e-05, "loss": 0.5459, "step": 11821 }, { "epoch": 0.3631616133689675, "grad_norm": 0.3861388564109802, "learning_rate": 1.843811700064198e-05, "loss": 0.5395, "step": 11822 }, { "epoch": 0.3631923325039167, "grad_norm": 0.38738569617271423, "learning_rate": 1.8437857645142157e-05, "loss": 0.5131, "step": 11823 }, { "epoch": 0.36322305163886587, "grad_norm": 0.34546878933906555, "learning_rate": 1.8437598269935125e-05, "loss": 0.5497, "step": 11824 }, { "epoch": 0.363253770773815, "grad_norm": 0.42154034972190857, "learning_rate": 1.8437338875021495e-05, "loss": 0.5665, "step": 11825 }, { "epoch": 0.36328448990876416, "grad_norm": 0.35521626472473145, "learning_rate": 1.843707946040187e-05, "loss": 0.5881, "step": 11826 }, { "epoch": 0.36331520904371334, "grad_norm": 0.33657553791999817, "learning_rate": 1.8436820026076855e-05, "loss": 0.5243, "step": 11827 }, { "epoch": 0.3633459281786625, "grad_norm": 0.33101460337638855, "learning_rate": 1.8436560572047058e-05, "loss": 0.4928, "step": 11828 }, { "epoch": 0.36337664731361163, "grad_norm": 0.5288845896720886, "learning_rate": 1.8436301098313086e-05, "loss": 0.5749, "step": 11829 }, { "epoch": 0.3634073664485608, "grad_norm": 0.32967710494995117, "learning_rate": 1.8436041604875545e-05, "loss": 0.4904, "step": 11830 }, { "epoch": 0.36343808558351, "grad_norm": 0.3471973240375519, "learning_rate": 1.843578209173504e-05, "loss": 0.6119, "step": 11831 }, { "epoch": 0.3634688047184591, "grad_norm": 0.3713919520378113, "learning_rate": 1.8435522558892173e-05, "loss": 0.5969, "step": 11832 }, { "epoch": 0.3634995238534083, "grad_norm": 0.35779061913490295, "learning_rate": 1.8435263006347558e-05, "loss": 0.6056, "step": 11833 }, { "epoch": 0.36353024298835745, "grad_norm": 0.3294345736503601, "learning_rate": 1.8435003434101794e-05, "loss": 0.5714, "step": 11834 }, { "epoch": 0.36356096212330663, "grad_norm": 0.3550017178058624, "learning_rate": 1.843474384215549e-05, "loss": 0.5344, "step": 11835 }, { "epoch": 0.36359168125825575, "grad_norm": 0.3302602469921112, "learning_rate": 1.8434484230509253e-05, "loss": 0.6095, "step": 11836 }, { "epoch": 0.3636224003932049, "grad_norm": 0.3615990877151489, "learning_rate": 1.843422459916369e-05, "loss": 0.5443, "step": 11837 }, { "epoch": 0.3636531195281541, "grad_norm": 0.3646582067012787, "learning_rate": 1.8433964948119406e-05, "loss": 0.5477, "step": 11838 }, { "epoch": 0.3636838386631032, "grad_norm": 0.3466217517852783, "learning_rate": 1.8433705277377008e-05, "loss": 0.7047, "step": 11839 }, { "epoch": 0.3637145577980524, "grad_norm": 0.34893137216567993, "learning_rate": 1.8433445586937103e-05, "loss": 0.5684, "step": 11840 }, { "epoch": 0.36374527693300157, "grad_norm": 0.3317178785800934, "learning_rate": 1.8433185876800298e-05, "loss": 0.5547, "step": 11841 }, { "epoch": 0.36377599606795075, "grad_norm": 0.3273610472679138, "learning_rate": 1.8432926146967195e-05, "loss": 0.533, "step": 11842 }, { "epoch": 0.36380671520289987, "grad_norm": 0.39357906579971313, "learning_rate": 1.8432666397438407e-05, "loss": 0.559, "step": 11843 }, { "epoch": 0.36383743433784904, "grad_norm": 0.33497288823127747, "learning_rate": 1.8432406628214538e-05, "loss": 0.5325, "step": 11844 }, { "epoch": 0.3638681534727982, "grad_norm": 0.34948399662971497, "learning_rate": 1.843214683929619e-05, "loss": 0.6687, "step": 11845 }, { "epoch": 0.3638988726077474, "grad_norm": 0.34257856011390686, "learning_rate": 1.843188703068398e-05, "loss": 0.5082, "step": 11846 }, { "epoch": 0.3639295917426965, "grad_norm": 0.32667967677116394, "learning_rate": 1.8431627202378504e-05, "loss": 0.6257, "step": 11847 }, { "epoch": 0.3639603108776457, "grad_norm": 0.37662214040756226, "learning_rate": 1.8431367354380375e-05, "loss": 0.5553, "step": 11848 }, { "epoch": 0.36399103001259486, "grad_norm": 0.3293911814689636, "learning_rate": 1.84311074866902e-05, "loss": 0.5481, "step": 11849 }, { "epoch": 0.364021749147544, "grad_norm": 0.3705604076385498, "learning_rate": 1.8430847599308585e-05, "loss": 0.5869, "step": 11850 }, { "epoch": 0.36405246828249316, "grad_norm": 0.3218510150909424, "learning_rate": 1.8430587692236135e-05, "loss": 0.5621, "step": 11851 }, { "epoch": 0.36408318741744233, "grad_norm": 0.3663003146648407, "learning_rate": 1.843032776547346e-05, "loss": 0.4907, "step": 11852 }, { "epoch": 0.3641139065523915, "grad_norm": 0.4038190245628357, "learning_rate": 1.8430067819021166e-05, "loss": 0.6114, "step": 11853 }, { "epoch": 0.36414462568734063, "grad_norm": 0.3271123766899109, "learning_rate": 1.8429807852879857e-05, "loss": 0.5277, "step": 11854 }, { "epoch": 0.3641753448222898, "grad_norm": 0.35887765884399414, "learning_rate": 1.8429547867050147e-05, "loss": 0.516, "step": 11855 }, { "epoch": 0.364206063957239, "grad_norm": 0.37054744362831116, "learning_rate": 1.8429287861532638e-05, "loss": 0.5826, "step": 11856 }, { "epoch": 0.3642367830921881, "grad_norm": 0.35418465733528137, "learning_rate": 1.8429027836327935e-05, "loss": 0.6142, "step": 11857 }, { "epoch": 0.3642675022271373, "grad_norm": 0.3547789752483368, "learning_rate": 1.8428767791436654e-05, "loss": 0.556, "step": 11858 }, { "epoch": 0.36429822136208645, "grad_norm": 0.36122819781303406, "learning_rate": 1.8428507726859393e-05, "loss": 0.6309, "step": 11859 }, { "epoch": 0.3643289404970356, "grad_norm": 0.4027959108352661, "learning_rate": 1.8428247642596768e-05, "loss": 0.4748, "step": 11860 }, { "epoch": 0.36435965963198474, "grad_norm": 0.3527657687664032, "learning_rate": 1.8427987538649378e-05, "loss": 0.5003, "step": 11861 }, { "epoch": 0.3643903787669339, "grad_norm": 0.3592173755168915, "learning_rate": 1.8427727415017837e-05, "loss": 0.6174, "step": 11862 }, { "epoch": 0.3644210979018831, "grad_norm": 0.36379390954971313, "learning_rate": 1.8427467271702747e-05, "loss": 0.6308, "step": 11863 }, { "epoch": 0.3644518170368322, "grad_norm": 0.3731750547885895, "learning_rate": 1.842720710870472e-05, "loss": 0.6338, "step": 11864 }, { "epoch": 0.3644825361717814, "grad_norm": 0.3795950412750244, "learning_rate": 1.8426946926024364e-05, "loss": 0.6134, "step": 11865 }, { "epoch": 0.36451325530673057, "grad_norm": 0.36661672592163086, "learning_rate": 1.8426686723662283e-05, "loss": 0.5935, "step": 11866 }, { "epoch": 0.36454397444167974, "grad_norm": 0.3783034682273865, "learning_rate": 1.842642650161909e-05, "loss": 0.5554, "step": 11867 }, { "epoch": 0.36457469357662886, "grad_norm": 0.37170377373695374, "learning_rate": 1.8426166259895387e-05, "loss": 0.5856, "step": 11868 }, { "epoch": 0.36460541271157804, "grad_norm": 0.30854669213294983, "learning_rate": 1.8425905998491782e-05, "loss": 0.5274, "step": 11869 }, { "epoch": 0.3646361318465272, "grad_norm": 0.3480738699436188, "learning_rate": 1.8425645717408888e-05, "loss": 0.624, "step": 11870 }, { "epoch": 0.3646668509814764, "grad_norm": 0.3386436402797699, "learning_rate": 1.8425385416647308e-05, "loss": 0.6266, "step": 11871 }, { "epoch": 0.3646975701164255, "grad_norm": 0.3255176246166229, "learning_rate": 1.8425125096207655e-05, "loss": 0.5877, "step": 11872 }, { "epoch": 0.3647282892513747, "grad_norm": 0.3033462166786194, "learning_rate": 1.8424864756090532e-05, "loss": 0.606, "step": 11873 }, { "epoch": 0.36475900838632386, "grad_norm": 0.3566072881221771, "learning_rate": 1.8424604396296545e-05, "loss": 0.6003, "step": 11874 }, { "epoch": 0.364789727521273, "grad_norm": 0.38538694381713867, "learning_rate": 1.8424344016826312e-05, "loss": 0.5856, "step": 11875 }, { "epoch": 0.36482044665622215, "grad_norm": 0.3610716164112091, "learning_rate": 1.8424083617680432e-05, "loss": 0.5368, "step": 11876 }, { "epoch": 0.36485116579117133, "grad_norm": 0.47198888659477234, "learning_rate": 1.842382319885952e-05, "loss": 0.6036, "step": 11877 }, { "epoch": 0.3648818849261205, "grad_norm": 0.33774733543395996, "learning_rate": 1.8423562760364174e-05, "loss": 0.581, "step": 11878 }, { "epoch": 0.3649126040610696, "grad_norm": 0.32840263843536377, "learning_rate": 1.8423302302195015e-05, "loss": 0.5502, "step": 11879 }, { "epoch": 0.3649433231960188, "grad_norm": 0.3575284779071808, "learning_rate": 1.8423041824352642e-05, "loss": 0.5695, "step": 11880 }, { "epoch": 0.364974042330968, "grad_norm": 0.38024014234542847, "learning_rate": 1.8422781326837672e-05, "loss": 0.636, "step": 11881 }, { "epoch": 0.3650047614659171, "grad_norm": 0.3673113286495209, "learning_rate": 1.84225208096507e-05, "loss": 0.6216, "step": 11882 }, { "epoch": 0.36503548060086627, "grad_norm": 0.3681805431842804, "learning_rate": 1.8422260272792346e-05, "loss": 0.5413, "step": 11883 }, { "epoch": 0.36506619973581544, "grad_norm": 0.3104393780231476, "learning_rate": 1.8421999716263218e-05, "loss": 0.5316, "step": 11884 }, { "epoch": 0.3650969188707646, "grad_norm": 0.4147748053073883, "learning_rate": 1.8421739140063918e-05, "loss": 0.5921, "step": 11885 }, { "epoch": 0.36512763800571374, "grad_norm": 0.4474312663078308, "learning_rate": 1.8421478544195056e-05, "loss": 0.5423, "step": 11886 }, { "epoch": 0.3651583571406629, "grad_norm": 0.36704373359680176, "learning_rate": 1.842121792865725e-05, "loss": 0.5402, "step": 11887 }, { "epoch": 0.3651890762756121, "grad_norm": 0.3954010307788849, "learning_rate": 1.8420957293451093e-05, "loss": 0.6846, "step": 11888 }, { "epoch": 0.36521979541056127, "grad_norm": 0.3363669216632843, "learning_rate": 1.8420696638577208e-05, "loss": 0.6263, "step": 11889 }, { "epoch": 0.3652505145455104, "grad_norm": 0.3122166693210602, "learning_rate": 1.8420435964036193e-05, "loss": 0.6427, "step": 11890 }, { "epoch": 0.36528123368045956, "grad_norm": 0.40260156989097595, "learning_rate": 1.8420175269828666e-05, "loss": 0.5464, "step": 11891 }, { "epoch": 0.36531195281540874, "grad_norm": 0.3528652489185333, "learning_rate": 1.841991455595523e-05, "loss": 0.5358, "step": 11892 }, { "epoch": 0.36534267195035786, "grad_norm": 0.3829364776611328, "learning_rate": 1.8419653822416497e-05, "loss": 0.5993, "step": 11893 }, { "epoch": 0.36537339108530703, "grad_norm": 0.363869845867157, "learning_rate": 1.841939306921307e-05, "loss": 0.5768, "step": 11894 }, { "epoch": 0.3654041102202562, "grad_norm": 0.36213886737823486, "learning_rate": 1.8419132296345568e-05, "loss": 0.5624, "step": 11895 }, { "epoch": 0.3654348293552054, "grad_norm": 0.3711516559123993, "learning_rate": 1.8418871503814592e-05, "loss": 0.6214, "step": 11896 }, { "epoch": 0.3654655484901545, "grad_norm": 0.3959082067012787, "learning_rate": 1.8418610691620756e-05, "loss": 0.5961, "step": 11897 }, { "epoch": 0.3654962676251037, "grad_norm": 0.3881227374076843, "learning_rate": 1.8418349859764662e-05, "loss": 0.5391, "step": 11898 }, { "epoch": 0.36552698676005285, "grad_norm": 0.32823678851127625, "learning_rate": 1.8418089008246925e-05, "loss": 0.62, "step": 11899 }, { "epoch": 0.36555770589500197, "grad_norm": 0.33642178773880005, "learning_rate": 1.8417828137068157e-05, "loss": 0.554, "step": 11900 }, { "epoch": 0.36558842502995115, "grad_norm": 0.5282217860221863, "learning_rate": 1.8417567246228962e-05, "loss": 0.5838, "step": 11901 }, { "epoch": 0.3656191441649003, "grad_norm": 0.34916791319847107, "learning_rate": 1.841730633572995e-05, "loss": 0.6438, "step": 11902 }, { "epoch": 0.3656498632998495, "grad_norm": 0.36850032210350037, "learning_rate": 1.8417045405571728e-05, "loss": 0.5725, "step": 11903 }, { "epoch": 0.3656805824347986, "grad_norm": 0.3249133229255676, "learning_rate": 1.8416784455754916e-05, "loss": 0.5929, "step": 11904 }, { "epoch": 0.3657113015697478, "grad_norm": 0.43784278631210327, "learning_rate": 1.841652348628011e-05, "loss": 0.574, "step": 11905 }, { "epoch": 0.36574202070469697, "grad_norm": 0.335622638463974, "learning_rate": 1.8416262497147928e-05, "loss": 0.5708, "step": 11906 }, { "epoch": 0.3657727398396461, "grad_norm": 0.33738917112350464, "learning_rate": 1.8416001488358977e-05, "loss": 0.5797, "step": 11907 }, { "epoch": 0.36580345897459526, "grad_norm": 0.4214390218257904, "learning_rate": 1.8415740459913866e-05, "loss": 0.6019, "step": 11908 }, { "epoch": 0.36583417810954444, "grad_norm": 0.37157219648361206, "learning_rate": 1.8415479411813205e-05, "loss": 0.5458, "step": 11909 }, { "epoch": 0.3658648972444936, "grad_norm": 0.381492018699646, "learning_rate": 1.8415218344057606e-05, "loss": 0.5507, "step": 11910 }, { "epoch": 0.36589561637944273, "grad_norm": 0.39769214391708374, "learning_rate": 1.8414957256647678e-05, "loss": 0.623, "step": 11911 }, { "epoch": 0.3659263355143919, "grad_norm": 0.35276323556900024, "learning_rate": 1.8414696149584027e-05, "loss": 0.5382, "step": 11912 }, { "epoch": 0.3659570546493411, "grad_norm": 0.3170086741447449, "learning_rate": 1.8414435022867264e-05, "loss": 0.667, "step": 11913 }, { "epoch": 0.36598777378429026, "grad_norm": 0.42976346611976624, "learning_rate": 1.8414173876498e-05, "loss": 0.5804, "step": 11914 }, { "epoch": 0.3660184929192394, "grad_norm": 0.35241779685020447, "learning_rate": 1.841391271047685e-05, "loss": 0.5729, "step": 11915 }, { "epoch": 0.36604921205418856, "grad_norm": 0.32764607667922974, "learning_rate": 1.8413651524804418e-05, "loss": 0.5341, "step": 11916 }, { "epoch": 0.36607993118913773, "grad_norm": 0.3538931906223297, "learning_rate": 1.8413390319481314e-05, "loss": 0.5807, "step": 11917 }, { "epoch": 0.36611065032408685, "grad_norm": 0.3512170612812042, "learning_rate": 1.841312909450815e-05, "loss": 0.5695, "step": 11918 }, { "epoch": 0.366141369459036, "grad_norm": 0.3725009262561798, "learning_rate": 1.841286784988553e-05, "loss": 0.5482, "step": 11919 }, { "epoch": 0.3661720885939852, "grad_norm": 0.40060776472091675, "learning_rate": 1.841260658561408e-05, "loss": 0.5652, "step": 11920 }, { "epoch": 0.3662028077289344, "grad_norm": 0.31050506234169006, "learning_rate": 1.8412345301694392e-05, "loss": 0.569, "step": 11921 }, { "epoch": 0.3662335268638835, "grad_norm": 0.35549959540367126, "learning_rate": 1.8412083998127087e-05, "loss": 0.6102, "step": 11922 }, { "epoch": 0.36626424599883267, "grad_norm": 0.35277268290519714, "learning_rate": 1.841182267491277e-05, "loss": 0.5471, "step": 11923 }, { "epoch": 0.36629496513378185, "grad_norm": 0.3853762149810791, "learning_rate": 1.8411561332052055e-05, "loss": 0.5033, "step": 11924 }, { "epoch": 0.36632568426873097, "grad_norm": 0.33365949988365173, "learning_rate": 1.841129996954555e-05, "loss": 0.532, "step": 11925 }, { "epoch": 0.36635640340368014, "grad_norm": 0.39100852608680725, "learning_rate": 1.8411038587393867e-05, "loss": 0.5928, "step": 11926 }, { "epoch": 0.3663871225386293, "grad_norm": 0.341845840215683, "learning_rate": 1.8410777185597618e-05, "loss": 0.5599, "step": 11927 }, { "epoch": 0.3664178416735785, "grad_norm": 0.4131210744380951, "learning_rate": 1.8410515764157414e-05, "loss": 0.5985, "step": 11928 }, { "epoch": 0.3664485608085276, "grad_norm": 0.5987859964370728, "learning_rate": 1.8410254323073856e-05, "loss": 0.4578, "step": 11929 }, { "epoch": 0.3664792799434768, "grad_norm": 0.3163631558418274, "learning_rate": 1.8409992862347567e-05, "loss": 0.5448, "step": 11930 }, { "epoch": 0.36650999907842596, "grad_norm": 0.3331263065338135, "learning_rate": 1.840973138197915e-05, "loss": 0.6215, "step": 11931 }, { "epoch": 0.36654071821337514, "grad_norm": 0.35286521911621094, "learning_rate": 1.8409469881969217e-05, "loss": 0.6529, "step": 11932 }, { "epoch": 0.36657143734832426, "grad_norm": 0.365863174200058, "learning_rate": 1.8409208362318382e-05, "loss": 0.5368, "step": 11933 }, { "epoch": 0.36660215648327343, "grad_norm": 0.3714585602283478, "learning_rate": 1.8408946823027253e-05, "loss": 0.6695, "step": 11934 }, { "epoch": 0.3666328756182226, "grad_norm": 0.33374840021133423, "learning_rate": 1.840868526409644e-05, "loss": 0.5222, "step": 11935 }, { "epoch": 0.36666359475317173, "grad_norm": 0.39832043647766113, "learning_rate": 1.8408423685526554e-05, "loss": 0.6443, "step": 11936 }, { "epoch": 0.3666943138881209, "grad_norm": 0.3028642237186432, "learning_rate": 1.840816208731821e-05, "loss": 0.4803, "step": 11937 }, { "epoch": 0.3667250330230701, "grad_norm": 0.3419438302516937, "learning_rate": 1.840790046947202e-05, "loss": 0.5777, "step": 11938 }, { "epoch": 0.36675575215801925, "grad_norm": 0.39224135875701904, "learning_rate": 1.8407638831988585e-05, "loss": 0.5276, "step": 11939 }, { "epoch": 0.3667864712929684, "grad_norm": 0.3401513397693634, "learning_rate": 1.8407377174868525e-05, "loss": 0.591, "step": 11940 }, { "epoch": 0.36681719042791755, "grad_norm": 0.3338383138179779, "learning_rate": 1.8407115498112446e-05, "loss": 0.5794, "step": 11941 }, { "epoch": 0.3668479095628667, "grad_norm": 0.3333677351474762, "learning_rate": 1.8406853801720963e-05, "loss": 0.5588, "step": 11942 }, { "epoch": 0.36687862869781585, "grad_norm": 0.34390443563461304, "learning_rate": 1.8406592085694688e-05, "loss": 0.5326, "step": 11943 }, { "epoch": 0.366909347832765, "grad_norm": 0.397388219833374, "learning_rate": 1.840633035003423e-05, "loss": 0.5649, "step": 11944 }, { "epoch": 0.3669400669677142, "grad_norm": 0.45031842589378357, "learning_rate": 1.8406068594740195e-05, "loss": 0.5734, "step": 11945 }, { "epoch": 0.36697078610266337, "grad_norm": 0.38525354862213135, "learning_rate": 1.8405806819813207e-05, "loss": 0.6441, "step": 11946 }, { "epoch": 0.3670015052376125, "grad_norm": 0.38057634234428406, "learning_rate": 1.8405545025253862e-05, "loss": 0.569, "step": 11947 }, { "epoch": 0.36703222437256167, "grad_norm": 0.36378416419029236, "learning_rate": 1.8405283211062787e-05, "loss": 0.6125, "step": 11948 }, { "epoch": 0.36706294350751084, "grad_norm": 0.34487470984458923, "learning_rate": 1.8405021377240585e-05, "loss": 0.585, "step": 11949 }, { "epoch": 0.36709366264245996, "grad_norm": 0.3296482264995575, "learning_rate": 1.8404759523787866e-05, "loss": 0.5667, "step": 11950 }, { "epoch": 0.36712438177740914, "grad_norm": 0.34236642718315125, "learning_rate": 1.8404497650705243e-05, "loss": 0.5921, "step": 11951 }, { "epoch": 0.3671551009123583, "grad_norm": 0.34392935037612915, "learning_rate": 1.8404235757993333e-05, "loss": 0.5941, "step": 11952 }, { "epoch": 0.3671858200473075, "grad_norm": 0.4271540343761444, "learning_rate": 1.840397384565274e-05, "loss": 0.5903, "step": 11953 }, { "epoch": 0.3672165391822566, "grad_norm": 0.3716713786125183, "learning_rate": 1.840371191368408e-05, "loss": 0.6328, "step": 11954 }, { "epoch": 0.3672472583172058, "grad_norm": 0.3413596749305725, "learning_rate": 1.8403449962087965e-05, "loss": 0.5658, "step": 11955 }, { "epoch": 0.36727797745215496, "grad_norm": 0.391269713640213, "learning_rate": 1.8403187990865005e-05, "loss": 0.5433, "step": 11956 }, { "epoch": 0.36730869658710413, "grad_norm": 0.35973289608955383, "learning_rate": 1.8402926000015813e-05, "loss": 0.6108, "step": 11957 }, { "epoch": 0.36733941572205325, "grad_norm": 0.34323906898498535, "learning_rate": 1.8402663989540997e-05, "loss": 0.5506, "step": 11958 }, { "epoch": 0.36737013485700243, "grad_norm": 0.380167692899704, "learning_rate": 1.8402401959441176e-05, "loss": 0.568, "step": 11959 }, { "epoch": 0.3674008539919516, "grad_norm": 0.33708688616752625, "learning_rate": 1.8402139909716958e-05, "loss": 0.5483, "step": 11960 }, { "epoch": 0.3674315731269007, "grad_norm": 0.34888654947280884, "learning_rate": 1.8401877840368957e-05, "loss": 0.6218, "step": 11961 }, { "epoch": 0.3674622922618499, "grad_norm": 0.33701956272125244, "learning_rate": 1.840161575139778e-05, "loss": 0.6179, "step": 11962 }, { "epoch": 0.3674930113967991, "grad_norm": 0.3968079388141632, "learning_rate": 1.840135364280404e-05, "loss": 0.5331, "step": 11963 }, { "epoch": 0.36752373053174825, "grad_norm": 0.5395948886871338, "learning_rate": 1.840109151458836e-05, "loss": 0.5712, "step": 11964 }, { "epoch": 0.36755444966669737, "grad_norm": 0.37396320700645447, "learning_rate": 1.8400829366751342e-05, "loss": 0.5465, "step": 11965 }, { "epoch": 0.36758516880164654, "grad_norm": 0.32626694440841675, "learning_rate": 1.8400567199293596e-05, "loss": 0.5008, "step": 11966 }, { "epoch": 0.3676158879365957, "grad_norm": 0.37863224744796753, "learning_rate": 1.840030501221574e-05, "loss": 0.5632, "step": 11967 }, { "epoch": 0.36764660707154484, "grad_norm": 0.3179282546043396, "learning_rate": 1.8400042805518387e-05, "loss": 0.5554, "step": 11968 }, { "epoch": 0.367677326206494, "grad_norm": 0.3494877219200134, "learning_rate": 1.8399780579202147e-05, "loss": 0.4554, "step": 11969 }, { "epoch": 0.3677080453414432, "grad_norm": 0.44056087732315063, "learning_rate": 1.8399518333267635e-05, "loss": 0.6037, "step": 11970 }, { "epoch": 0.36773876447639237, "grad_norm": 0.34568992257118225, "learning_rate": 1.8399256067715458e-05, "loss": 0.6008, "step": 11971 }, { "epoch": 0.3677694836113415, "grad_norm": 0.33962753415107727, "learning_rate": 1.8398993782546234e-05, "loss": 0.5965, "step": 11972 }, { "epoch": 0.36780020274629066, "grad_norm": 0.4204976558685303, "learning_rate": 1.839873147776057e-05, "loss": 0.5845, "step": 11973 }, { "epoch": 0.36783092188123984, "grad_norm": 0.2988635003566742, "learning_rate": 1.8398469153359085e-05, "loss": 0.4983, "step": 11974 }, { "epoch": 0.367861641016189, "grad_norm": 0.35543981194496155, "learning_rate": 1.8398206809342386e-05, "loss": 0.565, "step": 11975 }, { "epoch": 0.36789236015113813, "grad_norm": 0.34638211131095886, "learning_rate": 1.839794444571109e-05, "loss": 0.5649, "step": 11976 }, { "epoch": 0.3679230792860873, "grad_norm": 0.34254828095436096, "learning_rate": 1.839768206246581e-05, "loss": 0.6119, "step": 11977 }, { "epoch": 0.3679537984210365, "grad_norm": 0.467334508895874, "learning_rate": 1.8397419659607158e-05, "loss": 0.6525, "step": 11978 }, { "epoch": 0.3679845175559856, "grad_norm": 0.32235127687454224, "learning_rate": 1.839715723713574e-05, "loss": 0.5873, "step": 11979 }, { "epoch": 0.3680152366909348, "grad_norm": 0.3644038140773773, "learning_rate": 1.839689479505218e-05, "loss": 0.6374, "step": 11980 }, { "epoch": 0.36804595582588395, "grad_norm": 0.2973525822162628, "learning_rate": 1.8396632333357084e-05, "loss": 0.5347, "step": 11981 }, { "epoch": 0.36807667496083313, "grad_norm": 0.32197314500808716, "learning_rate": 1.8396369852051066e-05, "loss": 0.53, "step": 11982 }, { "epoch": 0.36810739409578225, "grad_norm": 0.3555222749710083, "learning_rate": 1.839610735113474e-05, "loss": 0.5819, "step": 11983 }, { "epoch": 0.3681381132307314, "grad_norm": 0.333900511264801, "learning_rate": 1.839584483060872e-05, "loss": 0.6135, "step": 11984 }, { "epoch": 0.3681688323656806, "grad_norm": 0.3176644742488861, "learning_rate": 1.839558229047362e-05, "loss": 0.5513, "step": 11985 }, { "epoch": 0.3681995515006297, "grad_norm": 0.32715582847595215, "learning_rate": 1.8395319730730047e-05, "loss": 0.6022, "step": 11986 }, { "epoch": 0.3682302706355789, "grad_norm": 0.38990506529808044, "learning_rate": 1.839505715137862e-05, "loss": 0.5066, "step": 11987 }, { "epoch": 0.36826098977052807, "grad_norm": 0.3567396104335785, "learning_rate": 1.839479455241995e-05, "loss": 0.5427, "step": 11988 }, { "epoch": 0.36829170890547724, "grad_norm": 0.361809104681015, "learning_rate": 1.8394531933854654e-05, "loss": 0.7245, "step": 11989 }, { "epoch": 0.36832242804042636, "grad_norm": 0.37100353837013245, "learning_rate": 1.839426929568334e-05, "loss": 0.6237, "step": 11990 }, { "epoch": 0.36835314717537554, "grad_norm": 0.3861450254917145, "learning_rate": 1.839400663790662e-05, "loss": 0.5637, "step": 11991 }, { "epoch": 0.3683838663103247, "grad_norm": 0.3342583477497101, "learning_rate": 1.839374396052512e-05, "loss": 0.5599, "step": 11992 }, { "epoch": 0.36841458544527383, "grad_norm": 0.32324764132499695, "learning_rate": 1.839348126353944e-05, "loss": 0.6011, "step": 11993 }, { "epoch": 0.368445304580223, "grad_norm": 0.33611488342285156, "learning_rate": 1.83932185469502e-05, "loss": 0.56, "step": 11994 }, { "epoch": 0.3684760237151722, "grad_norm": 0.32762402296066284, "learning_rate": 1.8392955810758008e-05, "loss": 0.5559, "step": 11995 }, { "epoch": 0.36850674285012136, "grad_norm": 1.0749499797821045, "learning_rate": 1.8392693054963485e-05, "loss": 0.5867, "step": 11996 }, { "epoch": 0.3685374619850705, "grad_norm": 0.33635398745536804, "learning_rate": 1.8392430279567238e-05, "loss": 0.5315, "step": 11997 }, { "epoch": 0.36856818112001966, "grad_norm": 0.34281662106513977, "learning_rate": 1.8392167484569886e-05, "loss": 0.6016, "step": 11998 }, { "epoch": 0.36859890025496883, "grad_norm": 0.35077184438705444, "learning_rate": 1.8391904669972043e-05, "loss": 0.6203, "step": 11999 }, { "epoch": 0.368629619389918, "grad_norm": 0.35605862736701965, "learning_rate": 1.8391641835774318e-05, "loss": 0.6185, "step": 12000 }, { "epoch": 0.3686603385248671, "grad_norm": 0.4318116307258606, "learning_rate": 1.839137898197733e-05, "loss": 0.5899, "step": 12001 }, { "epoch": 0.3686910576598163, "grad_norm": 0.4918050765991211, "learning_rate": 1.8391116108581687e-05, "loss": 0.6275, "step": 12002 }, { "epoch": 0.3687217767947655, "grad_norm": 0.3258839249610901, "learning_rate": 1.8390853215588007e-05, "loss": 0.5495, "step": 12003 }, { "epoch": 0.3687524959297146, "grad_norm": 0.5114021897315979, "learning_rate": 1.83905903029969e-05, "loss": 0.5244, "step": 12004 }, { "epoch": 0.36878321506466377, "grad_norm": 0.3305162787437439, "learning_rate": 1.839032737080899e-05, "loss": 0.5194, "step": 12005 }, { "epoch": 0.36881393419961295, "grad_norm": 0.3545305132865906, "learning_rate": 1.8390064419024882e-05, "loss": 0.6137, "step": 12006 }, { "epoch": 0.3688446533345621, "grad_norm": 0.32840293645858765, "learning_rate": 1.8389801447645194e-05, "loss": 0.5809, "step": 12007 }, { "epoch": 0.36887537246951124, "grad_norm": 0.5298738479614258, "learning_rate": 1.8389538456670534e-05, "loss": 0.5462, "step": 12008 }, { "epoch": 0.3689060916044604, "grad_norm": 0.37130317091941833, "learning_rate": 1.8389275446101528e-05, "loss": 0.5917, "step": 12009 }, { "epoch": 0.3689368107394096, "grad_norm": 0.3453938961029053, "learning_rate": 1.8389012415938777e-05, "loss": 0.6149, "step": 12010 }, { "epoch": 0.3689675298743587, "grad_norm": 0.4200533330440521, "learning_rate": 1.8388749366182904e-05, "loss": 0.5777, "step": 12011 }, { "epoch": 0.3689982490093079, "grad_norm": 0.44403308629989624, "learning_rate": 1.838848629683452e-05, "loss": 0.4966, "step": 12012 }, { "epoch": 0.36902896814425706, "grad_norm": 0.3414042890071869, "learning_rate": 1.8388223207894246e-05, "loss": 0.6445, "step": 12013 }, { "epoch": 0.36905968727920624, "grad_norm": 0.37806326150894165, "learning_rate": 1.8387960099362682e-05, "loss": 0.6239, "step": 12014 }, { "epoch": 0.36909040641415536, "grad_norm": 0.34608447551727295, "learning_rate": 1.8387696971240455e-05, "loss": 0.6066, "step": 12015 }, { "epoch": 0.36912112554910453, "grad_norm": 0.343478798866272, "learning_rate": 1.838743382352818e-05, "loss": 0.6067, "step": 12016 }, { "epoch": 0.3691518446840537, "grad_norm": 0.3560216724872589, "learning_rate": 1.8387170656226464e-05, "loss": 0.5338, "step": 12017 }, { "epoch": 0.36918256381900283, "grad_norm": 0.363333135843277, "learning_rate": 1.8386907469335926e-05, "loss": 0.5573, "step": 12018 }, { "epoch": 0.369213282953952, "grad_norm": 0.3826920986175537, "learning_rate": 1.8386644262857175e-05, "loss": 0.6275, "step": 12019 }, { "epoch": 0.3692440020889012, "grad_norm": 0.43314966559410095, "learning_rate": 1.8386381036790837e-05, "loss": 0.5778, "step": 12020 }, { "epoch": 0.36927472122385036, "grad_norm": 0.40674838423728943, "learning_rate": 1.8386117791137517e-05, "loss": 0.6079, "step": 12021 }, { "epoch": 0.3693054403587995, "grad_norm": 0.38697972893714905, "learning_rate": 1.8385854525897838e-05, "loss": 0.563, "step": 12022 }, { "epoch": 0.36933615949374865, "grad_norm": 0.9551025629043579, "learning_rate": 1.83855912410724e-05, "loss": 0.4944, "step": 12023 }, { "epoch": 0.3693668786286978, "grad_norm": 0.36853930354118347, "learning_rate": 1.838532793666184e-05, "loss": 0.4824, "step": 12024 }, { "epoch": 0.369397597763647, "grad_norm": 0.40153250098228455, "learning_rate": 1.8385064612666753e-05, "loss": 0.54, "step": 12025 }, { "epoch": 0.3694283168985961, "grad_norm": 0.31288498640060425, "learning_rate": 1.8384801269087768e-05, "loss": 0.5322, "step": 12026 }, { "epoch": 0.3694590360335453, "grad_norm": 0.3495669364929199, "learning_rate": 1.838453790592549e-05, "loss": 0.5502, "step": 12027 }, { "epoch": 0.36948975516849447, "grad_norm": 0.39413121342658997, "learning_rate": 1.8384274523180537e-05, "loss": 0.5501, "step": 12028 }, { "epoch": 0.3695204743034436, "grad_norm": 0.45310714840888977, "learning_rate": 1.8384011120853528e-05, "loss": 0.5683, "step": 12029 }, { "epoch": 0.36955119343839277, "grad_norm": 0.3582731783390045, "learning_rate": 1.8383747698945073e-05, "loss": 0.5006, "step": 12030 }, { "epoch": 0.36958191257334194, "grad_norm": 0.33134573698043823, "learning_rate": 1.8383484257455794e-05, "loss": 0.6329, "step": 12031 }, { "epoch": 0.3696126317082911, "grad_norm": 0.3443862497806549, "learning_rate": 1.8383220796386298e-05, "loss": 0.5999, "step": 12032 }, { "epoch": 0.36964335084324024, "grad_norm": 0.97184157371521, "learning_rate": 1.8382957315737205e-05, "loss": 0.5315, "step": 12033 }, { "epoch": 0.3696740699781894, "grad_norm": 0.32173559069633484, "learning_rate": 1.838269381550913e-05, "loss": 0.5395, "step": 12034 }, { "epoch": 0.3697047891131386, "grad_norm": 0.38673773407936096, "learning_rate": 1.838243029570269e-05, "loss": 0.6272, "step": 12035 }, { "epoch": 0.3697355082480877, "grad_norm": 0.3221803307533264, "learning_rate": 1.8382166756318492e-05, "loss": 0.6047, "step": 12036 }, { "epoch": 0.3697662273830369, "grad_norm": 0.33566808700561523, "learning_rate": 1.8381903197357168e-05, "loss": 0.5575, "step": 12037 }, { "epoch": 0.36979694651798606, "grad_norm": 0.3414601981639862, "learning_rate": 1.8381639618819317e-05, "loss": 0.6175, "step": 12038 }, { "epoch": 0.36982766565293523, "grad_norm": 0.36749696731567383, "learning_rate": 1.8381376020705562e-05, "loss": 0.6032, "step": 12039 }, { "epoch": 0.36985838478788435, "grad_norm": 0.3358899652957916, "learning_rate": 1.838111240301652e-05, "loss": 0.574, "step": 12040 }, { "epoch": 0.36988910392283353, "grad_norm": 0.35951241850852966, "learning_rate": 1.8380848765752804e-05, "loss": 0.6096, "step": 12041 }, { "epoch": 0.3699198230577827, "grad_norm": 0.3545386791229248, "learning_rate": 1.838058510891503e-05, "loss": 0.5044, "step": 12042 }, { "epoch": 0.3699505421927319, "grad_norm": 0.4061214029788971, "learning_rate": 1.8380321432503817e-05, "loss": 0.5446, "step": 12043 }, { "epoch": 0.369981261327681, "grad_norm": 0.3752491772174835, "learning_rate": 1.8380057736519772e-05, "loss": 0.6092, "step": 12044 }, { "epoch": 0.3700119804626302, "grad_norm": 0.4284396469593048, "learning_rate": 1.8379794020963523e-05, "loss": 0.5887, "step": 12045 }, { "epoch": 0.37004269959757935, "grad_norm": 0.32964760065078735, "learning_rate": 1.8379530285835677e-05, "loss": 0.5985, "step": 12046 }, { "epoch": 0.37007341873252847, "grad_norm": 0.3762403428554535, "learning_rate": 1.837926653113685e-05, "loss": 0.5995, "step": 12047 }, { "epoch": 0.37010413786747764, "grad_norm": 0.3639916777610779, "learning_rate": 1.8379002756867666e-05, "loss": 0.5983, "step": 12048 }, { "epoch": 0.3701348570024268, "grad_norm": 0.33791565895080566, "learning_rate": 1.8378738963028734e-05, "loss": 0.5314, "step": 12049 }, { "epoch": 0.370165576137376, "grad_norm": 0.3484160602092743, "learning_rate": 1.837847514962067e-05, "loss": 0.5704, "step": 12050 }, { "epoch": 0.3701962952723251, "grad_norm": 0.34674328565597534, "learning_rate": 1.8378211316644094e-05, "loss": 0.5477, "step": 12051 }, { "epoch": 0.3702270144072743, "grad_norm": 0.33115050196647644, "learning_rate": 1.8377947464099622e-05, "loss": 0.6283, "step": 12052 }, { "epoch": 0.37025773354222347, "grad_norm": 0.32590436935424805, "learning_rate": 1.8377683591987866e-05, "loss": 0.5039, "step": 12053 }, { "epoch": 0.3702884526771726, "grad_norm": 0.3400111496448517, "learning_rate": 1.8377419700309446e-05, "loss": 0.6171, "step": 12054 }, { "epoch": 0.37031917181212176, "grad_norm": 0.3587375581264496, "learning_rate": 1.8377155789064975e-05, "loss": 0.6204, "step": 12055 }, { "epoch": 0.37034989094707094, "grad_norm": 0.341569185256958, "learning_rate": 1.8376891858255072e-05, "loss": 0.6215, "step": 12056 }, { "epoch": 0.3703806100820201, "grad_norm": 0.3372836410999298, "learning_rate": 1.8376627907880355e-05, "loss": 0.4625, "step": 12057 }, { "epoch": 0.37041132921696923, "grad_norm": 0.32596638798713684, "learning_rate": 1.837636393794144e-05, "loss": 0.5592, "step": 12058 }, { "epoch": 0.3704420483519184, "grad_norm": 0.34708714485168457, "learning_rate": 1.8376099948438938e-05, "loss": 0.5813, "step": 12059 }, { "epoch": 0.3704727674868676, "grad_norm": 0.3296850621700287, "learning_rate": 1.8375835939373474e-05, "loss": 0.5515, "step": 12060 }, { "epoch": 0.3705034866218167, "grad_norm": 0.39669257402420044, "learning_rate": 1.8375571910745656e-05, "loss": 0.6139, "step": 12061 }, { "epoch": 0.3705342057567659, "grad_norm": 0.3574727773666382, "learning_rate": 1.8375307862556106e-05, "loss": 0.62, "step": 12062 }, { "epoch": 0.37056492489171505, "grad_norm": 0.30562087893486023, "learning_rate": 1.837504379480544e-05, "loss": 0.5216, "step": 12063 }, { "epoch": 0.37059564402666423, "grad_norm": 0.38799917697906494, "learning_rate": 1.8374779707494276e-05, "loss": 0.5348, "step": 12064 }, { "epoch": 0.37062636316161335, "grad_norm": 0.33592739701271057, "learning_rate": 1.8374515600623226e-05, "loss": 0.5987, "step": 12065 }, { "epoch": 0.3706570822965625, "grad_norm": 0.34963613748550415, "learning_rate": 1.8374251474192914e-05, "loss": 0.5602, "step": 12066 }, { "epoch": 0.3706878014315117, "grad_norm": 0.31579363346099854, "learning_rate": 1.837398732820395e-05, "loss": 0.5798, "step": 12067 }, { "epoch": 0.3707185205664609, "grad_norm": 0.4151420295238495, "learning_rate": 1.8373723162656953e-05, "loss": 0.6012, "step": 12068 }, { "epoch": 0.37074923970141, "grad_norm": 0.3305237293243408, "learning_rate": 1.8373458977552543e-05, "loss": 0.6396, "step": 12069 }, { "epoch": 0.37077995883635917, "grad_norm": 0.3695778548717499, "learning_rate": 1.8373194772891333e-05, "loss": 0.5591, "step": 12070 }, { "epoch": 0.37081067797130834, "grad_norm": 0.3590225279331207, "learning_rate": 1.837293054867394e-05, "loss": 0.5984, "step": 12071 }, { "epoch": 0.37084139710625746, "grad_norm": 0.3704279661178589, "learning_rate": 1.8372666304900986e-05, "loss": 0.573, "step": 12072 }, { "epoch": 0.37087211624120664, "grad_norm": 0.35881417989730835, "learning_rate": 1.8372402041573084e-05, "loss": 0.6496, "step": 12073 }, { "epoch": 0.3709028353761558, "grad_norm": 0.3786000609397888, "learning_rate": 1.8372137758690854e-05, "loss": 0.6485, "step": 12074 }, { "epoch": 0.370933554511105, "grad_norm": 0.3357364237308502, "learning_rate": 1.8371873456254913e-05, "loss": 0.5902, "step": 12075 }, { "epoch": 0.3709642736460541, "grad_norm": 0.35234999656677246, "learning_rate": 1.8371609134265873e-05, "loss": 0.6575, "step": 12076 }, { "epoch": 0.3709949927810033, "grad_norm": 0.5663955211639404, "learning_rate": 1.8371344792724354e-05, "loss": 0.6155, "step": 12077 }, { "epoch": 0.37102571191595246, "grad_norm": 0.37467503547668457, "learning_rate": 1.8371080431630978e-05, "loss": 0.6245, "step": 12078 }, { "epoch": 0.3710564310509016, "grad_norm": 0.3440595865249634, "learning_rate": 1.8370816050986356e-05, "loss": 0.6103, "step": 12079 }, { "epoch": 0.37108715018585076, "grad_norm": 0.3419538736343384, "learning_rate": 1.8370551650791113e-05, "loss": 0.5109, "step": 12080 }, { "epoch": 0.37111786932079993, "grad_norm": 0.3772508203983307, "learning_rate": 1.8370287231045862e-05, "loss": 0.5297, "step": 12081 }, { "epoch": 0.3711485884557491, "grad_norm": 0.3835662603378296, "learning_rate": 1.8370022791751215e-05, "loss": 0.6206, "step": 12082 }, { "epoch": 0.3711793075906982, "grad_norm": 0.41574904322624207, "learning_rate": 1.8369758332907797e-05, "loss": 0.6814, "step": 12083 }, { "epoch": 0.3712100267256474, "grad_norm": 0.3383312523365021, "learning_rate": 1.836949385451623e-05, "loss": 0.6115, "step": 12084 }, { "epoch": 0.3712407458605966, "grad_norm": 0.3310041129589081, "learning_rate": 1.836922935657712e-05, "loss": 0.4985, "step": 12085 }, { "epoch": 0.37127146499554575, "grad_norm": 0.348409503698349, "learning_rate": 1.836896483909109e-05, "loss": 0.6684, "step": 12086 }, { "epoch": 0.3713021841304949, "grad_norm": 0.36199697852134705, "learning_rate": 1.8368700302058758e-05, "loss": 0.5249, "step": 12087 }, { "epoch": 0.37133290326544405, "grad_norm": 0.33961331844329834, "learning_rate": 1.8368435745480745e-05, "loss": 0.5952, "step": 12088 }, { "epoch": 0.3713636224003932, "grad_norm": 0.3326971232891083, "learning_rate": 1.8368171169357664e-05, "loss": 0.5159, "step": 12089 }, { "epoch": 0.37139434153534234, "grad_norm": 0.31455743312835693, "learning_rate": 1.8367906573690134e-05, "loss": 0.4872, "step": 12090 }, { "epoch": 0.3714250606702915, "grad_norm": 0.31217148900032043, "learning_rate": 1.8367641958478775e-05, "loss": 0.5962, "step": 12091 }, { "epoch": 0.3714557798052407, "grad_norm": 0.36795228719711304, "learning_rate": 1.8367377323724205e-05, "loss": 0.5664, "step": 12092 }, { "epoch": 0.37148649894018987, "grad_norm": 0.39509469270706177, "learning_rate": 1.836711266942704e-05, "loss": 0.5785, "step": 12093 }, { "epoch": 0.371517218075139, "grad_norm": 0.3441612422466278, "learning_rate": 1.8366847995587897e-05, "loss": 0.5242, "step": 12094 }, { "epoch": 0.37154793721008816, "grad_norm": 0.3225632905960083, "learning_rate": 1.8366583302207398e-05, "loss": 0.643, "step": 12095 }, { "epoch": 0.37157865634503734, "grad_norm": 0.38300207257270813, "learning_rate": 1.8366318589286158e-05, "loss": 0.5196, "step": 12096 }, { "epoch": 0.37160937547998646, "grad_norm": 0.36712610721588135, "learning_rate": 1.83660538568248e-05, "loss": 0.5461, "step": 12097 }, { "epoch": 0.37164009461493563, "grad_norm": 0.34841737151145935, "learning_rate": 1.8365789104823935e-05, "loss": 0.5158, "step": 12098 }, { "epoch": 0.3716708137498848, "grad_norm": 0.3497195541858673, "learning_rate": 1.836552433328419e-05, "loss": 0.6449, "step": 12099 }, { "epoch": 0.371701532884834, "grad_norm": 0.35028520226478577, "learning_rate": 1.8365259542206172e-05, "loss": 0.559, "step": 12100 }, { "epoch": 0.3717322520197831, "grad_norm": 0.39131078124046326, "learning_rate": 1.836499473159051e-05, "loss": 0.5006, "step": 12101 }, { "epoch": 0.3717629711547323, "grad_norm": 0.36936435103416443, "learning_rate": 1.836472990143782e-05, "loss": 0.5908, "step": 12102 }, { "epoch": 0.37179369028968146, "grad_norm": 0.333901584148407, "learning_rate": 1.8364465051748713e-05, "loss": 0.6242, "step": 12103 }, { "epoch": 0.3718244094246306, "grad_norm": 0.31930941343307495, "learning_rate": 1.8364200182523822e-05, "loss": 0.527, "step": 12104 }, { "epoch": 0.37185512855957975, "grad_norm": 0.3666832149028778, "learning_rate": 1.8363935293763753e-05, "loss": 0.5751, "step": 12105 }, { "epoch": 0.3718858476945289, "grad_norm": 0.3177006244659424, "learning_rate": 1.836367038546913e-05, "loss": 0.573, "step": 12106 }, { "epoch": 0.3719165668294781, "grad_norm": 0.34941327571868896, "learning_rate": 1.836340545764057e-05, "loss": 0.5972, "step": 12107 }, { "epoch": 0.3719472859644272, "grad_norm": 0.3826427757740021, "learning_rate": 1.8363140510278692e-05, "loss": 0.6454, "step": 12108 }, { "epoch": 0.3719780050993764, "grad_norm": 0.3605712652206421, "learning_rate": 1.8362875543384116e-05, "loss": 0.544, "step": 12109 }, { "epoch": 0.37200872423432557, "grad_norm": 0.3211289048194885, "learning_rate": 1.8362610556957458e-05, "loss": 0.5274, "step": 12110 }, { "epoch": 0.37203944336927475, "grad_norm": 0.33158934116363525, "learning_rate": 1.8362345550999342e-05, "loss": 0.5928, "step": 12111 }, { "epoch": 0.37207016250422387, "grad_norm": 0.34741103649139404, "learning_rate": 1.8362080525510384e-05, "loss": 0.5559, "step": 12112 }, { "epoch": 0.37210088163917304, "grad_norm": 0.43731433153152466, "learning_rate": 1.83618154804912e-05, "loss": 0.6783, "step": 12113 }, { "epoch": 0.3721316007741222, "grad_norm": 0.3426723778247833, "learning_rate": 1.8361550415942414e-05, "loss": 0.6107, "step": 12114 }, { "epoch": 0.37216231990907134, "grad_norm": 0.3535725176334381, "learning_rate": 1.8361285331864645e-05, "loss": 0.6432, "step": 12115 }, { "epoch": 0.3721930390440205, "grad_norm": 0.3544923961162567, "learning_rate": 1.8361020228258507e-05, "loss": 0.5325, "step": 12116 }, { "epoch": 0.3722237581789697, "grad_norm": 0.3771418333053589, "learning_rate": 1.8360755105124625e-05, "loss": 0.6197, "step": 12117 }, { "epoch": 0.37225447731391886, "grad_norm": 0.3825949430465698, "learning_rate": 1.8360489962463614e-05, "loss": 0.5688, "step": 12118 }, { "epoch": 0.372285196448868, "grad_norm": 0.30727240443229675, "learning_rate": 1.836022480027609e-05, "loss": 0.5102, "step": 12119 }, { "epoch": 0.37231591558381716, "grad_norm": 0.32851701974868774, "learning_rate": 1.8359959618562688e-05, "loss": 0.6536, "step": 12120 }, { "epoch": 0.37234663471876633, "grad_norm": 0.36520758271217346, "learning_rate": 1.8359694417324005e-05, "loss": 0.5826, "step": 12121 }, { "epoch": 0.37237735385371545, "grad_norm": 0.35628077387809753, "learning_rate": 1.8359429196560682e-05, "loss": 0.4774, "step": 12122 }, { "epoch": 0.37240807298866463, "grad_norm": 0.36827102303504944, "learning_rate": 1.835916395627332e-05, "loss": 0.5758, "step": 12123 }, { "epoch": 0.3724387921236138, "grad_norm": 0.3304240107536316, "learning_rate": 1.835889869646255e-05, "loss": 0.5652, "step": 12124 }, { "epoch": 0.372469511258563, "grad_norm": 0.35485437512397766, "learning_rate": 1.835863341712899e-05, "loss": 0.569, "step": 12125 }, { "epoch": 0.3725002303935121, "grad_norm": 0.36710813641548157, "learning_rate": 1.8358368118273256e-05, "loss": 0.5787, "step": 12126 }, { "epoch": 0.3725309495284613, "grad_norm": 0.35038551688194275, "learning_rate": 1.835810279989597e-05, "loss": 0.5301, "step": 12127 }, { "epoch": 0.37256166866341045, "grad_norm": 0.32760778069496155, "learning_rate": 1.8357837461997753e-05, "loss": 0.6445, "step": 12128 }, { "epoch": 0.3725923877983596, "grad_norm": 0.34297558665275574, "learning_rate": 1.835757210457922e-05, "loss": 0.5798, "step": 12129 }, { "epoch": 0.37262310693330875, "grad_norm": 0.36310046911239624, "learning_rate": 1.8357306727640995e-05, "loss": 0.6175, "step": 12130 }, { "epoch": 0.3726538260682579, "grad_norm": 0.35917767882347107, "learning_rate": 1.8357041331183695e-05, "loss": 0.6458, "step": 12131 }, { "epoch": 0.3726845452032071, "grad_norm": 0.3614516258239746, "learning_rate": 1.8356775915207944e-05, "loss": 0.6375, "step": 12132 }, { "epoch": 0.3727152643381562, "grad_norm": 0.43099814653396606, "learning_rate": 1.8356510479714355e-05, "loss": 0.6258, "step": 12133 }, { "epoch": 0.3727459834731054, "grad_norm": 0.4001576006412506, "learning_rate": 1.8356245024703555e-05, "loss": 0.6313, "step": 12134 }, { "epoch": 0.37277670260805457, "grad_norm": 0.48225414752960205, "learning_rate": 1.835597955017616e-05, "loss": 0.5343, "step": 12135 }, { "epoch": 0.37280742174300374, "grad_norm": 0.3603494167327881, "learning_rate": 1.8355714056132795e-05, "loss": 0.598, "step": 12136 }, { "epoch": 0.37283814087795286, "grad_norm": 0.32561707496643066, "learning_rate": 1.835544854257407e-05, "loss": 0.5667, "step": 12137 }, { "epoch": 0.37286886001290204, "grad_norm": 0.3580394685268402, "learning_rate": 1.835518300950062e-05, "loss": 0.6682, "step": 12138 }, { "epoch": 0.3728995791478512, "grad_norm": 0.3784162700176239, "learning_rate": 1.8354917456913047e-05, "loss": 0.4936, "step": 12139 }, { "epoch": 0.37293029828280033, "grad_norm": 0.6742562055587769, "learning_rate": 1.8354651884811987e-05, "loss": 0.5912, "step": 12140 }, { "epoch": 0.3729610174177495, "grad_norm": 0.30308273434638977, "learning_rate": 1.8354386293198053e-05, "loss": 0.5175, "step": 12141 }, { "epoch": 0.3729917365526987, "grad_norm": 0.35824835300445557, "learning_rate": 1.8354120682071867e-05, "loss": 0.5945, "step": 12142 }, { "epoch": 0.37302245568764786, "grad_norm": 0.42936021089553833, "learning_rate": 1.8353855051434047e-05, "loss": 0.5476, "step": 12143 }, { "epoch": 0.373053174822597, "grad_norm": 0.49034327268600464, "learning_rate": 1.8353589401285213e-05, "loss": 0.5383, "step": 12144 }, { "epoch": 0.37308389395754615, "grad_norm": 0.36385759711265564, "learning_rate": 1.8353323731625988e-05, "loss": 0.6757, "step": 12145 }, { "epoch": 0.37311461309249533, "grad_norm": 0.32801076769828796, "learning_rate": 1.8353058042456995e-05, "loss": 0.5282, "step": 12146 }, { "epoch": 0.37314533222744445, "grad_norm": 0.347785085439682, "learning_rate": 1.8352792333778852e-05, "loss": 0.6478, "step": 12147 }, { "epoch": 0.3731760513623936, "grad_norm": 0.33968818187713623, "learning_rate": 1.8352526605592178e-05, "loss": 0.534, "step": 12148 }, { "epoch": 0.3732067704973428, "grad_norm": 0.30201780796051025, "learning_rate": 1.8352260857897596e-05, "loss": 0.5765, "step": 12149 }, { "epoch": 0.373237489632292, "grad_norm": 0.3151794970035553, "learning_rate": 1.8351995090695724e-05, "loss": 0.4809, "step": 12150 }, { "epoch": 0.3732682087672411, "grad_norm": 0.40291139483451843, "learning_rate": 1.8351729303987184e-05, "loss": 0.5356, "step": 12151 }, { "epoch": 0.37329892790219027, "grad_norm": 0.47956448793411255, "learning_rate": 1.8351463497772594e-05, "loss": 0.556, "step": 12152 }, { "epoch": 0.37332964703713944, "grad_norm": 0.32680588960647583, "learning_rate": 1.8351197672052583e-05, "loss": 0.5456, "step": 12153 }, { "epoch": 0.3733603661720886, "grad_norm": 0.32688695192337036, "learning_rate": 1.835093182682776e-05, "loss": 0.5388, "step": 12154 }, { "epoch": 0.37339108530703774, "grad_norm": 0.33203110098838806, "learning_rate": 1.8350665962098763e-05, "loss": 0.4935, "step": 12155 }, { "epoch": 0.3734218044419869, "grad_norm": 0.31248152256011963, "learning_rate": 1.8350400077866194e-05, "loss": 0.5707, "step": 12156 }, { "epoch": 0.3734525235769361, "grad_norm": 0.38916927576065063, "learning_rate": 1.8350134174130684e-05, "loss": 0.5909, "step": 12157 }, { "epoch": 0.3734832427118852, "grad_norm": 0.38977381587028503, "learning_rate": 1.8349868250892854e-05, "loss": 0.6137, "step": 12158 }, { "epoch": 0.3735139618468344, "grad_norm": 0.3384077548980713, "learning_rate": 1.8349602308153324e-05, "loss": 0.5917, "step": 12159 }, { "epoch": 0.37354468098178356, "grad_norm": 0.3522823452949524, "learning_rate": 1.834933634591271e-05, "loss": 0.5963, "step": 12160 }, { "epoch": 0.37357540011673274, "grad_norm": 0.5235410928726196, "learning_rate": 1.8349070364171643e-05, "loss": 0.5904, "step": 12161 }, { "epoch": 0.37360611925168186, "grad_norm": 0.3886731266975403, "learning_rate": 1.8348804362930735e-05, "loss": 0.5249, "step": 12162 }, { "epoch": 0.37363683838663103, "grad_norm": 0.3824404180049896, "learning_rate": 1.8348538342190617e-05, "loss": 0.5864, "step": 12163 }, { "epoch": 0.3736675575215802, "grad_norm": 0.33360669016838074, "learning_rate": 1.83482723019519e-05, "loss": 0.4577, "step": 12164 }, { "epoch": 0.3736982766565293, "grad_norm": 0.33303576707839966, "learning_rate": 1.834800624221521e-05, "loss": 0.5618, "step": 12165 }, { "epoch": 0.3737289957914785, "grad_norm": 0.317665159702301, "learning_rate": 1.834774016298117e-05, "loss": 0.5961, "step": 12166 }, { "epoch": 0.3737597149264277, "grad_norm": 0.3702441155910492, "learning_rate": 1.8347474064250402e-05, "loss": 0.6429, "step": 12167 }, { "epoch": 0.37379043406137685, "grad_norm": 0.36185023188591003, "learning_rate": 1.834720794602352e-05, "loss": 0.5761, "step": 12168 }, { "epoch": 0.373821153196326, "grad_norm": 0.36830878257751465, "learning_rate": 1.8346941808301154e-05, "loss": 0.589, "step": 12169 }, { "epoch": 0.37385187233127515, "grad_norm": 0.4464864134788513, "learning_rate": 1.8346675651083923e-05, "loss": 0.6566, "step": 12170 }, { "epoch": 0.3738825914662243, "grad_norm": 0.3220975995063782, "learning_rate": 1.834640947437245e-05, "loss": 0.5487, "step": 12171 }, { "epoch": 0.3739133106011735, "grad_norm": 0.3383484482765198, "learning_rate": 1.834614327816735e-05, "loss": 0.4977, "step": 12172 }, { "epoch": 0.3739440297361226, "grad_norm": 0.36472582817077637, "learning_rate": 1.834587706246925e-05, "loss": 0.668, "step": 12173 }, { "epoch": 0.3739747488710718, "grad_norm": 0.35959741473197937, "learning_rate": 1.8345610827278774e-05, "loss": 0.5763, "step": 12174 }, { "epoch": 0.37400546800602097, "grad_norm": 0.3130379915237427, "learning_rate": 1.834534457259654e-05, "loss": 0.5765, "step": 12175 }, { "epoch": 0.3740361871409701, "grad_norm": 0.33352312445640564, "learning_rate": 1.834507829842317e-05, "loss": 0.6405, "step": 12176 }, { "epoch": 0.37406690627591926, "grad_norm": 0.3498833179473877, "learning_rate": 1.834481200475929e-05, "loss": 0.553, "step": 12177 }, { "epoch": 0.37409762541086844, "grad_norm": 0.34850403666496277, "learning_rate": 1.8344545691605515e-05, "loss": 0.546, "step": 12178 }, { "epoch": 0.3741283445458176, "grad_norm": 0.4031921327114105, "learning_rate": 1.8344279358962472e-05, "loss": 0.6131, "step": 12179 }, { "epoch": 0.37415906368076673, "grad_norm": 0.3498925268650055, "learning_rate": 1.8344013006830783e-05, "loss": 0.6055, "step": 12180 }, { "epoch": 0.3741897828157159, "grad_norm": 0.30813440680503845, "learning_rate": 1.834374663521107e-05, "loss": 0.5787, "step": 12181 }, { "epoch": 0.3742205019506651, "grad_norm": 0.35841405391693115, "learning_rate": 1.8343480244103952e-05, "loss": 0.5454, "step": 12182 }, { "epoch": 0.3742512210856142, "grad_norm": 0.37997815012931824, "learning_rate": 1.834321383351005e-05, "loss": 0.5847, "step": 12183 }, { "epoch": 0.3742819402205634, "grad_norm": 0.3278006613254547, "learning_rate": 1.8342947403429994e-05, "loss": 0.5827, "step": 12184 }, { "epoch": 0.37431265935551256, "grad_norm": 0.35353365540504456, "learning_rate": 1.83426809538644e-05, "loss": 0.5689, "step": 12185 }, { "epoch": 0.37434337849046173, "grad_norm": 0.43014392256736755, "learning_rate": 1.834241448481389e-05, "loss": 0.6889, "step": 12186 }, { "epoch": 0.37437409762541085, "grad_norm": 0.35883089900016785, "learning_rate": 1.8342147996279092e-05, "loss": 0.5655, "step": 12187 }, { "epoch": 0.37440481676036, "grad_norm": 0.35833460092544556, "learning_rate": 1.8341881488260626e-05, "loss": 0.5541, "step": 12188 }, { "epoch": 0.3744355358953092, "grad_norm": 0.47719982266426086, "learning_rate": 1.8341614960759107e-05, "loss": 0.4914, "step": 12189 }, { "epoch": 0.3744662550302583, "grad_norm": 0.3193550705909729, "learning_rate": 1.834134841377517e-05, "loss": 0.6092, "step": 12190 }, { "epoch": 0.3744969741652075, "grad_norm": 0.3602188527584076, "learning_rate": 1.8341081847309427e-05, "loss": 0.6532, "step": 12191 }, { "epoch": 0.37452769330015667, "grad_norm": 0.3811357021331787, "learning_rate": 1.8340815261362506e-05, "loss": 0.5229, "step": 12192 }, { "epoch": 0.37455841243510585, "grad_norm": 0.3277207911014557, "learning_rate": 1.834054865593503e-05, "loss": 0.6291, "step": 12193 }, { "epoch": 0.37458913157005497, "grad_norm": 0.35268351435661316, "learning_rate": 1.834028203102762e-05, "loss": 0.6667, "step": 12194 }, { "epoch": 0.37461985070500414, "grad_norm": 0.37715986371040344, "learning_rate": 1.8340015386640896e-05, "loss": 0.5744, "step": 12195 }, { "epoch": 0.3746505698399533, "grad_norm": 0.37921687960624695, "learning_rate": 1.8339748722775485e-05, "loss": 0.5626, "step": 12196 }, { "epoch": 0.3746812889749025, "grad_norm": 0.3399159908294678, "learning_rate": 1.8339482039432006e-05, "loss": 0.5818, "step": 12197 }, { "epoch": 0.3747120081098516, "grad_norm": 0.35199815034866333, "learning_rate": 1.833921533661109e-05, "loss": 0.6163, "step": 12198 }, { "epoch": 0.3747427272448008, "grad_norm": 0.39986518025398254, "learning_rate": 1.8338948614313348e-05, "loss": 0.5911, "step": 12199 }, { "epoch": 0.37477344637974996, "grad_norm": 0.33342307806015015, "learning_rate": 1.8338681872539412e-05, "loss": 0.6115, "step": 12200 }, { "epoch": 0.3748041655146991, "grad_norm": 0.3572082221508026, "learning_rate": 1.8338415111289905e-05, "loss": 0.6001, "step": 12201 }, { "epoch": 0.37483488464964826, "grad_norm": 0.3955007791519165, "learning_rate": 1.833814833056544e-05, "loss": 0.5873, "step": 12202 }, { "epoch": 0.37486560378459743, "grad_norm": 0.3476649522781372, "learning_rate": 1.8337881530366655e-05, "loss": 0.5941, "step": 12203 }, { "epoch": 0.3748963229195466, "grad_norm": 0.33300742506980896, "learning_rate": 1.8337614710694162e-05, "loss": 0.5674, "step": 12204 }, { "epoch": 0.37492704205449573, "grad_norm": 0.3491385877132416, "learning_rate": 1.8337347871548588e-05, "loss": 0.518, "step": 12205 }, { "epoch": 0.3749577611894449, "grad_norm": 0.36407026648521423, "learning_rate": 1.8337081012930557e-05, "loss": 0.6407, "step": 12206 }, { "epoch": 0.3749884803243941, "grad_norm": 0.32493725419044495, "learning_rate": 1.8336814134840688e-05, "loss": 0.5763, "step": 12207 }, { "epoch": 0.3750191994593432, "grad_norm": 0.350605845451355, "learning_rate": 1.833654723727961e-05, "loss": 0.5448, "step": 12208 }, { "epoch": 0.3750499185942924, "grad_norm": 0.3420524299144745, "learning_rate": 1.8336280320247942e-05, "loss": 0.4771, "step": 12209 }, { "epoch": 0.37508063772924155, "grad_norm": 0.3285476565361023, "learning_rate": 1.833601338374631e-05, "loss": 0.5749, "step": 12210 }, { "epoch": 0.3751113568641907, "grad_norm": 0.34891006350517273, "learning_rate": 1.8335746427775335e-05, "loss": 0.5635, "step": 12211 }, { "epoch": 0.37514207599913985, "grad_norm": 0.3430192768573761, "learning_rate": 1.833547945233564e-05, "loss": 0.6914, "step": 12212 }, { "epoch": 0.375172795134089, "grad_norm": 0.3761206269264221, "learning_rate": 1.8335212457427854e-05, "loss": 0.546, "step": 12213 }, { "epoch": 0.3752035142690382, "grad_norm": 0.33406171202659607, "learning_rate": 1.83349454430526e-05, "loss": 0.5788, "step": 12214 }, { "epoch": 0.3752342334039873, "grad_norm": 0.36322659254074097, "learning_rate": 1.8334678409210496e-05, "loss": 0.5319, "step": 12215 }, { "epoch": 0.3752649525389365, "grad_norm": 0.320654034614563, "learning_rate": 1.8334411355902165e-05, "loss": 0.5831, "step": 12216 }, { "epoch": 0.37529567167388567, "grad_norm": 0.376638799905777, "learning_rate": 1.833414428312824e-05, "loss": 0.5615, "step": 12217 }, { "epoch": 0.37532639080883484, "grad_norm": 0.3476390838623047, "learning_rate": 1.8333877190889335e-05, "loss": 0.6579, "step": 12218 }, { "epoch": 0.37535710994378396, "grad_norm": 0.34455347061157227, "learning_rate": 1.8333610079186082e-05, "loss": 0.5141, "step": 12219 }, { "epoch": 0.37538782907873314, "grad_norm": 0.31734928488731384, "learning_rate": 1.8333342948019096e-05, "loss": 0.5177, "step": 12220 }, { "epoch": 0.3754185482136823, "grad_norm": 0.3605869710445404, "learning_rate": 1.8333075797389008e-05, "loss": 0.6287, "step": 12221 }, { "epoch": 0.3754492673486315, "grad_norm": 0.331885427236557, "learning_rate": 1.833280862729644e-05, "loss": 0.5015, "step": 12222 }, { "epoch": 0.3754799864835806, "grad_norm": 0.3089179992675781, "learning_rate": 1.8332541437742012e-05, "loss": 0.6035, "step": 12223 }, { "epoch": 0.3755107056185298, "grad_norm": 0.35665327310562134, "learning_rate": 1.8332274228726356e-05, "loss": 0.5682, "step": 12224 }, { "epoch": 0.37554142475347896, "grad_norm": 0.34247732162475586, "learning_rate": 1.833200700025009e-05, "loss": 0.6383, "step": 12225 }, { "epoch": 0.3755721438884281, "grad_norm": 0.34006834030151367, "learning_rate": 1.8331739752313836e-05, "loss": 0.5504, "step": 12226 }, { "epoch": 0.37560286302337725, "grad_norm": 0.34427472949028015, "learning_rate": 1.8331472484918226e-05, "loss": 0.5539, "step": 12227 }, { "epoch": 0.37563358215832643, "grad_norm": 0.3650262951850891, "learning_rate": 1.833120519806388e-05, "loss": 0.5047, "step": 12228 }, { "epoch": 0.3756643012932756, "grad_norm": 0.43642282485961914, "learning_rate": 1.8330937891751424e-05, "loss": 0.6229, "step": 12229 }, { "epoch": 0.3756950204282247, "grad_norm": 0.36465150117874146, "learning_rate": 1.8330670565981478e-05, "loss": 0.6171, "step": 12230 }, { "epoch": 0.3757257395631739, "grad_norm": 0.3769433796405792, "learning_rate": 1.833040322075467e-05, "loss": 0.5922, "step": 12231 }, { "epoch": 0.3757564586981231, "grad_norm": 0.34797239303588867, "learning_rate": 1.8330135856071624e-05, "loss": 0.5924, "step": 12232 }, { "epoch": 0.3757871778330722, "grad_norm": 0.32304349541664124, "learning_rate": 1.832986847193296e-05, "loss": 0.5858, "step": 12233 }, { "epoch": 0.37581789696802137, "grad_norm": 0.404876708984375, "learning_rate": 1.832960106833931e-05, "loss": 0.6114, "step": 12234 }, { "epoch": 0.37584861610297055, "grad_norm": 0.3882769048213959, "learning_rate": 1.8329333645291293e-05, "loss": 0.5448, "step": 12235 }, { "epoch": 0.3758793352379197, "grad_norm": 0.3431379795074463, "learning_rate": 1.832906620278954e-05, "loss": 0.5621, "step": 12236 }, { "epoch": 0.37591005437286884, "grad_norm": 0.3420567214488983, "learning_rate": 1.8328798740834668e-05, "loss": 0.6311, "step": 12237 }, { "epoch": 0.375940773507818, "grad_norm": 0.3867284655570984, "learning_rate": 1.8328531259427303e-05, "loss": 0.6216, "step": 12238 }, { "epoch": 0.3759714926427672, "grad_norm": 0.32543623447418213, "learning_rate": 1.8328263758568075e-05, "loss": 0.5391, "step": 12239 }, { "epoch": 0.37600221177771637, "grad_norm": 0.3603736162185669, "learning_rate": 1.8327996238257605e-05, "loss": 0.5528, "step": 12240 }, { "epoch": 0.3760329309126655, "grad_norm": 0.3343682885169983, "learning_rate": 1.832772869849652e-05, "loss": 0.4703, "step": 12241 }, { "epoch": 0.37606365004761466, "grad_norm": 0.3669746220111847, "learning_rate": 1.8327461139285437e-05, "loss": 0.5953, "step": 12242 }, { "epoch": 0.37609436918256384, "grad_norm": 0.3647971451282501, "learning_rate": 1.832719356062499e-05, "loss": 0.6329, "step": 12243 }, { "epoch": 0.37612508831751296, "grad_norm": 0.3334681987762451, "learning_rate": 1.83269259625158e-05, "loss": 0.4784, "step": 12244 }, { "epoch": 0.37615580745246213, "grad_norm": 0.35470861196517944, "learning_rate": 1.8326658344958495e-05, "loss": 0.5535, "step": 12245 }, { "epoch": 0.3761865265874113, "grad_norm": 0.3255474269390106, "learning_rate": 1.8326390707953695e-05, "loss": 0.5369, "step": 12246 }, { "epoch": 0.3762172457223605, "grad_norm": 0.36783820390701294, "learning_rate": 1.8326123051502033e-05, "loss": 0.5435, "step": 12247 }, { "epoch": 0.3762479648573096, "grad_norm": 0.28256264328956604, "learning_rate": 1.8325855375604123e-05, "loss": 0.5514, "step": 12248 }, { "epoch": 0.3762786839922588, "grad_norm": 0.3693925440311432, "learning_rate": 1.83255876802606e-05, "loss": 0.5731, "step": 12249 }, { "epoch": 0.37630940312720795, "grad_norm": 0.35602855682373047, "learning_rate": 1.8325319965472085e-05, "loss": 0.5093, "step": 12250 }, { "epoch": 0.3763401222621571, "grad_norm": 0.3890112638473511, "learning_rate": 1.8325052231239202e-05, "loss": 0.5474, "step": 12251 }, { "epoch": 0.37637084139710625, "grad_norm": 0.36882302165031433, "learning_rate": 1.832478447756258e-05, "loss": 0.5475, "step": 12252 }, { "epoch": 0.3764015605320554, "grad_norm": 0.37117791175842285, "learning_rate": 1.8324516704442846e-05, "loss": 0.5519, "step": 12253 }, { "epoch": 0.3764322796670046, "grad_norm": 0.31239303946495056, "learning_rate": 1.8324248911880615e-05, "loss": 0.5461, "step": 12254 }, { "epoch": 0.3764629988019537, "grad_norm": 0.3671448826789856, "learning_rate": 1.8323981099876525e-05, "loss": 0.5876, "step": 12255 }, { "epoch": 0.3764937179369029, "grad_norm": 0.33763256669044495, "learning_rate": 1.8323713268431193e-05, "loss": 0.5879, "step": 12256 }, { "epoch": 0.37652443707185207, "grad_norm": 0.3515906035900116, "learning_rate": 1.8323445417545245e-05, "loss": 0.5867, "step": 12257 }, { "epoch": 0.3765551562068012, "grad_norm": 0.31756818294525146, "learning_rate": 1.832317754721931e-05, "loss": 0.5532, "step": 12258 }, { "epoch": 0.37658587534175036, "grad_norm": 0.34266868233680725, "learning_rate": 1.8322909657454016e-05, "loss": 0.6109, "step": 12259 }, { "epoch": 0.37661659447669954, "grad_norm": 0.3945513367652893, "learning_rate": 1.832264174824998e-05, "loss": 0.6049, "step": 12260 }, { "epoch": 0.3766473136116487, "grad_norm": 0.3704911470413208, "learning_rate": 1.832237381960784e-05, "loss": 0.5637, "step": 12261 }, { "epoch": 0.37667803274659784, "grad_norm": 0.3803843557834625, "learning_rate": 1.832210587152821e-05, "loss": 0.551, "step": 12262 }, { "epoch": 0.376708751881547, "grad_norm": 0.3391750752925873, "learning_rate": 1.832183790401172e-05, "loss": 0.583, "step": 12263 }, { "epoch": 0.3767394710164962, "grad_norm": 0.3558330833911896, "learning_rate": 1.8321569917059e-05, "loss": 0.5817, "step": 12264 }, { "epoch": 0.37677019015144536, "grad_norm": 0.3763715624809265, "learning_rate": 1.832130191067067e-05, "loss": 0.5738, "step": 12265 }, { "epoch": 0.3768009092863945, "grad_norm": 0.3503153920173645, "learning_rate": 1.8321033884847357e-05, "loss": 0.5716, "step": 12266 }, { "epoch": 0.37683162842134366, "grad_norm": 0.34974613785743713, "learning_rate": 1.832076583958969e-05, "loss": 0.5238, "step": 12267 }, { "epoch": 0.37686234755629283, "grad_norm": 0.33951810002326965, "learning_rate": 1.832049777489829e-05, "loss": 0.5393, "step": 12268 }, { "epoch": 0.37689306669124195, "grad_norm": 0.32719266414642334, "learning_rate": 1.832022969077379e-05, "loss": 0.5787, "step": 12269 }, { "epoch": 0.3769237858261911, "grad_norm": 0.33814293146133423, "learning_rate": 1.8319961587216813e-05, "loss": 0.4609, "step": 12270 }, { "epoch": 0.3769545049611403, "grad_norm": 0.3077945411205292, "learning_rate": 1.8319693464227985e-05, "loss": 0.5449, "step": 12271 }, { "epoch": 0.3769852240960895, "grad_norm": 0.3228076100349426, "learning_rate": 1.8319425321807926e-05, "loss": 0.5372, "step": 12272 }, { "epoch": 0.3770159432310386, "grad_norm": 1.9420766830444336, "learning_rate": 1.8319157159957274e-05, "loss": 0.6256, "step": 12273 }, { "epoch": 0.3770466623659878, "grad_norm": 0.29438912868499756, "learning_rate": 1.831888897867665e-05, "loss": 0.5874, "step": 12274 }, { "epoch": 0.37707738150093695, "grad_norm": 0.35462504625320435, "learning_rate": 1.8318620777966676e-05, "loss": 0.5284, "step": 12275 }, { "epoch": 0.37710810063588607, "grad_norm": 0.33981436491012573, "learning_rate": 1.8318352557827984e-05, "loss": 0.6187, "step": 12276 }, { "epoch": 0.37713881977083524, "grad_norm": 0.3738057315349579, "learning_rate": 1.8318084318261195e-05, "loss": 0.5732, "step": 12277 }, { "epoch": 0.3771695389057844, "grad_norm": 0.34464284777641296, "learning_rate": 1.8317816059266943e-05, "loss": 0.6318, "step": 12278 }, { "epoch": 0.3772002580407336, "grad_norm": 0.38084378838539124, "learning_rate": 1.831754778084585e-05, "loss": 0.6736, "step": 12279 }, { "epoch": 0.3772309771756827, "grad_norm": 0.34842315316200256, "learning_rate": 1.8317279482998547e-05, "loss": 0.5591, "step": 12280 }, { "epoch": 0.3772616963106319, "grad_norm": 0.3528923988342285, "learning_rate": 1.831701116572565e-05, "loss": 0.5739, "step": 12281 }, { "epoch": 0.37729241544558106, "grad_norm": 0.31411728262901306, "learning_rate": 1.8316742829027797e-05, "loss": 0.5719, "step": 12282 }, { "epoch": 0.37732313458053024, "grad_norm": 0.3332708179950714, "learning_rate": 1.8316474472905608e-05, "loss": 0.5521, "step": 12283 }, { "epoch": 0.37735385371547936, "grad_norm": 0.32694488763809204, "learning_rate": 1.8316206097359714e-05, "loss": 0.54, "step": 12284 }, { "epoch": 0.37738457285042853, "grad_norm": 0.32313162088394165, "learning_rate": 1.8315937702390738e-05, "loss": 0.5221, "step": 12285 }, { "epoch": 0.3774152919853777, "grad_norm": 0.3670596778392792, "learning_rate": 1.831566928799931e-05, "loss": 0.5687, "step": 12286 }, { "epoch": 0.37744601112032683, "grad_norm": 0.3882518410682678, "learning_rate": 1.8315400854186057e-05, "loss": 0.5635, "step": 12287 }, { "epoch": 0.377476730255276, "grad_norm": 0.34982070326805115, "learning_rate": 1.8315132400951602e-05, "loss": 0.607, "step": 12288 }, { "epoch": 0.3775074493902252, "grad_norm": 0.3151768743991852, "learning_rate": 1.831486392829658e-05, "loss": 0.6262, "step": 12289 }, { "epoch": 0.37753816852517436, "grad_norm": 0.3885985016822815, "learning_rate": 1.8314595436221604e-05, "loss": 0.639, "step": 12290 }, { "epoch": 0.3775688876601235, "grad_norm": 0.34915247559547424, "learning_rate": 1.8314326924727315e-05, "loss": 0.5698, "step": 12291 }, { "epoch": 0.37759960679507265, "grad_norm": 0.328635036945343, "learning_rate": 1.8314058393814335e-05, "loss": 0.5221, "step": 12292 }, { "epoch": 0.3776303259300218, "grad_norm": 0.3257386088371277, "learning_rate": 1.8313789843483287e-05, "loss": 0.4915, "step": 12293 }, { "epoch": 0.37766104506497095, "grad_norm": 0.39388802647590637, "learning_rate": 1.831352127373481e-05, "loss": 0.5509, "step": 12294 }, { "epoch": 0.3776917641999201, "grad_norm": 0.361794650554657, "learning_rate": 1.8313252684569512e-05, "loss": 0.5246, "step": 12295 }, { "epoch": 0.3777224833348693, "grad_norm": 0.35145822167396545, "learning_rate": 1.831298407598804e-05, "loss": 0.5464, "step": 12296 }, { "epoch": 0.37775320246981847, "grad_norm": 0.569258451461792, "learning_rate": 1.8312715447991012e-05, "loss": 0.4746, "step": 12297 }, { "epoch": 0.3777839216047676, "grad_norm": 0.3518896996974945, "learning_rate": 1.8312446800579055e-05, "loss": 0.5619, "step": 12298 }, { "epoch": 0.37781464073971677, "grad_norm": 0.3691684901714325, "learning_rate": 1.8312178133752798e-05, "loss": 0.5389, "step": 12299 }, { "epoch": 0.37784535987466594, "grad_norm": 0.3202275037765503, "learning_rate": 1.831190944751287e-05, "loss": 0.489, "step": 12300 }, { "epoch": 0.37787607900961506, "grad_norm": 0.34359675645828247, "learning_rate": 1.8311640741859894e-05, "loss": 0.5976, "step": 12301 }, { "epoch": 0.37790679814456424, "grad_norm": 0.356679767370224, "learning_rate": 1.83113720167945e-05, "loss": 0.5898, "step": 12302 }, { "epoch": 0.3779375172795134, "grad_norm": 0.35042378306388855, "learning_rate": 1.8311103272317325e-05, "loss": 0.5827, "step": 12303 }, { "epoch": 0.3779682364144626, "grad_norm": 0.3414706289768219, "learning_rate": 1.831083450842898e-05, "loss": 0.6057, "step": 12304 }, { "epoch": 0.3779989555494117, "grad_norm": 0.3747488558292389, "learning_rate": 1.83105657251301e-05, "loss": 0.5883, "step": 12305 }, { "epoch": 0.3780296746843609, "grad_norm": 0.3635494112968445, "learning_rate": 1.8310296922421318e-05, "loss": 0.5527, "step": 12306 }, { "epoch": 0.37806039381931006, "grad_norm": 0.34734907746315, "learning_rate": 1.8310028100303255e-05, "loss": 0.5355, "step": 12307 }, { "epoch": 0.37809111295425923, "grad_norm": 0.3484237790107727, "learning_rate": 1.8309759258776538e-05, "loss": 0.6709, "step": 12308 }, { "epoch": 0.37812183208920835, "grad_norm": 0.42232149839401245, "learning_rate": 1.83094903978418e-05, "loss": 0.5363, "step": 12309 }, { "epoch": 0.37815255122415753, "grad_norm": 0.3293205201625824, "learning_rate": 1.830922151749967e-05, "loss": 0.5688, "step": 12310 }, { "epoch": 0.3781832703591067, "grad_norm": 0.3235052227973938, "learning_rate": 1.8308952617750767e-05, "loss": 0.5726, "step": 12311 }, { "epoch": 0.3782139894940558, "grad_norm": 0.3150380551815033, "learning_rate": 1.830868369859573e-05, "loss": 0.5779, "step": 12312 }, { "epoch": 0.378244708629005, "grad_norm": 0.4560126066207886, "learning_rate": 1.830841476003518e-05, "loss": 0.6344, "step": 12313 }, { "epoch": 0.3782754277639542, "grad_norm": 0.33381712436676025, "learning_rate": 1.8308145802069746e-05, "loss": 0.5632, "step": 12314 }, { "epoch": 0.37830614689890335, "grad_norm": 0.3752351999282837, "learning_rate": 1.830787682470006e-05, "loss": 0.6707, "step": 12315 }, { "epoch": 0.37833686603385247, "grad_norm": 0.37942636013031006, "learning_rate": 1.8307607827926747e-05, "loss": 0.5702, "step": 12316 }, { "epoch": 0.37836758516880165, "grad_norm": 0.6934341192245483, "learning_rate": 1.8307338811750434e-05, "loss": 0.6794, "step": 12317 }, { "epoch": 0.3783983043037508, "grad_norm": 0.35013946890830994, "learning_rate": 1.830706977617175e-05, "loss": 0.5833, "step": 12318 }, { "epoch": 0.37842902343869994, "grad_norm": 0.35414832830429077, "learning_rate": 1.8306800721191328e-05, "loss": 0.6106, "step": 12319 }, { "epoch": 0.3784597425736491, "grad_norm": 0.3600907325744629, "learning_rate": 1.830653164680979e-05, "loss": 0.6142, "step": 12320 }, { "epoch": 0.3784904617085983, "grad_norm": 0.3922841250896454, "learning_rate": 1.8306262553027768e-05, "loss": 0.5566, "step": 12321 }, { "epoch": 0.37852118084354747, "grad_norm": 0.3489941358566284, "learning_rate": 1.830599343984589e-05, "loss": 0.6065, "step": 12322 }, { "epoch": 0.3785518999784966, "grad_norm": 0.3441988527774811, "learning_rate": 1.8305724307264785e-05, "loss": 0.6007, "step": 12323 }, { "epoch": 0.37858261911344576, "grad_norm": 0.315552681684494, "learning_rate": 1.830545515528508e-05, "loss": 0.6127, "step": 12324 }, { "epoch": 0.37861333824839494, "grad_norm": 0.3315088748931885, "learning_rate": 1.8305185983907405e-05, "loss": 0.5901, "step": 12325 }, { "epoch": 0.3786440573833441, "grad_norm": 0.42870375514030457, "learning_rate": 1.8304916793132387e-05, "loss": 0.6313, "step": 12326 }, { "epoch": 0.37867477651829323, "grad_norm": 0.4204654097557068, "learning_rate": 1.8304647582960654e-05, "loss": 0.6295, "step": 12327 }, { "epoch": 0.3787054956532424, "grad_norm": 0.3144787847995758, "learning_rate": 1.830437835339284e-05, "loss": 0.5384, "step": 12328 }, { "epoch": 0.3787362147881916, "grad_norm": 0.40037837624549866, "learning_rate": 1.830410910442957e-05, "loss": 0.7496, "step": 12329 }, { "epoch": 0.3787669339231407, "grad_norm": 0.41805723309516907, "learning_rate": 1.8303839836071473e-05, "loss": 0.6229, "step": 12330 }, { "epoch": 0.3787976530580899, "grad_norm": 0.32610806822776794, "learning_rate": 1.8303570548319175e-05, "loss": 0.5828, "step": 12331 }, { "epoch": 0.37882837219303905, "grad_norm": 0.35919129848480225, "learning_rate": 1.8303301241173313e-05, "loss": 0.5577, "step": 12332 }, { "epoch": 0.37885909132798823, "grad_norm": 0.41950199007987976, "learning_rate": 1.8303031914634507e-05, "loss": 0.6215, "step": 12333 }, { "epoch": 0.37888981046293735, "grad_norm": 0.3801644444465637, "learning_rate": 1.8302762568703392e-05, "loss": 0.641, "step": 12334 }, { "epoch": 0.3789205295978865, "grad_norm": 0.42439576983451843, "learning_rate": 1.8302493203380597e-05, "loss": 0.5549, "step": 12335 }, { "epoch": 0.3789512487328357, "grad_norm": 0.3466308116912842, "learning_rate": 1.8302223818666744e-05, "loss": 0.5846, "step": 12336 }, { "epoch": 0.3789819678677848, "grad_norm": 0.34697139263153076, "learning_rate": 1.830195441456247e-05, "loss": 0.5192, "step": 12337 }, { "epoch": 0.379012687002734, "grad_norm": 0.3268202245235443, "learning_rate": 1.8301684991068404e-05, "loss": 0.564, "step": 12338 }, { "epoch": 0.37904340613768317, "grad_norm": 0.36887508630752563, "learning_rate": 1.830141554818517e-05, "loss": 0.6393, "step": 12339 }, { "epoch": 0.37907412527263235, "grad_norm": 0.3782278597354889, "learning_rate": 1.8301146085913402e-05, "loss": 0.6397, "step": 12340 }, { "epoch": 0.37910484440758146, "grad_norm": 0.338249146938324, "learning_rate": 1.830087660425373e-05, "loss": 0.5529, "step": 12341 }, { "epoch": 0.37913556354253064, "grad_norm": 0.3579395115375519, "learning_rate": 1.8300607103206775e-05, "loss": 0.5812, "step": 12342 }, { "epoch": 0.3791662826774798, "grad_norm": 0.3726062774658203, "learning_rate": 1.8300337582773175e-05, "loss": 0.5527, "step": 12343 }, { "epoch": 0.37919700181242894, "grad_norm": 0.3763701915740967, "learning_rate": 1.830006804295356e-05, "loss": 0.6381, "step": 12344 }, { "epoch": 0.3792277209473781, "grad_norm": 0.45430079102516174, "learning_rate": 1.829979848374855e-05, "loss": 0.5851, "step": 12345 }, { "epoch": 0.3792584400823273, "grad_norm": 0.32820263504981995, "learning_rate": 1.8299528905158784e-05, "loss": 0.5388, "step": 12346 }, { "epoch": 0.37928915921727646, "grad_norm": 0.3412542939186096, "learning_rate": 1.829925930718489e-05, "loss": 0.655, "step": 12347 }, { "epoch": 0.3793198783522256, "grad_norm": 0.3783092796802521, "learning_rate": 1.8298989689827497e-05, "loss": 0.6041, "step": 12348 }, { "epoch": 0.37935059748717476, "grad_norm": 0.360632985830307, "learning_rate": 1.8298720053087234e-05, "loss": 0.5895, "step": 12349 }, { "epoch": 0.37938131662212393, "grad_norm": 0.3429313600063324, "learning_rate": 1.8298450396964726e-05, "loss": 0.6101, "step": 12350 }, { "epoch": 0.3794120357570731, "grad_norm": 0.30949389934539795, "learning_rate": 1.8298180721460613e-05, "loss": 0.6204, "step": 12351 }, { "epoch": 0.3794427548920222, "grad_norm": 0.3796344995498657, "learning_rate": 1.8297911026575516e-05, "loss": 0.5681, "step": 12352 }, { "epoch": 0.3794734740269714, "grad_norm": 0.32479795813560486, "learning_rate": 1.829764131231007e-05, "loss": 0.5388, "step": 12353 }, { "epoch": 0.3795041931619206, "grad_norm": 0.32565009593963623, "learning_rate": 1.8297371578664903e-05, "loss": 0.561, "step": 12354 }, { "epoch": 0.3795349122968697, "grad_norm": 0.3914946913719177, "learning_rate": 1.8297101825640643e-05, "loss": 0.5644, "step": 12355 }, { "epoch": 0.3795656314318189, "grad_norm": 0.34023961424827576, "learning_rate": 1.8296832053237926e-05, "loss": 0.6234, "step": 12356 }, { "epoch": 0.37959635056676805, "grad_norm": 0.41376346349716187, "learning_rate": 1.8296562261457376e-05, "loss": 0.6247, "step": 12357 }, { "epoch": 0.3796270697017172, "grad_norm": 0.3933202922344208, "learning_rate": 1.8296292450299626e-05, "loss": 0.6511, "step": 12358 }, { "epoch": 0.37965778883666634, "grad_norm": 0.32662689685821533, "learning_rate": 1.8296022619765306e-05, "loss": 0.5613, "step": 12359 }, { "epoch": 0.3796885079716155, "grad_norm": 0.30137085914611816, "learning_rate": 1.8295752769855046e-05, "loss": 0.4673, "step": 12360 }, { "epoch": 0.3797192271065647, "grad_norm": 0.39787188172340393, "learning_rate": 1.8295482900569474e-05, "loss": 0.5249, "step": 12361 }, { "epoch": 0.3797499462415138, "grad_norm": 0.3400951623916626, "learning_rate": 1.8295213011909222e-05, "loss": 0.5861, "step": 12362 }, { "epoch": 0.379780665376463, "grad_norm": 0.4007326662540436, "learning_rate": 1.8294943103874924e-05, "loss": 0.5687, "step": 12363 }, { "epoch": 0.37981138451141216, "grad_norm": 0.45836809277534485, "learning_rate": 1.8294673176467204e-05, "loss": 0.4778, "step": 12364 }, { "epoch": 0.37984210364636134, "grad_norm": 0.3908834755420685, "learning_rate": 1.8294403229686696e-05, "loss": 0.5474, "step": 12365 }, { "epoch": 0.37987282278131046, "grad_norm": 0.3474871814250946, "learning_rate": 1.8294133263534032e-05, "loss": 0.5613, "step": 12366 }, { "epoch": 0.37990354191625963, "grad_norm": 0.4676002264022827, "learning_rate": 1.829386327800984e-05, "loss": 0.5845, "step": 12367 }, { "epoch": 0.3799342610512088, "grad_norm": 0.3514297306537628, "learning_rate": 1.829359327311475e-05, "loss": 0.6441, "step": 12368 }, { "epoch": 0.37996498018615793, "grad_norm": 0.3677128553390503, "learning_rate": 1.8293323248849396e-05, "loss": 0.5586, "step": 12369 }, { "epoch": 0.3799956993211071, "grad_norm": 0.3888852000236511, "learning_rate": 1.8293053205214403e-05, "loss": 0.5456, "step": 12370 }, { "epoch": 0.3800264184560563, "grad_norm": 0.341880202293396, "learning_rate": 1.8292783142210407e-05, "loss": 0.515, "step": 12371 }, { "epoch": 0.38005713759100546, "grad_norm": 0.3894100487232208, "learning_rate": 1.8292513059838036e-05, "loss": 0.636, "step": 12372 }, { "epoch": 0.3800878567259546, "grad_norm": 0.6547061800956726, "learning_rate": 1.8292242958097922e-05, "loss": 0.5806, "step": 12373 }, { "epoch": 0.38011857586090375, "grad_norm": 0.3474220633506775, "learning_rate": 1.8291972836990694e-05, "loss": 0.5733, "step": 12374 }, { "epoch": 0.3801492949958529, "grad_norm": 0.35453158617019653, "learning_rate": 1.8291702696516987e-05, "loss": 0.628, "step": 12375 }, { "epoch": 0.3801800141308021, "grad_norm": 0.3710530698299408, "learning_rate": 1.8291432536677426e-05, "loss": 0.6085, "step": 12376 }, { "epoch": 0.3802107332657512, "grad_norm": 0.3311328589916229, "learning_rate": 1.829116235747265e-05, "loss": 0.4868, "step": 12377 }, { "epoch": 0.3802414524007004, "grad_norm": 0.3517089784145355, "learning_rate": 1.8290892158903282e-05, "loss": 0.6586, "step": 12378 }, { "epoch": 0.3802721715356496, "grad_norm": 0.31486776471138, "learning_rate": 1.8290621940969956e-05, "loss": 0.5589, "step": 12379 }, { "epoch": 0.3803028906705987, "grad_norm": 0.3229255974292755, "learning_rate": 1.8290351703673305e-05, "loss": 0.5912, "step": 12380 }, { "epoch": 0.38033360980554787, "grad_norm": 0.3366328477859497, "learning_rate": 1.8290081447013958e-05, "loss": 0.6163, "step": 12381 }, { "epoch": 0.38036432894049704, "grad_norm": 0.32260873913764954, "learning_rate": 1.8289811170992546e-05, "loss": 0.579, "step": 12382 }, { "epoch": 0.3803950480754462, "grad_norm": 0.3857443928718567, "learning_rate": 1.8289540875609702e-05, "loss": 0.6517, "step": 12383 }, { "epoch": 0.38042576721039534, "grad_norm": 0.5314890742301941, "learning_rate": 1.8289270560866053e-05, "loss": 0.5289, "step": 12384 }, { "epoch": 0.3804564863453445, "grad_norm": 0.34102991223335266, "learning_rate": 1.828900022676224e-05, "loss": 0.5651, "step": 12385 }, { "epoch": 0.3804872054802937, "grad_norm": 0.38710248470306396, "learning_rate": 1.8288729873298883e-05, "loss": 0.489, "step": 12386 }, { "epoch": 0.3805179246152428, "grad_norm": 0.3744886815547943, "learning_rate": 1.8288459500476624e-05, "loss": 0.5346, "step": 12387 }, { "epoch": 0.380548643750192, "grad_norm": 0.3526829779148102, "learning_rate": 1.828818910829608e-05, "loss": 0.6013, "step": 12388 }, { "epoch": 0.38057936288514116, "grad_norm": 0.3706074059009552, "learning_rate": 1.82879186967579e-05, "loss": 0.5019, "step": 12389 }, { "epoch": 0.38061008202009033, "grad_norm": 0.34045615792274475, "learning_rate": 1.8287648265862702e-05, "loss": 0.6031, "step": 12390 }, { "epoch": 0.38064080115503945, "grad_norm": 0.3795492351055145, "learning_rate": 1.8287377815611126e-05, "loss": 0.517, "step": 12391 }, { "epoch": 0.38067152028998863, "grad_norm": 0.4329891502857208, "learning_rate": 1.82871073460038e-05, "loss": 0.5124, "step": 12392 }, { "epoch": 0.3807022394249378, "grad_norm": 0.3825784921646118, "learning_rate": 1.8286836857041354e-05, "loss": 0.659, "step": 12393 }, { "epoch": 0.380732958559887, "grad_norm": 0.3097206950187683, "learning_rate": 1.8286566348724422e-05, "loss": 0.5129, "step": 12394 }, { "epoch": 0.3807636776948361, "grad_norm": 0.3424071669578552, "learning_rate": 1.8286295821053638e-05, "loss": 0.5257, "step": 12395 }, { "epoch": 0.3807943968297853, "grad_norm": 0.3581220209598541, "learning_rate": 1.8286025274029628e-05, "loss": 0.6034, "step": 12396 }, { "epoch": 0.38082511596473445, "grad_norm": 0.2963148057460785, "learning_rate": 1.8285754707653026e-05, "loss": 0.4981, "step": 12397 }, { "epoch": 0.38085583509968357, "grad_norm": 0.3590690493583679, "learning_rate": 1.828548412192447e-05, "loss": 0.684, "step": 12398 }, { "epoch": 0.38088655423463275, "grad_norm": 0.3720987141132355, "learning_rate": 1.8285213516844584e-05, "loss": 0.5948, "step": 12399 }, { "epoch": 0.3809172733695819, "grad_norm": 0.345813512802124, "learning_rate": 1.8284942892414005e-05, "loss": 0.546, "step": 12400 }, { "epoch": 0.3809479925045311, "grad_norm": 0.3558904230594635, "learning_rate": 1.8284672248633364e-05, "loss": 0.5574, "step": 12401 }, { "epoch": 0.3809787116394802, "grad_norm": 0.33116671442985535, "learning_rate": 1.828440158550329e-05, "loss": 0.5963, "step": 12402 }, { "epoch": 0.3810094307744294, "grad_norm": 0.37790828943252563, "learning_rate": 1.828413090302442e-05, "loss": 0.5221, "step": 12403 }, { "epoch": 0.38104014990937857, "grad_norm": 0.33549293875694275, "learning_rate": 1.828386020119738e-05, "loss": 0.5376, "step": 12404 }, { "epoch": 0.3810708690443277, "grad_norm": 0.34645259380340576, "learning_rate": 1.828358948002281e-05, "loss": 0.5768, "step": 12405 }, { "epoch": 0.38110158817927686, "grad_norm": 0.31862109899520874, "learning_rate": 1.8283318739501336e-05, "loss": 0.5494, "step": 12406 }, { "epoch": 0.38113230731422604, "grad_norm": 0.3225826919078827, "learning_rate": 1.8283047979633593e-05, "loss": 0.5675, "step": 12407 }, { "epoch": 0.3811630264491752, "grad_norm": 0.3359171450138092, "learning_rate": 1.8282777200420214e-05, "loss": 0.5471, "step": 12408 }, { "epoch": 0.38119374558412433, "grad_norm": 1.259356141090393, "learning_rate": 1.8282506401861832e-05, "loss": 0.6143, "step": 12409 }, { "epoch": 0.3812244647190735, "grad_norm": 0.35441625118255615, "learning_rate": 1.8282235583959074e-05, "loss": 0.5481, "step": 12410 }, { "epoch": 0.3812551838540227, "grad_norm": 0.34135913848876953, "learning_rate": 1.8281964746712578e-05, "loss": 0.5165, "step": 12411 }, { "epoch": 0.3812859029889718, "grad_norm": 0.36771783232688904, "learning_rate": 1.8281693890122973e-05, "loss": 0.5723, "step": 12412 }, { "epoch": 0.381316622123921, "grad_norm": 0.3559032082557678, "learning_rate": 1.8281423014190896e-05, "loss": 0.6204, "step": 12413 }, { "epoch": 0.38134734125887015, "grad_norm": 0.35515573620796204, "learning_rate": 1.8281152118916977e-05, "loss": 0.582, "step": 12414 }, { "epoch": 0.38137806039381933, "grad_norm": 0.35190266370773315, "learning_rate": 1.8280881204301848e-05, "loss": 0.5082, "step": 12415 }, { "epoch": 0.38140877952876845, "grad_norm": 0.4026683270931244, "learning_rate": 1.8280610270346144e-05, "loss": 0.5873, "step": 12416 }, { "epoch": 0.3814394986637176, "grad_norm": 0.3289724588394165, "learning_rate": 1.8280339317050492e-05, "loss": 0.4993, "step": 12417 }, { "epoch": 0.3814702177986668, "grad_norm": 0.3902246654033661, "learning_rate": 1.8280068344415533e-05, "loss": 0.5838, "step": 12418 }, { "epoch": 0.381500936933616, "grad_norm": 0.3594312071800232, "learning_rate": 1.8279797352441898e-05, "loss": 0.6356, "step": 12419 }, { "epoch": 0.3815316560685651, "grad_norm": 0.33250874280929565, "learning_rate": 1.8279526341130215e-05, "loss": 0.5111, "step": 12420 }, { "epoch": 0.38156237520351427, "grad_norm": 0.31848347187042236, "learning_rate": 1.827925531048112e-05, "loss": 0.548, "step": 12421 }, { "epoch": 0.38159309433846345, "grad_norm": 0.49604910612106323, "learning_rate": 1.8278984260495247e-05, "loss": 0.632, "step": 12422 }, { "epoch": 0.38162381347341257, "grad_norm": 0.33975714445114136, "learning_rate": 1.8278713191173226e-05, "loss": 0.6094, "step": 12423 }, { "epoch": 0.38165453260836174, "grad_norm": 0.3154648542404175, "learning_rate": 1.8278442102515696e-05, "loss": 0.5726, "step": 12424 }, { "epoch": 0.3816852517433109, "grad_norm": 0.41615650057792664, "learning_rate": 1.8278170994523283e-05, "loss": 0.6467, "step": 12425 }, { "epoch": 0.3817159708782601, "grad_norm": 0.3693624436855316, "learning_rate": 1.8277899867196625e-05, "loss": 0.5881, "step": 12426 }, { "epoch": 0.3817466900132092, "grad_norm": 0.3295789361000061, "learning_rate": 1.8277628720536355e-05, "loss": 0.5597, "step": 12427 }, { "epoch": 0.3817774091481584, "grad_norm": 0.3298072814941406, "learning_rate": 1.82773575545431e-05, "loss": 0.5785, "step": 12428 }, { "epoch": 0.38180812828310756, "grad_norm": 0.3639981746673584, "learning_rate": 1.82770863692175e-05, "loss": 0.596, "step": 12429 }, { "epoch": 0.3818388474180567, "grad_norm": 0.3479153513908386, "learning_rate": 1.827681516456019e-05, "loss": 0.5113, "step": 12430 }, { "epoch": 0.38186956655300586, "grad_norm": 0.3758121132850647, "learning_rate": 1.82765439405718e-05, "loss": 0.557, "step": 12431 }, { "epoch": 0.38190028568795503, "grad_norm": 0.38112229108810425, "learning_rate": 1.827627269725296e-05, "loss": 0.6299, "step": 12432 }, { "epoch": 0.3819310048229042, "grad_norm": 0.37707117199897766, "learning_rate": 1.827600143460431e-05, "loss": 0.5969, "step": 12433 }, { "epoch": 0.3819617239578533, "grad_norm": 0.40666666626930237, "learning_rate": 1.8275730152626485e-05, "loss": 0.4996, "step": 12434 }, { "epoch": 0.3819924430928025, "grad_norm": 0.3341124653816223, "learning_rate": 1.8275458851320106e-05, "loss": 0.592, "step": 12435 }, { "epoch": 0.3820231622277517, "grad_norm": 0.35151275992393494, "learning_rate": 1.8275187530685818e-05, "loss": 0.5838, "step": 12436 }, { "epoch": 0.38205388136270085, "grad_norm": 0.3511669933795929, "learning_rate": 1.827491619072425e-05, "loss": 0.5396, "step": 12437 }, { "epoch": 0.38208460049765, "grad_norm": 0.42465558648109436, "learning_rate": 1.827464483143604e-05, "loss": 0.6208, "step": 12438 }, { "epoch": 0.38211531963259915, "grad_norm": 0.3420940935611725, "learning_rate": 1.827437345282182e-05, "loss": 0.5498, "step": 12439 }, { "epoch": 0.3821460387675483, "grad_norm": 0.3813416659832001, "learning_rate": 1.827410205488222e-05, "loss": 0.5688, "step": 12440 }, { "epoch": 0.38217675790249744, "grad_norm": 0.3514685332775116, "learning_rate": 1.8273830637617878e-05, "loss": 0.5483, "step": 12441 }, { "epoch": 0.3822074770374466, "grad_norm": 0.36479029059410095, "learning_rate": 1.8273559201029428e-05, "loss": 0.5737, "step": 12442 }, { "epoch": 0.3822381961723958, "grad_norm": 0.3082675635814667, "learning_rate": 1.82732877451175e-05, "loss": 0.5001, "step": 12443 }, { "epoch": 0.38226891530734497, "grad_norm": 0.6483739614486694, "learning_rate": 1.8273016269882732e-05, "loss": 0.6368, "step": 12444 }, { "epoch": 0.3822996344422941, "grad_norm": 0.3641391098499298, "learning_rate": 1.8272744775325756e-05, "loss": 0.6137, "step": 12445 }, { "epoch": 0.38233035357724326, "grad_norm": 0.3451610803604126, "learning_rate": 1.8272473261447207e-05, "loss": 0.5424, "step": 12446 }, { "epoch": 0.38236107271219244, "grad_norm": 0.3365713059902191, "learning_rate": 1.827220172824772e-05, "loss": 0.5607, "step": 12447 }, { "epoch": 0.38239179184714156, "grad_norm": 0.3374975323677063, "learning_rate": 1.827193017572793e-05, "loss": 0.6426, "step": 12448 }, { "epoch": 0.38242251098209074, "grad_norm": 0.38479918241500854, "learning_rate": 1.8271658603888465e-05, "loss": 0.5797, "step": 12449 }, { "epoch": 0.3824532301170399, "grad_norm": 0.33594679832458496, "learning_rate": 1.8271387012729967e-05, "loss": 0.5914, "step": 12450 }, { "epoch": 0.3824839492519891, "grad_norm": 0.3578900396823883, "learning_rate": 1.8271115402253063e-05, "loss": 0.6092, "step": 12451 }, { "epoch": 0.3825146683869382, "grad_norm": 0.38242432475090027, "learning_rate": 1.8270843772458397e-05, "loss": 0.5227, "step": 12452 }, { "epoch": 0.3825453875218874, "grad_norm": 0.3277205526828766, "learning_rate": 1.8270572123346592e-05, "loss": 0.6124, "step": 12453 }, { "epoch": 0.38257610665683656, "grad_norm": 0.35765892267227173, "learning_rate": 1.8270300454918293e-05, "loss": 0.6041, "step": 12454 }, { "epoch": 0.3826068257917857, "grad_norm": 0.37759754061698914, "learning_rate": 1.8270028767174125e-05, "loss": 0.5749, "step": 12455 }, { "epoch": 0.38263754492673485, "grad_norm": 0.39725714921951294, "learning_rate": 1.826975706011473e-05, "loss": 0.6548, "step": 12456 }, { "epoch": 0.382668264061684, "grad_norm": 0.33941659331321716, "learning_rate": 1.8269485333740743e-05, "loss": 0.5315, "step": 12457 }, { "epoch": 0.3826989831966332, "grad_norm": 0.3304234743118286, "learning_rate": 1.826921358805279e-05, "loss": 0.5332, "step": 12458 }, { "epoch": 0.3827297023315823, "grad_norm": 0.36464959383010864, "learning_rate": 1.8268941823051513e-05, "loss": 0.5335, "step": 12459 }, { "epoch": 0.3827604214665315, "grad_norm": 0.37228673696517944, "learning_rate": 1.8268670038737542e-05, "loss": 0.559, "step": 12460 }, { "epoch": 0.3827911406014807, "grad_norm": 0.366207480430603, "learning_rate": 1.826839823511152e-05, "loss": 0.612, "step": 12461 }, { "epoch": 0.38282185973642985, "grad_norm": 1.8842573165893555, "learning_rate": 1.8268126412174076e-05, "loss": 0.5912, "step": 12462 }, { "epoch": 0.38285257887137897, "grad_norm": 0.34351053833961487, "learning_rate": 1.8267854569925843e-05, "loss": 0.6122, "step": 12463 }, { "epoch": 0.38288329800632814, "grad_norm": 0.35536447167396545, "learning_rate": 1.826758270836746e-05, "loss": 0.5453, "step": 12464 }, { "epoch": 0.3829140171412773, "grad_norm": 0.34506046772003174, "learning_rate": 1.8267310827499555e-05, "loss": 0.5737, "step": 12465 }, { "epoch": 0.38294473627622644, "grad_norm": 0.37127652764320374, "learning_rate": 1.8267038927322774e-05, "loss": 0.5221, "step": 12466 }, { "epoch": 0.3829754554111756, "grad_norm": 0.40456151962280273, "learning_rate": 1.8266767007837744e-05, "loss": 0.534, "step": 12467 }, { "epoch": 0.3830061745461248, "grad_norm": 0.34588170051574707, "learning_rate": 1.8266495069045103e-05, "loss": 0.5955, "step": 12468 }, { "epoch": 0.38303689368107396, "grad_norm": 0.3151595890522003, "learning_rate": 1.8266223110945484e-05, "loss": 0.5168, "step": 12469 }, { "epoch": 0.3830676128160231, "grad_norm": 0.3256661593914032, "learning_rate": 1.8265951133539525e-05, "loss": 0.5534, "step": 12470 }, { "epoch": 0.38309833195097226, "grad_norm": 0.328845739364624, "learning_rate": 1.826567913682786e-05, "loss": 0.545, "step": 12471 }, { "epoch": 0.38312905108592143, "grad_norm": 0.3939554691314697, "learning_rate": 1.8265407120811126e-05, "loss": 0.6589, "step": 12472 }, { "epoch": 0.38315977022087055, "grad_norm": 0.36683887243270874, "learning_rate": 1.8265135085489953e-05, "loss": 0.5687, "step": 12473 }, { "epoch": 0.38319048935581973, "grad_norm": 0.3698396384716034, "learning_rate": 1.826486303086498e-05, "loss": 0.5398, "step": 12474 }, { "epoch": 0.3832212084907689, "grad_norm": 0.42733725905418396, "learning_rate": 1.826459095693684e-05, "loss": 0.6386, "step": 12475 }, { "epoch": 0.3832519276257181, "grad_norm": 0.3789157569408417, "learning_rate": 1.8264318863706177e-05, "loss": 0.5649, "step": 12476 }, { "epoch": 0.3832826467606672, "grad_norm": 0.3699917197227478, "learning_rate": 1.8264046751173618e-05, "loss": 0.5957, "step": 12477 }, { "epoch": 0.3833133658956164, "grad_norm": 0.37079936265945435, "learning_rate": 1.8263774619339796e-05, "loss": 0.6074, "step": 12478 }, { "epoch": 0.38334408503056555, "grad_norm": 0.36780497431755066, "learning_rate": 1.8263502468205357e-05, "loss": 0.5369, "step": 12479 }, { "epoch": 0.3833748041655147, "grad_norm": 0.35342663526535034, "learning_rate": 1.826323029777093e-05, "loss": 0.6175, "step": 12480 }, { "epoch": 0.38340552330046385, "grad_norm": 0.7856476306915283, "learning_rate": 1.8262958108037148e-05, "loss": 0.6333, "step": 12481 }, { "epoch": 0.383436242435413, "grad_norm": 0.381039023399353, "learning_rate": 1.8262685899004652e-05, "loss": 0.6376, "step": 12482 }, { "epoch": 0.3834669615703622, "grad_norm": 0.3668991029262543, "learning_rate": 1.8262413670674076e-05, "loss": 0.5678, "step": 12483 }, { "epoch": 0.3834976807053113, "grad_norm": 0.3295508623123169, "learning_rate": 1.8262141423046057e-05, "loss": 0.6732, "step": 12484 }, { "epoch": 0.3835283998402605, "grad_norm": 0.3315221667289734, "learning_rate": 1.826186915612123e-05, "loss": 0.6211, "step": 12485 }, { "epoch": 0.38355911897520967, "grad_norm": 0.3829135298728943, "learning_rate": 1.826159686990023e-05, "loss": 0.7124, "step": 12486 }, { "epoch": 0.38358983811015884, "grad_norm": 0.34257301688194275, "learning_rate": 1.826132456438369e-05, "loss": 0.5864, "step": 12487 }, { "epoch": 0.38362055724510796, "grad_norm": 0.34599801898002625, "learning_rate": 1.8261052239572253e-05, "loss": 0.5316, "step": 12488 }, { "epoch": 0.38365127638005714, "grad_norm": 0.3911677300930023, "learning_rate": 1.826077989546655e-05, "loss": 0.6485, "step": 12489 }, { "epoch": 0.3836819955150063, "grad_norm": 0.4145345985889435, "learning_rate": 1.8260507532067224e-05, "loss": 0.569, "step": 12490 }, { "epoch": 0.38371271464995543, "grad_norm": 0.3638071119785309, "learning_rate": 1.8260235149374898e-05, "loss": 0.6113, "step": 12491 }, { "epoch": 0.3837434337849046, "grad_norm": 0.3615020215511322, "learning_rate": 1.825996274739022e-05, "loss": 0.5831, "step": 12492 }, { "epoch": 0.3837741529198538, "grad_norm": 0.37993040680885315, "learning_rate": 1.8259690326113824e-05, "loss": 0.5772, "step": 12493 }, { "epoch": 0.38380487205480296, "grad_norm": 0.3406311869621277, "learning_rate": 1.825941788554634e-05, "loss": 0.5729, "step": 12494 }, { "epoch": 0.3838355911897521, "grad_norm": 0.32828986644744873, "learning_rate": 1.825914542568841e-05, "loss": 0.5849, "step": 12495 }, { "epoch": 0.38386631032470125, "grad_norm": 0.33082377910614014, "learning_rate": 1.825887294654067e-05, "loss": 0.6066, "step": 12496 }, { "epoch": 0.38389702945965043, "grad_norm": 0.35999399423599243, "learning_rate": 1.8258600448103755e-05, "loss": 0.6451, "step": 12497 }, { "epoch": 0.38392774859459955, "grad_norm": 0.34958627820014954, "learning_rate": 1.82583279303783e-05, "loss": 0.5788, "step": 12498 }, { "epoch": 0.3839584677295487, "grad_norm": 0.33796072006225586, "learning_rate": 1.8258055393364948e-05, "loss": 0.6101, "step": 12499 }, { "epoch": 0.3839891868644979, "grad_norm": 0.35148563981056213, "learning_rate": 1.825778283706433e-05, "loss": 0.6054, "step": 12500 }, { "epoch": 0.3840199059994471, "grad_norm": 0.3636520206928253, "learning_rate": 1.8257510261477082e-05, "loss": 0.6673, "step": 12501 }, { "epoch": 0.3840506251343962, "grad_norm": 0.3795004189014435, "learning_rate": 1.8257237666603837e-05, "loss": 0.5938, "step": 12502 }, { "epoch": 0.38408134426934537, "grad_norm": 0.36040982604026794, "learning_rate": 1.8256965052445243e-05, "loss": 0.6078, "step": 12503 }, { "epoch": 0.38411206340429455, "grad_norm": 0.3564394414424896, "learning_rate": 1.825669241900193e-05, "loss": 0.5765, "step": 12504 }, { "epoch": 0.3841427825392437, "grad_norm": 0.350304514169693, "learning_rate": 1.8256419766274535e-05, "loss": 0.5475, "step": 12505 }, { "epoch": 0.38417350167419284, "grad_norm": 0.3423313796520233, "learning_rate": 1.8256147094263693e-05, "loss": 0.5995, "step": 12506 }, { "epoch": 0.384204220809142, "grad_norm": 0.35372355580329895, "learning_rate": 1.8255874402970044e-05, "loss": 0.608, "step": 12507 }, { "epoch": 0.3842349399440912, "grad_norm": 0.35223588347435, "learning_rate": 1.8255601692394226e-05, "loss": 0.643, "step": 12508 }, { "epoch": 0.3842656590790403, "grad_norm": 0.3667507469654083, "learning_rate": 1.8255328962536868e-05, "loss": 0.5396, "step": 12509 }, { "epoch": 0.3842963782139895, "grad_norm": 0.32116207480430603, "learning_rate": 1.8255056213398615e-05, "loss": 0.6022, "step": 12510 }, { "epoch": 0.38432709734893866, "grad_norm": 0.34404775500297546, "learning_rate": 1.8254783444980106e-05, "loss": 0.5023, "step": 12511 }, { "epoch": 0.38435781648388784, "grad_norm": 0.3752647638320923, "learning_rate": 1.8254510657281968e-05, "loss": 0.6055, "step": 12512 }, { "epoch": 0.38438853561883696, "grad_norm": 0.36894550919532776, "learning_rate": 1.825423785030485e-05, "loss": 0.6737, "step": 12513 }, { "epoch": 0.38441925475378613, "grad_norm": 0.5027132034301758, "learning_rate": 1.825396502404938e-05, "loss": 0.5753, "step": 12514 }, { "epoch": 0.3844499738887353, "grad_norm": 0.38310930132865906, "learning_rate": 1.8253692178516194e-05, "loss": 0.5908, "step": 12515 }, { "epoch": 0.38448069302368443, "grad_norm": 0.4100963771343231, "learning_rate": 1.825341931370594e-05, "loss": 0.5544, "step": 12516 }, { "epoch": 0.3845114121586336, "grad_norm": 0.48629865050315857, "learning_rate": 1.8253146429619243e-05, "loss": 0.5398, "step": 12517 }, { "epoch": 0.3845421312935828, "grad_norm": 0.3458899259567261, "learning_rate": 1.825287352625675e-05, "loss": 0.602, "step": 12518 }, { "epoch": 0.38457285042853195, "grad_norm": 0.3250167667865753, "learning_rate": 1.8252600603619096e-05, "loss": 0.6394, "step": 12519 }, { "epoch": 0.3846035695634811, "grad_norm": 0.3800830841064453, "learning_rate": 1.825232766170691e-05, "loss": 0.6271, "step": 12520 }, { "epoch": 0.38463428869843025, "grad_norm": 0.29448482394218445, "learning_rate": 1.8252054700520844e-05, "loss": 0.4642, "step": 12521 }, { "epoch": 0.3846650078333794, "grad_norm": 0.3643339276313782, "learning_rate": 1.8251781720061524e-05, "loss": 0.5223, "step": 12522 }, { "epoch": 0.3846957269683286, "grad_norm": 0.3806588351726532, "learning_rate": 1.8251508720329593e-05, "loss": 0.5914, "step": 12523 }, { "epoch": 0.3847264461032777, "grad_norm": 0.35798561573028564, "learning_rate": 1.8251235701325687e-05, "loss": 0.5536, "step": 12524 }, { "epoch": 0.3847571652382269, "grad_norm": 0.352384477853775, "learning_rate": 1.8250962663050443e-05, "loss": 0.507, "step": 12525 }, { "epoch": 0.38478788437317607, "grad_norm": 0.34223881363868713, "learning_rate": 1.82506896055045e-05, "loss": 0.5982, "step": 12526 }, { "epoch": 0.3848186035081252, "grad_norm": 0.35657501220703125, "learning_rate": 1.8250416528688493e-05, "loss": 0.4805, "step": 12527 }, { "epoch": 0.38484932264307437, "grad_norm": 0.3812239468097687, "learning_rate": 1.8250143432603065e-05, "loss": 0.5144, "step": 12528 }, { "epoch": 0.38488004177802354, "grad_norm": 0.4678243100643158, "learning_rate": 1.824987031724885e-05, "loss": 0.5898, "step": 12529 }, { "epoch": 0.3849107609129727, "grad_norm": 0.3616114556789398, "learning_rate": 1.8249597182626487e-05, "loss": 0.5854, "step": 12530 }, { "epoch": 0.38494148004792184, "grad_norm": 0.37845513224601746, "learning_rate": 1.8249324028736613e-05, "loss": 0.6666, "step": 12531 }, { "epoch": 0.384972199182871, "grad_norm": 0.34575942158699036, "learning_rate": 1.8249050855579867e-05, "loss": 0.5913, "step": 12532 }, { "epoch": 0.3850029183178202, "grad_norm": 0.3278485834598541, "learning_rate": 1.8248777663156888e-05, "loss": 0.6036, "step": 12533 }, { "epoch": 0.3850336374527693, "grad_norm": 0.3435664772987366, "learning_rate": 1.824850445146831e-05, "loss": 0.5456, "step": 12534 }, { "epoch": 0.3850643565877185, "grad_norm": 0.3955530822277069, "learning_rate": 1.8248231220514773e-05, "loss": 0.6106, "step": 12535 }, { "epoch": 0.38509507572266766, "grad_norm": 0.3794417679309845, "learning_rate": 1.824795797029692e-05, "loss": 0.5403, "step": 12536 }, { "epoch": 0.38512579485761683, "grad_norm": 0.38597509264945984, "learning_rate": 1.824768470081538e-05, "loss": 0.5876, "step": 12537 }, { "epoch": 0.38515651399256595, "grad_norm": 0.3223658502101898, "learning_rate": 1.82474114120708e-05, "loss": 0.4958, "step": 12538 }, { "epoch": 0.3851872331275151, "grad_norm": 0.320223867893219, "learning_rate": 1.8247138104063812e-05, "loss": 0.5557, "step": 12539 }, { "epoch": 0.3852179522624643, "grad_norm": 0.36716386675834656, "learning_rate": 1.8246864776795062e-05, "loss": 0.6315, "step": 12540 }, { "epoch": 0.3852486713974134, "grad_norm": 0.3415415585041046, "learning_rate": 1.824659143026518e-05, "loss": 0.533, "step": 12541 }, { "epoch": 0.3852793905323626, "grad_norm": 0.3268398940563202, "learning_rate": 1.8246318064474807e-05, "loss": 0.5729, "step": 12542 }, { "epoch": 0.3853101096673118, "grad_norm": 0.32457059621810913, "learning_rate": 1.8246044679424583e-05, "loss": 0.586, "step": 12543 }, { "epoch": 0.38534082880226095, "grad_norm": 0.37338271737098694, "learning_rate": 1.8245771275115147e-05, "loss": 0.6591, "step": 12544 }, { "epoch": 0.38537154793721007, "grad_norm": 0.3164200186729431, "learning_rate": 1.8245497851547137e-05, "loss": 0.5579, "step": 12545 }, { "epoch": 0.38540226707215924, "grad_norm": 0.3411218822002411, "learning_rate": 1.8245224408721188e-05, "loss": 0.5335, "step": 12546 }, { "epoch": 0.3854329862071084, "grad_norm": 0.3222665786743164, "learning_rate": 1.8244950946637942e-05, "loss": 0.6107, "step": 12547 }, { "epoch": 0.3854637053420576, "grad_norm": 0.37485384941101074, "learning_rate": 1.8244677465298036e-05, "loss": 0.5286, "step": 12548 }, { "epoch": 0.3854944244770067, "grad_norm": 0.41882604360580444, "learning_rate": 1.8244403964702116e-05, "loss": 0.5547, "step": 12549 }, { "epoch": 0.3855251436119559, "grad_norm": 0.3479151129722595, "learning_rate": 1.824413044485081e-05, "loss": 0.6554, "step": 12550 }, { "epoch": 0.38555586274690506, "grad_norm": 0.31809598207473755, "learning_rate": 1.824385690574476e-05, "loss": 0.5974, "step": 12551 }, { "epoch": 0.3855865818818542, "grad_norm": 0.3832273483276367, "learning_rate": 1.824358334738461e-05, "loss": 0.5875, "step": 12552 }, { "epoch": 0.38561730101680336, "grad_norm": 0.354560524225235, "learning_rate": 1.8243309769770993e-05, "loss": 0.519, "step": 12553 }, { "epoch": 0.38564802015175254, "grad_norm": 0.31683942675590515, "learning_rate": 1.8243036172904554e-05, "loss": 0.5443, "step": 12554 }, { "epoch": 0.3856787392867017, "grad_norm": 0.3471749722957611, "learning_rate": 1.8242762556785924e-05, "loss": 0.6028, "step": 12555 }, { "epoch": 0.38570945842165083, "grad_norm": 0.3327770531177521, "learning_rate": 1.824248892141575e-05, "loss": 0.5966, "step": 12556 }, { "epoch": 0.3857401775566, "grad_norm": 0.44045311212539673, "learning_rate": 1.8242215266794664e-05, "loss": 0.5093, "step": 12557 }, { "epoch": 0.3857708966915492, "grad_norm": 0.37221333384513855, "learning_rate": 1.8241941592923308e-05, "loss": 0.5908, "step": 12558 }, { "epoch": 0.3858016158264983, "grad_norm": 1.5145132541656494, "learning_rate": 1.8241667899802326e-05, "loss": 0.5676, "step": 12559 }, { "epoch": 0.3858323349614475, "grad_norm": 0.34485527873039246, "learning_rate": 1.824139418743235e-05, "loss": 0.5665, "step": 12560 }, { "epoch": 0.38586305409639665, "grad_norm": 0.3353272080421448, "learning_rate": 1.8241120455814025e-05, "loss": 0.587, "step": 12561 }, { "epoch": 0.3858937732313458, "grad_norm": 0.34558624029159546, "learning_rate": 1.8240846704947984e-05, "loss": 0.6153, "step": 12562 }, { "epoch": 0.38592449236629495, "grad_norm": 0.31225183606147766, "learning_rate": 1.824057293483487e-05, "loss": 0.5887, "step": 12563 }, { "epoch": 0.3859552115012441, "grad_norm": 0.35261857509613037, "learning_rate": 1.824029914547533e-05, "loss": 0.5642, "step": 12564 }, { "epoch": 0.3859859306361933, "grad_norm": 0.32524487376213074, "learning_rate": 1.8240025336869987e-05, "loss": 0.5713, "step": 12565 }, { "epoch": 0.3860166497711424, "grad_norm": 0.4254744350910187, "learning_rate": 1.8239751509019493e-05, "loss": 0.6143, "step": 12566 }, { "epoch": 0.3860473689060916, "grad_norm": 0.3446838855743408, "learning_rate": 1.8239477661924485e-05, "loss": 0.62, "step": 12567 }, { "epoch": 0.38607808804104077, "grad_norm": 0.3490721881389618, "learning_rate": 1.82392037955856e-05, "loss": 0.6338, "step": 12568 }, { "epoch": 0.38610880717598994, "grad_norm": 0.3527669608592987, "learning_rate": 1.823892991000348e-05, "loss": 0.5526, "step": 12569 }, { "epoch": 0.38613952631093906, "grad_norm": 0.3690575063228607, "learning_rate": 1.823865600517876e-05, "loss": 0.6621, "step": 12570 }, { "epoch": 0.38617024544588824, "grad_norm": 0.34031978249549866, "learning_rate": 1.8238382081112087e-05, "loss": 0.5835, "step": 12571 }, { "epoch": 0.3862009645808374, "grad_norm": 0.336273193359375, "learning_rate": 1.8238108137804098e-05, "loss": 0.5697, "step": 12572 }, { "epoch": 0.3862316837157866, "grad_norm": 0.3537152409553528, "learning_rate": 1.823783417525543e-05, "loss": 0.6215, "step": 12573 }, { "epoch": 0.3862624028507357, "grad_norm": 0.31179773807525635, "learning_rate": 1.8237560193466727e-05, "loss": 0.5675, "step": 12574 }, { "epoch": 0.3862931219856849, "grad_norm": 0.3109518587589264, "learning_rate": 1.8237286192438626e-05, "loss": 0.6076, "step": 12575 }, { "epoch": 0.38632384112063406, "grad_norm": 0.3008643388748169, "learning_rate": 1.8237012172171767e-05, "loss": 0.5632, "step": 12576 }, { "epoch": 0.3863545602555832, "grad_norm": 0.43880778551101685, "learning_rate": 1.8236738132666788e-05, "loss": 0.5864, "step": 12577 }, { "epoch": 0.38638527939053235, "grad_norm": 0.35843607783317566, "learning_rate": 1.823646407392434e-05, "loss": 0.5756, "step": 12578 }, { "epoch": 0.38641599852548153, "grad_norm": 0.387682169675827, "learning_rate": 1.823618999594505e-05, "loss": 0.5441, "step": 12579 }, { "epoch": 0.3864467176604307, "grad_norm": 0.34428897500038147, "learning_rate": 1.823591589872956e-05, "loss": 0.6873, "step": 12580 }, { "epoch": 0.3864774367953798, "grad_norm": 0.3445190191268921, "learning_rate": 1.8235641782278516e-05, "loss": 0.6076, "step": 12581 }, { "epoch": 0.386508155930329, "grad_norm": 0.35933130979537964, "learning_rate": 1.8235367646592556e-05, "loss": 0.5999, "step": 12582 }, { "epoch": 0.3865388750652782, "grad_norm": 0.35630905628204346, "learning_rate": 1.823509349167232e-05, "loss": 0.6546, "step": 12583 }, { "epoch": 0.3865695942002273, "grad_norm": 0.327798455953598, "learning_rate": 1.8234819317518448e-05, "loss": 0.7173, "step": 12584 }, { "epoch": 0.38660031333517647, "grad_norm": 0.35514917969703674, "learning_rate": 1.823454512413158e-05, "loss": 0.6475, "step": 12585 }, { "epoch": 0.38663103247012565, "grad_norm": 0.481374591588974, "learning_rate": 1.823427091151236e-05, "loss": 0.52, "step": 12586 }, { "epoch": 0.3866617516050748, "grad_norm": 0.34858188033103943, "learning_rate": 1.823399667966142e-05, "loss": 0.5959, "step": 12587 }, { "epoch": 0.38669247074002394, "grad_norm": 0.35031187534332275, "learning_rate": 1.8233722428579404e-05, "loss": 0.5532, "step": 12588 }, { "epoch": 0.3867231898749731, "grad_norm": 0.3554961383342743, "learning_rate": 1.823344815826696e-05, "loss": 0.6438, "step": 12589 }, { "epoch": 0.3867539090099223, "grad_norm": 0.35448774695396423, "learning_rate": 1.823317386872472e-05, "loss": 0.5836, "step": 12590 }, { "epoch": 0.38678462814487147, "grad_norm": 0.3457255959510803, "learning_rate": 1.823289955995333e-05, "loss": 0.5888, "step": 12591 }, { "epoch": 0.3868153472798206, "grad_norm": 0.35012921690940857, "learning_rate": 1.8232625231953424e-05, "loss": 0.5594, "step": 12592 }, { "epoch": 0.38684606641476976, "grad_norm": 0.33400118350982666, "learning_rate": 1.823235088472565e-05, "loss": 0.5469, "step": 12593 }, { "epoch": 0.38687678554971894, "grad_norm": 0.38483765721321106, "learning_rate": 1.8232076518270647e-05, "loss": 0.5661, "step": 12594 }, { "epoch": 0.38690750468466806, "grad_norm": 0.3188033401966095, "learning_rate": 1.8231802132589048e-05, "loss": 0.4892, "step": 12595 }, { "epoch": 0.38693822381961723, "grad_norm": 0.3661021292209625, "learning_rate": 1.8231527727681503e-05, "loss": 0.5735, "step": 12596 }, { "epoch": 0.3869689429545664, "grad_norm": 0.332582026720047, "learning_rate": 1.8231253303548654e-05, "loss": 0.5923, "step": 12597 }, { "epoch": 0.3869996620895156, "grad_norm": 0.3867836594581604, "learning_rate": 1.8230978860191135e-05, "loss": 0.5717, "step": 12598 }, { "epoch": 0.3870303812244647, "grad_norm": 0.388803094625473, "learning_rate": 1.8230704397609587e-05, "loss": 0.5266, "step": 12599 }, { "epoch": 0.3870611003594139, "grad_norm": 0.39410752058029175, "learning_rate": 1.823042991580466e-05, "loss": 0.5644, "step": 12600 }, { "epoch": 0.38709181949436305, "grad_norm": 1.0480247735977173, "learning_rate": 1.8230155414776988e-05, "loss": 0.5318, "step": 12601 }, { "epoch": 0.3871225386293122, "grad_norm": 0.47047552466392517, "learning_rate": 1.822988089452721e-05, "loss": 0.6461, "step": 12602 }, { "epoch": 0.38715325776426135, "grad_norm": 0.4002799391746521, "learning_rate": 1.822960635505597e-05, "loss": 0.6364, "step": 12603 }, { "epoch": 0.3871839768992105, "grad_norm": 0.3314950466156006, "learning_rate": 1.822933179636391e-05, "loss": 0.5238, "step": 12604 }, { "epoch": 0.3872146960341597, "grad_norm": 0.3827502429485321, "learning_rate": 1.8229057218451675e-05, "loss": 0.6534, "step": 12605 }, { "epoch": 0.3872454151691088, "grad_norm": 0.330681174993515, "learning_rate": 1.8228782621319902e-05, "loss": 0.5655, "step": 12606 }, { "epoch": 0.387276134304058, "grad_norm": 0.3372170925140381, "learning_rate": 1.822850800496923e-05, "loss": 0.6349, "step": 12607 }, { "epoch": 0.38730685343900717, "grad_norm": 0.32247716188430786, "learning_rate": 1.82282333694003e-05, "loss": 0.4862, "step": 12608 }, { "epoch": 0.3873375725739563, "grad_norm": 0.35790520906448364, "learning_rate": 1.822795871461376e-05, "loss": 0.6222, "step": 12609 }, { "epoch": 0.38736829170890547, "grad_norm": 0.3275488615036011, "learning_rate": 1.8227684040610247e-05, "loss": 0.5709, "step": 12610 }, { "epoch": 0.38739901084385464, "grad_norm": 0.37938183546066284, "learning_rate": 1.8227409347390406e-05, "loss": 0.6276, "step": 12611 }, { "epoch": 0.3874297299788038, "grad_norm": 0.3475094735622406, "learning_rate": 1.8227134634954874e-05, "loss": 0.6084, "step": 12612 }, { "epoch": 0.38746044911375294, "grad_norm": 0.5625912547111511, "learning_rate": 1.822685990330429e-05, "loss": 0.5804, "step": 12613 }, { "epoch": 0.3874911682487021, "grad_norm": 0.3835003674030304, "learning_rate": 1.8226585152439308e-05, "loss": 0.5929, "step": 12614 }, { "epoch": 0.3875218873836513, "grad_norm": 0.3280009329319, "learning_rate": 1.8226310382360558e-05, "loss": 0.5503, "step": 12615 }, { "epoch": 0.38755260651860046, "grad_norm": 0.3386102318763733, "learning_rate": 1.8226035593068682e-05, "loss": 0.6189, "step": 12616 }, { "epoch": 0.3875833256535496, "grad_norm": 0.3466804325580597, "learning_rate": 1.8225760784564326e-05, "loss": 0.5311, "step": 12617 }, { "epoch": 0.38761404478849876, "grad_norm": 0.3681892454624176, "learning_rate": 1.822548595684814e-05, "loss": 0.5629, "step": 12618 }, { "epoch": 0.38764476392344793, "grad_norm": 0.3753035366535187, "learning_rate": 1.822521110992075e-05, "loss": 0.6091, "step": 12619 }, { "epoch": 0.38767548305839705, "grad_norm": 0.3478451073169708, "learning_rate": 1.8224936243782805e-05, "loss": 0.6083, "step": 12620 }, { "epoch": 0.3877062021933462, "grad_norm": 0.3551974594593048, "learning_rate": 1.8224661358434946e-05, "loss": 0.5753, "step": 12621 }, { "epoch": 0.3877369213282954, "grad_norm": 0.3498266637325287, "learning_rate": 1.822438645387782e-05, "loss": 0.5313, "step": 12622 }, { "epoch": 0.3877676404632446, "grad_norm": 0.3587892949581146, "learning_rate": 1.8224111530112064e-05, "loss": 0.59, "step": 12623 }, { "epoch": 0.3877983595981937, "grad_norm": 0.3525114357471466, "learning_rate": 1.8223836587138318e-05, "loss": 0.5994, "step": 12624 }, { "epoch": 0.3878290787331429, "grad_norm": 0.36976346373558044, "learning_rate": 1.8223561624957232e-05, "loss": 0.5768, "step": 12625 }, { "epoch": 0.38785979786809205, "grad_norm": 0.34399881958961487, "learning_rate": 1.822328664356944e-05, "loss": 0.5306, "step": 12626 }, { "epoch": 0.38789051700304117, "grad_norm": 0.3104521930217743, "learning_rate": 1.8223011642975587e-05, "loss": 0.5502, "step": 12627 }, { "epoch": 0.38792123613799034, "grad_norm": 0.4279651343822479, "learning_rate": 1.822273662317632e-05, "loss": 0.5645, "step": 12628 }, { "epoch": 0.3879519552729395, "grad_norm": 0.3398714065551758, "learning_rate": 1.8222461584172275e-05, "loss": 0.5795, "step": 12629 }, { "epoch": 0.3879826744078887, "grad_norm": 0.38412904739379883, "learning_rate": 1.8222186525964097e-05, "loss": 0.6275, "step": 12630 }, { "epoch": 0.3880133935428378, "grad_norm": 0.3591623902320862, "learning_rate": 1.822191144855243e-05, "loss": 0.6308, "step": 12631 }, { "epoch": 0.388044112677787, "grad_norm": 0.3525063395500183, "learning_rate": 1.822163635193791e-05, "loss": 0.5869, "step": 12632 }, { "epoch": 0.38807483181273617, "grad_norm": 0.33502039313316345, "learning_rate": 1.822136123612119e-05, "loss": 0.5427, "step": 12633 }, { "epoch": 0.38810555094768534, "grad_norm": 0.4441884756088257, "learning_rate": 1.8221086101102902e-05, "loss": 0.5386, "step": 12634 }, { "epoch": 0.38813627008263446, "grad_norm": 0.3710276484489441, "learning_rate": 1.8220810946883692e-05, "loss": 0.5722, "step": 12635 }, { "epoch": 0.38816698921758364, "grad_norm": 0.3922441899776459, "learning_rate": 1.8220535773464207e-05, "loss": 0.6631, "step": 12636 }, { "epoch": 0.3881977083525328, "grad_norm": 0.3683168590068817, "learning_rate": 1.8220260580845088e-05, "loss": 0.5638, "step": 12637 }, { "epoch": 0.38822842748748193, "grad_norm": 0.3635037839412689, "learning_rate": 1.8219985369026974e-05, "loss": 0.5169, "step": 12638 }, { "epoch": 0.3882591466224311, "grad_norm": 0.3238923251628876, "learning_rate": 1.821971013801051e-05, "loss": 0.5533, "step": 12639 }, { "epoch": 0.3882898657573803, "grad_norm": 0.3708910644054413, "learning_rate": 1.821943488779634e-05, "loss": 0.5889, "step": 12640 }, { "epoch": 0.38832058489232946, "grad_norm": 0.34931379556655884, "learning_rate": 1.8219159618385105e-05, "loss": 0.5781, "step": 12641 }, { "epoch": 0.3883513040272786, "grad_norm": 0.46294674277305603, "learning_rate": 1.8218884329777447e-05, "loss": 0.5903, "step": 12642 }, { "epoch": 0.38838202316222775, "grad_norm": 0.3967902958393097, "learning_rate": 1.8218609021974013e-05, "loss": 0.5951, "step": 12643 }, { "epoch": 0.3884127422971769, "grad_norm": 0.36157310009002686, "learning_rate": 1.8218333694975444e-05, "loss": 0.6217, "step": 12644 }, { "epoch": 0.38844346143212605, "grad_norm": 0.319477379322052, "learning_rate": 1.821805834878238e-05, "loss": 0.5267, "step": 12645 }, { "epoch": 0.3884741805670752, "grad_norm": 0.29840320348739624, "learning_rate": 1.821778298339547e-05, "loss": 0.4383, "step": 12646 }, { "epoch": 0.3885048997020244, "grad_norm": 0.33364129066467285, "learning_rate": 1.8217507598815355e-05, "loss": 0.5822, "step": 12647 }, { "epoch": 0.3885356188369736, "grad_norm": 0.43391337990760803, "learning_rate": 1.8217232195042675e-05, "loss": 0.5327, "step": 12648 }, { "epoch": 0.3885663379719227, "grad_norm": 0.34249475598335266, "learning_rate": 1.8216956772078075e-05, "loss": 0.6602, "step": 12649 }, { "epoch": 0.38859705710687187, "grad_norm": 0.3727380931377411, "learning_rate": 1.8216681329922195e-05, "loss": 0.5645, "step": 12650 }, { "epoch": 0.38862777624182104, "grad_norm": 0.31060948967933655, "learning_rate": 1.821640586857569e-05, "loss": 0.6005, "step": 12651 }, { "epoch": 0.38865849537677016, "grad_norm": 0.6430467367172241, "learning_rate": 1.821613038803919e-05, "loss": 0.5118, "step": 12652 }, { "epoch": 0.38868921451171934, "grad_norm": 0.3430403769016266, "learning_rate": 1.8215854888313342e-05, "loss": 0.6108, "step": 12653 }, { "epoch": 0.3887199336466685, "grad_norm": 0.36365172266960144, "learning_rate": 1.8215579369398795e-05, "loss": 0.6208, "step": 12654 }, { "epoch": 0.3887506527816177, "grad_norm": 0.3203742504119873, "learning_rate": 1.8215303831296186e-05, "loss": 0.5474, "step": 12655 }, { "epoch": 0.3887813719165668, "grad_norm": 0.38130345940589905, "learning_rate": 1.8215028274006164e-05, "loss": 0.5796, "step": 12656 }, { "epoch": 0.388812091051516, "grad_norm": 0.3599478006362915, "learning_rate": 1.8214752697529365e-05, "loss": 0.5189, "step": 12657 }, { "epoch": 0.38884281018646516, "grad_norm": 0.3450779616832733, "learning_rate": 1.8214477101866442e-05, "loss": 0.5428, "step": 12658 }, { "epoch": 0.38887352932141434, "grad_norm": 0.37221449613571167, "learning_rate": 1.8214201487018034e-05, "loss": 0.5446, "step": 12659 }, { "epoch": 0.38890424845636345, "grad_norm": 0.3363869786262512, "learning_rate": 1.821392585298478e-05, "loss": 0.53, "step": 12660 }, { "epoch": 0.38893496759131263, "grad_norm": 0.32831862568855286, "learning_rate": 1.8213650199767335e-05, "loss": 0.5952, "step": 12661 }, { "epoch": 0.3889656867262618, "grad_norm": 0.3702086806297302, "learning_rate": 1.821337452736633e-05, "loss": 0.5675, "step": 12662 }, { "epoch": 0.3889964058612109, "grad_norm": 0.3556568920612335, "learning_rate": 1.821309883578242e-05, "loss": 0.5848, "step": 12663 }, { "epoch": 0.3890271249961601, "grad_norm": 0.42274361848831177, "learning_rate": 1.8212823125016245e-05, "loss": 0.5553, "step": 12664 }, { "epoch": 0.3890578441311093, "grad_norm": 0.3151712715625763, "learning_rate": 1.821254739506844e-05, "loss": 0.5322, "step": 12665 }, { "epoch": 0.38908856326605845, "grad_norm": 0.3515087366104126, "learning_rate": 1.8212271645939664e-05, "loss": 0.5441, "step": 12666 }, { "epoch": 0.38911928240100757, "grad_norm": 0.37466102838516235, "learning_rate": 1.8211995877630553e-05, "loss": 0.4664, "step": 12667 }, { "epoch": 0.38915000153595675, "grad_norm": 0.280686616897583, "learning_rate": 1.821172009014175e-05, "loss": 0.4895, "step": 12668 }, { "epoch": 0.3891807206709059, "grad_norm": 0.35638198256492615, "learning_rate": 1.8211444283473904e-05, "loss": 0.6151, "step": 12669 }, { "epoch": 0.38921143980585504, "grad_norm": 0.3691403865814209, "learning_rate": 1.8211168457627658e-05, "loss": 0.6106, "step": 12670 }, { "epoch": 0.3892421589408042, "grad_norm": 0.3384265601634979, "learning_rate": 1.8210892612603647e-05, "loss": 0.6251, "step": 12671 }, { "epoch": 0.3892728780757534, "grad_norm": 0.33641737699508667, "learning_rate": 1.821061674840253e-05, "loss": 0.5209, "step": 12672 }, { "epoch": 0.38930359721070257, "grad_norm": 0.33943793177604675, "learning_rate": 1.8210340865024944e-05, "loss": 0.5374, "step": 12673 }, { "epoch": 0.3893343163456517, "grad_norm": 0.32559552788734436, "learning_rate": 1.821006496247153e-05, "loss": 0.5673, "step": 12674 }, { "epoch": 0.38936503548060086, "grad_norm": 0.3333324193954468, "learning_rate": 1.8209789040742935e-05, "loss": 0.6223, "step": 12675 }, { "epoch": 0.38939575461555004, "grad_norm": 0.3348560035228729, "learning_rate": 1.820951309983981e-05, "loss": 0.555, "step": 12676 }, { "epoch": 0.3894264737504992, "grad_norm": 0.3359452784061432, "learning_rate": 1.8209237139762787e-05, "loss": 0.5636, "step": 12677 }, { "epoch": 0.38945719288544833, "grad_norm": 0.3419310748577118, "learning_rate": 1.820896116051252e-05, "loss": 0.5992, "step": 12678 }, { "epoch": 0.3894879120203975, "grad_norm": 0.44843193888664246, "learning_rate": 1.8208685162089654e-05, "loss": 0.4806, "step": 12679 }, { "epoch": 0.3895186311553467, "grad_norm": 0.3336940407752991, "learning_rate": 1.8208409144494827e-05, "loss": 0.5459, "step": 12680 }, { "epoch": 0.3895493502902958, "grad_norm": 0.3577467203140259, "learning_rate": 1.8208133107728686e-05, "loss": 0.5985, "step": 12681 }, { "epoch": 0.389580069425245, "grad_norm": 0.36254003643989563, "learning_rate": 1.820785705179188e-05, "loss": 0.5833, "step": 12682 }, { "epoch": 0.38961078856019415, "grad_norm": 0.3472042679786682, "learning_rate": 1.8207580976685052e-05, "loss": 0.4822, "step": 12683 }, { "epoch": 0.38964150769514333, "grad_norm": 0.38242363929748535, "learning_rate": 1.8207304882408843e-05, "loss": 0.5576, "step": 12684 }, { "epoch": 0.38967222683009245, "grad_norm": 0.36178305745124817, "learning_rate": 1.8207028768963903e-05, "loss": 0.6008, "step": 12685 }, { "epoch": 0.3897029459650416, "grad_norm": 0.40467602014541626, "learning_rate": 1.8206752636350868e-05, "loss": 0.6016, "step": 12686 }, { "epoch": 0.3897336650999908, "grad_norm": 0.39130914211273193, "learning_rate": 1.8206476484570395e-05, "loss": 0.5561, "step": 12687 }, { "epoch": 0.3897643842349399, "grad_norm": 0.3460814356803894, "learning_rate": 1.820620031362312e-05, "loss": 0.5792, "step": 12688 }, { "epoch": 0.3897951033698891, "grad_norm": 0.4139222800731659, "learning_rate": 1.820592412350969e-05, "loss": 0.6563, "step": 12689 }, { "epoch": 0.38982582250483827, "grad_norm": 0.36775991320610046, "learning_rate": 1.8205647914230754e-05, "loss": 0.6707, "step": 12690 }, { "epoch": 0.38985654163978745, "grad_norm": 0.3293814957141876, "learning_rate": 1.820537168578695e-05, "loss": 0.5259, "step": 12691 }, { "epoch": 0.38988726077473657, "grad_norm": 0.31520891189575195, "learning_rate": 1.820509543817893e-05, "loss": 0.5712, "step": 12692 }, { "epoch": 0.38991797990968574, "grad_norm": 0.3852525055408478, "learning_rate": 1.820481917140734e-05, "loss": 0.5564, "step": 12693 }, { "epoch": 0.3899486990446349, "grad_norm": 0.3378998041152954, "learning_rate": 1.8204542885472817e-05, "loss": 0.639, "step": 12694 }, { "epoch": 0.38997941817958404, "grad_norm": 0.38546907901763916, "learning_rate": 1.8204266580376013e-05, "loss": 0.5849, "step": 12695 }, { "epoch": 0.3900101373145332, "grad_norm": 0.3532363176345825, "learning_rate": 1.8203990256117572e-05, "loss": 0.6712, "step": 12696 }, { "epoch": 0.3900408564494824, "grad_norm": 0.34957534074783325, "learning_rate": 1.820371391269814e-05, "loss": 0.561, "step": 12697 }, { "epoch": 0.39007157558443156, "grad_norm": 0.33232951164245605, "learning_rate": 1.820343755011836e-05, "loss": 0.5989, "step": 12698 }, { "epoch": 0.3901022947193807, "grad_norm": 0.37336111068725586, "learning_rate": 1.8203161168378876e-05, "loss": 0.5462, "step": 12699 }, { "epoch": 0.39013301385432986, "grad_norm": 0.3531326353549957, "learning_rate": 1.820288476748034e-05, "loss": 0.5156, "step": 12700 }, { "epoch": 0.39016373298927903, "grad_norm": 0.3059805929660797, "learning_rate": 1.820260834742339e-05, "loss": 0.5214, "step": 12701 }, { "epoch": 0.3901944521242282, "grad_norm": 0.5658020377159119, "learning_rate": 1.8202331908208677e-05, "loss": 0.5368, "step": 12702 }, { "epoch": 0.39022517125917733, "grad_norm": 0.310506135225296, "learning_rate": 1.8202055449836847e-05, "loss": 0.4948, "step": 12703 }, { "epoch": 0.3902558903941265, "grad_norm": 0.33393508195877075, "learning_rate": 1.820177897230854e-05, "loss": 0.5965, "step": 12704 }, { "epoch": 0.3902866095290757, "grad_norm": 0.3950824737548828, "learning_rate": 1.820150247562441e-05, "loss": 0.6352, "step": 12705 }, { "epoch": 0.3903173286640248, "grad_norm": 0.36537185311317444, "learning_rate": 1.8201225959785094e-05, "loss": 0.5616, "step": 12706 }, { "epoch": 0.390348047798974, "grad_norm": 0.41239434480667114, "learning_rate": 1.8200949424791243e-05, "loss": 0.5785, "step": 12707 }, { "epoch": 0.39037876693392315, "grad_norm": 0.32685980200767517, "learning_rate": 1.82006728706435e-05, "loss": 0.5821, "step": 12708 }, { "epoch": 0.3904094860688723, "grad_norm": 0.35476619005203247, "learning_rate": 1.8200396297342515e-05, "loss": 0.5998, "step": 12709 }, { "epoch": 0.39044020520382144, "grad_norm": 0.39550358057022095, "learning_rate": 1.8200119704888934e-05, "loss": 0.6401, "step": 12710 }, { "epoch": 0.3904709243387706, "grad_norm": 0.32102224230766296, "learning_rate": 1.81998430932834e-05, "loss": 0.4706, "step": 12711 }, { "epoch": 0.3905016434737198, "grad_norm": 0.35520726442337036, "learning_rate": 1.8199566462526557e-05, "loss": 0.5761, "step": 12712 }, { "epoch": 0.3905323626086689, "grad_norm": 0.3713115155696869, "learning_rate": 1.8199289812619053e-05, "loss": 0.5066, "step": 12713 }, { "epoch": 0.3905630817436181, "grad_norm": 0.3835351765155792, "learning_rate": 1.819901314356154e-05, "loss": 0.5583, "step": 12714 }, { "epoch": 0.39059380087856727, "grad_norm": 0.32299384474754333, "learning_rate": 1.8198736455354654e-05, "loss": 0.4925, "step": 12715 }, { "epoch": 0.39062452001351644, "grad_norm": 0.36355462670326233, "learning_rate": 1.8198459747999048e-05, "loss": 0.5966, "step": 12716 }, { "epoch": 0.39065523914846556, "grad_norm": 0.3784339427947998, "learning_rate": 1.8198183021495367e-05, "loss": 0.5937, "step": 12717 }, { "epoch": 0.39068595828341474, "grad_norm": 0.3324376940727234, "learning_rate": 1.8197906275844262e-05, "loss": 0.5457, "step": 12718 }, { "epoch": 0.3907166774183639, "grad_norm": 0.34337347745895386, "learning_rate": 1.8197629511046367e-05, "loss": 0.5747, "step": 12719 }, { "epoch": 0.39074739655331303, "grad_norm": 0.398963987827301, "learning_rate": 1.819735272710234e-05, "loss": 0.5742, "step": 12720 }, { "epoch": 0.3907781156882622, "grad_norm": 0.3766856789588928, "learning_rate": 1.819707592401282e-05, "loss": 0.6441, "step": 12721 }, { "epoch": 0.3908088348232114, "grad_norm": 0.3609825670719147, "learning_rate": 1.8196799101778458e-05, "loss": 0.5527, "step": 12722 }, { "epoch": 0.39083955395816056, "grad_norm": 0.37928688526153564, "learning_rate": 1.81965222603999e-05, "loss": 0.6831, "step": 12723 }, { "epoch": 0.3908702730931097, "grad_norm": 0.3854934871196747, "learning_rate": 1.8196245399877793e-05, "loss": 0.5154, "step": 12724 }, { "epoch": 0.39090099222805885, "grad_norm": 0.40799617767333984, "learning_rate": 1.8195968520212785e-05, "loss": 0.6198, "step": 12725 }, { "epoch": 0.390931711363008, "grad_norm": 0.3382120430469513, "learning_rate": 1.8195691621405513e-05, "loss": 0.4915, "step": 12726 }, { "epoch": 0.3909624304979572, "grad_norm": 0.3693837523460388, "learning_rate": 1.8195414703456635e-05, "loss": 0.5754, "step": 12727 }, { "epoch": 0.3909931496329063, "grad_norm": 0.4385649263858795, "learning_rate": 1.8195137766366794e-05, "loss": 0.4908, "step": 12728 }, { "epoch": 0.3910238687678555, "grad_norm": 0.3332604765892029, "learning_rate": 1.8194860810136634e-05, "loss": 0.5262, "step": 12729 }, { "epoch": 0.3910545879028047, "grad_norm": 0.3272441327571869, "learning_rate": 1.8194583834766803e-05, "loss": 0.501, "step": 12730 }, { "epoch": 0.3910853070377538, "grad_norm": 0.32544565200805664, "learning_rate": 1.8194306840257954e-05, "loss": 0.5124, "step": 12731 }, { "epoch": 0.39111602617270297, "grad_norm": 0.3259119987487793, "learning_rate": 1.8194029826610727e-05, "loss": 0.5403, "step": 12732 }, { "epoch": 0.39114674530765214, "grad_norm": 0.3998487889766693, "learning_rate": 1.819375279382577e-05, "loss": 0.6223, "step": 12733 }, { "epoch": 0.3911774644426013, "grad_norm": 0.3864685893058777, "learning_rate": 1.8193475741903734e-05, "loss": 0.5657, "step": 12734 }, { "epoch": 0.39120818357755044, "grad_norm": 0.44996100664138794, "learning_rate": 1.8193198670845258e-05, "loss": 0.5494, "step": 12735 }, { "epoch": 0.3912389027124996, "grad_norm": 0.37049680948257446, "learning_rate": 1.8192921580650996e-05, "loss": 0.5465, "step": 12736 }, { "epoch": 0.3912696218474488, "grad_norm": 0.3206457197666168, "learning_rate": 1.8192644471321597e-05, "loss": 0.5586, "step": 12737 }, { "epoch": 0.3913003409823979, "grad_norm": 0.34707027673721313, "learning_rate": 1.8192367342857702e-05, "loss": 0.5771, "step": 12738 }, { "epoch": 0.3913310601173471, "grad_norm": 0.3418354094028473, "learning_rate": 1.819209019525996e-05, "loss": 0.678, "step": 12739 }, { "epoch": 0.39136177925229626, "grad_norm": 0.39108923077583313, "learning_rate": 1.819181302852902e-05, "loss": 0.5458, "step": 12740 }, { "epoch": 0.39139249838724544, "grad_norm": 0.4312012493610382, "learning_rate": 1.819153584266553e-05, "loss": 0.537, "step": 12741 }, { "epoch": 0.39142321752219456, "grad_norm": 0.34393349289894104, "learning_rate": 1.8191258637670135e-05, "loss": 0.5602, "step": 12742 }, { "epoch": 0.39145393665714373, "grad_norm": 0.3418145179748535, "learning_rate": 1.819098141354348e-05, "loss": 0.5741, "step": 12743 }, { "epoch": 0.3914846557920929, "grad_norm": 0.32313355803489685, "learning_rate": 1.8190704170286222e-05, "loss": 0.4797, "step": 12744 }, { "epoch": 0.3915153749270421, "grad_norm": 0.518947184085846, "learning_rate": 1.8190426907899002e-05, "loss": 0.4963, "step": 12745 }, { "epoch": 0.3915460940619912, "grad_norm": 0.3358088731765747, "learning_rate": 1.819014962638246e-05, "loss": 0.5919, "step": 12746 }, { "epoch": 0.3915768131969404, "grad_norm": 0.31682321429252625, "learning_rate": 1.818987232573726e-05, "loss": 0.53, "step": 12747 }, { "epoch": 0.39160753233188955, "grad_norm": 0.3594593405723572, "learning_rate": 1.818959500596404e-05, "loss": 0.5066, "step": 12748 }, { "epoch": 0.39163825146683867, "grad_norm": 0.3678840100765228, "learning_rate": 1.8189317667063446e-05, "loss": 0.603, "step": 12749 }, { "epoch": 0.39166897060178785, "grad_norm": 0.3709924817085266, "learning_rate": 1.818904030903613e-05, "loss": 0.6268, "step": 12750 }, { "epoch": 0.391699689736737, "grad_norm": 0.32755231857299805, "learning_rate": 1.818876293188274e-05, "loss": 0.5134, "step": 12751 }, { "epoch": 0.3917304088716862, "grad_norm": 0.3151247203350067, "learning_rate": 1.818848553560392e-05, "loss": 0.5864, "step": 12752 }, { "epoch": 0.3917611280066353, "grad_norm": 0.34476086497306824, "learning_rate": 1.818820812020032e-05, "loss": 0.5775, "step": 12753 }, { "epoch": 0.3917918471415845, "grad_norm": 0.3425072133541107, "learning_rate": 1.818793068567259e-05, "loss": 0.6099, "step": 12754 }, { "epoch": 0.39182256627653367, "grad_norm": 0.44688543677330017, "learning_rate": 1.8187653232021372e-05, "loss": 0.5141, "step": 12755 }, { "epoch": 0.3918532854114828, "grad_norm": 0.38633593916893005, "learning_rate": 1.8187375759247323e-05, "loss": 0.5836, "step": 12756 }, { "epoch": 0.39188400454643196, "grad_norm": 0.3630231022834778, "learning_rate": 1.8187098267351085e-05, "loss": 0.5679, "step": 12757 }, { "epoch": 0.39191472368138114, "grad_norm": 0.33725807070732117, "learning_rate": 1.8186820756333304e-05, "loss": 0.5209, "step": 12758 }, { "epoch": 0.3919454428163303, "grad_norm": 0.3213243782520294, "learning_rate": 1.8186543226194636e-05, "loss": 0.5229, "step": 12759 }, { "epoch": 0.39197616195127943, "grad_norm": 0.32613253593444824, "learning_rate": 1.818626567693572e-05, "loss": 0.5251, "step": 12760 }, { "epoch": 0.3920068810862286, "grad_norm": 0.37759166955947876, "learning_rate": 1.8185988108557212e-05, "loss": 0.5723, "step": 12761 }, { "epoch": 0.3920376002211778, "grad_norm": 0.3797167241573334, "learning_rate": 1.8185710521059756e-05, "loss": 0.592, "step": 12762 }, { "epoch": 0.3920683193561269, "grad_norm": 0.2890707552433014, "learning_rate": 1.8185432914444005e-05, "loss": 0.5347, "step": 12763 }, { "epoch": 0.3920990384910761, "grad_norm": 0.34908726811408997, "learning_rate": 1.8185155288710598e-05, "loss": 0.5438, "step": 12764 }, { "epoch": 0.39212975762602525, "grad_norm": 0.31697121262550354, "learning_rate": 1.818487764386019e-05, "loss": 0.5825, "step": 12765 }, { "epoch": 0.39216047676097443, "grad_norm": 0.34345513582229614, "learning_rate": 1.8184599979893432e-05, "loss": 0.5913, "step": 12766 }, { "epoch": 0.39219119589592355, "grad_norm": 0.3325057029724121, "learning_rate": 1.8184322296810965e-05, "loss": 0.5837, "step": 12767 }, { "epoch": 0.3922219150308727, "grad_norm": 0.330489844083786, "learning_rate": 1.8184044594613444e-05, "loss": 0.6159, "step": 12768 }, { "epoch": 0.3922526341658219, "grad_norm": 0.35868802666664124, "learning_rate": 1.8183766873301516e-05, "loss": 0.5653, "step": 12769 }, { "epoch": 0.3922833533007711, "grad_norm": 0.32376763224601746, "learning_rate": 1.818348913287583e-05, "loss": 0.5002, "step": 12770 }, { "epoch": 0.3923140724357202, "grad_norm": 0.3798137605190277, "learning_rate": 1.8183211373337028e-05, "loss": 0.6331, "step": 12771 }, { "epoch": 0.39234479157066937, "grad_norm": 0.3545227348804474, "learning_rate": 1.818293359468577e-05, "loss": 0.5668, "step": 12772 }, { "epoch": 0.39237551070561855, "grad_norm": 0.3300997018814087, "learning_rate": 1.8182655796922696e-05, "loss": 0.5462, "step": 12773 }, { "epoch": 0.39240622984056767, "grad_norm": 0.31555670499801636, "learning_rate": 1.818237798004846e-05, "loss": 0.5162, "step": 12774 }, { "epoch": 0.39243694897551684, "grad_norm": 0.356912761926651, "learning_rate": 1.818210014406371e-05, "loss": 0.5584, "step": 12775 }, { "epoch": 0.392467668110466, "grad_norm": 0.4127078056335449, "learning_rate": 1.8181822288969088e-05, "loss": 0.5959, "step": 12776 }, { "epoch": 0.3924983872454152, "grad_norm": 0.31994518637657166, "learning_rate": 1.8181544414765253e-05, "loss": 0.58, "step": 12777 }, { "epoch": 0.3925291063803643, "grad_norm": 0.34047064185142517, "learning_rate": 1.818126652145285e-05, "loss": 0.6357, "step": 12778 }, { "epoch": 0.3925598255153135, "grad_norm": 0.35640543699264526, "learning_rate": 1.8180988609032523e-05, "loss": 0.6334, "step": 12779 }, { "epoch": 0.39259054465026266, "grad_norm": 0.820229172706604, "learning_rate": 1.818071067750493e-05, "loss": 0.621, "step": 12780 }, { "epoch": 0.3926212637852118, "grad_norm": 0.32395482063293457, "learning_rate": 1.8180432726870716e-05, "loss": 0.5785, "step": 12781 }, { "epoch": 0.39265198292016096, "grad_norm": 0.3598460257053375, "learning_rate": 1.818015475713053e-05, "loss": 0.5449, "step": 12782 }, { "epoch": 0.39268270205511013, "grad_norm": 0.36322563886642456, "learning_rate": 1.817987676828502e-05, "loss": 0.5021, "step": 12783 }, { "epoch": 0.3927134211900593, "grad_norm": 0.32324206829071045, "learning_rate": 1.8179598760334838e-05, "loss": 0.4786, "step": 12784 }, { "epoch": 0.39274414032500843, "grad_norm": 0.38048267364501953, "learning_rate": 1.817932073328063e-05, "loss": 0.6279, "step": 12785 }, { "epoch": 0.3927748594599576, "grad_norm": 0.3299476206302643, "learning_rate": 1.817904268712305e-05, "loss": 0.5258, "step": 12786 }, { "epoch": 0.3928055785949068, "grad_norm": 0.3179298937320709, "learning_rate": 1.8178764621862738e-05, "loss": 0.5131, "step": 12787 }, { "epoch": 0.39283629772985595, "grad_norm": 0.33918771147727966, "learning_rate": 1.8178486537500356e-05, "loss": 0.5305, "step": 12788 }, { "epoch": 0.3928670168648051, "grad_norm": 0.37182340025901794, "learning_rate": 1.8178208434036545e-05, "loss": 0.5377, "step": 12789 }, { "epoch": 0.39289773599975425, "grad_norm": 0.3483932614326477, "learning_rate": 1.817793031147196e-05, "loss": 0.5729, "step": 12790 }, { "epoch": 0.3929284551347034, "grad_norm": 0.3446057140827179, "learning_rate": 1.8177652169807245e-05, "loss": 0.6093, "step": 12791 }, { "epoch": 0.39295917426965254, "grad_norm": 0.3477858603000641, "learning_rate": 1.817737400904305e-05, "loss": 0.6666, "step": 12792 }, { "epoch": 0.3929898934046017, "grad_norm": 0.32864198088645935, "learning_rate": 1.8177095829180033e-05, "loss": 0.6043, "step": 12793 }, { "epoch": 0.3930206125395509, "grad_norm": 0.32323840260505676, "learning_rate": 1.8176817630218833e-05, "loss": 0.5216, "step": 12794 }, { "epoch": 0.39305133167450007, "grad_norm": 0.3462378978729248, "learning_rate": 1.8176539412160104e-05, "loss": 0.5652, "step": 12795 }, { "epoch": 0.3930820508094492, "grad_norm": 0.3513031303882599, "learning_rate": 1.8176261175004502e-05, "loss": 0.5571, "step": 12796 }, { "epoch": 0.39311276994439837, "grad_norm": 0.3849570155143738, "learning_rate": 1.8175982918752664e-05, "loss": 0.5354, "step": 12797 }, { "epoch": 0.39314348907934754, "grad_norm": 0.34383028745651245, "learning_rate": 1.817570464340525e-05, "loss": 0.5594, "step": 12798 }, { "epoch": 0.39317420821429666, "grad_norm": 0.32246312499046326, "learning_rate": 1.8175426348962907e-05, "loss": 0.5994, "step": 12799 }, { "epoch": 0.39320492734924584, "grad_norm": 0.3417467176914215, "learning_rate": 1.8175148035426285e-05, "loss": 0.5983, "step": 12800 }, { "epoch": 0.393235646484195, "grad_norm": 0.3139697313308716, "learning_rate": 1.8174869702796033e-05, "loss": 0.5056, "step": 12801 }, { "epoch": 0.3932663656191442, "grad_norm": 0.3582741320133209, "learning_rate": 1.8174591351072804e-05, "loss": 0.5483, "step": 12802 }, { "epoch": 0.3932970847540933, "grad_norm": 0.35488972067832947, "learning_rate": 1.8174312980257243e-05, "loss": 0.5782, "step": 12803 }, { "epoch": 0.3933278038890425, "grad_norm": 0.5101213455200195, "learning_rate": 1.8174034590350006e-05, "loss": 0.6004, "step": 12804 }, { "epoch": 0.39335852302399166, "grad_norm": 0.4329844117164612, "learning_rate": 1.8173756181351734e-05, "loss": 0.5745, "step": 12805 }, { "epoch": 0.3933892421589408, "grad_norm": 0.37905266880989075, "learning_rate": 1.8173477753263095e-05, "loss": 0.5228, "step": 12806 }, { "epoch": 0.39341996129388995, "grad_norm": 0.3561650216579437, "learning_rate": 1.8173199306084718e-05, "loss": 0.6219, "step": 12807 }, { "epoch": 0.39345068042883913, "grad_norm": 0.35827162861824036, "learning_rate": 1.8172920839817268e-05, "loss": 0.586, "step": 12808 }, { "epoch": 0.3934813995637883, "grad_norm": 0.5302042365074158, "learning_rate": 1.817264235446139e-05, "loss": 0.6714, "step": 12809 }, { "epoch": 0.3935121186987374, "grad_norm": 0.34511616826057434, "learning_rate": 1.817236385001773e-05, "loss": 0.5356, "step": 12810 }, { "epoch": 0.3935428378336866, "grad_norm": 0.32790037989616394, "learning_rate": 1.817208532648695e-05, "loss": 0.586, "step": 12811 }, { "epoch": 0.3935735569686358, "grad_norm": 0.3022743761539459, "learning_rate": 1.817180678386969e-05, "loss": 0.5883, "step": 12812 }, { "epoch": 0.39360427610358495, "grad_norm": 0.3409270942211151, "learning_rate": 1.8171528222166605e-05, "loss": 0.5757, "step": 12813 }, { "epoch": 0.39363499523853407, "grad_norm": 0.34028932452201843, "learning_rate": 1.817124964137835e-05, "loss": 0.547, "step": 12814 }, { "epoch": 0.39366571437348324, "grad_norm": 0.3532288372516632, "learning_rate": 1.8170971041505563e-05, "loss": 0.5544, "step": 12815 }, { "epoch": 0.3936964335084324, "grad_norm": 0.3446268141269684, "learning_rate": 1.8170692422548908e-05, "loss": 0.5593, "step": 12816 }, { "epoch": 0.39372715264338154, "grad_norm": 0.3573243021965027, "learning_rate": 1.8170413784509028e-05, "loss": 0.5803, "step": 12817 }, { "epoch": 0.3937578717783307, "grad_norm": 0.3697608709335327, "learning_rate": 1.8170135127386576e-05, "loss": 0.6204, "step": 12818 }, { "epoch": 0.3937885909132799, "grad_norm": 0.3816118538379669, "learning_rate": 1.8169856451182203e-05, "loss": 0.6476, "step": 12819 }, { "epoch": 0.39381931004822907, "grad_norm": 0.35307908058166504, "learning_rate": 1.8169577755896563e-05, "loss": 0.5093, "step": 12820 }, { "epoch": 0.3938500291831782, "grad_norm": 0.33663007616996765, "learning_rate": 1.81692990415303e-05, "loss": 0.5727, "step": 12821 }, { "epoch": 0.39388074831812736, "grad_norm": 0.33210235834121704, "learning_rate": 1.816902030808407e-05, "loss": 0.5897, "step": 12822 }, { "epoch": 0.39391146745307654, "grad_norm": 0.3522430956363678, "learning_rate": 1.816874155555852e-05, "loss": 0.5285, "step": 12823 }, { "epoch": 0.39394218658802566, "grad_norm": 0.35440826416015625, "learning_rate": 1.8168462783954305e-05, "loss": 0.6236, "step": 12824 }, { "epoch": 0.39397290572297483, "grad_norm": 0.33583495020866394, "learning_rate": 1.8168183993272074e-05, "loss": 0.573, "step": 12825 }, { "epoch": 0.394003624857924, "grad_norm": 0.33631300926208496, "learning_rate": 1.816790518351248e-05, "loss": 0.5664, "step": 12826 }, { "epoch": 0.3940343439928732, "grad_norm": 0.3489297926425934, "learning_rate": 1.8167626354676175e-05, "loss": 0.5994, "step": 12827 }, { "epoch": 0.3940650631278223, "grad_norm": 0.36632487177848816, "learning_rate": 1.8167347506763807e-05, "loss": 0.6102, "step": 12828 }, { "epoch": 0.3940957822627715, "grad_norm": 0.36638349294662476, "learning_rate": 1.8167068639776023e-05, "loss": 0.5882, "step": 12829 }, { "epoch": 0.39412650139772065, "grad_norm": 0.35900527238845825, "learning_rate": 1.8166789753713488e-05, "loss": 0.5919, "step": 12830 }, { "epoch": 0.3941572205326698, "grad_norm": 0.34766098856925964, "learning_rate": 1.8166510848576838e-05, "loss": 0.6314, "step": 12831 }, { "epoch": 0.39418793966761895, "grad_norm": 0.3395267426967621, "learning_rate": 1.8166231924366737e-05, "loss": 0.5284, "step": 12832 }, { "epoch": 0.3942186588025681, "grad_norm": 0.33809995651245117, "learning_rate": 1.816595298108383e-05, "loss": 0.6269, "step": 12833 }, { "epoch": 0.3942493779375173, "grad_norm": 0.33063483238220215, "learning_rate": 1.816567401872877e-05, "loss": 0.5149, "step": 12834 }, { "epoch": 0.3942800970724664, "grad_norm": 0.3420949876308441, "learning_rate": 1.8165395037302206e-05, "loss": 0.6006, "step": 12835 }, { "epoch": 0.3943108162074156, "grad_norm": 0.29784148931503296, "learning_rate": 1.816511603680479e-05, "loss": 0.5505, "step": 12836 }, { "epoch": 0.39434153534236477, "grad_norm": 0.34664586186408997, "learning_rate": 1.816483701723718e-05, "loss": 0.5986, "step": 12837 }, { "epoch": 0.39437225447731394, "grad_norm": 0.3633562922477722, "learning_rate": 1.8164557978600018e-05, "loss": 0.5881, "step": 12838 }, { "epoch": 0.39440297361226306, "grad_norm": 0.3667909502983093, "learning_rate": 1.8164278920893964e-05, "loss": 0.5754, "step": 12839 }, { "epoch": 0.39443369274721224, "grad_norm": 0.3740542232990265, "learning_rate": 1.8163999844119667e-05, "loss": 0.6192, "step": 12840 }, { "epoch": 0.3944644118821614, "grad_norm": 0.34725409746170044, "learning_rate": 1.8163720748277774e-05, "loss": 0.5522, "step": 12841 }, { "epoch": 0.39449513101711053, "grad_norm": 0.3728778660297394, "learning_rate": 1.8163441633368946e-05, "loss": 0.5428, "step": 12842 }, { "epoch": 0.3945258501520597, "grad_norm": 0.34048062562942505, "learning_rate": 1.8163162499393826e-05, "loss": 0.6324, "step": 12843 }, { "epoch": 0.3945565692870089, "grad_norm": 0.34225714206695557, "learning_rate": 1.8162883346353072e-05, "loss": 0.5711, "step": 12844 }, { "epoch": 0.39458728842195806, "grad_norm": 0.3218097984790802, "learning_rate": 1.8162604174247334e-05, "loss": 0.5685, "step": 12845 }, { "epoch": 0.3946180075569072, "grad_norm": 0.4195527136325836, "learning_rate": 1.8162324983077263e-05, "loss": 0.5393, "step": 12846 }, { "epoch": 0.39464872669185636, "grad_norm": 0.39020389318466187, "learning_rate": 1.8162045772843513e-05, "loss": 0.639, "step": 12847 }, { "epoch": 0.39467944582680553, "grad_norm": 0.3705294728279114, "learning_rate": 1.8161766543546734e-05, "loss": 0.5228, "step": 12848 }, { "epoch": 0.39471016496175465, "grad_norm": 0.41043511033058167, "learning_rate": 1.816148729518758e-05, "loss": 0.621, "step": 12849 }, { "epoch": 0.3947408840967038, "grad_norm": 0.33246028423309326, "learning_rate": 1.8161208027766702e-05, "loss": 0.587, "step": 12850 }, { "epoch": 0.394771603231653, "grad_norm": 0.44218236207962036, "learning_rate": 1.8160928741284754e-05, "loss": 0.5573, "step": 12851 }, { "epoch": 0.3948023223666022, "grad_norm": 0.3847273290157318, "learning_rate": 1.8160649435742382e-05, "loss": 0.5746, "step": 12852 }, { "epoch": 0.3948330415015513, "grad_norm": 0.36323902010917664, "learning_rate": 1.816037011114025e-05, "loss": 0.5719, "step": 12853 }, { "epoch": 0.39486376063650047, "grad_norm": 0.32781165838241577, "learning_rate": 1.8160090767478998e-05, "loss": 0.5866, "step": 12854 }, { "epoch": 0.39489447977144965, "grad_norm": 0.3807957172393799, "learning_rate": 1.815981140475929e-05, "loss": 0.6243, "step": 12855 }, { "epoch": 0.3949251989063988, "grad_norm": 0.3364047110080719, "learning_rate": 1.815953202298177e-05, "loss": 0.5713, "step": 12856 }, { "epoch": 0.39495591804134794, "grad_norm": 0.346954882144928, "learning_rate": 1.8159252622147092e-05, "loss": 0.5691, "step": 12857 }, { "epoch": 0.3949866371762971, "grad_norm": 0.37489521503448486, "learning_rate": 1.8158973202255906e-05, "loss": 0.6934, "step": 12858 }, { "epoch": 0.3950173563112463, "grad_norm": 0.33791032433509827, "learning_rate": 1.8158693763308877e-05, "loss": 0.5436, "step": 12859 }, { "epoch": 0.3950480754461954, "grad_norm": 0.31310659646987915, "learning_rate": 1.8158414305306643e-05, "loss": 0.5959, "step": 12860 }, { "epoch": 0.3950787945811446, "grad_norm": 0.35715073347091675, "learning_rate": 1.8158134828249864e-05, "loss": 0.5547, "step": 12861 }, { "epoch": 0.39510951371609376, "grad_norm": 0.3391798734664917, "learning_rate": 1.815785533213919e-05, "loss": 0.535, "step": 12862 }, { "epoch": 0.39514023285104294, "grad_norm": 0.47078198194503784, "learning_rate": 1.8157575816975277e-05, "loss": 0.6161, "step": 12863 }, { "epoch": 0.39517095198599206, "grad_norm": 0.30335533618927, "learning_rate": 1.8157296282758773e-05, "loss": 0.5496, "step": 12864 }, { "epoch": 0.39520167112094123, "grad_norm": 0.4036659300327301, "learning_rate": 1.8157016729490337e-05, "loss": 0.5343, "step": 12865 }, { "epoch": 0.3952323902558904, "grad_norm": 0.34383928775787354, "learning_rate": 1.8156737157170622e-05, "loss": 0.5497, "step": 12866 }, { "epoch": 0.39526310939083953, "grad_norm": 0.3330143988132477, "learning_rate": 1.8156457565800272e-05, "loss": 0.5975, "step": 12867 }, { "epoch": 0.3952938285257887, "grad_norm": 0.4451829791069031, "learning_rate": 1.8156177955379948e-05, "loss": 0.6473, "step": 12868 }, { "epoch": 0.3953245476607379, "grad_norm": 0.40778782963752747, "learning_rate": 1.81558983259103e-05, "loss": 0.5485, "step": 12869 }, { "epoch": 0.39535526679568705, "grad_norm": 0.36082887649536133, "learning_rate": 1.8155618677391982e-05, "loss": 0.6561, "step": 12870 }, { "epoch": 0.3953859859306362, "grad_norm": 0.4530898332595825, "learning_rate": 1.8155339009825645e-05, "loss": 0.5971, "step": 12871 }, { "epoch": 0.39541670506558535, "grad_norm": 0.32219329476356506, "learning_rate": 1.8155059323211946e-05, "loss": 0.6365, "step": 12872 }, { "epoch": 0.3954474242005345, "grad_norm": 0.34637799859046936, "learning_rate": 1.815477961755154e-05, "loss": 0.613, "step": 12873 }, { "epoch": 0.39547814333548365, "grad_norm": 0.3677693009376526, "learning_rate": 1.815449989284507e-05, "loss": 0.6246, "step": 12874 }, { "epoch": 0.3955088624704328, "grad_norm": 0.3725658357143402, "learning_rate": 1.81542201490932e-05, "loss": 0.6072, "step": 12875 }, { "epoch": 0.395539581605382, "grad_norm": 0.3616657555103302, "learning_rate": 1.8153940386296577e-05, "loss": 0.516, "step": 12876 }, { "epoch": 0.39557030074033117, "grad_norm": 0.40663278102874756, "learning_rate": 1.815366060445586e-05, "loss": 0.6543, "step": 12877 }, { "epoch": 0.3956010198752803, "grad_norm": 0.33798497915267944, "learning_rate": 1.8153380803571697e-05, "loss": 0.5473, "step": 12878 }, { "epoch": 0.39563173901022947, "grad_norm": 0.3357340395450592, "learning_rate": 1.8153100983644746e-05, "loss": 0.6258, "step": 12879 }, { "epoch": 0.39566245814517864, "grad_norm": 0.3790649473667145, "learning_rate": 1.8152821144675658e-05, "loss": 0.6035, "step": 12880 }, { "epoch": 0.3956931772801278, "grad_norm": 0.3645251989364624, "learning_rate": 1.8152541286665082e-05, "loss": 0.588, "step": 12881 }, { "epoch": 0.39572389641507694, "grad_norm": 0.35480886697769165, "learning_rate": 1.815226140961368e-05, "loss": 0.5451, "step": 12882 }, { "epoch": 0.3957546155500261, "grad_norm": 0.38754212856292725, "learning_rate": 1.8151981513522103e-05, "loss": 0.5949, "step": 12883 }, { "epoch": 0.3957853346849753, "grad_norm": 0.35579872131347656, "learning_rate": 1.8151701598391003e-05, "loss": 0.6425, "step": 12884 }, { "epoch": 0.3958160538199244, "grad_norm": 0.3870236575603485, "learning_rate": 1.8151421664221033e-05, "loss": 0.5924, "step": 12885 }, { "epoch": 0.3958467729548736, "grad_norm": 0.36626720428466797, "learning_rate": 1.815114171101285e-05, "loss": 0.5537, "step": 12886 }, { "epoch": 0.39587749208982276, "grad_norm": 0.3443355858325958, "learning_rate": 1.815086173876711e-05, "loss": 0.6449, "step": 12887 }, { "epoch": 0.39590821122477193, "grad_norm": 0.3456527590751648, "learning_rate": 1.8150581747484458e-05, "loss": 0.5743, "step": 12888 }, { "epoch": 0.39593893035972105, "grad_norm": 0.42168915271759033, "learning_rate": 1.815030173716555e-05, "loss": 0.5891, "step": 12889 }, { "epoch": 0.39596964949467023, "grad_norm": 0.346948504447937, "learning_rate": 1.8150021707811053e-05, "loss": 0.6291, "step": 12890 }, { "epoch": 0.3960003686296194, "grad_norm": 0.3424777686595917, "learning_rate": 1.8149741659421603e-05, "loss": 0.5888, "step": 12891 }, { "epoch": 0.3960310877645685, "grad_norm": 0.33735302090644836, "learning_rate": 1.8149461591997862e-05, "loss": 0.5812, "step": 12892 }, { "epoch": 0.3960618068995177, "grad_norm": 0.39985591173171997, "learning_rate": 1.8149181505540488e-05, "loss": 0.503, "step": 12893 }, { "epoch": 0.3960925260344669, "grad_norm": 0.3446309566497803, "learning_rate": 1.814890140005013e-05, "loss": 0.6132, "step": 12894 }, { "epoch": 0.39612324516941605, "grad_norm": 0.3279944062232971, "learning_rate": 1.8148621275527446e-05, "loss": 0.5081, "step": 12895 }, { "epoch": 0.39615396430436517, "grad_norm": 0.37307870388031006, "learning_rate": 1.814834113197309e-05, "loss": 0.5897, "step": 12896 }, { "epoch": 0.39618468343931434, "grad_norm": 0.3299659192562103, "learning_rate": 1.8148060969387707e-05, "loss": 0.5808, "step": 12897 }, { "epoch": 0.3962154025742635, "grad_norm": 0.3435394763946533, "learning_rate": 1.8147780787771962e-05, "loss": 0.6662, "step": 12898 }, { "epoch": 0.3962461217092127, "grad_norm": 0.3639402389526367, "learning_rate": 1.8147500587126507e-05, "loss": 0.5665, "step": 12899 }, { "epoch": 0.3962768408441618, "grad_norm": 0.40050485730171204, "learning_rate": 1.8147220367451992e-05, "loss": 0.5963, "step": 12900 }, { "epoch": 0.396307559979111, "grad_norm": 0.3918745815753937, "learning_rate": 1.814694012874908e-05, "loss": 0.5457, "step": 12901 }, { "epoch": 0.39633827911406017, "grad_norm": 0.3358967900276184, "learning_rate": 1.8146659871018415e-05, "loss": 0.6053, "step": 12902 }, { "epoch": 0.3963689982490093, "grad_norm": 0.34041520953178406, "learning_rate": 1.8146379594260662e-05, "loss": 0.5844, "step": 12903 }, { "epoch": 0.39639971738395846, "grad_norm": 0.3292492628097534, "learning_rate": 1.8146099298476467e-05, "loss": 0.5626, "step": 12904 }, { "epoch": 0.39643043651890764, "grad_norm": 0.3562423884868622, "learning_rate": 1.8145818983666488e-05, "loss": 0.5742, "step": 12905 }, { "epoch": 0.3964611556538568, "grad_norm": 0.37773001194000244, "learning_rate": 1.814553864983138e-05, "loss": 0.5169, "step": 12906 }, { "epoch": 0.39649187478880593, "grad_norm": 0.3469577431678772, "learning_rate": 1.81452582969718e-05, "loss": 0.5393, "step": 12907 }, { "epoch": 0.3965225939237551, "grad_norm": 0.42746174335479736, "learning_rate": 1.81449779250884e-05, "loss": 0.5372, "step": 12908 }, { "epoch": 0.3965533130587043, "grad_norm": 0.34618517756462097, "learning_rate": 1.814469753418183e-05, "loss": 0.5973, "step": 12909 }, { "epoch": 0.3965840321936534, "grad_norm": 0.3625010848045349, "learning_rate": 1.8144417124252756e-05, "loss": 0.6264, "step": 12910 }, { "epoch": 0.3966147513286026, "grad_norm": 0.35739457607269287, "learning_rate": 1.8144136695301824e-05, "loss": 0.5551, "step": 12911 }, { "epoch": 0.39664547046355175, "grad_norm": 0.33814677596092224, "learning_rate": 1.8143856247329694e-05, "loss": 0.5772, "step": 12912 }, { "epoch": 0.39667618959850093, "grad_norm": 0.363502562046051, "learning_rate": 1.8143575780337016e-05, "loss": 0.5652, "step": 12913 }, { "epoch": 0.39670690873345005, "grad_norm": 0.4577622413635254, "learning_rate": 1.8143295294324453e-05, "loss": 0.6406, "step": 12914 }, { "epoch": 0.3967376278683992, "grad_norm": 0.31255558133125305, "learning_rate": 1.8143014789292653e-05, "loss": 0.5962, "step": 12915 }, { "epoch": 0.3967683470033484, "grad_norm": 0.38728344440460205, "learning_rate": 1.8142734265242273e-05, "loss": 0.5985, "step": 12916 }, { "epoch": 0.3967990661382975, "grad_norm": 0.44451674818992615, "learning_rate": 1.8142453722173964e-05, "loss": 0.5792, "step": 12917 }, { "epoch": 0.3968297852732467, "grad_norm": 0.3660999834537506, "learning_rate": 1.814217316008839e-05, "loss": 0.6376, "step": 12918 }, { "epoch": 0.39686050440819587, "grad_norm": 0.3361862301826477, "learning_rate": 1.81418925789862e-05, "loss": 0.5774, "step": 12919 }, { "epoch": 0.39689122354314504, "grad_norm": 0.37523892521858215, "learning_rate": 1.8141611978868058e-05, "loss": 0.546, "step": 12920 }, { "epoch": 0.39692194267809416, "grad_norm": 0.32210540771484375, "learning_rate": 1.8141331359734605e-05, "loss": 0.4972, "step": 12921 }, { "epoch": 0.39695266181304334, "grad_norm": 0.42029455304145813, "learning_rate": 1.8141050721586505e-05, "loss": 0.7231, "step": 12922 }, { "epoch": 0.3969833809479925, "grad_norm": 0.37349945306777954, "learning_rate": 1.8140770064424417e-05, "loss": 0.5858, "step": 12923 }, { "epoch": 0.3970141000829417, "grad_norm": 0.37552574276924133, "learning_rate": 1.8140489388248986e-05, "loss": 0.6078, "step": 12924 }, { "epoch": 0.3970448192178908, "grad_norm": 0.35673609375953674, "learning_rate": 1.814020869306088e-05, "loss": 0.5942, "step": 12925 }, { "epoch": 0.39707553835284, "grad_norm": 0.3872948884963989, "learning_rate": 1.813992797886074e-05, "loss": 0.5885, "step": 12926 }, { "epoch": 0.39710625748778916, "grad_norm": 0.542278528213501, "learning_rate": 1.8139647245649238e-05, "loss": 0.5257, "step": 12927 }, { "epoch": 0.3971369766227383, "grad_norm": 0.4214458465576172, "learning_rate": 1.8139366493427014e-05, "loss": 0.6455, "step": 12928 }, { "epoch": 0.39716769575768746, "grad_norm": 0.36166608333587646, "learning_rate": 1.8139085722194737e-05, "loss": 0.595, "step": 12929 }, { "epoch": 0.39719841489263663, "grad_norm": 0.347302109003067, "learning_rate": 1.8138804931953053e-05, "loss": 0.6402, "step": 12930 }, { "epoch": 0.3972291340275858, "grad_norm": 0.3700729310512543, "learning_rate": 1.8138524122702624e-05, "loss": 0.6183, "step": 12931 }, { "epoch": 0.3972598531625349, "grad_norm": 0.33718588948249817, "learning_rate": 1.8138243294444105e-05, "loss": 0.6764, "step": 12932 }, { "epoch": 0.3972905722974841, "grad_norm": 0.3395189344882965, "learning_rate": 1.8137962447178147e-05, "loss": 0.4761, "step": 12933 }, { "epoch": 0.3973212914324333, "grad_norm": 0.3912227153778076, "learning_rate": 1.813768158090541e-05, "loss": 0.6083, "step": 12934 }, { "epoch": 0.3973520105673824, "grad_norm": 0.3656686544418335, "learning_rate": 1.813740069562655e-05, "loss": 0.5115, "step": 12935 }, { "epoch": 0.39738272970233157, "grad_norm": 0.3566579222679138, "learning_rate": 1.813711979134222e-05, "loss": 0.6267, "step": 12936 }, { "epoch": 0.39741344883728075, "grad_norm": 0.3447688817977905, "learning_rate": 1.8136838868053082e-05, "loss": 0.5729, "step": 12937 }, { "epoch": 0.3974441679722299, "grad_norm": 0.363421231508255, "learning_rate": 1.813655792575979e-05, "loss": 0.5107, "step": 12938 }, { "epoch": 0.39747488710717904, "grad_norm": 0.3633897602558136, "learning_rate": 1.8136276964462992e-05, "loss": 0.5343, "step": 12939 }, { "epoch": 0.3975056062421282, "grad_norm": 0.3049306273460388, "learning_rate": 1.8135995984163357e-05, "loss": 0.4934, "step": 12940 }, { "epoch": 0.3975363253770774, "grad_norm": 0.3804556131362915, "learning_rate": 1.8135714984861533e-05, "loss": 0.5812, "step": 12941 }, { "epoch": 0.39756704451202657, "grad_norm": 0.32398805022239685, "learning_rate": 1.813543396655818e-05, "loss": 0.5237, "step": 12942 }, { "epoch": 0.3975977636469757, "grad_norm": 0.3800413906574249, "learning_rate": 1.813515292925395e-05, "loss": 0.5722, "step": 12943 }, { "epoch": 0.39762848278192486, "grad_norm": 0.4092594385147095, "learning_rate": 1.81348718729495e-05, "loss": 0.5407, "step": 12944 }, { "epoch": 0.39765920191687404, "grad_norm": 0.33835622668266296, "learning_rate": 1.8134590797645495e-05, "loss": 0.6099, "step": 12945 }, { "epoch": 0.39768992105182316, "grad_norm": 0.3506392240524292, "learning_rate": 1.8134309703342584e-05, "loss": 0.5724, "step": 12946 }, { "epoch": 0.39772064018677233, "grad_norm": 0.36245647072792053, "learning_rate": 1.8134028590041423e-05, "loss": 0.6049, "step": 12947 }, { "epoch": 0.3977513593217215, "grad_norm": 0.3553200960159302, "learning_rate": 1.8133747457742667e-05, "loss": 0.5835, "step": 12948 }, { "epoch": 0.3977820784566707, "grad_norm": 0.33084797859191895, "learning_rate": 1.813346630644698e-05, "loss": 0.5713, "step": 12949 }, { "epoch": 0.3978127975916198, "grad_norm": 0.3255881369113922, "learning_rate": 1.813318513615501e-05, "loss": 0.6323, "step": 12950 }, { "epoch": 0.397843516726569, "grad_norm": 0.3158280551433563, "learning_rate": 1.8132903946867424e-05, "loss": 0.5413, "step": 12951 }, { "epoch": 0.39787423586151816, "grad_norm": 0.33184319734573364, "learning_rate": 1.813262273858487e-05, "loss": 0.516, "step": 12952 }, { "epoch": 0.3979049549964673, "grad_norm": 0.3346373438835144, "learning_rate": 1.8132341511308008e-05, "loss": 0.5777, "step": 12953 }, { "epoch": 0.39793567413141645, "grad_norm": 0.336779922246933, "learning_rate": 1.8132060265037493e-05, "loss": 0.564, "step": 12954 }, { "epoch": 0.3979663932663656, "grad_norm": 0.381418913602829, "learning_rate": 1.8131778999773985e-05, "loss": 0.5736, "step": 12955 }, { "epoch": 0.3979971124013148, "grad_norm": 0.34984034299850464, "learning_rate": 1.813149771551814e-05, "loss": 0.5311, "step": 12956 }, { "epoch": 0.3980278315362639, "grad_norm": 0.33968305587768555, "learning_rate": 1.813121641227061e-05, "loss": 0.5717, "step": 12957 }, { "epoch": 0.3980585506712131, "grad_norm": 0.3671490550041199, "learning_rate": 1.813093509003206e-05, "loss": 0.6126, "step": 12958 }, { "epoch": 0.39808926980616227, "grad_norm": 1.060914158821106, "learning_rate": 1.8130653748803143e-05, "loss": 0.5419, "step": 12959 }, { "epoch": 0.3981199889411114, "grad_norm": 0.3911992013454437, "learning_rate": 1.8130372388584515e-05, "loss": 0.5598, "step": 12960 }, { "epoch": 0.39815070807606057, "grad_norm": 0.34417155385017395, "learning_rate": 1.8130091009376834e-05, "loss": 0.5563, "step": 12961 }, { "epoch": 0.39818142721100974, "grad_norm": 0.33973246812820435, "learning_rate": 1.812980961118076e-05, "loss": 0.6055, "step": 12962 }, { "epoch": 0.3982121463459589, "grad_norm": 0.34577494859695435, "learning_rate": 1.8129528193996945e-05, "loss": 0.5788, "step": 12963 }, { "epoch": 0.39824286548090804, "grad_norm": 0.365262895822525, "learning_rate": 1.8129246757826055e-05, "loss": 0.5954, "step": 12964 }, { "epoch": 0.3982735846158572, "grad_norm": 0.3435237407684326, "learning_rate": 1.8128965302668734e-05, "loss": 0.6537, "step": 12965 }, { "epoch": 0.3983043037508064, "grad_norm": 0.31961965560913086, "learning_rate": 1.812868382852565e-05, "loss": 0.5742, "step": 12966 }, { "epoch": 0.39833502288575556, "grad_norm": 0.35584908723831177, "learning_rate": 1.8128402335397457e-05, "loss": 0.6003, "step": 12967 }, { "epoch": 0.3983657420207047, "grad_norm": 0.3422880172729492, "learning_rate": 1.8128120823284815e-05, "loss": 0.6184, "step": 12968 }, { "epoch": 0.39839646115565386, "grad_norm": 0.44394099712371826, "learning_rate": 1.8127839292188375e-05, "loss": 0.5651, "step": 12969 }, { "epoch": 0.39842718029060303, "grad_norm": 0.4173397719860077, "learning_rate": 1.8127557742108802e-05, "loss": 0.5659, "step": 12970 }, { "epoch": 0.39845789942555215, "grad_norm": 0.3696073889732361, "learning_rate": 1.8127276173046747e-05, "loss": 0.6117, "step": 12971 }, { "epoch": 0.39848861856050133, "grad_norm": 0.48497119545936584, "learning_rate": 1.8126994585002873e-05, "loss": 0.5606, "step": 12972 }, { "epoch": 0.3985193376954505, "grad_norm": 0.34101182222366333, "learning_rate": 1.8126712977977837e-05, "loss": 0.6356, "step": 12973 }, { "epoch": 0.3985500568303997, "grad_norm": 0.3321332037448883, "learning_rate": 1.8126431351972294e-05, "loss": 0.5802, "step": 12974 }, { "epoch": 0.3985807759653488, "grad_norm": 0.3778130114078522, "learning_rate": 1.81261497069869e-05, "loss": 0.5595, "step": 12975 }, { "epoch": 0.398611495100298, "grad_norm": 0.32816267013549805, "learning_rate": 1.8125868043022322e-05, "loss": 0.578, "step": 12976 }, { "epoch": 0.39864221423524715, "grad_norm": 0.3848414421081543, "learning_rate": 1.8125586360079208e-05, "loss": 0.5779, "step": 12977 }, { "epoch": 0.39867293337019627, "grad_norm": 0.326799213886261, "learning_rate": 1.812530465815822e-05, "loss": 0.5103, "step": 12978 }, { "epoch": 0.39870365250514544, "grad_norm": 0.3200840950012207, "learning_rate": 1.8125022937260012e-05, "loss": 0.5084, "step": 12979 }, { "epoch": 0.3987343716400946, "grad_norm": 0.35580867528915405, "learning_rate": 1.8124741197385248e-05, "loss": 0.5687, "step": 12980 }, { "epoch": 0.3987650907750438, "grad_norm": 0.35510948300361633, "learning_rate": 1.8124459438534585e-05, "loss": 0.5602, "step": 12981 }, { "epoch": 0.3987958099099929, "grad_norm": 0.36603158712387085, "learning_rate": 1.8124177660708683e-05, "loss": 0.6027, "step": 12982 }, { "epoch": 0.3988265290449421, "grad_norm": 0.391293466091156, "learning_rate": 1.8123895863908192e-05, "loss": 0.5766, "step": 12983 }, { "epoch": 0.39885724817989127, "grad_norm": 0.3561176061630249, "learning_rate": 1.8123614048133774e-05, "loss": 0.5856, "step": 12984 }, { "epoch": 0.39888796731484044, "grad_norm": 0.3451213836669922, "learning_rate": 1.812333221338609e-05, "loss": 0.5784, "step": 12985 }, { "epoch": 0.39891868644978956, "grad_norm": 0.3281084895133972, "learning_rate": 1.8123050359665794e-05, "loss": 0.5726, "step": 12986 }, { "epoch": 0.39894940558473874, "grad_norm": 0.328919380903244, "learning_rate": 1.812276848697355e-05, "loss": 0.6129, "step": 12987 }, { "epoch": 0.3989801247196879, "grad_norm": 0.3612845838069916, "learning_rate": 1.812248659531001e-05, "loss": 0.5469, "step": 12988 }, { "epoch": 0.39901084385463703, "grad_norm": 0.3562816381454468, "learning_rate": 1.812220468467584e-05, "loss": 0.5638, "step": 12989 }, { "epoch": 0.3990415629895862, "grad_norm": 0.3571195602416992, "learning_rate": 1.812192275507169e-05, "loss": 0.5827, "step": 12990 }, { "epoch": 0.3990722821245354, "grad_norm": 0.45717182755470276, "learning_rate": 1.8121640806498218e-05, "loss": 0.5398, "step": 12991 }, { "epoch": 0.39910300125948456, "grad_norm": 0.3124993145465851, "learning_rate": 1.8121358838956092e-05, "loss": 0.5303, "step": 12992 }, { "epoch": 0.3991337203944337, "grad_norm": 0.3760831356048584, "learning_rate": 1.8121076852445965e-05, "loss": 0.499, "step": 12993 }, { "epoch": 0.39916443952938285, "grad_norm": 0.35389119386672974, "learning_rate": 1.8120794846968496e-05, "loss": 0.6036, "step": 12994 }, { "epoch": 0.39919515866433203, "grad_norm": 0.34164535999298096, "learning_rate": 1.8120512822524344e-05, "loss": 0.5769, "step": 12995 }, { "epoch": 0.39922587779928115, "grad_norm": 0.366719126701355, "learning_rate": 1.8120230779114164e-05, "loss": 0.5296, "step": 12996 }, { "epoch": 0.3992565969342303, "grad_norm": 0.3573869466781616, "learning_rate": 1.811994871673862e-05, "loss": 0.5846, "step": 12997 }, { "epoch": 0.3992873160691795, "grad_norm": 0.4543416500091553, "learning_rate": 1.811966663539837e-05, "loss": 0.4933, "step": 12998 }, { "epoch": 0.3993180352041287, "grad_norm": 0.3431367874145508, "learning_rate": 1.8119384535094068e-05, "loss": 0.5689, "step": 12999 }, { "epoch": 0.3993487543390778, "grad_norm": 0.4150281548500061, "learning_rate": 1.8119102415826376e-05, "loss": 0.4885, "step": 13000 }, { "epoch": 0.39937947347402697, "grad_norm": 0.33836331963539124, "learning_rate": 1.8118820277595958e-05, "loss": 0.6382, "step": 13001 }, { "epoch": 0.39941019260897614, "grad_norm": 0.39145365357398987, "learning_rate": 1.8118538120403467e-05, "loss": 0.5356, "step": 13002 }, { "epoch": 0.39944091174392526, "grad_norm": 0.379238486289978, "learning_rate": 1.811825594424956e-05, "loss": 0.5709, "step": 13003 }, { "epoch": 0.39947163087887444, "grad_norm": 0.3587002456188202, "learning_rate": 1.8117973749134903e-05, "loss": 0.6635, "step": 13004 }, { "epoch": 0.3995023500138236, "grad_norm": 0.44223320484161377, "learning_rate": 1.811769153506015e-05, "loss": 0.5841, "step": 13005 }, { "epoch": 0.3995330691487728, "grad_norm": 0.34275391697883606, "learning_rate": 1.811740930202596e-05, "loss": 0.6025, "step": 13006 }, { "epoch": 0.3995637882837219, "grad_norm": 0.3236602246761322, "learning_rate": 1.8117127050032993e-05, "loss": 0.5707, "step": 13007 }, { "epoch": 0.3995945074186711, "grad_norm": 0.3650473952293396, "learning_rate": 1.8116844779081914e-05, "loss": 0.6648, "step": 13008 }, { "epoch": 0.39962522655362026, "grad_norm": 0.35549506545066833, "learning_rate": 1.8116562489173373e-05, "loss": 0.6596, "step": 13009 }, { "epoch": 0.39965594568856944, "grad_norm": 0.4076179563999176, "learning_rate": 1.8116280180308036e-05, "loss": 0.6282, "step": 13010 }, { "epoch": 0.39968666482351856, "grad_norm": 0.35554876923561096, "learning_rate": 1.8115997852486558e-05, "loss": 0.5583, "step": 13011 }, { "epoch": 0.39971738395846773, "grad_norm": 0.37644362449645996, "learning_rate": 1.8115715505709597e-05, "loss": 0.6106, "step": 13012 }, { "epoch": 0.3997481030934169, "grad_norm": 0.33263808488845825, "learning_rate": 1.8115433139977822e-05, "loss": 0.5097, "step": 13013 }, { "epoch": 0.399778822228366, "grad_norm": 0.40367427468299866, "learning_rate": 1.811515075529188e-05, "loss": 0.6519, "step": 13014 }, { "epoch": 0.3998095413633152, "grad_norm": 0.35062751173973083, "learning_rate": 1.811486835165244e-05, "loss": 0.5069, "step": 13015 }, { "epoch": 0.3998402604982644, "grad_norm": 0.3653094470500946, "learning_rate": 1.811458592906016e-05, "loss": 0.6169, "step": 13016 }, { "epoch": 0.39987097963321355, "grad_norm": 0.4211580753326416, "learning_rate": 1.8114303487515697e-05, "loss": 0.5862, "step": 13017 }, { "epoch": 0.3999016987681627, "grad_norm": 0.348274827003479, "learning_rate": 1.8114021027019708e-05, "loss": 0.6253, "step": 13018 }, { "epoch": 0.39993241790311185, "grad_norm": 0.33451783657073975, "learning_rate": 1.8113738547572858e-05, "loss": 0.5177, "step": 13019 }, { "epoch": 0.399963137038061, "grad_norm": 0.36390310525894165, "learning_rate": 1.8113456049175807e-05, "loss": 0.6111, "step": 13020 }, { "epoch": 0.39999385617301014, "grad_norm": 0.3350619077682495, "learning_rate": 1.811317353182921e-05, "loss": 0.5928, "step": 13021 }, { "epoch": 0.4000245753079593, "grad_norm": 0.3923126757144928, "learning_rate": 1.811289099553373e-05, "loss": 0.5497, "step": 13022 }, { "epoch": 0.4000552944429085, "grad_norm": 0.34330451488494873, "learning_rate": 1.811260844029003e-05, "loss": 0.5161, "step": 13023 }, { "epoch": 0.40008601357785767, "grad_norm": 0.3216344118118286, "learning_rate": 1.8112325866098762e-05, "loss": 0.5008, "step": 13024 }, { "epoch": 0.4001167327128068, "grad_norm": 0.3841245174407959, "learning_rate": 1.8112043272960592e-05, "loss": 0.6009, "step": 13025 }, { "epoch": 0.40014745184775596, "grad_norm": 0.3680126667022705, "learning_rate": 1.811176066087618e-05, "loss": 0.5813, "step": 13026 }, { "epoch": 0.40017817098270514, "grad_norm": 0.32905346155166626, "learning_rate": 1.8111478029846182e-05, "loss": 0.453, "step": 13027 }, { "epoch": 0.4002088901176543, "grad_norm": 0.38026511669158936, "learning_rate": 1.811119537987126e-05, "loss": 0.6061, "step": 13028 }, { "epoch": 0.40023960925260343, "grad_norm": 0.3490017354488373, "learning_rate": 1.811091271095208e-05, "loss": 0.5985, "step": 13029 }, { "epoch": 0.4002703283875526, "grad_norm": 0.37698471546173096, "learning_rate": 1.811063002308929e-05, "loss": 0.5882, "step": 13030 }, { "epoch": 0.4003010475225018, "grad_norm": 0.3300102651119232, "learning_rate": 1.811034731628356e-05, "loss": 0.5263, "step": 13031 }, { "epoch": 0.4003317666574509, "grad_norm": 0.34968727827072144, "learning_rate": 1.8110064590535548e-05, "loss": 0.5895, "step": 13032 }, { "epoch": 0.4003624857924001, "grad_norm": 0.36806586384773254, "learning_rate": 1.8109781845845914e-05, "loss": 0.5727, "step": 13033 }, { "epoch": 0.40039320492734926, "grad_norm": 0.37422826886177063, "learning_rate": 1.810949908221532e-05, "loss": 0.6813, "step": 13034 }, { "epoch": 0.40042392406229843, "grad_norm": 0.3682180643081665, "learning_rate": 1.810921629964442e-05, "loss": 0.5142, "step": 13035 }, { "epoch": 0.40045464319724755, "grad_norm": 0.3092144727706909, "learning_rate": 1.8108933498133882e-05, "loss": 0.5226, "step": 13036 }, { "epoch": 0.4004853623321967, "grad_norm": 0.3458704352378845, "learning_rate": 1.810865067768436e-05, "loss": 0.5385, "step": 13037 }, { "epoch": 0.4005160814671459, "grad_norm": 0.3322073221206665, "learning_rate": 1.810836783829652e-05, "loss": 0.5132, "step": 13038 }, { "epoch": 0.400546800602095, "grad_norm": 0.35289859771728516, "learning_rate": 1.8108084979971024e-05, "loss": 0.6306, "step": 13039 }, { "epoch": 0.4005775197370442, "grad_norm": 0.317409873008728, "learning_rate": 1.8107802102708527e-05, "loss": 0.4718, "step": 13040 }, { "epoch": 0.40060823887199337, "grad_norm": 0.3947815001010895, "learning_rate": 1.8107519206509693e-05, "loss": 0.6558, "step": 13041 }, { "epoch": 0.40063895800694255, "grad_norm": 0.3502882719039917, "learning_rate": 1.8107236291375177e-05, "loss": 0.5903, "step": 13042 }, { "epoch": 0.40066967714189167, "grad_norm": 0.3418857157230377, "learning_rate": 1.810695335730565e-05, "loss": 0.5836, "step": 13043 }, { "epoch": 0.40070039627684084, "grad_norm": 0.3470628559589386, "learning_rate": 1.8106670404301764e-05, "loss": 0.5701, "step": 13044 }, { "epoch": 0.40073111541179, "grad_norm": 0.3345760405063629, "learning_rate": 1.8106387432364184e-05, "loss": 0.5742, "step": 13045 }, { "epoch": 0.40076183454673914, "grad_norm": 0.3489665687084198, "learning_rate": 1.810610444149357e-05, "loss": 0.5245, "step": 13046 }, { "epoch": 0.4007925536816883, "grad_norm": 0.3301171064376831, "learning_rate": 1.810582143169058e-05, "loss": 0.6005, "step": 13047 }, { "epoch": 0.4008232728166375, "grad_norm": 0.3442883789539337, "learning_rate": 1.8105538402955886e-05, "loss": 0.5649, "step": 13048 }, { "epoch": 0.40085399195158666, "grad_norm": 0.42283299565315247, "learning_rate": 1.8105255355290136e-05, "loss": 0.5352, "step": 13049 }, { "epoch": 0.4008847110865358, "grad_norm": 0.359520822763443, "learning_rate": 1.8104972288693992e-05, "loss": 0.5711, "step": 13050 }, { "epoch": 0.40091543022148496, "grad_norm": 0.3355616331100464, "learning_rate": 1.8104689203168123e-05, "loss": 0.6387, "step": 13051 }, { "epoch": 0.40094614935643413, "grad_norm": 0.3579041063785553, "learning_rate": 1.8104406098713188e-05, "loss": 0.599, "step": 13052 }, { "epoch": 0.4009768684913833, "grad_norm": 0.3391491174697876, "learning_rate": 1.8104122975329845e-05, "loss": 0.57, "step": 13053 }, { "epoch": 0.40100758762633243, "grad_norm": 0.37575003504753113, "learning_rate": 1.8103839833018754e-05, "loss": 0.5682, "step": 13054 }, { "epoch": 0.4010383067612816, "grad_norm": 0.3149512708187103, "learning_rate": 1.8103556671780584e-05, "loss": 0.4729, "step": 13055 }, { "epoch": 0.4010690258962308, "grad_norm": 0.38076773285865784, "learning_rate": 1.810327349161599e-05, "loss": 0.6419, "step": 13056 }, { "epoch": 0.4010997450311799, "grad_norm": 0.364455908536911, "learning_rate": 1.8102990292525632e-05, "loss": 0.6077, "step": 13057 }, { "epoch": 0.4011304641661291, "grad_norm": 0.3282911479473114, "learning_rate": 1.8102707074510176e-05, "loss": 0.5812, "step": 13058 }, { "epoch": 0.40116118330107825, "grad_norm": 0.3234592080116272, "learning_rate": 1.8102423837570286e-05, "loss": 0.53, "step": 13059 }, { "epoch": 0.4011919024360274, "grad_norm": 0.3652111291885376, "learning_rate": 1.8102140581706612e-05, "loss": 0.5384, "step": 13060 }, { "epoch": 0.40122262157097655, "grad_norm": 0.3186188042163849, "learning_rate": 1.8101857306919826e-05, "loss": 0.5428, "step": 13061 }, { "epoch": 0.4012533407059257, "grad_norm": 0.34593990445137024, "learning_rate": 1.810157401321059e-05, "loss": 0.5753, "step": 13062 }, { "epoch": 0.4012840598408749, "grad_norm": 0.3296295702457428, "learning_rate": 1.8101290700579556e-05, "loss": 0.5742, "step": 13063 }, { "epoch": 0.401314778975824, "grad_norm": 0.4416889548301697, "learning_rate": 1.8101007369027397e-05, "loss": 0.7115, "step": 13064 }, { "epoch": 0.4013454981107732, "grad_norm": 0.3627316951751709, "learning_rate": 1.8100724018554765e-05, "loss": 0.508, "step": 13065 }, { "epoch": 0.40137621724572237, "grad_norm": 0.3624752163887024, "learning_rate": 1.810044064916233e-05, "loss": 0.5349, "step": 13066 }, { "epoch": 0.40140693638067154, "grad_norm": 0.41776421666145325, "learning_rate": 1.8100157260850748e-05, "loss": 0.5973, "step": 13067 }, { "epoch": 0.40143765551562066, "grad_norm": 0.35225096344947815, "learning_rate": 1.809987385362068e-05, "loss": 0.5706, "step": 13068 }, { "epoch": 0.40146837465056984, "grad_norm": 0.36620867252349854, "learning_rate": 1.8099590427472796e-05, "loss": 0.5726, "step": 13069 }, { "epoch": 0.401499093785519, "grad_norm": 0.3512287735939026, "learning_rate": 1.8099306982407753e-05, "loss": 0.504, "step": 13070 }, { "epoch": 0.40152981292046813, "grad_norm": 0.3456054925918579, "learning_rate": 1.809902351842621e-05, "loss": 0.6203, "step": 13071 }, { "epoch": 0.4015605320554173, "grad_norm": 0.34405654668807983, "learning_rate": 1.8098740035528834e-05, "loss": 0.5068, "step": 13072 }, { "epoch": 0.4015912511903665, "grad_norm": 0.3305802047252655, "learning_rate": 1.8098456533716283e-05, "loss": 0.6143, "step": 13073 }, { "epoch": 0.40162197032531566, "grad_norm": 0.692613422870636, "learning_rate": 1.8098173012989225e-05, "loss": 0.549, "step": 13074 }, { "epoch": 0.4016526894602648, "grad_norm": 0.384886234998703, "learning_rate": 1.8097889473348315e-05, "loss": 0.5992, "step": 13075 }, { "epoch": 0.40168340859521395, "grad_norm": 0.31406688690185547, "learning_rate": 1.809760591479422e-05, "loss": 0.628, "step": 13076 }, { "epoch": 0.40171412773016313, "grad_norm": 0.346515953540802, "learning_rate": 1.80973223373276e-05, "loss": 0.5939, "step": 13077 }, { "epoch": 0.4017448468651123, "grad_norm": 0.34516391158103943, "learning_rate": 1.8097038740949118e-05, "loss": 0.5423, "step": 13078 }, { "epoch": 0.4017755660000614, "grad_norm": 0.47109633684158325, "learning_rate": 1.8096755125659434e-05, "loss": 0.5872, "step": 13079 }, { "epoch": 0.4018062851350106, "grad_norm": 0.3405253291130066, "learning_rate": 1.809647149145922e-05, "loss": 0.6094, "step": 13080 }, { "epoch": 0.4018370042699598, "grad_norm": 0.343245267868042, "learning_rate": 1.8096187838349127e-05, "loss": 0.6193, "step": 13081 }, { "epoch": 0.4018677234049089, "grad_norm": 0.33914852142333984, "learning_rate": 1.8095904166329822e-05, "loss": 0.6095, "step": 13082 }, { "epoch": 0.40189844253985807, "grad_norm": 0.35435816645622253, "learning_rate": 1.8095620475401966e-05, "loss": 0.6413, "step": 13083 }, { "epoch": 0.40192916167480724, "grad_norm": 0.3293165862560272, "learning_rate": 1.8095336765566224e-05, "loss": 0.5686, "step": 13084 }, { "epoch": 0.4019598808097564, "grad_norm": 0.36999669671058655, "learning_rate": 1.8095053036823257e-05, "loss": 0.5861, "step": 13085 }, { "epoch": 0.40199059994470554, "grad_norm": 0.30673325061798096, "learning_rate": 1.809476928917373e-05, "loss": 0.5587, "step": 13086 }, { "epoch": 0.4020213190796547, "grad_norm": 0.3096969425678253, "learning_rate": 1.8094485522618305e-05, "loss": 0.5659, "step": 13087 }, { "epoch": 0.4020520382146039, "grad_norm": 0.36580365896224976, "learning_rate": 1.8094201737157642e-05, "loss": 0.5569, "step": 13088 }, { "epoch": 0.402082757349553, "grad_norm": 0.46562474966049194, "learning_rate": 1.8093917932792403e-05, "loss": 0.6061, "step": 13089 }, { "epoch": 0.4021134764845022, "grad_norm": 0.3400559723377228, "learning_rate": 1.809363410952326e-05, "loss": 0.5647, "step": 13090 }, { "epoch": 0.40214419561945136, "grad_norm": 0.3582642674446106, "learning_rate": 1.809335026735086e-05, "loss": 0.6268, "step": 13091 }, { "epoch": 0.40217491475440054, "grad_norm": 0.344508558511734, "learning_rate": 1.8093066406275882e-05, "loss": 0.4923, "step": 13092 }, { "epoch": 0.40220563388934966, "grad_norm": 0.33267149329185486, "learning_rate": 1.8092782526298983e-05, "loss": 0.4997, "step": 13093 }, { "epoch": 0.40223635302429883, "grad_norm": 0.3526279032230377, "learning_rate": 1.8092498627420822e-05, "loss": 0.6235, "step": 13094 }, { "epoch": 0.402267072159248, "grad_norm": 0.34682610630989075, "learning_rate": 1.8092214709642066e-05, "loss": 0.4992, "step": 13095 }, { "epoch": 0.4022977912941972, "grad_norm": 1.0744670629501343, "learning_rate": 1.809193077296338e-05, "loss": 0.5784, "step": 13096 }, { "epoch": 0.4023285104291463, "grad_norm": 0.32341837882995605, "learning_rate": 1.8091646817385422e-05, "loss": 0.5039, "step": 13097 }, { "epoch": 0.4023592295640955, "grad_norm": 0.36430105566978455, "learning_rate": 1.8091362842908856e-05, "loss": 0.5389, "step": 13098 }, { "epoch": 0.40238994869904465, "grad_norm": 0.35660237073898315, "learning_rate": 1.809107884953435e-05, "loss": 0.5998, "step": 13099 }, { "epoch": 0.4024206678339938, "grad_norm": 0.3508598506450653, "learning_rate": 1.809079483726256e-05, "loss": 0.5567, "step": 13100 }, { "epoch": 0.40245138696894295, "grad_norm": 0.3341028094291687, "learning_rate": 1.809051080609416e-05, "loss": 0.5633, "step": 13101 }, { "epoch": 0.4024821061038921, "grad_norm": 0.41753479838371277, "learning_rate": 1.8090226756029804e-05, "loss": 0.5252, "step": 13102 }, { "epoch": 0.4025128252388413, "grad_norm": 0.3819064497947693, "learning_rate": 1.8089942687070158e-05, "loss": 0.6748, "step": 13103 }, { "epoch": 0.4025435443737904, "grad_norm": 0.5630823969841003, "learning_rate": 1.8089658599215883e-05, "loss": 0.5255, "step": 13104 }, { "epoch": 0.4025742635087396, "grad_norm": 0.39077112078666687, "learning_rate": 1.808937449246765e-05, "loss": 0.6823, "step": 13105 }, { "epoch": 0.40260498264368877, "grad_norm": 0.342196524143219, "learning_rate": 1.8089090366826116e-05, "loss": 0.6251, "step": 13106 }, { "epoch": 0.4026357017786379, "grad_norm": 0.3390013873577118, "learning_rate": 1.8088806222291947e-05, "loss": 0.4902, "step": 13107 }, { "epoch": 0.40266642091358706, "grad_norm": 0.3660273253917694, "learning_rate": 1.8088522058865806e-05, "loss": 0.5683, "step": 13108 }, { "epoch": 0.40269714004853624, "grad_norm": 0.3340429961681366, "learning_rate": 1.8088237876548354e-05, "loss": 0.6137, "step": 13109 }, { "epoch": 0.4027278591834854, "grad_norm": 0.3554539382457733, "learning_rate": 1.8087953675340265e-05, "loss": 0.6379, "step": 13110 }, { "epoch": 0.40275857831843453, "grad_norm": 0.3949281871318817, "learning_rate": 1.808766945524219e-05, "loss": 0.4897, "step": 13111 }, { "epoch": 0.4027892974533837, "grad_norm": 0.3407707214355469, "learning_rate": 1.8087385216254797e-05, "loss": 0.6516, "step": 13112 }, { "epoch": 0.4028200165883329, "grad_norm": 0.3528645634651184, "learning_rate": 1.8087100958378754e-05, "loss": 0.6398, "step": 13113 }, { "epoch": 0.402850735723282, "grad_norm": 0.3908250033855438, "learning_rate": 1.808681668161472e-05, "loss": 0.6355, "step": 13114 }, { "epoch": 0.4028814548582312, "grad_norm": 0.34136149287223816, "learning_rate": 1.8086532385963364e-05, "loss": 0.4587, "step": 13115 }, { "epoch": 0.40291217399318036, "grad_norm": 0.352906733751297, "learning_rate": 1.8086248071425343e-05, "loss": 0.5613, "step": 13116 }, { "epoch": 0.40294289312812953, "grad_norm": 0.4742562174797058, "learning_rate": 1.8085963738001326e-05, "loss": 0.6108, "step": 13117 }, { "epoch": 0.40297361226307865, "grad_norm": 0.3818645775318146, "learning_rate": 1.8085679385691978e-05, "loss": 0.6136, "step": 13118 }, { "epoch": 0.4030043313980278, "grad_norm": 0.38420817255973816, "learning_rate": 1.8085395014497956e-05, "loss": 0.6033, "step": 13119 }, { "epoch": 0.403035050532977, "grad_norm": 0.7938624024391174, "learning_rate": 1.8085110624419934e-05, "loss": 0.5879, "step": 13120 }, { "epoch": 0.4030657696679262, "grad_norm": 0.3185769021511078, "learning_rate": 1.808482621545857e-05, "loss": 0.493, "step": 13121 }, { "epoch": 0.4030964888028753, "grad_norm": 0.3845197558403015, "learning_rate": 1.8084541787614533e-05, "loss": 0.5477, "step": 13122 }, { "epoch": 0.40312720793782447, "grad_norm": 0.3443424105644226, "learning_rate": 1.808425734088848e-05, "loss": 0.5691, "step": 13123 }, { "epoch": 0.40315792707277365, "grad_norm": 0.31963735818862915, "learning_rate": 1.808397287528108e-05, "loss": 0.5041, "step": 13124 }, { "epoch": 0.40318864620772277, "grad_norm": 0.3439933955669403, "learning_rate": 1.8083688390792997e-05, "loss": 0.5301, "step": 13125 }, { "epoch": 0.40321936534267194, "grad_norm": 0.3794308304786682, "learning_rate": 1.8083403887424894e-05, "loss": 0.5228, "step": 13126 }, { "epoch": 0.4032500844776211, "grad_norm": 0.3233562707901001, "learning_rate": 1.8083119365177437e-05, "loss": 0.5732, "step": 13127 }, { "epoch": 0.4032808036125703, "grad_norm": 0.40932291746139526, "learning_rate": 1.8082834824051292e-05, "loss": 0.5267, "step": 13128 }, { "epoch": 0.4033115227475194, "grad_norm": 0.3801185190677643, "learning_rate": 1.8082550264047124e-05, "loss": 0.5715, "step": 13129 }, { "epoch": 0.4033422418824686, "grad_norm": 0.30947035551071167, "learning_rate": 1.808226568516559e-05, "loss": 0.5258, "step": 13130 }, { "epoch": 0.40337296101741776, "grad_norm": 0.35159894824028015, "learning_rate": 1.8081981087407365e-05, "loss": 0.5422, "step": 13131 }, { "epoch": 0.4034036801523669, "grad_norm": 0.3544352352619171, "learning_rate": 1.8081696470773105e-05, "loss": 0.5649, "step": 13132 }, { "epoch": 0.40343439928731606, "grad_norm": 0.3628195524215698, "learning_rate": 1.808141183526348e-05, "loss": 0.5882, "step": 13133 }, { "epoch": 0.40346511842226523, "grad_norm": 0.3386306166648865, "learning_rate": 1.8081127180879152e-05, "loss": 0.5189, "step": 13134 }, { "epoch": 0.4034958375572144, "grad_norm": 0.31472495198249817, "learning_rate": 1.8080842507620787e-05, "loss": 0.5085, "step": 13135 }, { "epoch": 0.40352655669216353, "grad_norm": 0.3493591248989105, "learning_rate": 1.8080557815489054e-05, "loss": 0.5232, "step": 13136 }, { "epoch": 0.4035572758271127, "grad_norm": 0.29173317551612854, "learning_rate": 1.808027310448461e-05, "loss": 0.4888, "step": 13137 }, { "epoch": 0.4035879949620619, "grad_norm": 0.3375170826911926, "learning_rate": 1.8079988374608124e-05, "loss": 0.6249, "step": 13138 }, { "epoch": 0.40361871409701106, "grad_norm": 0.33546194434165955, "learning_rate": 1.807970362586026e-05, "loss": 0.531, "step": 13139 }, { "epoch": 0.4036494332319602, "grad_norm": 0.38059037923812866, "learning_rate": 1.8079418858241688e-05, "loss": 0.5872, "step": 13140 }, { "epoch": 0.40368015236690935, "grad_norm": 0.3443724811077118, "learning_rate": 1.8079134071753063e-05, "loss": 0.5038, "step": 13141 }, { "epoch": 0.4037108715018585, "grad_norm": 0.3287619650363922, "learning_rate": 1.807884926639506e-05, "loss": 0.5912, "step": 13142 }, { "epoch": 0.40374159063680765, "grad_norm": 0.33632469177246094, "learning_rate": 1.807856444216834e-05, "loss": 0.527, "step": 13143 }, { "epoch": 0.4037723097717568, "grad_norm": 0.3910737931728363, "learning_rate": 1.8078279599073568e-05, "loss": 0.6193, "step": 13144 }, { "epoch": 0.403803028906706, "grad_norm": 0.3915809690952301, "learning_rate": 1.8077994737111414e-05, "loss": 0.5735, "step": 13145 }, { "epoch": 0.40383374804165517, "grad_norm": 0.36712706089019775, "learning_rate": 1.8077709856282533e-05, "loss": 0.5165, "step": 13146 }, { "epoch": 0.4038644671766043, "grad_norm": 0.36657610535621643, "learning_rate": 1.8077424956587595e-05, "loss": 0.5609, "step": 13147 }, { "epoch": 0.40389518631155347, "grad_norm": 0.3505792021751404, "learning_rate": 1.8077140038027276e-05, "loss": 0.6915, "step": 13148 }, { "epoch": 0.40392590544650264, "grad_norm": 0.3357135057449341, "learning_rate": 1.8076855100602225e-05, "loss": 0.535, "step": 13149 }, { "epoch": 0.40395662458145176, "grad_norm": 0.3678404986858368, "learning_rate": 1.8076570144313117e-05, "loss": 0.5422, "step": 13150 }, { "epoch": 0.40398734371640094, "grad_norm": 0.3309127390384674, "learning_rate": 1.8076285169160615e-05, "loss": 0.5758, "step": 13151 }, { "epoch": 0.4040180628513501, "grad_norm": 0.6444262266159058, "learning_rate": 1.8076000175145388e-05, "loss": 0.6048, "step": 13152 }, { "epoch": 0.4040487819862993, "grad_norm": 0.34198930859565735, "learning_rate": 1.8075715162268092e-05, "loss": 0.5817, "step": 13153 }, { "epoch": 0.4040795011212484, "grad_norm": 0.41887524724006653, "learning_rate": 1.8075430130529405e-05, "loss": 0.5956, "step": 13154 }, { "epoch": 0.4041102202561976, "grad_norm": 0.34593990445137024, "learning_rate": 1.8075145079929982e-05, "loss": 0.6437, "step": 13155 }, { "epoch": 0.40414093939114676, "grad_norm": 0.3388037383556366, "learning_rate": 1.80748600104705e-05, "loss": 0.5456, "step": 13156 }, { "epoch": 0.4041716585260959, "grad_norm": 0.4230838119983673, "learning_rate": 1.8074574922151614e-05, "loss": 0.5796, "step": 13157 }, { "epoch": 0.40420237766104505, "grad_norm": 0.3585130274295807, "learning_rate": 1.8074289814973994e-05, "loss": 0.5915, "step": 13158 }, { "epoch": 0.40423309679599423, "grad_norm": 0.32813316583633423, "learning_rate": 1.8074004688938308e-05, "loss": 0.4987, "step": 13159 }, { "epoch": 0.4042638159309434, "grad_norm": 0.3809431195259094, "learning_rate": 1.807371954404522e-05, "loss": 0.6116, "step": 13160 }, { "epoch": 0.4042945350658925, "grad_norm": 0.38213789463043213, "learning_rate": 1.80734343802954e-05, "loss": 0.5793, "step": 13161 }, { "epoch": 0.4043252542008417, "grad_norm": 0.42334070801734924, "learning_rate": 1.8073149197689503e-05, "loss": 0.5628, "step": 13162 }, { "epoch": 0.4043559733357909, "grad_norm": 0.3533456027507782, "learning_rate": 1.8072863996228206e-05, "loss": 0.5361, "step": 13163 }, { "epoch": 0.40438669247074005, "grad_norm": 0.3332694470882416, "learning_rate": 1.807257877591217e-05, "loss": 0.5325, "step": 13164 }, { "epoch": 0.40441741160568917, "grad_norm": 0.35352230072021484, "learning_rate": 1.8072293536742065e-05, "loss": 0.5829, "step": 13165 }, { "epoch": 0.40444813074063835, "grad_norm": 0.35988345742225647, "learning_rate": 1.807200827871855e-05, "loss": 0.5713, "step": 13166 }, { "epoch": 0.4044788498755875, "grad_norm": 0.3468203842639923, "learning_rate": 1.80717230018423e-05, "loss": 0.5332, "step": 13167 }, { "epoch": 0.40450956901053664, "grad_norm": 0.3855845034122467, "learning_rate": 1.8071437706113976e-05, "loss": 0.5687, "step": 13168 }, { "epoch": 0.4045402881454858, "grad_norm": 0.36016398668289185, "learning_rate": 1.8071152391534247e-05, "loss": 0.5293, "step": 13169 }, { "epoch": 0.404571007280435, "grad_norm": 0.34679076075553894, "learning_rate": 1.8070867058103777e-05, "loss": 0.6441, "step": 13170 }, { "epoch": 0.40460172641538417, "grad_norm": 0.33859869837760925, "learning_rate": 1.807058170582323e-05, "loss": 0.6504, "step": 13171 }, { "epoch": 0.4046324455503333, "grad_norm": 0.33596837520599365, "learning_rate": 1.807029633469328e-05, "loss": 0.6154, "step": 13172 }, { "epoch": 0.40466316468528246, "grad_norm": 0.3812282085418701, "learning_rate": 1.8070010944714588e-05, "loss": 0.6115, "step": 13173 }, { "epoch": 0.40469388382023164, "grad_norm": 0.48419860005378723, "learning_rate": 1.806972553588782e-05, "loss": 0.603, "step": 13174 }, { "epoch": 0.40472460295518076, "grad_norm": 0.329866498708725, "learning_rate": 1.806944010821365e-05, "loss": 0.5379, "step": 13175 }, { "epoch": 0.40475532209012993, "grad_norm": 0.3842249810695648, "learning_rate": 1.8069154661692734e-05, "loss": 0.5509, "step": 13176 }, { "epoch": 0.4047860412250791, "grad_norm": 0.3235860764980316, "learning_rate": 1.8068869196325745e-05, "loss": 0.5332, "step": 13177 }, { "epoch": 0.4048167603600283, "grad_norm": 0.34518513083457947, "learning_rate": 1.8068583712113347e-05, "loss": 0.5758, "step": 13178 }, { "epoch": 0.4048474794949774, "grad_norm": 0.3528963625431061, "learning_rate": 1.806829820905621e-05, "loss": 0.6104, "step": 13179 }, { "epoch": 0.4048781986299266, "grad_norm": 0.40236401557922363, "learning_rate": 1.8068012687154997e-05, "loss": 0.5414, "step": 13180 }, { "epoch": 0.40490891776487575, "grad_norm": 0.33451542258262634, "learning_rate": 1.806772714641038e-05, "loss": 0.5945, "step": 13181 }, { "epoch": 0.40493963689982493, "grad_norm": 0.3600684106349945, "learning_rate": 1.806744158682302e-05, "loss": 0.5468, "step": 13182 }, { "epoch": 0.40497035603477405, "grad_norm": 0.6209030151367188, "learning_rate": 1.806715600839359e-05, "loss": 0.6892, "step": 13183 }, { "epoch": 0.4050010751697232, "grad_norm": 0.3815654516220093, "learning_rate": 1.806687041112275e-05, "loss": 0.5629, "step": 13184 }, { "epoch": 0.4050317943046724, "grad_norm": 0.3354444205760956, "learning_rate": 1.8066584795011176e-05, "loss": 0.5661, "step": 13185 }, { "epoch": 0.4050625134396215, "grad_norm": 0.33996134996414185, "learning_rate": 1.8066299160059527e-05, "loss": 0.5429, "step": 13186 }, { "epoch": 0.4050932325745707, "grad_norm": 0.3763646185398102, "learning_rate": 1.806601350626847e-05, "loss": 0.5809, "step": 13187 }, { "epoch": 0.40512395170951987, "grad_norm": 0.3652627170085907, "learning_rate": 1.806572783363868e-05, "loss": 0.5475, "step": 13188 }, { "epoch": 0.40515467084446904, "grad_norm": 0.3331427276134491, "learning_rate": 1.8065442142170817e-05, "loss": 0.5889, "step": 13189 }, { "epoch": 0.40518538997941816, "grad_norm": 0.38037094473838806, "learning_rate": 1.806515643186555e-05, "loss": 0.5615, "step": 13190 }, { "epoch": 0.40521610911436734, "grad_norm": 0.33297911286354065, "learning_rate": 1.8064870702723546e-05, "loss": 0.6163, "step": 13191 }, { "epoch": 0.4052468282493165, "grad_norm": 0.3599946200847626, "learning_rate": 1.8064584954745478e-05, "loss": 0.6525, "step": 13192 }, { "epoch": 0.40527754738426564, "grad_norm": 0.3462601602077484, "learning_rate": 1.8064299187932003e-05, "loss": 0.5379, "step": 13193 }, { "epoch": 0.4053082665192148, "grad_norm": 0.3725699186325073, "learning_rate": 1.8064013402283794e-05, "loss": 0.5853, "step": 13194 }, { "epoch": 0.405338985654164, "grad_norm": 0.42260104417800903, "learning_rate": 1.8063727597801524e-05, "loss": 0.5819, "step": 13195 }, { "epoch": 0.40536970478911316, "grad_norm": 0.33267802000045776, "learning_rate": 1.8063441774485852e-05, "loss": 0.5583, "step": 13196 }, { "epoch": 0.4054004239240623, "grad_norm": 0.3613864481449127, "learning_rate": 1.806315593233745e-05, "loss": 0.6477, "step": 13197 }, { "epoch": 0.40543114305901146, "grad_norm": 0.32758575677871704, "learning_rate": 1.806287007135698e-05, "loss": 0.5787, "step": 13198 }, { "epoch": 0.40546186219396063, "grad_norm": 0.3378181457519531, "learning_rate": 1.806258419154512e-05, "loss": 0.533, "step": 13199 }, { "epoch": 0.40549258132890975, "grad_norm": 0.38123273849487305, "learning_rate": 1.8062298292902525e-05, "loss": 0.5713, "step": 13200 }, { "epoch": 0.4055233004638589, "grad_norm": 0.35432228446006775, "learning_rate": 1.8062012375429875e-05, "loss": 0.5416, "step": 13201 }, { "epoch": 0.4055540195988081, "grad_norm": 0.40621235966682434, "learning_rate": 1.8061726439127828e-05, "loss": 0.6668, "step": 13202 }, { "epoch": 0.4055847387337573, "grad_norm": 0.39514318108558655, "learning_rate": 1.8061440483997058e-05, "loss": 0.6575, "step": 13203 }, { "epoch": 0.4056154578687064, "grad_norm": 0.3433992564678192, "learning_rate": 1.806115451003823e-05, "loss": 0.5665, "step": 13204 }, { "epoch": 0.4056461770036556, "grad_norm": 0.40002697706222534, "learning_rate": 1.8060868517252014e-05, "loss": 0.5396, "step": 13205 }, { "epoch": 0.40567689613860475, "grad_norm": 0.37661364674568176, "learning_rate": 1.806058250563907e-05, "loss": 0.5882, "step": 13206 }, { "epoch": 0.4057076152735539, "grad_norm": 0.35503414273262024, "learning_rate": 1.8060296475200082e-05, "loss": 0.5415, "step": 13207 }, { "epoch": 0.40573833440850304, "grad_norm": 0.3804701268672943, "learning_rate": 1.8060010425935702e-05, "loss": 0.6116, "step": 13208 }, { "epoch": 0.4057690535434522, "grad_norm": 0.3633003234863281, "learning_rate": 1.8059724357846607e-05, "loss": 0.6884, "step": 13209 }, { "epoch": 0.4057997726784014, "grad_norm": 0.3466537892818451, "learning_rate": 1.8059438270933465e-05, "loss": 0.537, "step": 13210 }, { "epoch": 0.4058304918133505, "grad_norm": 0.3750211000442505, "learning_rate": 1.805915216519694e-05, "loss": 0.5972, "step": 13211 }, { "epoch": 0.4058612109482997, "grad_norm": 0.3553035259246826, "learning_rate": 1.8058866040637704e-05, "loss": 0.5616, "step": 13212 }, { "epoch": 0.40589193008324886, "grad_norm": 0.33633631467819214, "learning_rate": 1.805857989725642e-05, "loss": 0.5278, "step": 13213 }, { "epoch": 0.40592264921819804, "grad_norm": 0.34713441133499146, "learning_rate": 1.8058293735053762e-05, "loss": 0.5607, "step": 13214 }, { "epoch": 0.40595336835314716, "grad_norm": 0.3524875342845917, "learning_rate": 1.8058007554030398e-05, "loss": 0.5122, "step": 13215 }, { "epoch": 0.40598408748809633, "grad_norm": 0.3367280662059784, "learning_rate": 1.8057721354186992e-05, "loss": 0.6163, "step": 13216 }, { "epoch": 0.4060148066230455, "grad_norm": 0.4530256688594818, "learning_rate": 1.8057435135524214e-05, "loss": 0.5602, "step": 13217 }, { "epoch": 0.40604552575799463, "grad_norm": 0.42189085483551025, "learning_rate": 1.8057148898042737e-05, "loss": 0.5712, "step": 13218 }, { "epoch": 0.4060762448929438, "grad_norm": 0.3343675136566162, "learning_rate": 1.8056862641743222e-05, "loss": 0.6297, "step": 13219 }, { "epoch": 0.406106964027893, "grad_norm": 0.3363408148288727, "learning_rate": 1.8056576366626343e-05, "loss": 0.5987, "step": 13220 }, { "epoch": 0.40613768316284216, "grad_norm": 0.3495791256427765, "learning_rate": 1.8056290072692772e-05, "loss": 0.6063, "step": 13221 }, { "epoch": 0.4061684022977913, "grad_norm": 0.33670029044151306, "learning_rate": 1.8056003759943167e-05, "loss": 0.5258, "step": 13222 }, { "epoch": 0.40619912143274045, "grad_norm": 0.3376031517982483, "learning_rate": 1.8055717428378203e-05, "loss": 0.6161, "step": 13223 }, { "epoch": 0.4062298405676896, "grad_norm": 0.3932039737701416, "learning_rate": 1.805543107799855e-05, "loss": 0.5609, "step": 13224 }, { "epoch": 0.40626055970263875, "grad_norm": 0.37044891715049744, "learning_rate": 1.8055144708804878e-05, "loss": 0.5876, "step": 13225 }, { "epoch": 0.4062912788375879, "grad_norm": 0.3538510501384735, "learning_rate": 1.805485832079785e-05, "loss": 0.6034, "step": 13226 }, { "epoch": 0.4063219979725371, "grad_norm": 0.32308897376060486, "learning_rate": 1.8054571913978138e-05, "loss": 0.5295, "step": 13227 }, { "epoch": 0.40635271710748627, "grad_norm": 0.34119364619255066, "learning_rate": 1.805428548834641e-05, "loss": 0.6046, "step": 13228 }, { "epoch": 0.4063834362424354, "grad_norm": 0.44343268871307373, "learning_rate": 1.8053999043903335e-05, "loss": 0.6128, "step": 13229 }, { "epoch": 0.40641415537738457, "grad_norm": 0.37357407808303833, "learning_rate": 1.8053712580649585e-05, "loss": 0.6185, "step": 13230 }, { "epoch": 0.40644487451233374, "grad_norm": 0.33704420924186707, "learning_rate": 1.8053426098585826e-05, "loss": 0.6191, "step": 13231 }, { "epoch": 0.4064755936472829, "grad_norm": 0.41252601146698, "learning_rate": 1.8053139597712728e-05, "loss": 0.635, "step": 13232 }, { "epoch": 0.40650631278223204, "grad_norm": 0.36471667885780334, "learning_rate": 1.805285307803096e-05, "loss": 0.5965, "step": 13233 }, { "epoch": 0.4065370319171812, "grad_norm": 0.35035908222198486, "learning_rate": 1.805256653954119e-05, "loss": 0.5594, "step": 13234 }, { "epoch": 0.4065677510521304, "grad_norm": 0.3217068016529083, "learning_rate": 1.805227998224409e-05, "loss": 0.5772, "step": 13235 }, { "epoch": 0.4065984701870795, "grad_norm": 0.6090555787086487, "learning_rate": 1.8051993406140328e-05, "loss": 0.6203, "step": 13236 }, { "epoch": 0.4066291893220287, "grad_norm": 0.3153019845485687, "learning_rate": 1.805170681123057e-05, "loss": 0.571, "step": 13237 }, { "epoch": 0.40665990845697786, "grad_norm": 0.3324664235115051, "learning_rate": 1.8051420197515496e-05, "loss": 0.5393, "step": 13238 }, { "epoch": 0.40669062759192703, "grad_norm": 0.3292078673839569, "learning_rate": 1.805113356499576e-05, "loss": 0.6137, "step": 13239 }, { "epoch": 0.40672134672687615, "grad_norm": 0.3431033790111542, "learning_rate": 1.805084691367204e-05, "loss": 0.5802, "step": 13240 }, { "epoch": 0.40675206586182533, "grad_norm": 0.35035669803619385, "learning_rate": 1.805056024354501e-05, "loss": 0.4901, "step": 13241 }, { "epoch": 0.4067827849967745, "grad_norm": 0.3236664831638336, "learning_rate": 1.805027355461533e-05, "loss": 0.4737, "step": 13242 }, { "epoch": 0.4068135041317236, "grad_norm": 0.39731061458587646, "learning_rate": 1.8049986846883673e-05, "loss": 0.5591, "step": 13243 }, { "epoch": 0.4068442232666728, "grad_norm": 0.3950156569480896, "learning_rate": 1.804970012035071e-05, "loss": 0.5975, "step": 13244 }, { "epoch": 0.406874942401622, "grad_norm": 0.3723045885562897, "learning_rate": 1.8049413375017115e-05, "loss": 0.626, "step": 13245 }, { "epoch": 0.40690566153657115, "grad_norm": 0.3053271770477295, "learning_rate": 1.8049126610883547e-05, "loss": 0.558, "step": 13246 }, { "epoch": 0.40693638067152027, "grad_norm": 0.34489768743515015, "learning_rate": 1.8048839827950687e-05, "loss": 0.6384, "step": 13247 }, { "epoch": 0.40696709980646945, "grad_norm": 0.3326824903488159, "learning_rate": 1.8048553026219197e-05, "loss": 0.6656, "step": 13248 }, { "epoch": 0.4069978189414186, "grad_norm": 0.3346917927265167, "learning_rate": 1.804826620568975e-05, "loss": 0.5154, "step": 13249 }, { "epoch": 0.4070285380763678, "grad_norm": 0.3566778898239136, "learning_rate": 1.804797936636301e-05, "loss": 0.4848, "step": 13250 }, { "epoch": 0.4070592572113169, "grad_norm": 0.37307843565940857, "learning_rate": 1.8047692508239658e-05, "loss": 0.6664, "step": 13251 }, { "epoch": 0.4070899763462661, "grad_norm": 0.3397893011569977, "learning_rate": 1.8047405631320357e-05, "loss": 0.656, "step": 13252 }, { "epoch": 0.40712069548121527, "grad_norm": 0.34122607111930847, "learning_rate": 1.8047118735605775e-05, "loss": 0.5509, "step": 13253 }, { "epoch": 0.4071514146161644, "grad_norm": 0.3375268876552582, "learning_rate": 1.8046831821096588e-05, "loss": 0.5689, "step": 13254 }, { "epoch": 0.40718213375111356, "grad_norm": 0.335317999124527, "learning_rate": 1.804654488779346e-05, "loss": 0.5572, "step": 13255 }, { "epoch": 0.40721285288606274, "grad_norm": 0.31614312529563904, "learning_rate": 1.8046257935697072e-05, "loss": 0.5791, "step": 13256 }, { "epoch": 0.4072435720210119, "grad_norm": 0.37284180521965027, "learning_rate": 1.8045970964808082e-05, "loss": 0.5955, "step": 13257 }, { "epoch": 0.40727429115596103, "grad_norm": 0.3438921272754669, "learning_rate": 1.8045683975127164e-05, "loss": 0.5316, "step": 13258 }, { "epoch": 0.4073050102909102, "grad_norm": 0.30290886759757996, "learning_rate": 1.804539696665499e-05, "loss": 0.4196, "step": 13259 }, { "epoch": 0.4073357294258594, "grad_norm": 0.33215951919555664, "learning_rate": 1.8045109939392233e-05, "loss": 0.5516, "step": 13260 }, { "epoch": 0.4073664485608085, "grad_norm": 0.32777801156044006, "learning_rate": 1.8044822893339555e-05, "loss": 0.5778, "step": 13261 }, { "epoch": 0.4073971676957577, "grad_norm": 0.41430842876434326, "learning_rate": 1.8044535828497632e-05, "loss": 0.5779, "step": 13262 }, { "epoch": 0.40742788683070685, "grad_norm": 0.34821897745132446, "learning_rate": 1.8044248744867134e-05, "loss": 0.5796, "step": 13263 }, { "epoch": 0.40745860596565603, "grad_norm": 0.3759816288948059, "learning_rate": 1.804396164244873e-05, "loss": 0.6099, "step": 13264 }, { "epoch": 0.40748932510060515, "grad_norm": 0.33857786655426025, "learning_rate": 1.8043674521243097e-05, "loss": 0.532, "step": 13265 }, { "epoch": 0.4075200442355543, "grad_norm": 0.37014079093933105, "learning_rate": 1.8043387381250893e-05, "loss": 0.6462, "step": 13266 }, { "epoch": 0.4075507633705035, "grad_norm": 0.40906351804733276, "learning_rate": 1.8043100222472804e-05, "loss": 0.5497, "step": 13267 }, { "epoch": 0.4075814825054526, "grad_norm": 0.3239123523235321, "learning_rate": 1.8042813044909485e-05, "loss": 0.5066, "step": 13268 }, { "epoch": 0.4076122016404018, "grad_norm": 0.38072434067726135, "learning_rate": 1.804252584856162e-05, "loss": 0.5961, "step": 13269 }, { "epoch": 0.40764292077535097, "grad_norm": 0.3282376229763031, "learning_rate": 1.804223863342987e-05, "loss": 0.5276, "step": 13270 }, { "epoch": 0.40767363991030015, "grad_norm": 0.31671142578125, "learning_rate": 1.8041951399514915e-05, "loss": 0.6033, "step": 13271 }, { "epoch": 0.40770435904524926, "grad_norm": 0.356025755405426, "learning_rate": 1.8041664146817415e-05, "loss": 0.5688, "step": 13272 }, { "epoch": 0.40773507818019844, "grad_norm": 0.3766225576400757, "learning_rate": 1.804137687533805e-05, "loss": 0.5852, "step": 13273 }, { "epoch": 0.4077657973151476, "grad_norm": 0.33130133152008057, "learning_rate": 1.804108958507749e-05, "loss": 0.5311, "step": 13274 }, { "epoch": 0.4077965164500968, "grad_norm": 0.34380489587783813, "learning_rate": 1.80408022760364e-05, "loss": 0.5969, "step": 13275 }, { "epoch": 0.4078272355850459, "grad_norm": 0.3910300135612488, "learning_rate": 1.8040514948215457e-05, "loss": 0.5807, "step": 13276 }, { "epoch": 0.4078579547199951, "grad_norm": 0.34354084730148315, "learning_rate": 1.8040227601615326e-05, "loss": 0.5621, "step": 13277 }, { "epoch": 0.40788867385494426, "grad_norm": 0.36970123648643494, "learning_rate": 1.803994023623669e-05, "loss": 0.7063, "step": 13278 }, { "epoch": 0.4079193929898934, "grad_norm": 0.36374717950820923, "learning_rate": 1.80396528520802e-05, "loss": 0.584, "step": 13279 }, { "epoch": 0.40795011212484256, "grad_norm": 0.35005778074264526, "learning_rate": 1.8039365449146552e-05, "loss": 0.5467, "step": 13280 }, { "epoch": 0.40798083125979173, "grad_norm": 0.41552814841270447, "learning_rate": 1.8039078027436398e-05, "loss": 0.5856, "step": 13281 }, { "epoch": 0.4080115503947409, "grad_norm": 0.3755143880844116, "learning_rate": 1.8038790586950417e-05, "loss": 0.4838, "step": 13282 }, { "epoch": 0.40804226952969, "grad_norm": 0.3313836455345154, "learning_rate": 1.8038503127689277e-05, "loss": 0.5163, "step": 13283 }, { "epoch": 0.4080729886646392, "grad_norm": 0.4111133813858032, "learning_rate": 1.8038215649653654e-05, "loss": 0.5678, "step": 13284 }, { "epoch": 0.4081037077995884, "grad_norm": 0.35680222511291504, "learning_rate": 1.8037928152844214e-05, "loss": 0.5383, "step": 13285 }, { "epoch": 0.4081344269345375, "grad_norm": 0.3705424666404724, "learning_rate": 1.8037640637261634e-05, "loss": 0.6008, "step": 13286 }, { "epoch": 0.4081651460694867, "grad_norm": 0.3411761224269867, "learning_rate": 1.8037353102906584e-05, "loss": 0.5767, "step": 13287 }, { "epoch": 0.40819586520443585, "grad_norm": 0.3526240587234497, "learning_rate": 1.8037065549779733e-05, "loss": 0.5595, "step": 13288 }, { "epoch": 0.408226584339385, "grad_norm": 0.3399559557437897, "learning_rate": 1.8036777977881754e-05, "loss": 0.5718, "step": 13289 }, { "epoch": 0.40825730347433414, "grad_norm": 0.32839441299438477, "learning_rate": 1.8036490387213318e-05, "loss": 0.5579, "step": 13290 }, { "epoch": 0.4082880226092833, "grad_norm": 0.3611079156398773, "learning_rate": 1.80362027777751e-05, "loss": 0.5418, "step": 13291 }, { "epoch": 0.4083187417442325, "grad_norm": 0.34611862897872925, "learning_rate": 1.8035915149567766e-05, "loss": 0.5117, "step": 13292 }, { "epoch": 0.40834946087918167, "grad_norm": 0.3515903651714325, "learning_rate": 1.803562750259199e-05, "loss": 0.5453, "step": 13293 }, { "epoch": 0.4083801800141308, "grad_norm": 0.3842042088508606, "learning_rate": 1.803533983684845e-05, "loss": 0.6688, "step": 13294 }, { "epoch": 0.40841089914907996, "grad_norm": 0.36423954367637634, "learning_rate": 1.803505215233781e-05, "loss": 0.5455, "step": 13295 }, { "epoch": 0.40844161828402914, "grad_norm": 0.3952051103115082, "learning_rate": 1.8034764449060745e-05, "loss": 0.5482, "step": 13296 }, { "epoch": 0.40847233741897826, "grad_norm": 0.5053848624229431, "learning_rate": 1.8034476727017926e-05, "loss": 0.5751, "step": 13297 }, { "epoch": 0.40850305655392743, "grad_norm": 0.33920490741729736, "learning_rate": 1.8034188986210023e-05, "loss": 0.5279, "step": 13298 }, { "epoch": 0.4085337756888766, "grad_norm": 0.38294529914855957, "learning_rate": 1.8033901226637715e-05, "loss": 0.4926, "step": 13299 }, { "epoch": 0.4085644948238258, "grad_norm": 0.34102192521095276, "learning_rate": 1.803361344830167e-05, "loss": 0.6414, "step": 13300 }, { "epoch": 0.4085952139587749, "grad_norm": 0.3494352102279663, "learning_rate": 1.8033325651202552e-05, "loss": 0.6115, "step": 13301 }, { "epoch": 0.4086259330937241, "grad_norm": 1.5121268033981323, "learning_rate": 1.803303783534105e-05, "loss": 0.5572, "step": 13302 }, { "epoch": 0.40865665222867326, "grad_norm": 0.42827755212783813, "learning_rate": 1.803275000071782e-05, "loss": 0.5825, "step": 13303 }, { "epoch": 0.4086873713636224, "grad_norm": 0.3109184205532074, "learning_rate": 1.8032462147333544e-05, "loss": 0.5441, "step": 13304 }, { "epoch": 0.40871809049857155, "grad_norm": 0.34212058782577515, "learning_rate": 1.8032174275188894e-05, "loss": 0.5435, "step": 13305 }, { "epoch": 0.4087488096335207, "grad_norm": 0.3654639720916748, "learning_rate": 1.8031886384284538e-05, "loss": 0.6191, "step": 13306 }, { "epoch": 0.4087795287684699, "grad_norm": 0.3273680508136749, "learning_rate": 1.8031598474621153e-05, "loss": 0.575, "step": 13307 }, { "epoch": 0.408810247903419, "grad_norm": 0.3898259699344635, "learning_rate": 1.8031310546199406e-05, "loss": 0.8062, "step": 13308 }, { "epoch": 0.4088409670383682, "grad_norm": 0.6514219641685486, "learning_rate": 1.8031022599019973e-05, "loss": 0.6056, "step": 13309 }, { "epoch": 0.4088716861733174, "grad_norm": 0.35000869631767273, "learning_rate": 1.8030734633083524e-05, "loss": 0.5175, "step": 13310 }, { "epoch": 0.4089024053082665, "grad_norm": 0.3438626229763031, "learning_rate": 1.8030446648390736e-05, "loss": 0.5504, "step": 13311 }, { "epoch": 0.40893312444321567, "grad_norm": 0.35130393505096436, "learning_rate": 1.8030158644942278e-05, "loss": 0.6448, "step": 13312 }, { "epoch": 0.40896384357816484, "grad_norm": 0.6816607713699341, "learning_rate": 1.8029870622738822e-05, "loss": 0.5428, "step": 13313 }, { "epoch": 0.408994562713114, "grad_norm": 0.7303506135940552, "learning_rate": 1.8029582581781048e-05, "loss": 0.5898, "step": 13314 }, { "epoch": 0.40902528184806314, "grad_norm": 0.3346390128135681, "learning_rate": 1.8029294522069615e-05, "loss": 0.5541, "step": 13315 }, { "epoch": 0.4090560009830123, "grad_norm": 0.33808669447898865, "learning_rate": 1.802900644360521e-05, "loss": 0.6048, "step": 13316 }, { "epoch": 0.4090867201179615, "grad_norm": 0.4064679741859436, "learning_rate": 1.8028718346388498e-05, "loss": 0.6017, "step": 13317 }, { "epoch": 0.40911743925291066, "grad_norm": 0.863000214099884, "learning_rate": 1.8028430230420152e-05, "loss": 0.6029, "step": 13318 }, { "epoch": 0.4091481583878598, "grad_norm": 0.42155924439430237, "learning_rate": 1.8028142095700848e-05, "loss": 0.6161, "step": 13319 }, { "epoch": 0.40917887752280896, "grad_norm": 0.3434848487377167, "learning_rate": 1.802785394223126e-05, "loss": 0.5664, "step": 13320 }, { "epoch": 0.40920959665775813, "grad_norm": 0.3420056700706482, "learning_rate": 1.8027565770012056e-05, "loss": 0.5686, "step": 13321 }, { "epoch": 0.40924031579270725, "grad_norm": 0.3669591546058655, "learning_rate": 1.802727757904391e-05, "loss": 0.5722, "step": 13322 }, { "epoch": 0.40927103492765643, "grad_norm": 0.3542930781841278, "learning_rate": 1.8026989369327495e-05, "loss": 0.6528, "step": 13323 }, { "epoch": 0.4093017540626056, "grad_norm": 0.3773088753223419, "learning_rate": 1.802670114086349e-05, "loss": 0.5931, "step": 13324 }, { "epoch": 0.4093324731975548, "grad_norm": 0.34135720133781433, "learning_rate": 1.8026412893652563e-05, "loss": 0.5468, "step": 13325 }, { "epoch": 0.4093631923325039, "grad_norm": 0.4171135425567627, "learning_rate": 1.8026124627695388e-05, "loss": 0.6527, "step": 13326 }, { "epoch": 0.4093939114674531, "grad_norm": 0.31659573316574097, "learning_rate": 1.8025836342992637e-05, "loss": 0.5751, "step": 13327 }, { "epoch": 0.40942463060240225, "grad_norm": 0.3256969153881073, "learning_rate": 1.8025548039544987e-05, "loss": 0.5755, "step": 13328 }, { "epoch": 0.40945534973735137, "grad_norm": 0.35562482476234436, "learning_rate": 1.8025259717353107e-05, "loss": 0.5561, "step": 13329 }, { "epoch": 0.40948606887230055, "grad_norm": 0.34955406188964844, "learning_rate": 1.8024971376417673e-05, "loss": 0.6456, "step": 13330 }, { "epoch": 0.4095167880072497, "grad_norm": 0.34488391876220703, "learning_rate": 1.802468301673936e-05, "loss": 0.5838, "step": 13331 }, { "epoch": 0.4095475071421989, "grad_norm": 0.3290219306945801, "learning_rate": 1.8024394638318835e-05, "loss": 0.5632, "step": 13332 }, { "epoch": 0.409578226277148, "grad_norm": 0.32421875, "learning_rate": 1.802410624115678e-05, "loss": 0.5589, "step": 13333 }, { "epoch": 0.4096089454120972, "grad_norm": 0.34710243344306946, "learning_rate": 1.8023817825253865e-05, "loss": 0.6044, "step": 13334 }, { "epoch": 0.40963966454704637, "grad_norm": 0.3590364456176758, "learning_rate": 1.8023529390610762e-05, "loss": 0.6611, "step": 13335 }, { "epoch": 0.40967038368199554, "grad_norm": 0.36385223269462585, "learning_rate": 1.8023240937228143e-05, "loss": 0.5473, "step": 13336 }, { "epoch": 0.40970110281694466, "grad_norm": 0.3431869149208069, "learning_rate": 1.8022952465106685e-05, "loss": 0.5697, "step": 13337 }, { "epoch": 0.40973182195189384, "grad_norm": 0.3227713406085968, "learning_rate": 1.8022663974247062e-05, "loss": 0.5742, "step": 13338 }, { "epoch": 0.409762541086843, "grad_norm": 0.3400222957134247, "learning_rate": 1.802237546464995e-05, "loss": 0.5843, "step": 13339 }, { "epoch": 0.40979326022179213, "grad_norm": 0.3274899423122406, "learning_rate": 1.8022086936316013e-05, "loss": 0.6178, "step": 13340 }, { "epoch": 0.4098239793567413, "grad_norm": 0.3146752119064331, "learning_rate": 1.8021798389245936e-05, "loss": 0.5623, "step": 13341 }, { "epoch": 0.4098546984916905, "grad_norm": 0.35686931014060974, "learning_rate": 1.8021509823440394e-05, "loss": 0.5595, "step": 13342 }, { "epoch": 0.40988541762663966, "grad_norm": 0.406803697347641, "learning_rate": 1.802122123890005e-05, "loss": 0.6071, "step": 13343 }, { "epoch": 0.4099161367615888, "grad_norm": 0.44267651438713074, "learning_rate": 1.8020932635625582e-05, "loss": 0.6204, "step": 13344 }, { "epoch": 0.40994685589653795, "grad_norm": 0.35030630230903625, "learning_rate": 1.8020644013617666e-05, "loss": 0.5992, "step": 13345 }, { "epoch": 0.40997757503148713, "grad_norm": 0.5320367217063904, "learning_rate": 1.8020355372876976e-05, "loss": 0.5301, "step": 13346 }, { "epoch": 0.41000829416643625, "grad_norm": 0.38668292760849, "learning_rate": 1.802006671340419e-05, "loss": 0.5461, "step": 13347 }, { "epoch": 0.4100390133013854, "grad_norm": 0.35920238494873047, "learning_rate": 1.801977803519997e-05, "loss": 0.552, "step": 13348 }, { "epoch": 0.4100697324363346, "grad_norm": 0.33744069933891296, "learning_rate": 1.8019489338265005e-05, "loss": 0.5512, "step": 13349 }, { "epoch": 0.4101004515712838, "grad_norm": 0.4004199504852295, "learning_rate": 1.8019200622599965e-05, "loss": 0.6286, "step": 13350 }, { "epoch": 0.4101311707062329, "grad_norm": 0.4518113136291504, "learning_rate": 1.8018911888205516e-05, "loss": 0.4756, "step": 13351 }, { "epoch": 0.41016188984118207, "grad_norm": 0.36341455578804016, "learning_rate": 1.801862313508234e-05, "loss": 0.6375, "step": 13352 }, { "epoch": 0.41019260897613125, "grad_norm": 0.3458767235279083, "learning_rate": 1.801833436323111e-05, "loss": 0.5817, "step": 13353 }, { "epoch": 0.41022332811108037, "grad_norm": 0.3540935218334198, "learning_rate": 1.80180455726525e-05, "loss": 0.6826, "step": 13354 }, { "epoch": 0.41025404724602954, "grad_norm": 0.36973273754119873, "learning_rate": 1.801775676334718e-05, "loss": 0.5811, "step": 13355 }, { "epoch": 0.4102847663809787, "grad_norm": 0.34720316529273987, "learning_rate": 1.801746793531584e-05, "loss": 0.5031, "step": 13356 }, { "epoch": 0.4103154855159279, "grad_norm": 0.35198694467544556, "learning_rate": 1.8017179088559135e-05, "loss": 0.6159, "step": 13357 }, { "epoch": 0.410346204650877, "grad_norm": 0.35134315490722656, "learning_rate": 1.801689022307775e-05, "loss": 0.6007, "step": 13358 }, { "epoch": 0.4103769237858262, "grad_norm": 0.3622816205024719, "learning_rate": 1.801660133887236e-05, "loss": 0.5915, "step": 13359 }, { "epoch": 0.41040764292077536, "grad_norm": 0.3385726809501648, "learning_rate": 1.8016312435943638e-05, "loss": 0.633, "step": 13360 }, { "epoch": 0.41043836205572454, "grad_norm": 0.3584575653076172, "learning_rate": 1.8016023514292257e-05, "loss": 0.5789, "step": 13361 }, { "epoch": 0.41046908119067366, "grad_norm": 0.3935995101928711, "learning_rate": 1.8015734573918894e-05, "loss": 0.6129, "step": 13362 }, { "epoch": 0.41049980032562283, "grad_norm": 0.3636106550693512, "learning_rate": 1.8015445614824218e-05, "loss": 0.6219, "step": 13363 }, { "epoch": 0.410530519460572, "grad_norm": 0.3081815540790558, "learning_rate": 1.8015156637008915e-05, "loss": 0.5358, "step": 13364 }, { "epoch": 0.4105612385955211, "grad_norm": 0.3335350453853607, "learning_rate": 1.8014867640473652e-05, "loss": 0.4897, "step": 13365 }, { "epoch": 0.4105919577304703, "grad_norm": 0.378714382648468, "learning_rate": 1.8014578625219106e-05, "loss": 0.5607, "step": 13366 }, { "epoch": 0.4106226768654195, "grad_norm": 0.37269750237464905, "learning_rate": 1.801428959124595e-05, "loss": 0.5511, "step": 13367 }, { "epoch": 0.41065339600036865, "grad_norm": 0.3774247169494629, "learning_rate": 1.8014000538554865e-05, "loss": 0.5782, "step": 13368 }, { "epoch": 0.4106841151353178, "grad_norm": 0.33441925048828125, "learning_rate": 1.801371146714652e-05, "loss": 0.5417, "step": 13369 }, { "epoch": 0.41071483427026695, "grad_norm": 0.3476141393184662, "learning_rate": 1.801342237702159e-05, "loss": 0.5688, "step": 13370 }, { "epoch": 0.4107455534052161, "grad_norm": 0.41763246059417725, "learning_rate": 1.8013133268180757e-05, "loss": 0.6408, "step": 13371 }, { "epoch": 0.41077627254016524, "grad_norm": 0.33434662222862244, "learning_rate": 1.801284414062469e-05, "loss": 0.555, "step": 13372 }, { "epoch": 0.4108069916751144, "grad_norm": 0.9210301041603088, "learning_rate": 1.8012554994354062e-05, "loss": 0.7642, "step": 13373 }, { "epoch": 0.4108377108100636, "grad_norm": 0.34701699018478394, "learning_rate": 1.8012265829369554e-05, "loss": 0.6015, "step": 13374 }, { "epoch": 0.41086842994501277, "grad_norm": 0.3850666880607605, "learning_rate": 1.8011976645671844e-05, "loss": 0.5254, "step": 13375 }, { "epoch": 0.4108991490799619, "grad_norm": 0.35702842473983765, "learning_rate": 1.8011687443261598e-05, "loss": 0.5641, "step": 13376 }, { "epoch": 0.41092986821491106, "grad_norm": 0.35126301646232605, "learning_rate": 1.80113982221395e-05, "loss": 0.5878, "step": 13377 }, { "epoch": 0.41096058734986024, "grad_norm": 0.36517664790153503, "learning_rate": 1.801110898230622e-05, "loss": 0.5665, "step": 13378 }, { "epoch": 0.4109913064848094, "grad_norm": 0.5095042586326599, "learning_rate": 1.8010819723762432e-05, "loss": 0.6057, "step": 13379 }, { "epoch": 0.41102202561975854, "grad_norm": 0.3582266867160797, "learning_rate": 1.801053044650882e-05, "loss": 0.5922, "step": 13380 }, { "epoch": 0.4110527447547077, "grad_norm": 0.3832608163356781, "learning_rate": 1.8010241150546054e-05, "loss": 0.5406, "step": 13381 }, { "epoch": 0.4110834638896569, "grad_norm": 0.3821364939212799, "learning_rate": 1.8009951835874807e-05, "loss": 0.5684, "step": 13382 }, { "epoch": 0.411114183024606, "grad_norm": 0.5626112222671509, "learning_rate": 1.800966250249576e-05, "loss": 0.6063, "step": 13383 }, { "epoch": 0.4111449021595552, "grad_norm": 0.354559063911438, "learning_rate": 1.800937315040959e-05, "loss": 0.5979, "step": 13384 }, { "epoch": 0.41117562129450436, "grad_norm": 0.3662196695804596, "learning_rate": 1.800908377961697e-05, "loss": 0.5996, "step": 13385 }, { "epoch": 0.41120634042945353, "grad_norm": 0.4314398169517517, "learning_rate": 1.800879439011857e-05, "loss": 0.6215, "step": 13386 }, { "epoch": 0.41123705956440265, "grad_norm": 0.3626413941383362, "learning_rate": 1.8008504981915073e-05, "loss": 0.5881, "step": 13387 }, { "epoch": 0.4112677786993518, "grad_norm": 0.3779001235961914, "learning_rate": 1.8008215555007157e-05, "loss": 0.5951, "step": 13388 }, { "epoch": 0.411298497834301, "grad_norm": 0.3261342942714691, "learning_rate": 1.8007926109395493e-05, "loss": 0.5862, "step": 13389 }, { "epoch": 0.4113292169692501, "grad_norm": 0.4207363724708557, "learning_rate": 1.8007636645080755e-05, "loss": 0.594, "step": 13390 }, { "epoch": 0.4113599361041993, "grad_norm": 0.3282261788845062, "learning_rate": 1.8007347162063625e-05, "loss": 0.5531, "step": 13391 }, { "epoch": 0.4113906552391485, "grad_norm": 0.31070226430892944, "learning_rate": 1.8007057660344776e-05, "loss": 0.5048, "step": 13392 }, { "epoch": 0.41142137437409765, "grad_norm": 0.3748149275779724, "learning_rate": 1.8006768139924884e-05, "loss": 0.587, "step": 13393 }, { "epoch": 0.41145209350904677, "grad_norm": 0.3548424541950226, "learning_rate": 1.8006478600804628e-05, "loss": 0.5898, "step": 13394 }, { "epoch": 0.41148281264399594, "grad_norm": 0.3877282738685608, "learning_rate": 1.800618904298468e-05, "loss": 0.5201, "step": 13395 }, { "epoch": 0.4115135317789451, "grad_norm": 0.35842517018318176, "learning_rate": 1.8005899466465717e-05, "loss": 0.6257, "step": 13396 }, { "epoch": 0.41154425091389424, "grad_norm": 0.34570077061653137, "learning_rate": 1.800560987124842e-05, "loss": 0.5621, "step": 13397 }, { "epoch": 0.4115749700488434, "grad_norm": 0.35815566778182983, "learning_rate": 1.8005320257333458e-05, "loss": 0.5763, "step": 13398 }, { "epoch": 0.4116056891837926, "grad_norm": 0.401619553565979, "learning_rate": 1.8005030624721517e-05, "loss": 0.6326, "step": 13399 }, { "epoch": 0.41163640831874176, "grad_norm": 0.366493821144104, "learning_rate": 1.800474097341326e-05, "loss": 0.5188, "step": 13400 }, { "epoch": 0.4116671274536909, "grad_norm": 0.32717084884643555, "learning_rate": 1.800445130340938e-05, "loss": 0.5047, "step": 13401 }, { "epoch": 0.41169784658864006, "grad_norm": 0.34303492307662964, "learning_rate": 1.800416161471054e-05, "loss": 0.5046, "step": 13402 }, { "epoch": 0.41172856572358923, "grad_norm": 0.38064152002334595, "learning_rate": 1.8003871907317423e-05, "loss": 0.5602, "step": 13403 }, { "epoch": 0.4117592848585384, "grad_norm": 0.34689226746559143, "learning_rate": 1.8003582181230704e-05, "loss": 0.5749, "step": 13404 }, { "epoch": 0.41179000399348753, "grad_norm": 0.38675081729888916, "learning_rate": 1.8003292436451062e-05, "loss": 0.568, "step": 13405 }, { "epoch": 0.4118207231284367, "grad_norm": 0.32017284631729126, "learning_rate": 1.8003002672979165e-05, "loss": 0.5632, "step": 13406 }, { "epoch": 0.4118514422633859, "grad_norm": 0.36230817437171936, "learning_rate": 1.8002712890815703e-05, "loss": 0.6522, "step": 13407 }, { "epoch": 0.411882161398335, "grad_norm": 0.3519146740436554, "learning_rate": 1.8002423089961342e-05, "loss": 0.5931, "step": 13408 }, { "epoch": 0.4119128805332842, "grad_norm": 0.37563830614089966, "learning_rate": 1.8002133270416765e-05, "loss": 0.6461, "step": 13409 }, { "epoch": 0.41194359966823335, "grad_norm": 0.3450593054294586, "learning_rate": 1.8001843432182646e-05, "loss": 0.4857, "step": 13410 }, { "epoch": 0.4119743188031825, "grad_norm": 0.4909997880458832, "learning_rate": 1.8001553575259663e-05, "loss": 0.571, "step": 13411 }, { "epoch": 0.41200503793813165, "grad_norm": 0.44551292061805725, "learning_rate": 1.8001263699648495e-05, "loss": 0.6264, "step": 13412 }, { "epoch": 0.4120357570730808, "grad_norm": 0.3315099775791168, "learning_rate": 1.8000973805349814e-05, "loss": 0.5051, "step": 13413 }, { "epoch": 0.41206647620803, "grad_norm": 0.3375314772129059, "learning_rate": 1.8000683892364303e-05, "loss": 0.5464, "step": 13414 }, { "epoch": 0.4120971953429791, "grad_norm": 0.35366058349609375, "learning_rate": 1.800039396069263e-05, "loss": 0.5322, "step": 13415 }, { "epoch": 0.4121279144779283, "grad_norm": 0.3705359101295471, "learning_rate": 1.8000104010335487e-05, "loss": 0.6015, "step": 13416 }, { "epoch": 0.41215863361287747, "grad_norm": 0.348046213388443, "learning_rate": 1.7999814041293535e-05, "loss": 0.6326, "step": 13417 }, { "epoch": 0.41218935274782664, "grad_norm": 0.3741365969181061, "learning_rate": 1.7999524053567463e-05, "loss": 0.5641, "step": 13418 }, { "epoch": 0.41222007188277576, "grad_norm": 0.3592749238014221, "learning_rate": 1.799923404715794e-05, "loss": 0.6061, "step": 13419 }, { "epoch": 0.41225079101772494, "grad_norm": 0.3400217294692993, "learning_rate": 1.799894402206565e-05, "loss": 0.6096, "step": 13420 }, { "epoch": 0.4122815101526741, "grad_norm": 0.3532443940639496, "learning_rate": 1.7998653978291266e-05, "loss": 0.5705, "step": 13421 }, { "epoch": 0.41231222928762323, "grad_norm": 0.3382752239704132, "learning_rate": 1.799836391583547e-05, "loss": 0.5834, "step": 13422 }, { "epoch": 0.4123429484225724, "grad_norm": 0.3300535976886749, "learning_rate": 1.7998073834698934e-05, "loss": 0.5238, "step": 13423 }, { "epoch": 0.4123736675575216, "grad_norm": 0.3445984721183777, "learning_rate": 1.799778373488234e-05, "loss": 0.4673, "step": 13424 }, { "epoch": 0.41240438669247076, "grad_norm": 0.35489359498023987, "learning_rate": 1.799749361638636e-05, "loss": 0.6336, "step": 13425 }, { "epoch": 0.4124351058274199, "grad_norm": 0.34575605392456055, "learning_rate": 1.7997203479211678e-05, "loss": 0.5109, "step": 13426 }, { "epoch": 0.41246582496236905, "grad_norm": 0.3383218050003052, "learning_rate": 1.7996913323358967e-05, "loss": 0.6087, "step": 13427 }, { "epoch": 0.41249654409731823, "grad_norm": 0.33458301424980164, "learning_rate": 1.7996623148828906e-05, "loss": 0.5998, "step": 13428 }, { "epoch": 0.4125272632322674, "grad_norm": 0.37094035744667053, "learning_rate": 1.7996332955622175e-05, "loss": 0.5889, "step": 13429 }, { "epoch": 0.4125579823672165, "grad_norm": 0.3577995002269745, "learning_rate": 1.799604274373945e-05, "loss": 0.5978, "step": 13430 }, { "epoch": 0.4125887015021657, "grad_norm": 0.3698597252368927, "learning_rate": 1.7995752513181406e-05, "loss": 0.5251, "step": 13431 }, { "epoch": 0.4126194206371149, "grad_norm": 0.3546057343482971, "learning_rate": 1.7995462263948728e-05, "loss": 0.5888, "step": 13432 }, { "epoch": 0.412650139772064, "grad_norm": 0.44069668650627136, "learning_rate": 1.7995171996042086e-05, "loss": 0.541, "step": 13433 }, { "epoch": 0.41268085890701317, "grad_norm": 0.3702760934829712, "learning_rate": 1.7994881709462163e-05, "loss": 0.5884, "step": 13434 }, { "epoch": 0.41271157804196235, "grad_norm": 0.3380815386772156, "learning_rate": 1.7994591404209632e-05, "loss": 0.5646, "step": 13435 }, { "epoch": 0.4127422971769115, "grad_norm": 0.3412424325942993, "learning_rate": 1.799430108028518e-05, "loss": 0.5543, "step": 13436 }, { "epoch": 0.41277301631186064, "grad_norm": 0.3341761827468872, "learning_rate": 1.7994010737689476e-05, "loss": 0.6314, "step": 13437 }, { "epoch": 0.4128037354468098, "grad_norm": 0.40015873312950134, "learning_rate": 1.7993720376423202e-05, "loss": 0.6146, "step": 13438 }, { "epoch": 0.412834454581759, "grad_norm": 0.36581161618232727, "learning_rate": 1.7993429996487038e-05, "loss": 0.5966, "step": 13439 }, { "epoch": 0.4128651737167081, "grad_norm": 0.3178241550922394, "learning_rate": 1.7993139597881657e-05, "loss": 0.5407, "step": 13440 }, { "epoch": 0.4128958928516573, "grad_norm": 0.3412364423274994, "learning_rate": 1.7992849180607742e-05, "loss": 0.6614, "step": 13441 }, { "epoch": 0.41292661198660646, "grad_norm": 0.3445812165737152, "learning_rate": 1.799255874466597e-05, "loss": 0.4987, "step": 13442 }, { "epoch": 0.41295733112155564, "grad_norm": 0.38411086797714233, "learning_rate": 1.7992268290057017e-05, "loss": 0.5462, "step": 13443 }, { "epoch": 0.41298805025650476, "grad_norm": 0.37098950147628784, "learning_rate": 1.7991977816781562e-05, "loss": 0.5813, "step": 13444 }, { "epoch": 0.41301876939145393, "grad_norm": 0.33148059248924255, "learning_rate": 1.7991687324840287e-05, "loss": 0.5671, "step": 13445 }, { "epoch": 0.4130494885264031, "grad_norm": 0.3676229417324066, "learning_rate": 1.799139681423387e-05, "loss": 0.5351, "step": 13446 }, { "epoch": 0.4130802076613523, "grad_norm": 0.37273865938186646, "learning_rate": 1.7991106284962986e-05, "loss": 0.558, "step": 13447 }, { "epoch": 0.4131109267963014, "grad_norm": 0.3445177972316742, "learning_rate": 1.7990815737028312e-05, "loss": 0.61, "step": 13448 }, { "epoch": 0.4131416459312506, "grad_norm": 0.34974682331085205, "learning_rate": 1.7990525170430535e-05, "loss": 0.5742, "step": 13449 }, { "epoch": 0.41317236506619975, "grad_norm": 0.3356606662273407, "learning_rate": 1.7990234585170325e-05, "loss": 0.5263, "step": 13450 }, { "epoch": 0.4132030842011489, "grad_norm": 0.39067623019218445, "learning_rate": 1.7989943981248363e-05, "loss": 0.6215, "step": 13451 }, { "epoch": 0.41323380333609805, "grad_norm": 0.4276455044746399, "learning_rate": 1.7989653358665334e-05, "loss": 0.5612, "step": 13452 }, { "epoch": 0.4132645224710472, "grad_norm": 0.42444127798080444, "learning_rate": 1.7989362717421904e-05, "loss": 0.537, "step": 13453 }, { "epoch": 0.4132952416059964, "grad_norm": 0.31514284014701843, "learning_rate": 1.7989072057518765e-05, "loss": 0.5642, "step": 13454 }, { "epoch": 0.4133259607409455, "grad_norm": 0.33914822340011597, "learning_rate": 1.798878137895659e-05, "loss": 0.5104, "step": 13455 }, { "epoch": 0.4133566798758947, "grad_norm": 0.3853336274623871, "learning_rate": 1.7988490681736058e-05, "loss": 0.6215, "step": 13456 }, { "epoch": 0.41338739901084387, "grad_norm": 0.3371196389198303, "learning_rate": 1.7988199965857845e-05, "loss": 0.5803, "step": 13457 }, { "epoch": 0.413418118145793, "grad_norm": 0.33220669627189636, "learning_rate": 1.7987909231322637e-05, "loss": 0.563, "step": 13458 }, { "epoch": 0.41344883728074217, "grad_norm": 0.309817910194397, "learning_rate": 1.7987618478131108e-05, "loss": 0.5207, "step": 13459 }, { "epoch": 0.41347955641569134, "grad_norm": 0.38921141624450684, "learning_rate": 1.7987327706283936e-05, "loss": 0.4965, "step": 13460 }, { "epoch": 0.4135102755506405, "grad_norm": 0.35291197896003723, "learning_rate": 1.7987036915781805e-05, "loss": 0.711, "step": 13461 }, { "epoch": 0.41354099468558964, "grad_norm": 0.6277258992195129, "learning_rate": 1.798674610662539e-05, "loss": 0.6558, "step": 13462 }, { "epoch": 0.4135717138205388, "grad_norm": 0.3130023181438446, "learning_rate": 1.7986455278815374e-05, "loss": 0.5429, "step": 13463 }, { "epoch": 0.413602432955488, "grad_norm": 0.42266830801963806, "learning_rate": 1.7986164432352432e-05, "loss": 0.5708, "step": 13464 }, { "epoch": 0.4136331520904371, "grad_norm": 0.3828681409358978, "learning_rate": 1.7985873567237246e-05, "loss": 0.5414, "step": 13465 }, { "epoch": 0.4136638712253863, "grad_norm": 0.3780165910720825, "learning_rate": 1.7985582683470492e-05, "loss": 0.5613, "step": 13466 }, { "epoch": 0.41369459036033546, "grad_norm": 0.3308892548084259, "learning_rate": 1.7985291781052852e-05, "loss": 0.6101, "step": 13467 }, { "epoch": 0.41372530949528463, "grad_norm": 0.3487844467163086, "learning_rate": 1.7985000859985007e-05, "loss": 0.605, "step": 13468 }, { "epoch": 0.41375602863023375, "grad_norm": 0.3884526491165161, "learning_rate": 1.7984709920267635e-05, "loss": 0.5381, "step": 13469 }, { "epoch": 0.4137867477651829, "grad_norm": 0.3837454319000244, "learning_rate": 1.798441896190142e-05, "loss": 0.645, "step": 13470 }, { "epoch": 0.4138174669001321, "grad_norm": 0.3347581624984741, "learning_rate": 1.798412798488703e-05, "loss": 0.6185, "step": 13471 }, { "epoch": 0.4138481860350813, "grad_norm": 0.33069300651550293, "learning_rate": 1.7983836989225155e-05, "loss": 0.5933, "step": 13472 }, { "epoch": 0.4138789051700304, "grad_norm": 0.33278778195381165, "learning_rate": 1.798354597491647e-05, "loss": 0.5312, "step": 13473 }, { "epoch": 0.4139096243049796, "grad_norm": 0.3195437490940094, "learning_rate": 1.7983254941961653e-05, "loss": 0.6161, "step": 13474 }, { "epoch": 0.41394034343992875, "grad_norm": 0.3487446904182434, "learning_rate": 1.7982963890361392e-05, "loss": 0.5015, "step": 13475 }, { "epoch": 0.41397106257487787, "grad_norm": 0.3408828377723694, "learning_rate": 1.7982672820116354e-05, "loss": 0.5428, "step": 13476 }, { "epoch": 0.41400178170982704, "grad_norm": 0.3633253872394562, "learning_rate": 1.7982381731227233e-05, "loss": 0.5982, "step": 13477 }, { "epoch": 0.4140325008447762, "grad_norm": 0.37097978591918945, "learning_rate": 1.79820906236947e-05, "loss": 0.5949, "step": 13478 }, { "epoch": 0.4140632199797254, "grad_norm": 0.36620303988456726, "learning_rate": 1.7981799497519437e-05, "loss": 0.5652, "step": 13479 }, { "epoch": 0.4140939391146745, "grad_norm": 0.31966632604599, "learning_rate": 1.7981508352702123e-05, "loss": 0.5522, "step": 13480 }, { "epoch": 0.4141246582496237, "grad_norm": 0.377055287361145, "learning_rate": 1.7981217189243437e-05, "loss": 0.6299, "step": 13481 }, { "epoch": 0.41415537738457286, "grad_norm": 0.2899676263332367, "learning_rate": 1.7980926007144063e-05, "loss": 0.4797, "step": 13482 }, { "epoch": 0.414186096519522, "grad_norm": 0.32399389147758484, "learning_rate": 1.798063480640468e-05, "loss": 0.5897, "step": 13483 }, { "epoch": 0.41421681565447116, "grad_norm": 0.33974161744117737, "learning_rate": 1.7980343587025964e-05, "loss": 0.5951, "step": 13484 }, { "epoch": 0.41424753478942034, "grad_norm": 0.32968106865882874, "learning_rate": 1.7980052349008602e-05, "loss": 0.5818, "step": 13485 }, { "epoch": 0.4142782539243695, "grad_norm": 0.37111541628837585, "learning_rate": 1.797976109235327e-05, "loss": 0.6293, "step": 13486 }, { "epoch": 0.41430897305931863, "grad_norm": 0.3460269868373871, "learning_rate": 1.7979469817060647e-05, "loss": 0.5263, "step": 13487 }, { "epoch": 0.4143396921942678, "grad_norm": 0.3377259075641632, "learning_rate": 1.7979178523131416e-05, "loss": 0.5952, "step": 13488 }, { "epoch": 0.414370411329217, "grad_norm": 0.3228611648082733, "learning_rate": 1.7978887210566258e-05, "loss": 0.5664, "step": 13489 }, { "epoch": 0.41440113046416616, "grad_norm": 0.3422519266605377, "learning_rate": 1.7978595879365845e-05, "loss": 0.5747, "step": 13490 }, { "epoch": 0.4144318495991153, "grad_norm": 0.31227001547813416, "learning_rate": 1.797830452953087e-05, "loss": 0.5432, "step": 13491 }, { "epoch": 0.41446256873406445, "grad_norm": 0.3392178416252136, "learning_rate": 1.7978013161062006e-05, "loss": 0.5684, "step": 13492 }, { "epoch": 0.4144932878690136, "grad_norm": 0.33658236265182495, "learning_rate": 1.7977721773959933e-05, "loss": 0.6458, "step": 13493 }, { "epoch": 0.41452400700396275, "grad_norm": 0.3575741946697235, "learning_rate": 1.7977430368225338e-05, "loss": 0.66, "step": 13494 }, { "epoch": 0.4145547261389119, "grad_norm": 0.3159225583076477, "learning_rate": 1.7977138943858895e-05, "loss": 0.4922, "step": 13495 }, { "epoch": 0.4145854452738611, "grad_norm": 0.3650193512439728, "learning_rate": 1.7976847500861284e-05, "loss": 0.5475, "step": 13496 }, { "epoch": 0.4146161644088103, "grad_norm": 0.3506503701210022, "learning_rate": 1.7976556039233194e-05, "loss": 0.6262, "step": 13497 }, { "epoch": 0.4146468835437594, "grad_norm": 0.3729051947593689, "learning_rate": 1.7976264558975295e-05, "loss": 0.5932, "step": 13498 }, { "epoch": 0.41467760267870857, "grad_norm": 0.38548117876052856, "learning_rate": 1.7975973060088275e-05, "loss": 0.6515, "step": 13499 }, { "epoch": 0.41470832181365774, "grad_norm": 0.34316256642341614, "learning_rate": 1.7975681542572815e-05, "loss": 0.602, "step": 13500 }, { "epoch": 0.41473904094860686, "grad_norm": 0.38326403498649597, "learning_rate": 1.797539000642959e-05, "loss": 0.6084, "step": 13501 }, { "epoch": 0.41476976008355604, "grad_norm": 0.3802562355995178, "learning_rate": 1.7975098451659287e-05, "loss": 0.5411, "step": 13502 }, { "epoch": 0.4148004792185052, "grad_norm": 0.35803282260894775, "learning_rate": 1.7974806878262584e-05, "loss": 0.578, "step": 13503 }, { "epoch": 0.4148311983534544, "grad_norm": 0.34899741411209106, "learning_rate": 1.7974515286240165e-05, "loss": 0.4997, "step": 13504 }, { "epoch": 0.4148619174884035, "grad_norm": 0.3400305211544037, "learning_rate": 1.7974223675592707e-05, "loss": 0.5757, "step": 13505 }, { "epoch": 0.4148926366233527, "grad_norm": 0.3617069721221924, "learning_rate": 1.7973932046320893e-05, "loss": 0.5044, "step": 13506 }, { "epoch": 0.41492335575830186, "grad_norm": 0.31433746218681335, "learning_rate": 1.7973640398425402e-05, "loss": 0.5134, "step": 13507 }, { "epoch": 0.414954074893251, "grad_norm": 0.31475383043289185, "learning_rate": 1.797334873190692e-05, "loss": 0.474, "step": 13508 }, { "epoch": 0.41498479402820015, "grad_norm": 0.6247244477272034, "learning_rate": 1.7973057046766124e-05, "loss": 0.4803, "step": 13509 }, { "epoch": 0.41501551316314933, "grad_norm": 0.3455808758735657, "learning_rate": 1.7972765343003694e-05, "loss": 0.5525, "step": 13510 }, { "epoch": 0.4150462322980985, "grad_norm": 0.32530826330184937, "learning_rate": 1.7972473620620316e-05, "loss": 0.552, "step": 13511 }, { "epoch": 0.4150769514330476, "grad_norm": 0.3750620186328888, "learning_rate": 1.7972181879616667e-05, "loss": 0.5045, "step": 13512 }, { "epoch": 0.4151076705679968, "grad_norm": 0.34242960810661316, "learning_rate": 1.7971890119993434e-05, "loss": 0.5449, "step": 13513 }, { "epoch": 0.415138389702946, "grad_norm": 0.36084040999412537, "learning_rate": 1.7971598341751293e-05, "loss": 0.593, "step": 13514 }, { "epoch": 0.41516910883789515, "grad_norm": 0.33685389161109924, "learning_rate": 1.7971306544890926e-05, "loss": 0.5122, "step": 13515 }, { "epoch": 0.41519982797284427, "grad_norm": 0.44419145584106445, "learning_rate": 1.797101472941302e-05, "loss": 0.5394, "step": 13516 }, { "epoch": 0.41523054710779345, "grad_norm": 0.43860355019569397, "learning_rate": 1.7970722895318248e-05, "loss": 0.5701, "step": 13517 }, { "epoch": 0.4152612662427426, "grad_norm": 0.33094528317451477, "learning_rate": 1.7970431042607297e-05, "loss": 0.4797, "step": 13518 }, { "epoch": 0.41529198537769174, "grad_norm": 0.35538312792778015, "learning_rate": 1.797013917128085e-05, "loss": 0.5483, "step": 13519 }, { "epoch": 0.4153227045126409, "grad_norm": 0.39709800481796265, "learning_rate": 1.796984728133958e-05, "loss": 0.5626, "step": 13520 }, { "epoch": 0.4153534236475901, "grad_norm": 0.34902703762054443, "learning_rate": 1.7969555372784182e-05, "loss": 0.5128, "step": 13521 }, { "epoch": 0.41538414278253927, "grad_norm": 0.4764990508556366, "learning_rate": 1.796926344561533e-05, "loss": 0.5446, "step": 13522 }, { "epoch": 0.4154148619174884, "grad_norm": 0.37706974148750305, "learning_rate": 1.7968971499833703e-05, "loss": 0.5934, "step": 13523 }, { "epoch": 0.41544558105243756, "grad_norm": 0.337767630815506, "learning_rate": 1.7968679535439987e-05, "loss": 0.4944, "step": 13524 }, { "epoch": 0.41547630018738674, "grad_norm": 0.39806538820266724, "learning_rate": 1.7968387552434868e-05, "loss": 0.5633, "step": 13525 }, { "epoch": 0.41550701932233586, "grad_norm": 0.3465985953807831, "learning_rate": 1.7968095550819022e-05, "loss": 0.5908, "step": 13526 }, { "epoch": 0.41553773845728503, "grad_norm": 0.36597657203674316, "learning_rate": 1.796780353059313e-05, "loss": 0.6231, "step": 13527 }, { "epoch": 0.4155684575922342, "grad_norm": 0.310036838054657, "learning_rate": 1.7967511491757877e-05, "loss": 0.6672, "step": 13528 }, { "epoch": 0.4155991767271834, "grad_norm": 0.321611225605011, "learning_rate": 1.7967219434313943e-05, "loss": 0.5916, "step": 13529 }, { "epoch": 0.4156298958621325, "grad_norm": 0.34464773535728455, "learning_rate": 1.7966927358262014e-05, "loss": 0.5873, "step": 13530 }, { "epoch": 0.4156606149970817, "grad_norm": 0.3694591522216797, "learning_rate": 1.796663526360277e-05, "loss": 0.5928, "step": 13531 }, { "epoch": 0.41569133413203085, "grad_norm": 0.40424680709838867, "learning_rate": 1.796634315033689e-05, "loss": 0.5985, "step": 13532 }, { "epoch": 0.41572205326698003, "grad_norm": 0.7059298753738403, "learning_rate": 1.796605101846506e-05, "loss": 0.6104, "step": 13533 }, { "epoch": 0.41575277240192915, "grad_norm": 0.3214164674282074, "learning_rate": 1.7965758867987965e-05, "loss": 0.5547, "step": 13534 }, { "epoch": 0.4157834915368783, "grad_norm": 0.3449247479438782, "learning_rate": 1.796546669890628e-05, "loss": 0.5493, "step": 13535 }, { "epoch": 0.4158142106718275, "grad_norm": 0.31372910737991333, "learning_rate": 1.7965174511220692e-05, "loss": 0.5489, "step": 13536 }, { "epoch": 0.4158449298067766, "grad_norm": 0.36073338985443115, "learning_rate": 1.7964882304931887e-05, "loss": 0.6611, "step": 13537 }, { "epoch": 0.4158756489417258, "grad_norm": 0.35527417063713074, "learning_rate": 1.7964590080040534e-05, "loss": 0.5621, "step": 13538 }, { "epoch": 0.41590636807667497, "grad_norm": 0.41904664039611816, "learning_rate": 1.7964297836547332e-05, "loss": 0.5974, "step": 13539 }, { "epoch": 0.41593708721162415, "grad_norm": 0.3420105278491974, "learning_rate": 1.7964005574452957e-05, "loss": 0.5853, "step": 13540 }, { "epoch": 0.41596780634657327, "grad_norm": 0.3496705889701843, "learning_rate": 1.7963713293758088e-05, "loss": 0.6053, "step": 13541 }, { "epoch": 0.41599852548152244, "grad_norm": 0.3533177375793457, "learning_rate": 1.796342099446341e-05, "loss": 0.6432, "step": 13542 }, { "epoch": 0.4160292446164716, "grad_norm": 0.37544897198677063, "learning_rate": 1.7963128676569607e-05, "loss": 0.6036, "step": 13543 }, { "epoch": 0.41605996375142074, "grad_norm": 0.32714641094207764, "learning_rate": 1.7962836340077357e-05, "loss": 0.5422, "step": 13544 }, { "epoch": 0.4160906828863699, "grad_norm": 0.3831726908683777, "learning_rate": 1.7962543984987354e-05, "loss": 0.5831, "step": 13545 }, { "epoch": 0.4161214020213191, "grad_norm": 0.4031693935394287, "learning_rate": 1.7962251611300268e-05, "loss": 0.5046, "step": 13546 }, { "epoch": 0.41615212115626826, "grad_norm": 0.3283339738845825, "learning_rate": 1.7961959219016787e-05, "loss": 0.5789, "step": 13547 }, { "epoch": 0.4161828402912174, "grad_norm": 0.3421998918056488, "learning_rate": 1.7961666808137596e-05, "loss": 0.5778, "step": 13548 }, { "epoch": 0.41621355942616656, "grad_norm": 0.34212225675582886, "learning_rate": 1.7961374378663378e-05, "loss": 0.6241, "step": 13549 }, { "epoch": 0.41624427856111573, "grad_norm": 0.37136203050613403, "learning_rate": 1.7961081930594812e-05, "loss": 0.5787, "step": 13550 }, { "epoch": 0.41627499769606485, "grad_norm": 0.35251137614250183, "learning_rate": 1.7960789463932582e-05, "loss": 0.6317, "step": 13551 }, { "epoch": 0.416305716831014, "grad_norm": 0.3322385847568512, "learning_rate": 1.7960496978677376e-05, "loss": 0.5743, "step": 13552 }, { "epoch": 0.4163364359659632, "grad_norm": 0.37494999170303345, "learning_rate": 1.796020447482987e-05, "loss": 0.5574, "step": 13553 }, { "epoch": 0.4163671551009124, "grad_norm": 0.3462819755077362, "learning_rate": 1.795991195239075e-05, "loss": 0.5526, "step": 13554 }, { "epoch": 0.4163978742358615, "grad_norm": 0.3660653829574585, "learning_rate": 1.7959619411360702e-05, "loss": 0.6614, "step": 13555 }, { "epoch": 0.4164285933708107, "grad_norm": 0.33216071128845215, "learning_rate": 1.7959326851740404e-05, "loss": 0.5356, "step": 13556 }, { "epoch": 0.41645931250575985, "grad_norm": 0.36338868737220764, "learning_rate": 1.7959034273530545e-05, "loss": 0.4419, "step": 13557 }, { "epoch": 0.416490031640709, "grad_norm": 0.35737144947052, "learning_rate": 1.7958741676731806e-05, "loss": 0.5619, "step": 13558 }, { "epoch": 0.41652075077565814, "grad_norm": 0.35395118594169617, "learning_rate": 1.795844906134487e-05, "loss": 0.6009, "step": 13559 }, { "epoch": 0.4165514699106073, "grad_norm": 0.3729795515537262, "learning_rate": 1.7958156427370416e-05, "loss": 0.6318, "step": 13560 }, { "epoch": 0.4165821890455565, "grad_norm": 0.32066863775253296, "learning_rate": 1.795786377480914e-05, "loss": 0.3948, "step": 13561 }, { "epoch": 0.4166129081805056, "grad_norm": 0.338638573884964, "learning_rate": 1.795757110366171e-05, "loss": 0.4851, "step": 13562 }, { "epoch": 0.4166436273154548, "grad_norm": 0.36284223198890686, "learning_rate": 1.795727841392882e-05, "loss": 0.6327, "step": 13563 }, { "epoch": 0.41667434645040397, "grad_norm": 0.3716714680194855, "learning_rate": 1.795698570561115e-05, "loss": 0.6808, "step": 13564 }, { "epoch": 0.41670506558535314, "grad_norm": 0.32993242144584656, "learning_rate": 1.7956692978709384e-05, "loss": 0.5534, "step": 13565 }, { "epoch": 0.41673578472030226, "grad_norm": 0.33987265825271606, "learning_rate": 1.7956400233224206e-05, "loss": 0.5773, "step": 13566 }, { "epoch": 0.41676650385525144, "grad_norm": 0.3732895851135254, "learning_rate": 1.7956107469156298e-05, "loss": 0.6399, "step": 13567 }, { "epoch": 0.4167972229902006, "grad_norm": 0.35777533054351807, "learning_rate": 1.7955814686506346e-05, "loss": 0.5717, "step": 13568 }, { "epoch": 0.41682794212514973, "grad_norm": 1.4428119659423828, "learning_rate": 1.7955521885275037e-05, "loss": 0.523, "step": 13569 }, { "epoch": 0.4168586612600989, "grad_norm": 0.3417584300041199, "learning_rate": 1.795522906546305e-05, "loss": 0.5962, "step": 13570 }, { "epoch": 0.4168893803950481, "grad_norm": 0.37208259105682373, "learning_rate": 1.7954936227071066e-05, "loss": 0.6072, "step": 13571 }, { "epoch": 0.41692009952999726, "grad_norm": 0.35172322392463684, "learning_rate": 1.7954643370099778e-05, "loss": 0.6008, "step": 13572 }, { "epoch": 0.4169508186649464, "grad_norm": 0.35454341769218445, "learning_rate": 1.7954350494549858e-05, "loss": 0.588, "step": 13573 }, { "epoch": 0.41698153779989555, "grad_norm": 0.482441246509552, "learning_rate": 1.7954057600422002e-05, "loss": 0.5889, "step": 13574 }, { "epoch": 0.4170122569348447, "grad_norm": 0.35916849970817566, "learning_rate": 1.7953764687716887e-05, "loss": 0.6162, "step": 13575 }, { "epoch": 0.41704297606979385, "grad_norm": 0.3558606207370758, "learning_rate": 1.7953471756435202e-05, "loss": 0.6089, "step": 13576 }, { "epoch": 0.417073695204743, "grad_norm": 0.3777182996273041, "learning_rate": 1.7953178806577626e-05, "loss": 0.6223, "step": 13577 }, { "epoch": 0.4171044143396922, "grad_norm": 0.3695099651813507, "learning_rate": 1.7952885838144845e-05, "loss": 0.5704, "step": 13578 }, { "epoch": 0.4171351334746414, "grad_norm": 0.3485008478164673, "learning_rate": 1.7952592851137543e-05, "loss": 0.4994, "step": 13579 }, { "epoch": 0.4171658526095905, "grad_norm": 0.3708656132221222, "learning_rate": 1.7952299845556408e-05, "loss": 0.5228, "step": 13580 }, { "epoch": 0.41719657174453967, "grad_norm": 0.34526345133781433, "learning_rate": 1.795200682140212e-05, "loss": 0.5411, "step": 13581 }, { "epoch": 0.41722729087948884, "grad_norm": 0.32994213700294495, "learning_rate": 1.7951713778675364e-05, "loss": 0.6141, "step": 13582 }, { "epoch": 0.417258010014438, "grad_norm": 0.35332897305488586, "learning_rate": 1.7951420717376825e-05, "loss": 0.4986, "step": 13583 }, { "epoch": 0.41728872914938714, "grad_norm": 0.3248617947101593, "learning_rate": 1.795112763750719e-05, "loss": 0.4671, "step": 13584 }, { "epoch": 0.4173194482843363, "grad_norm": 0.34700703620910645, "learning_rate": 1.7950834539067137e-05, "loss": 0.6358, "step": 13585 }, { "epoch": 0.4173501674192855, "grad_norm": 0.33503881096839905, "learning_rate": 1.7950541422057357e-05, "loss": 0.5616, "step": 13586 }, { "epoch": 0.4173808865542346, "grad_norm": 0.4057067036628723, "learning_rate": 1.7950248286478534e-05, "loss": 0.5768, "step": 13587 }, { "epoch": 0.4174116056891838, "grad_norm": 0.37507200241088867, "learning_rate": 1.794995513233135e-05, "loss": 0.5647, "step": 13588 }, { "epoch": 0.41744232482413296, "grad_norm": 0.3448200821876526, "learning_rate": 1.794966195961649e-05, "loss": 0.5901, "step": 13589 }, { "epoch": 0.41747304395908214, "grad_norm": 0.3658061623573303, "learning_rate": 1.7949368768334635e-05, "loss": 0.5476, "step": 13590 }, { "epoch": 0.41750376309403125, "grad_norm": 0.3874819874763489, "learning_rate": 1.794907555848648e-05, "loss": 0.5931, "step": 13591 }, { "epoch": 0.41753448222898043, "grad_norm": 0.35941797494888306, "learning_rate": 1.79487823300727e-05, "loss": 0.574, "step": 13592 }, { "epoch": 0.4175652013639296, "grad_norm": 0.3364095389842987, "learning_rate": 1.7948489083093986e-05, "loss": 0.5898, "step": 13593 }, { "epoch": 0.4175959204988787, "grad_norm": 0.36332714557647705, "learning_rate": 1.794819581755102e-05, "loss": 0.5447, "step": 13594 }, { "epoch": 0.4176266396338279, "grad_norm": 0.33720535039901733, "learning_rate": 1.7947902533444488e-05, "loss": 0.5736, "step": 13595 }, { "epoch": 0.4176573587687771, "grad_norm": 0.3926407992839813, "learning_rate": 1.7947609230775074e-05, "loss": 0.5658, "step": 13596 }, { "epoch": 0.41768807790372625, "grad_norm": 0.36118948459625244, "learning_rate": 1.7947315909543465e-05, "loss": 0.5158, "step": 13597 }, { "epoch": 0.41771879703867537, "grad_norm": 0.32338815927505493, "learning_rate": 1.7947022569750342e-05, "loss": 0.6306, "step": 13598 }, { "epoch": 0.41774951617362455, "grad_norm": 0.37669607996940613, "learning_rate": 1.7946729211396396e-05, "loss": 0.5429, "step": 13599 }, { "epoch": 0.4177802353085737, "grad_norm": 0.32195034623146057, "learning_rate": 1.7946435834482305e-05, "loss": 0.5912, "step": 13600 }, { "epoch": 0.4178109544435229, "grad_norm": 0.40909096598625183, "learning_rate": 1.7946142439008763e-05, "loss": 0.6446, "step": 13601 }, { "epoch": 0.417841673578472, "grad_norm": 0.3417479693889618, "learning_rate": 1.7945849024976444e-05, "loss": 0.551, "step": 13602 }, { "epoch": 0.4178723927134212, "grad_norm": 0.32102376222610474, "learning_rate": 1.7945555592386044e-05, "loss": 0.5504, "step": 13603 }, { "epoch": 0.41790311184837037, "grad_norm": 0.38472631573677063, "learning_rate": 1.7945262141238243e-05, "loss": 0.6013, "step": 13604 }, { "epoch": 0.4179338309833195, "grad_norm": 0.376674085855484, "learning_rate": 1.794496867153373e-05, "loss": 0.5286, "step": 13605 }, { "epoch": 0.41796455011826866, "grad_norm": 0.35689863562583923, "learning_rate": 1.7944675183273186e-05, "loss": 0.5944, "step": 13606 }, { "epoch": 0.41799526925321784, "grad_norm": 0.32555097341537476, "learning_rate": 1.79443816764573e-05, "loss": 0.594, "step": 13607 }, { "epoch": 0.418025988388167, "grad_norm": 0.3898604214191437, "learning_rate": 1.794408815108675e-05, "loss": 0.6333, "step": 13608 }, { "epoch": 0.41805670752311613, "grad_norm": 0.34341856837272644, "learning_rate": 1.7943794607162232e-05, "loss": 0.6153, "step": 13609 }, { "epoch": 0.4180874266580653, "grad_norm": 0.5007764101028442, "learning_rate": 1.7943501044684425e-05, "loss": 0.6583, "step": 13610 }, { "epoch": 0.4181181457930145, "grad_norm": 0.3256283104419708, "learning_rate": 1.7943207463654015e-05, "loss": 0.5324, "step": 13611 }, { "epoch": 0.4181488649279636, "grad_norm": 0.3469979763031006, "learning_rate": 1.7942913864071692e-05, "loss": 0.5369, "step": 13612 }, { "epoch": 0.4181795840629128, "grad_norm": 0.42277806997299194, "learning_rate": 1.7942620245938142e-05, "loss": 0.5726, "step": 13613 }, { "epoch": 0.41821030319786195, "grad_norm": 0.3402706980705261, "learning_rate": 1.7942326609254045e-05, "loss": 0.5892, "step": 13614 }, { "epoch": 0.41824102233281113, "grad_norm": 0.5751505494117737, "learning_rate": 1.794203295402009e-05, "loss": 0.5407, "step": 13615 }, { "epoch": 0.41827174146776025, "grad_norm": 0.3614290654659271, "learning_rate": 1.794173928023696e-05, "loss": 0.6417, "step": 13616 }, { "epoch": 0.4183024606027094, "grad_norm": 0.32762113213539124, "learning_rate": 1.7941445587905342e-05, "loss": 0.5223, "step": 13617 }, { "epoch": 0.4183331797376586, "grad_norm": 0.33942264318466187, "learning_rate": 1.7941151877025926e-05, "loss": 0.5183, "step": 13618 }, { "epoch": 0.4183638988726077, "grad_norm": 0.3226116895675659, "learning_rate": 1.7940858147599396e-05, "loss": 0.6253, "step": 13619 }, { "epoch": 0.4183946180075569, "grad_norm": 0.3390662968158722, "learning_rate": 1.794056439962643e-05, "loss": 0.5788, "step": 13620 }, { "epoch": 0.41842533714250607, "grad_norm": 0.5941289663314819, "learning_rate": 1.794027063310773e-05, "loss": 0.5576, "step": 13621 }, { "epoch": 0.41845605627745525, "grad_norm": 0.34264469146728516, "learning_rate": 1.793997684804397e-05, "loss": 0.5428, "step": 13622 }, { "epoch": 0.41848677541240437, "grad_norm": 0.3591174781322479, "learning_rate": 1.7939683044435844e-05, "loss": 0.5643, "step": 13623 }, { "epoch": 0.41851749454735354, "grad_norm": 0.3255631625652313, "learning_rate": 1.793938922228403e-05, "loss": 0.6051, "step": 13624 }, { "epoch": 0.4185482136823027, "grad_norm": 0.3723742365837097, "learning_rate": 1.793909538158922e-05, "loss": 0.609, "step": 13625 }, { "epoch": 0.4185789328172519, "grad_norm": 0.3429974615573883, "learning_rate": 1.793880152235209e-05, "loss": 0.5792, "step": 13626 }, { "epoch": 0.418609651952201, "grad_norm": 0.37104251980781555, "learning_rate": 1.7938507644573344e-05, "loss": 0.6058, "step": 13627 }, { "epoch": 0.4186403710871502, "grad_norm": 0.353971928358078, "learning_rate": 1.7938213748253655e-05, "loss": 0.5561, "step": 13628 }, { "epoch": 0.41867109022209936, "grad_norm": 0.31816691160202026, "learning_rate": 1.7937919833393716e-05, "loss": 0.6292, "step": 13629 }, { "epoch": 0.4187018093570485, "grad_norm": 0.34023699164390564, "learning_rate": 1.7937625899994205e-05, "loss": 0.5482, "step": 13630 }, { "epoch": 0.41873252849199766, "grad_norm": 0.353531152009964, "learning_rate": 1.793733194805582e-05, "loss": 0.6116, "step": 13631 }, { "epoch": 0.41876324762694683, "grad_norm": 0.38691550493240356, "learning_rate": 1.7937037977579243e-05, "loss": 0.5999, "step": 13632 }, { "epoch": 0.418793966761896, "grad_norm": 0.3443091809749603, "learning_rate": 1.7936743988565154e-05, "loss": 0.6196, "step": 13633 }, { "epoch": 0.41882468589684513, "grad_norm": 0.3432387709617615, "learning_rate": 1.7936449981014248e-05, "loss": 0.5823, "step": 13634 }, { "epoch": 0.4188554050317943, "grad_norm": 0.3404443562030792, "learning_rate": 1.793615595492721e-05, "loss": 0.5339, "step": 13635 }, { "epoch": 0.4188861241667435, "grad_norm": 0.3803689479827881, "learning_rate": 1.7935861910304725e-05, "loss": 0.5305, "step": 13636 }, { "epoch": 0.4189168433016926, "grad_norm": 0.3441116213798523, "learning_rate": 1.7935567847147478e-05, "loss": 0.5908, "step": 13637 }, { "epoch": 0.4189475624366418, "grad_norm": 0.3414524793624878, "learning_rate": 1.7935273765456157e-05, "loss": 0.5477, "step": 13638 }, { "epoch": 0.41897828157159095, "grad_norm": 0.31987109780311584, "learning_rate": 1.7934979665231456e-05, "loss": 0.6083, "step": 13639 }, { "epoch": 0.4190090007065401, "grad_norm": 0.32376378774642944, "learning_rate": 1.793468554647405e-05, "loss": 0.5406, "step": 13640 }, { "epoch": 0.41903971984148924, "grad_norm": 0.3340388834476471, "learning_rate": 1.7934391409184636e-05, "loss": 0.5935, "step": 13641 }, { "epoch": 0.4190704389764384, "grad_norm": 0.41424229741096497, "learning_rate": 1.7934097253363892e-05, "loss": 0.5941, "step": 13642 }, { "epoch": 0.4191011581113876, "grad_norm": 0.7332278490066528, "learning_rate": 1.7933803079012517e-05, "loss": 0.6073, "step": 13643 }, { "epoch": 0.41913187724633677, "grad_norm": 0.35454562306404114, "learning_rate": 1.7933508886131187e-05, "loss": 0.6386, "step": 13644 }, { "epoch": 0.4191625963812859, "grad_norm": 0.35108429193496704, "learning_rate": 1.7933214674720592e-05, "loss": 0.5677, "step": 13645 }, { "epoch": 0.41919331551623507, "grad_norm": 0.32668834924697876, "learning_rate": 1.793292044478142e-05, "loss": 0.5938, "step": 13646 }, { "epoch": 0.41922403465118424, "grad_norm": 0.3612866699695587, "learning_rate": 1.7932626196314356e-05, "loss": 0.5778, "step": 13647 }, { "epoch": 0.41925475378613336, "grad_norm": 0.36457982659339905, "learning_rate": 1.7932331929320093e-05, "loss": 0.5553, "step": 13648 }, { "epoch": 0.41928547292108254, "grad_norm": 0.3209291994571686, "learning_rate": 1.7932037643799313e-05, "loss": 0.5526, "step": 13649 }, { "epoch": 0.4193161920560317, "grad_norm": 0.34314507246017456, "learning_rate": 1.793174333975271e-05, "loss": 0.4912, "step": 13650 }, { "epoch": 0.4193469111909809, "grad_norm": 0.327249675989151, "learning_rate": 1.793144901718096e-05, "loss": 0.5638, "step": 13651 }, { "epoch": 0.41937763032593, "grad_norm": 0.3582034409046173, "learning_rate": 1.7931154676084757e-05, "loss": 0.6339, "step": 13652 }, { "epoch": 0.4194083494608792, "grad_norm": 0.4138329029083252, "learning_rate": 1.7930860316464793e-05, "loss": 0.6428, "step": 13653 }, { "epoch": 0.41943906859582836, "grad_norm": 0.32892557978630066, "learning_rate": 1.793056593832175e-05, "loss": 0.5238, "step": 13654 }, { "epoch": 0.4194697877307775, "grad_norm": 0.3319656252861023, "learning_rate": 1.7930271541656313e-05, "loss": 0.575, "step": 13655 }, { "epoch": 0.41950050686572665, "grad_norm": 0.3682383894920349, "learning_rate": 1.7929977126469175e-05, "loss": 0.5883, "step": 13656 }, { "epoch": 0.4195312260006758, "grad_norm": 0.3587525486946106, "learning_rate": 1.792968269276102e-05, "loss": 0.6299, "step": 13657 }, { "epoch": 0.419561945135625, "grad_norm": 0.3337033689022064, "learning_rate": 1.792938824053254e-05, "loss": 0.5149, "step": 13658 }, { "epoch": 0.4195926642705741, "grad_norm": 0.34074416756629944, "learning_rate": 1.7929093769784422e-05, "loss": 0.5373, "step": 13659 }, { "epoch": 0.4196233834055233, "grad_norm": 0.3872559070587158, "learning_rate": 1.7928799280517347e-05, "loss": 0.5729, "step": 13660 }, { "epoch": 0.4196541025404725, "grad_norm": 0.3326333463191986, "learning_rate": 1.7928504772732007e-05, "loss": 0.5145, "step": 13661 }, { "epoch": 0.4196848216754216, "grad_norm": 0.4156367778778076, "learning_rate": 1.7928210246429094e-05, "loss": 0.536, "step": 13662 }, { "epoch": 0.41971554081037077, "grad_norm": 0.36305302381515503, "learning_rate": 1.7927915701609292e-05, "loss": 0.5531, "step": 13663 }, { "epoch": 0.41974625994531994, "grad_norm": 0.37955349683761597, "learning_rate": 1.792762113827329e-05, "loss": 0.5388, "step": 13664 }, { "epoch": 0.4197769790802691, "grad_norm": 0.3581618666648865, "learning_rate": 1.7927326556421772e-05, "loss": 0.5982, "step": 13665 }, { "epoch": 0.41980769821521824, "grad_norm": 0.36039426922798157, "learning_rate": 1.792703195605543e-05, "loss": 0.5587, "step": 13666 }, { "epoch": 0.4198384173501674, "grad_norm": 0.3172575831413269, "learning_rate": 1.792673733717495e-05, "loss": 0.5558, "step": 13667 }, { "epoch": 0.4198691364851166, "grad_norm": 0.34876832365989685, "learning_rate": 1.7926442699781023e-05, "loss": 0.5615, "step": 13668 }, { "epoch": 0.41989985562006577, "grad_norm": 0.36051225662231445, "learning_rate": 1.7926148043874337e-05, "loss": 0.6303, "step": 13669 }, { "epoch": 0.4199305747550149, "grad_norm": 0.3494899272918701, "learning_rate": 1.7925853369455577e-05, "loss": 0.5273, "step": 13670 }, { "epoch": 0.41996129388996406, "grad_norm": 0.3630965054035187, "learning_rate": 1.7925558676525434e-05, "loss": 0.5454, "step": 13671 }, { "epoch": 0.41999201302491324, "grad_norm": 0.389550119638443, "learning_rate": 1.792526396508459e-05, "loss": 0.5881, "step": 13672 }, { "epoch": 0.42002273215986236, "grad_norm": 0.3366812765598297, "learning_rate": 1.7924969235133745e-05, "loss": 0.5479, "step": 13673 }, { "epoch": 0.42005345129481153, "grad_norm": 0.3666175901889801, "learning_rate": 1.7924674486673578e-05, "loss": 0.5763, "step": 13674 }, { "epoch": 0.4200841704297607, "grad_norm": 0.41260164976119995, "learning_rate": 1.792437971970478e-05, "loss": 0.6065, "step": 13675 }, { "epoch": 0.4201148895647099, "grad_norm": 0.3445298373699188, "learning_rate": 1.792408493422804e-05, "loss": 0.5096, "step": 13676 }, { "epoch": 0.420145608699659, "grad_norm": 0.36334577202796936, "learning_rate": 1.7923790130244047e-05, "loss": 0.5815, "step": 13677 }, { "epoch": 0.4201763278346082, "grad_norm": 0.32935258746147156, "learning_rate": 1.792349530775349e-05, "loss": 0.5225, "step": 13678 }, { "epoch": 0.42020704696955735, "grad_norm": 0.5830272436141968, "learning_rate": 1.7923200466757056e-05, "loss": 0.6324, "step": 13679 }, { "epoch": 0.42023776610450647, "grad_norm": 0.42957785725593567, "learning_rate": 1.792290560725543e-05, "loss": 0.6082, "step": 13680 }, { "epoch": 0.42026848523945565, "grad_norm": 0.33713382482528687, "learning_rate": 1.792261072924931e-05, "loss": 0.4777, "step": 13681 }, { "epoch": 0.4202992043744048, "grad_norm": 0.4984355568885803, "learning_rate": 1.7922315832739375e-05, "loss": 0.533, "step": 13682 }, { "epoch": 0.420329923509354, "grad_norm": 0.3821815848350525, "learning_rate": 1.792202091772632e-05, "loss": 0.5236, "step": 13683 }, { "epoch": 0.4203606426443031, "grad_norm": 0.34014126658439636, "learning_rate": 1.7921725984210834e-05, "loss": 0.5947, "step": 13684 }, { "epoch": 0.4203913617792523, "grad_norm": 0.3237806558609009, "learning_rate": 1.79214310321936e-05, "loss": 0.5233, "step": 13685 }, { "epoch": 0.42042208091420147, "grad_norm": 0.47360095381736755, "learning_rate": 1.7921136061675314e-05, "loss": 0.6246, "step": 13686 }, { "epoch": 0.42045280004915064, "grad_norm": 0.34132441878318787, "learning_rate": 1.792084107265666e-05, "loss": 0.5752, "step": 13687 }, { "epoch": 0.42048351918409976, "grad_norm": 0.3260282278060913, "learning_rate": 1.792054606513833e-05, "loss": 0.6344, "step": 13688 }, { "epoch": 0.42051423831904894, "grad_norm": 0.5223171710968018, "learning_rate": 1.7920251039121007e-05, "loss": 0.6555, "step": 13689 }, { "epoch": 0.4205449574539981, "grad_norm": 0.33805039525032043, "learning_rate": 1.7919955994605387e-05, "loss": 0.5556, "step": 13690 }, { "epoch": 0.42057567658894723, "grad_norm": 4.592628479003906, "learning_rate": 1.7919660931592156e-05, "loss": 0.6508, "step": 13691 }, { "epoch": 0.4206063957238964, "grad_norm": 0.36188623309135437, "learning_rate": 1.7919365850082006e-05, "loss": 0.557, "step": 13692 }, { "epoch": 0.4206371148588456, "grad_norm": 0.32571834325790405, "learning_rate": 1.791907075007562e-05, "loss": 0.5247, "step": 13693 }, { "epoch": 0.42066783399379476, "grad_norm": 0.3364171087741852, "learning_rate": 1.79187756315737e-05, "loss": 0.5377, "step": 13694 }, { "epoch": 0.4206985531287439, "grad_norm": 0.40245988965034485, "learning_rate": 1.7918480494576917e-05, "loss": 0.5398, "step": 13695 }, { "epoch": 0.42072927226369305, "grad_norm": 0.3849131166934967, "learning_rate": 1.791818533908597e-05, "loss": 0.6926, "step": 13696 }, { "epoch": 0.42075999139864223, "grad_norm": 0.33267951011657715, "learning_rate": 1.7917890165101554e-05, "loss": 0.6074, "step": 13697 }, { "epoch": 0.42079071053359135, "grad_norm": 0.34417226910591125, "learning_rate": 1.791759497262435e-05, "loss": 0.598, "step": 13698 }, { "epoch": 0.4208214296685405, "grad_norm": 0.34117186069488525, "learning_rate": 1.7917299761655048e-05, "loss": 0.586, "step": 13699 }, { "epoch": 0.4208521488034897, "grad_norm": 0.34811827540397644, "learning_rate": 1.791700453219434e-05, "loss": 0.5981, "step": 13700 }, { "epoch": 0.4208828679384389, "grad_norm": 0.41498133540153503, "learning_rate": 1.791670928424292e-05, "loss": 0.5933, "step": 13701 }, { "epoch": 0.420913587073388, "grad_norm": 0.3517075479030609, "learning_rate": 1.791641401780147e-05, "loss": 0.5674, "step": 13702 }, { "epoch": 0.42094430620833717, "grad_norm": 0.38625842332839966, "learning_rate": 1.791611873287068e-05, "loss": 0.6397, "step": 13703 }, { "epoch": 0.42097502534328635, "grad_norm": 0.41680026054382324, "learning_rate": 1.7915823429451242e-05, "loss": 0.6019, "step": 13704 }, { "epoch": 0.42100574447823547, "grad_norm": 0.3878123462200165, "learning_rate": 1.7915528107543845e-05, "loss": 0.5023, "step": 13705 }, { "epoch": 0.42103646361318464, "grad_norm": 0.37448152899742126, "learning_rate": 1.7915232767149184e-05, "loss": 0.5887, "step": 13706 }, { "epoch": 0.4210671827481338, "grad_norm": 0.32517001032829285, "learning_rate": 1.7914937408267937e-05, "loss": 0.5848, "step": 13707 }, { "epoch": 0.421097901883083, "grad_norm": 0.4219764471054077, "learning_rate": 1.7914642030900807e-05, "loss": 0.6808, "step": 13708 }, { "epoch": 0.4211286210180321, "grad_norm": 0.3752448856830597, "learning_rate": 1.7914346635048473e-05, "loss": 0.6385, "step": 13709 }, { "epoch": 0.4211593401529813, "grad_norm": 0.33976268768310547, "learning_rate": 1.7914051220711634e-05, "loss": 0.6001, "step": 13710 }, { "epoch": 0.42119005928793046, "grad_norm": 0.33098405599594116, "learning_rate": 1.7913755787890974e-05, "loss": 0.5613, "step": 13711 }, { "epoch": 0.42122077842287964, "grad_norm": 0.3375484347343445, "learning_rate": 1.7913460336587184e-05, "loss": 0.5124, "step": 13712 }, { "epoch": 0.42125149755782876, "grad_norm": 0.3898363411426544, "learning_rate": 1.7913164866800956e-05, "loss": 0.6343, "step": 13713 }, { "epoch": 0.42128221669277793, "grad_norm": 0.34963560104370117, "learning_rate": 1.7912869378532977e-05, "loss": 0.575, "step": 13714 }, { "epoch": 0.4213129358277271, "grad_norm": 0.3601008951663971, "learning_rate": 1.791257387178394e-05, "loss": 0.4188, "step": 13715 }, { "epoch": 0.42134365496267623, "grad_norm": 0.3715578019618988, "learning_rate": 1.7912278346554536e-05, "loss": 0.5887, "step": 13716 }, { "epoch": 0.4213743740976254, "grad_norm": 0.3555755913257599, "learning_rate": 1.7911982802845452e-05, "loss": 0.6467, "step": 13717 }, { "epoch": 0.4214050932325746, "grad_norm": 0.3835761249065399, "learning_rate": 1.7911687240657376e-05, "loss": 0.5794, "step": 13718 }, { "epoch": 0.42143581236752375, "grad_norm": 0.4941348433494568, "learning_rate": 1.7911391659991008e-05, "loss": 0.4622, "step": 13719 }, { "epoch": 0.4214665315024729, "grad_norm": 0.3184669017791748, "learning_rate": 1.7911096060847026e-05, "loss": 0.523, "step": 13720 }, { "epoch": 0.42149725063742205, "grad_norm": 0.3106752634048462, "learning_rate": 1.7910800443226132e-05, "loss": 0.58, "step": 13721 }, { "epoch": 0.4215279697723712, "grad_norm": 0.3704081177711487, "learning_rate": 1.7910504807129007e-05, "loss": 0.4773, "step": 13722 }, { "epoch": 0.42155868890732034, "grad_norm": 0.33200156688690186, "learning_rate": 1.7910209152556348e-05, "loss": 0.538, "step": 13723 }, { "epoch": 0.4215894080422695, "grad_norm": 0.3723933696746826, "learning_rate": 1.7909913479508843e-05, "loss": 0.6219, "step": 13724 }, { "epoch": 0.4216201271772187, "grad_norm": 0.3450648784637451, "learning_rate": 1.790961778798718e-05, "loss": 0.5886, "step": 13725 }, { "epoch": 0.42165084631216787, "grad_norm": 0.34337374567985535, "learning_rate": 1.7909322077992058e-05, "loss": 0.5239, "step": 13726 }, { "epoch": 0.421681565447117, "grad_norm": 0.34349292516708374, "learning_rate": 1.7909026349524156e-05, "loss": 0.5812, "step": 13727 }, { "epoch": 0.42171228458206617, "grad_norm": 0.3317430317401886, "learning_rate": 1.7908730602584174e-05, "loss": 0.5533, "step": 13728 }, { "epoch": 0.42174300371701534, "grad_norm": 0.3458785116672516, "learning_rate": 1.79084348371728e-05, "loss": 0.6051, "step": 13729 }, { "epoch": 0.42177372285196446, "grad_norm": 0.33519697189331055, "learning_rate": 1.7908139053290718e-05, "loss": 0.585, "step": 13730 }, { "epoch": 0.42180444198691364, "grad_norm": 0.33924561738967896, "learning_rate": 1.790784325093863e-05, "loss": 0.5019, "step": 13731 }, { "epoch": 0.4218351611218628, "grad_norm": 0.339887410402298, "learning_rate": 1.790754743011722e-05, "loss": 0.6072, "step": 13732 }, { "epoch": 0.421865880256812, "grad_norm": 0.3477778434753418, "learning_rate": 1.7907251590827185e-05, "loss": 0.5948, "step": 13733 }, { "epoch": 0.4218965993917611, "grad_norm": 0.35875001549720764, "learning_rate": 1.7906955733069208e-05, "loss": 0.5566, "step": 13734 }, { "epoch": 0.4219273185267103, "grad_norm": 0.3578559160232544, "learning_rate": 1.7906659856843985e-05, "loss": 0.5084, "step": 13735 }, { "epoch": 0.42195803766165946, "grad_norm": 0.4151288568973541, "learning_rate": 1.7906363962152203e-05, "loss": 0.685, "step": 13736 }, { "epoch": 0.42198875679660863, "grad_norm": 0.7790361046791077, "learning_rate": 1.7906068048994558e-05, "loss": 0.5286, "step": 13737 }, { "epoch": 0.42201947593155775, "grad_norm": 0.33154648542404175, "learning_rate": 1.790577211737174e-05, "loss": 0.5743, "step": 13738 }, { "epoch": 0.42205019506650693, "grad_norm": 0.3580038249492645, "learning_rate": 1.7905476167284437e-05, "loss": 0.5479, "step": 13739 }, { "epoch": 0.4220809142014561, "grad_norm": 0.37286561727523804, "learning_rate": 1.790518019873334e-05, "loss": 0.5785, "step": 13740 }, { "epoch": 0.4221116333364052, "grad_norm": 0.4099390506744385, "learning_rate": 1.7904884211719146e-05, "loss": 0.6801, "step": 13741 }, { "epoch": 0.4221423524713544, "grad_norm": 0.6702728867530823, "learning_rate": 1.7904588206242546e-05, "loss": 0.72, "step": 13742 }, { "epoch": 0.4221730716063036, "grad_norm": 0.30737969279289246, "learning_rate": 1.7904292182304223e-05, "loss": 0.5612, "step": 13743 }, { "epoch": 0.42220379074125275, "grad_norm": 0.48112356662750244, "learning_rate": 1.7903996139904875e-05, "loss": 0.5503, "step": 13744 }, { "epoch": 0.42223450987620187, "grad_norm": 0.3694402873516083, "learning_rate": 1.7903700079045192e-05, "loss": 0.5531, "step": 13745 }, { "epoch": 0.42226522901115104, "grad_norm": 0.33446431159973145, "learning_rate": 1.790340399972587e-05, "loss": 0.6088, "step": 13746 }, { "epoch": 0.4222959481461002, "grad_norm": 0.3637501001358032, "learning_rate": 1.7903107901947588e-05, "loss": 0.5618, "step": 13747 }, { "epoch": 0.42232666728104934, "grad_norm": 0.32040029764175415, "learning_rate": 1.790281178571105e-05, "loss": 0.5255, "step": 13748 }, { "epoch": 0.4223573864159985, "grad_norm": 0.35006412863731384, "learning_rate": 1.790251565101694e-05, "loss": 0.5808, "step": 13749 }, { "epoch": 0.4223881055509477, "grad_norm": 0.35866519808769226, "learning_rate": 1.7902219497865954e-05, "loss": 0.5727, "step": 13750 }, { "epoch": 0.42241882468589687, "grad_norm": 0.4016134738922119, "learning_rate": 1.7901923326258787e-05, "loss": 0.5764, "step": 13751 }, { "epoch": 0.422449543820846, "grad_norm": 0.32082512974739075, "learning_rate": 1.7901627136196124e-05, "loss": 0.527, "step": 13752 }, { "epoch": 0.42248026295579516, "grad_norm": 0.3548704981803894, "learning_rate": 1.7901330927678656e-05, "loss": 0.5864, "step": 13753 }, { "epoch": 0.42251098209074434, "grad_norm": 0.4126437306404114, "learning_rate": 1.790103470070708e-05, "loss": 0.5662, "step": 13754 }, { "epoch": 0.4225417012256935, "grad_norm": 0.37354904413223267, "learning_rate": 1.7900738455282086e-05, "loss": 0.5818, "step": 13755 }, { "epoch": 0.42257242036064263, "grad_norm": 0.3807099759578705, "learning_rate": 1.7900442191404364e-05, "loss": 0.5424, "step": 13756 }, { "epoch": 0.4226031394955918, "grad_norm": 0.32012489438056946, "learning_rate": 1.7900145909074605e-05, "loss": 0.5058, "step": 13757 }, { "epoch": 0.422633858630541, "grad_norm": 0.35646292567253113, "learning_rate": 1.7899849608293508e-05, "loss": 0.5515, "step": 13758 }, { "epoch": 0.4226645777654901, "grad_norm": 0.33858591318130493, "learning_rate": 1.7899553289061762e-05, "loss": 0.5376, "step": 13759 }, { "epoch": 0.4226952969004393, "grad_norm": 0.3398784399032593, "learning_rate": 1.7899256951380054e-05, "loss": 0.5179, "step": 13760 }, { "epoch": 0.42272601603538845, "grad_norm": 0.3417968153953552, "learning_rate": 1.789896059524908e-05, "loss": 0.5265, "step": 13761 }, { "epoch": 0.4227567351703376, "grad_norm": 0.36332687735557556, "learning_rate": 1.7898664220669535e-05, "loss": 0.5535, "step": 13762 }, { "epoch": 0.42278745430528675, "grad_norm": 0.35556676983833313, "learning_rate": 1.78983678276421e-05, "loss": 0.5196, "step": 13763 }, { "epoch": 0.4228181734402359, "grad_norm": 0.3212848901748657, "learning_rate": 1.7898071416167485e-05, "loss": 0.5644, "step": 13764 }, { "epoch": 0.4228488925751851, "grad_norm": 0.3373847007751465, "learning_rate": 1.789777498624637e-05, "loss": 0.6212, "step": 13765 }, { "epoch": 0.4228796117101342, "grad_norm": 0.39276811480522156, "learning_rate": 1.7897478537879446e-05, "loss": 0.6439, "step": 13766 }, { "epoch": 0.4229103308450834, "grad_norm": 0.32682663202285767, "learning_rate": 1.7897182071067412e-05, "loss": 0.6086, "step": 13767 }, { "epoch": 0.42294104998003257, "grad_norm": 0.3236887753009796, "learning_rate": 1.7896885585810958e-05, "loss": 0.5627, "step": 13768 }, { "epoch": 0.42297176911498174, "grad_norm": 0.32789716124534607, "learning_rate": 1.7896589082110775e-05, "loss": 0.5789, "step": 13769 }, { "epoch": 0.42300248824993086, "grad_norm": 0.3284471333026886, "learning_rate": 1.7896292559967557e-05, "loss": 0.4842, "step": 13770 }, { "epoch": 0.42303320738488004, "grad_norm": 0.3239450752735138, "learning_rate": 1.7895996019382e-05, "loss": 0.539, "step": 13771 }, { "epoch": 0.4230639265198292, "grad_norm": 0.3479504883289337, "learning_rate": 1.7895699460354786e-05, "loss": 0.6007, "step": 13772 }, { "epoch": 0.42309464565477833, "grad_norm": 0.4570083022117615, "learning_rate": 1.7895402882886622e-05, "loss": 0.6111, "step": 13773 }, { "epoch": 0.4231253647897275, "grad_norm": 0.36839213967323303, "learning_rate": 1.789510628697819e-05, "loss": 0.5951, "step": 13774 }, { "epoch": 0.4231560839246767, "grad_norm": 0.34120622277259827, "learning_rate": 1.7894809672630183e-05, "loss": 0.5515, "step": 13775 }, { "epoch": 0.42318680305962586, "grad_norm": 0.32537826895713806, "learning_rate": 1.7894513039843297e-05, "loss": 0.6204, "step": 13776 }, { "epoch": 0.423217522194575, "grad_norm": 0.34834304451942444, "learning_rate": 1.7894216388618228e-05, "loss": 0.5699, "step": 13777 }, { "epoch": 0.42324824132952416, "grad_norm": 0.3385746479034424, "learning_rate": 1.789391971895566e-05, "loss": 0.5187, "step": 13778 }, { "epoch": 0.42327896046447333, "grad_norm": 0.34506675601005554, "learning_rate": 1.7893623030856296e-05, "loss": 0.6329, "step": 13779 }, { "epoch": 0.4233096795994225, "grad_norm": 0.46552422642707825, "learning_rate": 1.7893326324320823e-05, "loss": 0.6198, "step": 13780 }, { "epoch": 0.4233403987343716, "grad_norm": 0.38379573822021484, "learning_rate": 1.7893029599349935e-05, "loss": 0.6119, "step": 13781 }, { "epoch": 0.4233711178693208, "grad_norm": 0.3566911518573761, "learning_rate": 1.7892732855944327e-05, "loss": 0.6218, "step": 13782 }, { "epoch": 0.42340183700427, "grad_norm": 0.3546522855758667, "learning_rate": 1.789243609410469e-05, "loss": 0.6074, "step": 13783 }, { "epoch": 0.4234325561392191, "grad_norm": 0.3452991545200348, "learning_rate": 1.7892139313831712e-05, "loss": 0.5498, "step": 13784 }, { "epoch": 0.42346327527416827, "grad_norm": 0.38666659593582153, "learning_rate": 1.7891842515126097e-05, "loss": 0.5796, "step": 13785 }, { "epoch": 0.42349399440911745, "grad_norm": 0.3420117199420929, "learning_rate": 1.7891545697988528e-05, "loss": 0.5806, "step": 13786 }, { "epoch": 0.4235247135440666, "grad_norm": 0.34679973125457764, "learning_rate": 1.7891248862419704e-05, "loss": 0.4715, "step": 13787 }, { "epoch": 0.42355543267901574, "grad_norm": 0.34394583106040955, "learning_rate": 1.789095200842032e-05, "loss": 0.547, "step": 13788 }, { "epoch": 0.4235861518139649, "grad_norm": 0.4138399362564087, "learning_rate": 1.7890655135991067e-05, "loss": 0.5666, "step": 13789 }, { "epoch": 0.4236168709489141, "grad_norm": 0.39537495374679565, "learning_rate": 1.7890358245132635e-05, "loss": 0.5847, "step": 13790 }, { "epoch": 0.4236475900838632, "grad_norm": 0.3495256304740906, "learning_rate": 1.789006133584572e-05, "loss": 0.5249, "step": 13791 }, { "epoch": 0.4236783092188124, "grad_norm": 0.3657526969909668, "learning_rate": 1.788976440813102e-05, "loss": 0.5574, "step": 13792 }, { "epoch": 0.42370902835376156, "grad_norm": 0.36745157837867737, "learning_rate": 1.788946746198922e-05, "loss": 0.5079, "step": 13793 }, { "epoch": 0.42373974748871074, "grad_norm": 0.37793049216270447, "learning_rate": 1.788917049742102e-05, "loss": 0.5867, "step": 13794 }, { "epoch": 0.42377046662365986, "grad_norm": 0.3281875252723694, "learning_rate": 1.788887351442711e-05, "loss": 0.5308, "step": 13795 }, { "epoch": 0.42380118575860903, "grad_norm": 0.31238216161727905, "learning_rate": 1.7888576513008186e-05, "loss": 0.5438, "step": 13796 }, { "epoch": 0.4238319048935582, "grad_norm": 0.3507313132286072, "learning_rate": 1.788827949316494e-05, "loss": 0.5928, "step": 13797 }, { "epoch": 0.4238626240285074, "grad_norm": 0.35526421666145325, "learning_rate": 1.788798245489806e-05, "loss": 0.6649, "step": 13798 }, { "epoch": 0.4238933431634565, "grad_norm": 0.42792749404907227, "learning_rate": 1.7887685398208258e-05, "loss": 0.5833, "step": 13799 }, { "epoch": 0.4239240622984057, "grad_norm": 0.36928436160087585, "learning_rate": 1.788738832309621e-05, "loss": 0.5722, "step": 13800 }, { "epoch": 0.42395478143335485, "grad_norm": 0.33646953105926514, "learning_rate": 1.7887091229562614e-05, "loss": 0.637, "step": 13801 }, { "epoch": 0.423985500568304, "grad_norm": 0.3504921793937683, "learning_rate": 1.788679411760817e-05, "loss": 0.4832, "step": 13802 }, { "epoch": 0.42401621970325315, "grad_norm": 0.3138941526412964, "learning_rate": 1.788649698723356e-05, "loss": 0.5838, "step": 13803 }, { "epoch": 0.4240469388382023, "grad_norm": 0.35928037762641907, "learning_rate": 1.7886199838439492e-05, "loss": 0.5626, "step": 13804 }, { "epoch": 0.4240776579731515, "grad_norm": 0.33818739652633667, "learning_rate": 1.788590267122665e-05, "loss": 0.6512, "step": 13805 }, { "epoch": 0.4241083771081006, "grad_norm": 0.36346733570098877, "learning_rate": 1.7885605485595736e-05, "loss": 0.5408, "step": 13806 }, { "epoch": 0.4241390962430498, "grad_norm": 0.38370373845100403, "learning_rate": 1.7885308281547438e-05, "loss": 0.6361, "step": 13807 }, { "epoch": 0.42416981537799897, "grad_norm": 0.3630296289920807, "learning_rate": 1.788501105908245e-05, "loss": 0.6639, "step": 13808 }, { "epoch": 0.4242005345129481, "grad_norm": 0.45626309514045715, "learning_rate": 1.788471381820147e-05, "loss": 0.58, "step": 13809 }, { "epoch": 0.42423125364789727, "grad_norm": 0.3506886661052704, "learning_rate": 1.788441655890519e-05, "loss": 0.5829, "step": 13810 }, { "epoch": 0.42426197278284644, "grad_norm": 0.34192296862602234, "learning_rate": 1.7884119281194304e-05, "loss": 0.5756, "step": 13811 }, { "epoch": 0.4242926919177956, "grad_norm": 0.8025415539741516, "learning_rate": 1.7883821985069506e-05, "loss": 0.6448, "step": 13812 }, { "epoch": 0.42432341105274474, "grad_norm": 0.35355284810066223, "learning_rate": 1.788352467053149e-05, "loss": 0.5391, "step": 13813 }, { "epoch": 0.4243541301876939, "grad_norm": 0.4945549964904785, "learning_rate": 1.788322733758095e-05, "loss": 0.6177, "step": 13814 }, { "epoch": 0.4243848493226431, "grad_norm": 0.36303314566612244, "learning_rate": 1.7882929986218587e-05, "loss": 0.5845, "step": 13815 }, { "epoch": 0.4244155684575922, "grad_norm": 0.4005300998687744, "learning_rate": 1.788263261644509e-05, "loss": 0.5893, "step": 13816 }, { "epoch": 0.4244462875925414, "grad_norm": 0.46209731698036194, "learning_rate": 1.7882335228261155e-05, "loss": 0.5431, "step": 13817 }, { "epoch": 0.42447700672749056, "grad_norm": 0.3015206754207611, "learning_rate": 1.7882037821667472e-05, "loss": 0.5566, "step": 13818 }, { "epoch": 0.42450772586243973, "grad_norm": 0.3406355679035187, "learning_rate": 1.788174039666474e-05, "loss": 0.616, "step": 13819 }, { "epoch": 0.42453844499738885, "grad_norm": 0.37871766090393066, "learning_rate": 1.7881442953253653e-05, "loss": 0.6068, "step": 13820 }, { "epoch": 0.42456916413233803, "grad_norm": 0.35765329003334045, "learning_rate": 1.7881145491434906e-05, "loss": 0.6055, "step": 13821 }, { "epoch": 0.4245998832672872, "grad_norm": 0.7031141519546509, "learning_rate": 1.788084801120919e-05, "loss": 0.6109, "step": 13822 }, { "epoch": 0.4246306024022364, "grad_norm": 0.3503846824169159, "learning_rate": 1.7880550512577207e-05, "loss": 0.6006, "step": 13823 }, { "epoch": 0.4246613215371855, "grad_norm": 0.33871448040008545, "learning_rate": 1.788025299553965e-05, "loss": 0.5532, "step": 13824 }, { "epoch": 0.4246920406721347, "grad_norm": 0.3547748327255249, "learning_rate": 1.787995546009721e-05, "loss": 0.5721, "step": 13825 }, { "epoch": 0.42472275980708385, "grad_norm": 0.41906583309173584, "learning_rate": 1.7879657906250582e-05, "loss": 0.5833, "step": 13826 }, { "epoch": 0.42475347894203297, "grad_norm": 0.3963031768798828, "learning_rate": 1.787936033400046e-05, "loss": 0.6165, "step": 13827 }, { "epoch": 0.42478419807698214, "grad_norm": 0.37570592761039734, "learning_rate": 1.787906274334755e-05, "loss": 0.5633, "step": 13828 }, { "epoch": 0.4248149172119313, "grad_norm": 0.4359897971153259, "learning_rate": 1.7878765134292534e-05, "loss": 0.6408, "step": 13829 }, { "epoch": 0.4248456363468805, "grad_norm": 0.4180382788181305, "learning_rate": 1.787846750683611e-05, "loss": 0.5975, "step": 13830 }, { "epoch": 0.4248763554818296, "grad_norm": 0.5160223245620728, "learning_rate": 1.7878169860978975e-05, "loss": 0.5835, "step": 13831 }, { "epoch": 0.4249070746167788, "grad_norm": 0.34342968463897705, "learning_rate": 1.7877872196721827e-05, "loss": 0.5107, "step": 13832 }, { "epoch": 0.42493779375172797, "grad_norm": 0.344064861536026, "learning_rate": 1.7877574514065355e-05, "loss": 0.5219, "step": 13833 }, { "epoch": 0.4249685128866771, "grad_norm": 0.3588239550590515, "learning_rate": 1.787727681301026e-05, "loss": 0.577, "step": 13834 }, { "epoch": 0.42499923202162626, "grad_norm": 0.3423170745372772, "learning_rate": 1.7876979093557233e-05, "loss": 0.5615, "step": 13835 }, { "epoch": 0.42502995115657544, "grad_norm": 0.4581453800201416, "learning_rate": 1.7876681355706972e-05, "loss": 0.5396, "step": 13836 }, { "epoch": 0.4250606702915246, "grad_norm": 0.33521413803100586, "learning_rate": 1.7876383599460173e-05, "loss": 0.5637, "step": 13837 }, { "epoch": 0.42509138942647373, "grad_norm": 0.6350281238555908, "learning_rate": 1.787608582481753e-05, "loss": 0.533, "step": 13838 }, { "epoch": 0.4251221085614229, "grad_norm": 0.35194724798202515, "learning_rate": 1.7875788031779734e-05, "loss": 0.5942, "step": 13839 }, { "epoch": 0.4251528276963721, "grad_norm": 0.37574461102485657, "learning_rate": 1.787549022034749e-05, "loss": 0.6013, "step": 13840 }, { "epoch": 0.42518354683132126, "grad_norm": 0.3320836126804352, "learning_rate": 1.7875192390521487e-05, "loss": 0.5144, "step": 13841 }, { "epoch": 0.4252142659662704, "grad_norm": 0.3225395679473877, "learning_rate": 1.7874894542302423e-05, "loss": 0.5597, "step": 13842 }, { "epoch": 0.42524498510121955, "grad_norm": 0.3692372441291809, "learning_rate": 1.7874596675690995e-05, "loss": 0.5598, "step": 13843 }, { "epoch": 0.42527570423616873, "grad_norm": 0.3461536169052124, "learning_rate": 1.787429879068789e-05, "loss": 0.6576, "step": 13844 }, { "epoch": 0.42530642337111785, "grad_norm": 0.36904603242874146, "learning_rate": 1.7874000887293814e-05, "loss": 0.6715, "step": 13845 }, { "epoch": 0.425337142506067, "grad_norm": 0.3753088116645813, "learning_rate": 1.7873702965509458e-05, "loss": 0.4214, "step": 13846 }, { "epoch": 0.4253678616410162, "grad_norm": 0.33823350071907043, "learning_rate": 1.7873405025335522e-05, "loss": 0.6334, "step": 13847 }, { "epoch": 0.4253985807759654, "grad_norm": 0.3643438220024109, "learning_rate": 1.7873107066772694e-05, "loss": 0.5443, "step": 13848 }, { "epoch": 0.4254292999109145, "grad_norm": 0.38709792494773865, "learning_rate": 1.787280908982168e-05, "loss": 0.6528, "step": 13849 }, { "epoch": 0.42546001904586367, "grad_norm": 0.41475656628608704, "learning_rate": 1.7872511094483166e-05, "loss": 0.6021, "step": 13850 }, { "epoch": 0.42549073818081284, "grad_norm": 0.36581259965896606, "learning_rate": 1.7872213080757853e-05, "loss": 0.6114, "step": 13851 }, { "epoch": 0.42552145731576196, "grad_norm": 0.3382721245288849, "learning_rate": 1.787191504864644e-05, "loss": 0.6362, "step": 13852 }, { "epoch": 0.42555217645071114, "grad_norm": 0.43712174892425537, "learning_rate": 1.7871616998149616e-05, "loss": 0.5322, "step": 13853 }, { "epoch": 0.4255828955856603, "grad_norm": 0.36466825008392334, "learning_rate": 1.787131892926808e-05, "loss": 0.6119, "step": 13854 }, { "epoch": 0.4256136147206095, "grad_norm": 0.36450842022895813, "learning_rate": 1.787102084200253e-05, "loss": 0.6173, "step": 13855 }, { "epoch": 0.4256443338555586, "grad_norm": 0.33186736702919006, "learning_rate": 1.7870722736353664e-05, "loss": 0.5676, "step": 13856 }, { "epoch": 0.4256750529905078, "grad_norm": 0.3334033191204071, "learning_rate": 1.7870424612322174e-05, "loss": 0.5547, "step": 13857 }, { "epoch": 0.42570577212545696, "grad_norm": 0.3444192111492157, "learning_rate": 1.7870126469908754e-05, "loss": 0.6417, "step": 13858 }, { "epoch": 0.4257364912604061, "grad_norm": 0.3518643081188202, "learning_rate": 1.786982830911411e-05, "loss": 0.6009, "step": 13859 }, { "epoch": 0.42576721039535526, "grad_norm": 0.32228201627731323, "learning_rate": 1.7869530129938928e-05, "loss": 0.5404, "step": 13860 }, { "epoch": 0.42579792953030443, "grad_norm": 0.7894916534423828, "learning_rate": 1.786923193238391e-05, "loss": 0.5982, "step": 13861 }, { "epoch": 0.4258286486652536, "grad_norm": 0.4025440216064453, "learning_rate": 1.786893371644975e-05, "loss": 0.5749, "step": 13862 }, { "epoch": 0.4258593678002027, "grad_norm": 0.3339258134365082, "learning_rate": 1.786863548213715e-05, "loss": 0.5354, "step": 13863 }, { "epoch": 0.4258900869351519, "grad_norm": 0.3260866701602936, "learning_rate": 1.78683372294468e-05, "loss": 0.5923, "step": 13864 }, { "epoch": 0.4259208060701011, "grad_norm": 0.34091079235076904, "learning_rate": 1.7868038958379395e-05, "loss": 0.5953, "step": 13865 }, { "epoch": 0.42595152520505025, "grad_norm": 0.46686244010925293, "learning_rate": 1.786774066893564e-05, "loss": 0.6154, "step": 13866 }, { "epoch": 0.42598224433999937, "grad_norm": 0.3384016752243042, "learning_rate": 1.7867442361116227e-05, "loss": 0.5214, "step": 13867 }, { "epoch": 0.42601296347494855, "grad_norm": 0.32860657572746277, "learning_rate": 1.786714403492185e-05, "loss": 0.5949, "step": 13868 }, { "epoch": 0.4260436826098977, "grad_norm": 0.32303205132484436, "learning_rate": 1.786684569035321e-05, "loss": 0.5627, "step": 13869 }, { "epoch": 0.42607440174484684, "grad_norm": 0.33524858951568604, "learning_rate": 1.7866547327411005e-05, "loss": 0.6097, "step": 13870 }, { "epoch": 0.426105120879796, "grad_norm": 0.4093991219997406, "learning_rate": 1.7866248946095927e-05, "loss": 0.504, "step": 13871 }, { "epoch": 0.4261358400147452, "grad_norm": 0.34276220202445984, "learning_rate": 1.7865950546408677e-05, "loss": 0.5617, "step": 13872 }, { "epoch": 0.42616655914969437, "grad_norm": 0.3908942639827728, "learning_rate": 1.7865652128349946e-05, "loss": 0.5855, "step": 13873 }, { "epoch": 0.4261972782846435, "grad_norm": 0.4023450016975403, "learning_rate": 1.786535369192044e-05, "loss": 0.5331, "step": 13874 }, { "epoch": 0.42622799741959266, "grad_norm": 0.36015960574150085, "learning_rate": 1.786505523712085e-05, "loss": 0.5556, "step": 13875 }, { "epoch": 0.42625871655454184, "grad_norm": 0.3130751848220825, "learning_rate": 1.7864756763951868e-05, "loss": 0.5191, "step": 13876 }, { "epoch": 0.42628943568949096, "grad_norm": 0.34965598583221436, "learning_rate": 1.7864458272414205e-05, "loss": 0.6517, "step": 13877 }, { "epoch": 0.42632015482444013, "grad_norm": 0.3311425745487213, "learning_rate": 1.7864159762508548e-05, "loss": 0.5197, "step": 13878 }, { "epoch": 0.4263508739593893, "grad_norm": 0.48655128479003906, "learning_rate": 1.7863861234235598e-05, "loss": 0.6654, "step": 13879 }, { "epoch": 0.4263815930943385, "grad_norm": 0.33261197805404663, "learning_rate": 1.786356268759605e-05, "loss": 0.5531, "step": 13880 }, { "epoch": 0.4264123122292876, "grad_norm": 0.342721551656723, "learning_rate": 1.78632641225906e-05, "loss": 0.6044, "step": 13881 }, { "epoch": 0.4264430313642368, "grad_norm": 0.38254448771476746, "learning_rate": 1.786296553921995e-05, "loss": 0.5286, "step": 13882 }, { "epoch": 0.42647375049918596, "grad_norm": 0.3640389144420624, "learning_rate": 1.7862666937484795e-05, "loss": 0.5128, "step": 13883 }, { "epoch": 0.42650446963413513, "grad_norm": 0.3872791528701782, "learning_rate": 1.786236831738583e-05, "loss": 0.5975, "step": 13884 }, { "epoch": 0.42653518876908425, "grad_norm": 0.3666490614414215, "learning_rate": 1.7862069678923756e-05, "loss": 0.6286, "step": 13885 }, { "epoch": 0.4265659079040334, "grad_norm": 0.32976633310317993, "learning_rate": 1.786177102209927e-05, "loss": 0.5523, "step": 13886 }, { "epoch": 0.4265966270389826, "grad_norm": 0.3494909107685089, "learning_rate": 1.7861472346913065e-05, "loss": 0.5962, "step": 13887 }, { "epoch": 0.4266273461739317, "grad_norm": 0.3578147292137146, "learning_rate": 1.7861173653365847e-05, "loss": 0.5257, "step": 13888 }, { "epoch": 0.4266580653088809, "grad_norm": 0.7444906234741211, "learning_rate": 1.7860874941458306e-05, "loss": 0.5738, "step": 13889 }, { "epoch": 0.42668878444383007, "grad_norm": 0.3363628089427948, "learning_rate": 1.7860576211191146e-05, "loss": 0.5111, "step": 13890 }, { "epoch": 0.42671950357877925, "grad_norm": 0.3665574789047241, "learning_rate": 1.7860277462565058e-05, "loss": 0.6165, "step": 13891 }, { "epoch": 0.42675022271372837, "grad_norm": 0.3505662679672241, "learning_rate": 1.785997869558074e-05, "loss": 0.5262, "step": 13892 }, { "epoch": 0.42678094184867754, "grad_norm": 0.3557291328907013, "learning_rate": 1.7859679910238902e-05, "loss": 0.5836, "step": 13893 }, { "epoch": 0.4268116609836267, "grad_norm": 0.36337944865226746, "learning_rate": 1.7859381106540223e-05, "loss": 0.5963, "step": 13894 }, { "epoch": 0.42684238011857584, "grad_norm": 0.32715341448783875, "learning_rate": 1.7859082284485415e-05, "loss": 0.5439, "step": 13895 }, { "epoch": 0.426873099253525, "grad_norm": 0.3451557755470276, "learning_rate": 1.7858783444075172e-05, "loss": 0.5141, "step": 13896 }, { "epoch": 0.4269038183884742, "grad_norm": 0.35858291387557983, "learning_rate": 1.7858484585310193e-05, "loss": 0.6078, "step": 13897 }, { "epoch": 0.42693453752342336, "grad_norm": 0.3377326428890228, "learning_rate": 1.7858185708191173e-05, "loss": 0.5615, "step": 13898 }, { "epoch": 0.4269652566583725, "grad_norm": 0.3551117777824402, "learning_rate": 1.785788681271881e-05, "loss": 0.5614, "step": 13899 }, { "epoch": 0.42699597579332166, "grad_norm": 0.34750765562057495, "learning_rate": 1.78575878988938e-05, "loss": 0.5422, "step": 13900 }, { "epoch": 0.42702669492827083, "grad_norm": 0.3729635179042816, "learning_rate": 1.7857288966716853e-05, "loss": 0.5549, "step": 13901 }, { "epoch": 0.42705741406321995, "grad_norm": 0.3750109374523163, "learning_rate": 1.7856990016188654e-05, "loss": 0.6019, "step": 13902 }, { "epoch": 0.42708813319816913, "grad_norm": 0.314452201128006, "learning_rate": 1.7856691047309907e-05, "loss": 0.6033, "step": 13903 }, { "epoch": 0.4271188523331183, "grad_norm": 0.33650660514831543, "learning_rate": 1.785639206008131e-05, "loss": 0.6413, "step": 13904 }, { "epoch": 0.4271495714680675, "grad_norm": 0.3828151226043701, "learning_rate": 1.785609305450356e-05, "loss": 0.5187, "step": 13905 }, { "epoch": 0.4271802906030166, "grad_norm": 0.40795302391052246, "learning_rate": 1.7855794030577357e-05, "loss": 0.5258, "step": 13906 }, { "epoch": 0.4272110097379658, "grad_norm": 0.32419028878211975, "learning_rate": 1.7855494988303395e-05, "loss": 0.606, "step": 13907 }, { "epoch": 0.42724172887291495, "grad_norm": 0.6918889284133911, "learning_rate": 1.7855195927682377e-05, "loss": 0.6443, "step": 13908 }, { "epoch": 0.4272724480078641, "grad_norm": 0.3365534245967865, "learning_rate": 1.7854896848715002e-05, "loss": 0.564, "step": 13909 }, { "epoch": 0.42730316714281324, "grad_norm": 0.3904309868812561, "learning_rate": 1.7854597751401964e-05, "loss": 0.6227, "step": 13910 }, { "epoch": 0.4273338862777624, "grad_norm": 0.4170694351196289, "learning_rate": 1.785429863574397e-05, "loss": 0.6137, "step": 13911 }, { "epoch": 0.4273646054127116, "grad_norm": 0.35710933804512024, "learning_rate": 1.785399950174171e-05, "loss": 0.5698, "step": 13912 }, { "epoch": 0.4273953245476607, "grad_norm": 0.3696695566177368, "learning_rate": 1.7853700349395885e-05, "loss": 0.5886, "step": 13913 }, { "epoch": 0.4274260436826099, "grad_norm": 0.3560411334037781, "learning_rate": 1.785340117870719e-05, "loss": 0.5281, "step": 13914 }, { "epoch": 0.42745676281755907, "grad_norm": 0.36582276225090027, "learning_rate": 1.7853101989676334e-05, "loss": 0.6458, "step": 13915 }, { "epoch": 0.42748748195250824, "grad_norm": 0.3169812262058258, "learning_rate": 1.785280278230401e-05, "loss": 0.5259, "step": 13916 }, { "epoch": 0.42751820108745736, "grad_norm": 0.36424052715301514, "learning_rate": 1.7852503556590917e-05, "loss": 0.6169, "step": 13917 }, { "epoch": 0.42754892022240654, "grad_norm": 0.3553009331226349, "learning_rate": 1.7852204312537752e-05, "loss": 0.5784, "step": 13918 }, { "epoch": 0.4275796393573557, "grad_norm": 0.3858945667743683, "learning_rate": 1.7851905050145216e-05, "loss": 0.6102, "step": 13919 }, { "epoch": 0.42761035849230483, "grad_norm": 0.33275553584098816, "learning_rate": 1.7851605769414003e-05, "loss": 0.5952, "step": 13920 }, { "epoch": 0.427641077627254, "grad_norm": 0.3383229374885559, "learning_rate": 1.785130647034482e-05, "loss": 0.5442, "step": 13921 }, { "epoch": 0.4276717967622032, "grad_norm": 0.3414721190929413, "learning_rate": 1.7851007152938363e-05, "loss": 0.5651, "step": 13922 }, { "epoch": 0.42770251589715236, "grad_norm": 0.3289460837841034, "learning_rate": 1.7850707817195328e-05, "loss": 0.6455, "step": 13923 }, { "epoch": 0.4277332350321015, "grad_norm": 0.3715989887714386, "learning_rate": 1.7850408463116422e-05, "loss": 0.5413, "step": 13924 }, { "epoch": 0.42776395416705065, "grad_norm": 0.3330579102039337, "learning_rate": 1.7850109090702333e-05, "loss": 0.5225, "step": 13925 }, { "epoch": 0.42779467330199983, "grad_norm": 0.350081205368042, "learning_rate": 1.784980969995377e-05, "loss": 0.54, "step": 13926 }, { "epoch": 0.42782539243694895, "grad_norm": 0.3623183071613312, "learning_rate": 1.7849510290871426e-05, "loss": 0.7043, "step": 13927 }, { "epoch": 0.4278561115718981, "grad_norm": 0.3611922860145569, "learning_rate": 1.7849210863456004e-05, "loss": 0.5613, "step": 13928 }, { "epoch": 0.4278868307068473, "grad_norm": 0.3615553379058838, "learning_rate": 1.78489114177082e-05, "loss": 0.6552, "step": 13929 }, { "epoch": 0.4279175498417965, "grad_norm": 0.33187663555145264, "learning_rate": 1.7848611953628717e-05, "loss": 0.5787, "step": 13930 }, { "epoch": 0.4279482689767456, "grad_norm": 0.37295347452163696, "learning_rate": 1.784831247121825e-05, "loss": 0.5773, "step": 13931 }, { "epoch": 0.42797898811169477, "grad_norm": 0.37032562494277954, "learning_rate": 1.7848012970477505e-05, "loss": 0.5985, "step": 13932 }, { "epoch": 0.42800970724664394, "grad_norm": 0.3726099729537964, "learning_rate": 1.7847713451407172e-05, "loss": 0.6176, "step": 13933 }, { "epoch": 0.4280404263815931, "grad_norm": 0.32461023330688477, "learning_rate": 1.784741391400796e-05, "loss": 0.5649, "step": 13934 }, { "epoch": 0.42807114551654224, "grad_norm": 0.34349778294563293, "learning_rate": 1.7847114358280567e-05, "loss": 0.6081, "step": 13935 }, { "epoch": 0.4281018646514914, "grad_norm": 0.33179178833961487, "learning_rate": 1.7846814784225686e-05, "loss": 0.5552, "step": 13936 }, { "epoch": 0.4281325837864406, "grad_norm": 0.3529262840747833, "learning_rate": 1.784651519184402e-05, "loss": 0.6003, "step": 13937 }, { "epoch": 0.4281633029213897, "grad_norm": 0.35609281063079834, "learning_rate": 1.7846215581136274e-05, "loss": 0.5081, "step": 13938 }, { "epoch": 0.4281940220563389, "grad_norm": 0.3329264223575592, "learning_rate": 1.7845915952103145e-05, "loss": 0.5957, "step": 13939 }, { "epoch": 0.42822474119128806, "grad_norm": 0.3495156168937683, "learning_rate": 1.784561630474533e-05, "loss": 0.5797, "step": 13940 }, { "epoch": 0.42825546032623724, "grad_norm": 0.40278106927871704, "learning_rate": 1.7845316639063527e-05, "loss": 0.6065, "step": 13941 }, { "epoch": 0.42828617946118636, "grad_norm": 0.3306826055049896, "learning_rate": 1.7845016955058437e-05, "loss": 0.5288, "step": 13942 }, { "epoch": 0.42831689859613553, "grad_norm": 0.3385336697101593, "learning_rate": 1.7844717252730767e-05, "loss": 0.5607, "step": 13943 }, { "epoch": 0.4283476177310847, "grad_norm": 0.36988621950149536, "learning_rate": 1.784441753208121e-05, "loss": 0.6185, "step": 13944 }, { "epoch": 0.4283783368660338, "grad_norm": 0.37286874651908875, "learning_rate": 1.7844117793110468e-05, "loss": 0.65, "step": 13945 }, { "epoch": 0.428409056000983, "grad_norm": 0.3061349093914032, "learning_rate": 1.7843818035819244e-05, "loss": 0.5427, "step": 13946 }, { "epoch": 0.4284397751359322, "grad_norm": 0.3115340769290924, "learning_rate": 1.7843518260208232e-05, "loss": 0.6125, "step": 13947 }, { "epoch": 0.42847049427088135, "grad_norm": 0.33794912695884705, "learning_rate": 1.7843218466278137e-05, "loss": 0.6479, "step": 13948 }, { "epoch": 0.4285012134058305, "grad_norm": 0.35065948963165283, "learning_rate": 1.7842918654029654e-05, "loss": 0.6292, "step": 13949 }, { "epoch": 0.42853193254077965, "grad_norm": 0.383851021528244, "learning_rate": 1.784261882346349e-05, "loss": 0.5666, "step": 13950 }, { "epoch": 0.4285626516757288, "grad_norm": 0.376465380191803, "learning_rate": 1.7842318974580344e-05, "loss": 0.6093, "step": 13951 }, { "epoch": 0.428593370810678, "grad_norm": 0.34331902861595154, "learning_rate": 1.784201910738091e-05, "loss": 0.6231, "step": 13952 }, { "epoch": 0.4286240899456271, "grad_norm": 0.3490898311138153, "learning_rate": 1.7841719221865895e-05, "loss": 0.5111, "step": 13953 }, { "epoch": 0.4286548090805763, "grad_norm": 0.36792081594467163, "learning_rate": 1.7841419318035996e-05, "loss": 0.5455, "step": 13954 }, { "epoch": 0.42868552821552547, "grad_norm": 0.34153249859809875, "learning_rate": 1.7841119395891917e-05, "loss": 0.5317, "step": 13955 }, { "epoch": 0.4287162473504746, "grad_norm": 0.35032087564468384, "learning_rate": 1.784081945543435e-05, "loss": 0.5625, "step": 13956 }, { "epoch": 0.42874696648542376, "grad_norm": 0.3295805752277374, "learning_rate": 1.7840519496664008e-05, "loss": 0.5313, "step": 13957 }, { "epoch": 0.42877768562037294, "grad_norm": 0.4071832299232483, "learning_rate": 1.784021951958158e-05, "loss": 0.6667, "step": 13958 }, { "epoch": 0.4288084047553221, "grad_norm": 0.42279133200645447, "learning_rate": 1.7839919524187776e-05, "loss": 0.6256, "step": 13959 }, { "epoch": 0.42883912389027123, "grad_norm": 0.3264189064502716, "learning_rate": 1.7839619510483294e-05, "loss": 0.4911, "step": 13960 }, { "epoch": 0.4288698430252204, "grad_norm": 0.3696635365486145, "learning_rate": 1.783931947846883e-05, "loss": 0.5474, "step": 13961 }, { "epoch": 0.4289005621601696, "grad_norm": 0.3753335773944855, "learning_rate": 1.7839019428145087e-05, "loss": 0.5745, "step": 13962 }, { "epoch": 0.4289312812951187, "grad_norm": 0.654650866985321, "learning_rate": 1.7838719359512768e-05, "loss": 0.5674, "step": 13963 }, { "epoch": 0.4289620004300679, "grad_norm": 0.3590349555015564, "learning_rate": 1.783841927257257e-05, "loss": 0.6742, "step": 13964 }, { "epoch": 0.42899271956501706, "grad_norm": 0.3972882032394409, "learning_rate": 1.7838119167325202e-05, "loss": 0.6364, "step": 13965 }, { "epoch": 0.42902343869996623, "grad_norm": 0.34892600774765015, "learning_rate": 1.7837819043771355e-05, "loss": 0.5852, "step": 13966 }, { "epoch": 0.42905415783491535, "grad_norm": 0.30990341305732727, "learning_rate": 1.7837518901911733e-05, "loss": 0.5306, "step": 13967 }, { "epoch": 0.4290848769698645, "grad_norm": 0.3437008857727051, "learning_rate": 1.7837218741747042e-05, "loss": 0.6164, "step": 13968 }, { "epoch": 0.4291155961048137, "grad_norm": 0.33658692240715027, "learning_rate": 1.783691856327798e-05, "loss": 0.5304, "step": 13969 }, { "epoch": 0.4291463152397628, "grad_norm": 0.40659305453300476, "learning_rate": 1.783661836650524e-05, "loss": 0.5619, "step": 13970 }, { "epoch": 0.429177034374712, "grad_norm": 0.337517648935318, "learning_rate": 1.7836318151429538e-05, "loss": 0.5341, "step": 13971 }, { "epoch": 0.42920775350966117, "grad_norm": 0.3652278482913971, "learning_rate": 1.7836017918051564e-05, "loss": 0.4903, "step": 13972 }, { "epoch": 0.42923847264461035, "grad_norm": 0.39821022748947144, "learning_rate": 1.7835717666372027e-05, "loss": 0.457, "step": 13973 }, { "epoch": 0.42926919177955947, "grad_norm": 0.3859250247478485, "learning_rate": 1.7835417396391618e-05, "loss": 0.6715, "step": 13974 }, { "epoch": 0.42929991091450864, "grad_norm": 0.44490766525268555, "learning_rate": 1.7835117108111048e-05, "loss": 0.6299, "step": 13975 }, { "epoch": 0.4293306300494578, "grad_norm": 0.3350333273410797, "learning_rate": 1.7834816801531013e-05, "loss": 0.5278, "step": 13976 }, { "epoch": 0.429361349184407, "grad_norm": 0.5774491429328918, "learning_rate": 1.783451647665222e-05, "loss": 0.5214, "step": 13977 }, { "epoch": 0.4293920683193561, "grad_norm": 0.34992581605911255, "learning_rate": 1.7834216133475364e-05, "loss": 0.5964, "step": 13978 }, { "epoch": 0.4294227874543053, "grad_norm": 0.36204203963279724, "learning_rate": 1.783391577200115e-05, "loss": 0.5945, "step": 13979 }, { "epoch": 0.42945350658925446, "grad_norm": 0.3479914367198944, "learning_rate": 1.7833615392230278e-05, "loss": 0.6297, "step": 13980 }, { "epoch": 0.4294842257242036, "grad_norm": 0.3648708462715149, "learning_rate": 1.783331499416345e-05, "loss": 0.542, "step": 13981 }, { "epoch": 0.42951494485915276, "grad_norm": 0.3821769952774048, "learning_rate": 1.7833014577801365e-05, "loss": 0.6162, "step": 13982 }, { "epoch": 0.42954566399410193, "grad_norm": 0.33578070998191833, "learning_rate": 1.783271414314473e-05, "loss": 0.5741, "step": 13983 }, { "epoch": 0.4295763831290511, "grad_norm": 0.37099674344062805, "learning_rate": 1.7832413690194248e-05, "loss": 0.446, "step": 13984 }, { "epoch": 0.42960710226400023, "grad_norm": 0.6847054958343506, "learning_rate": 1.783211321895061e-05, "loss": 0.4707, "step": 13985 }, { "epoch": 0.4296378213989494, "grad_norm": 0.36954277753829956, "learning_rate": 1.783181272941453e-05, "loss": 0.5655, "step": 13986 }, { "epoch": 0.4296685405338986, "grad_norm": 0.35731831192970276, "learning_rate": 1.78315122215867e-05, "loss": 0.628, "step": 13987 }, { "epoch": 0.4296992596688477, "grad_norm": 0.3683944046497345, "learning_rate": 1.783121169546783e-05, "loss": 0.556, "step": 13988 }, { "epoch": 0.4297299788037969, "grad_norm": 0.332460880279541, "learning_rate": 1.7830911151058615e-05, "loss": 0.5305, "step": 13989 }, { "epoch": 0.42976069793874605, "grad_norm": 0.3102289140224457, "learning_rate": 1.7830610588359764e-05, "loss": 0.491, "step": 13990 }, { "epoch": 0.4297914170736952, "grad_norm": 0.3946370482444763, "learning_rate": 1.783031000737197e-05, "loss": 0.5545, "step": 13991 }, { "epoch": 0.42982213620864435, "grad_norm": 0.368053138256073, "learning_rate": 1.7830009408095942e-05, "loss": 0.5299, "step": 13992 }, { "epoch": 0.4298528553435935, "grad_norm": 0.357835054397583, "learning_rate": 1.7829708790532382e-05, "loss": 0.5666, "step": 13993 }, { "epoch": 0.4298835744785427, "grad_norm": 0.342983603477478, "learning_rate": 1.782940815468199e-05, "loss": 0.5049, "step": 13994 }, { "epoch": 0.42991429361349187, "grad_norm": 0.4189978837966919, "learning_rate": 1.7829107500545468e-05, "loss": 0.6186, "step": 13995 }, { "epoch": 0.429945012748441, "grad_norm": 0.3367140591144562, "learning_rate": 1.7828806828123517e-05, "loss": 0.5704, "step": 13996 }, { "epoch": 0.42997573188339017, "grad_norm": 0.35388699173927307, "learning_rate": 1.7828506137416842e-05, "loss": 0.4919, "step": 13997 }, { "epoch": 0.43000645101833934, "grad_norm": 0.3542553782463074, "learning_rate": 1.7828205428426148e-05, "loss": 0.52, "step": 13998 }, { "epoch": 0.43003717015328846, "grad_norm": 0.3440512716770172, "learning_rate": 1.782790470115213e-05, "loss": 0.5698, "step": 13999 }, { "epoch": 0.43006788928823764, "grad_norm": 0.36703255772590637, "learning_rate": 1.7827603955595493e-05, "loss": 0.5762, "step": 14000 }, { "epoch": 0.4300986084231868, "grad_norm": 0.3736492693424225, "learning_rate": 1.782730319175694e-05, "loss": 0.5963, "step": 14001 }, { "epoch": 0.430129327558136, "grad_norm": 0.36219048500061035, "learning_rate": 1.7827002409637174e-05, "loss": 0.6045, "step": 14002 }, { "epoch": 0.4301600466930851, "grad_norm": 0.3576197326183319, "learning_rate": 1.7826701609236897e-05, "loss": 0.4269, "step": 14003 }, { "epoch": 0.4301907658280343, "grad_norm": 0.4546017646789551, "learning_rate": 1.7826400790556815e-05, "loss": 0.5766, "step": 14004 }, { "epoch": 0.43022148496298346, "grad_norm": 0.32744723558425903, "learning_rate": 1.7826099953597623e-05, "loss": 0.5788, "step": 14005 }, { "epoch": 0.4302522040979326, "grad_norm": 0.364206463098526, "learning_rate": 1.7825799098360033e-05, "loss": 0.5645, "step": 14006 }, { "epoch": 0.43028292323288175, "grad_norm": 0.32736027240753174, "learning_rate": 1.782549822484474e-05, "loss": 0.5397, "step": 14007 }, { "epoch": 0.43031364236783093, "grad_norm": 0.3680122196674347, "learning_rate": 1.782519733305245e-05, "loss": 0.5549, "step": 14008 }, { "epoch": 0.4303443615027801, "grad_norm": 0.3656328618526459, "learning_rate": 1.782489642298386e-05, "loss": 0.5142, "step": 14009 }, { "epoch": 0.4303750806377292, "grad_norm": 0.3467338979244232, "learning_rate": 1.7824595494639684e-05, "loss": 0.4972, "step": 14010 }, { "epoch": 0.4304057997726784, "grad_norm": 0.3757704794406891, "learning_rate": 1.782429454802062e-05, "loss": 0.5745, "step": 14011 }, { "epoch": 0.4304365189076276, "grad_norm": 0.32824987173080444, "learning_rate": 1.7823993583127362e-05, "loss": 0.4919, "step": 14012 }, { "epoch": 0.4304672380425767, "grad_norm": 0.4591667056083679, "learning_rate": 1.7823692599960628e-05, "loss": 0.5816, "step": 14013 }, { "epoch": 0.43049795717752587, "grad_norm": 0.33681610226631165, "learning_rate": 1.7823391598521112e-05, "loss": 0.5571, "step": 14014 }, { "epoch": 0.43052867631247504, "grad_norm": 0.3204104006290436, "learning_rate": 1.7823090578809517e-05, "loss": 0.5941, "step": 14015 }, { "epoch": 0.4305593954474242, "grad_norm": 0.35368356108665466, "learning_rate": 1.7822789540826547e-05, "loss": 0.6106, "step": 14016 }, { "epoch": 0.43059011458237334, "grad_norm": 0.3433784544467926, "learning_rate": 1.7822488484572907e-05, "loss": 0.5726, "step": 14017 }, { "epoch": 0.4306208337173225, "grad_norm": 0.3323821723461151, "learning_rate": 1.7822187410049298e-05, "loss": 0.5974, "step": 14018 }, { "epoch": 0.4306515528522717, "grad_norm": 0.3336169719696045, "learning_rate": 1.7821886317256425e-05, "loss": 0.4815, "step": 14019 }, { "epoch": 0.43068227198722087, "grad_norm": 0.34214162826538086, "learning_rate": 1.782158520619499e-05, "loss": 0.55, "step": 14020 }, { "epoch": 0.43071299112217, "grad_norm": 0.3206334710121155, "learning_rate": 1.7821284076865697e-05, "loss": 0.4738, "step": 14021 }, { "epoch": 0.43074371025711916, "grad_norm": 0.36164259910583496, "learning_rate": 1.7820982929269247e-05, "loss": 0.5811, "step": 14022 }, { "epoch": 0.43077442939206834, "grad_norm": 0.37583765387535095, "learning_rate": 1.7820681763406345e-05, "loss": 0.6295, "step": 14023 }, { "epoch": 0.43080514852701746, "grad_norm": 0.3614848554134369, "learning_rate": 1.7820380579277696e-05, "loss": 0.6356, "step": 14024 }, { "epoch": 0.43083586766196663, "grad_norm": 0.418822318315506, "learning_rate": 1.7820079376884e-05, "loss": 0.5472, "step": 14025 }, { "epoch": 0.4308665867969158, "grad_norm": 0.4004127085208893, "learning_rate": 1.7819778156225966e-05, "loss": 0.5911, "step": 14026 }, { "epoch": 0.430897305931865, "grad_norm": 0.31576067209243774, "learning_rate": 1.7819476917304293e-05, "loss": 0.5224, "step": 14027 }, { "epoch": 0.4309280250668141, "grad_norm": 0.3303540349006653, "learning_rate": 1.7819175660119686e-05, "loss": 0.535, "step": 14028 }, { "epoch": 0.4309587442017633, "grad_norm": 0.3555672764778137, "learning_rate": 1.7818874384672846e-05, "loss": 0.5914, "step": 14029 }, { "epoch": 0.43098946333671245, "grad_norm": 0.3350389301776886, "learning_rate": 1.781857309096448e-05, "loss": 0.5667, "step": 14030 }, { "epoch": 0.4310201824716616, "grad_norm": 0.3363313674926758, "learning_rate": 1.781827177899529e-05, "loss": 0.6035, "step": 14031 }, { "epoch": 0.43105090160661075, "grad_norm": 0.352491557598114, "learning_rate": 1.781797044876598e-05, "loss": 0.5453, "step": 14032 }, { "epoch": 0.4310816207415599, "grad_norm": 0.3529573380947113, "learning_rate": 1.7817669100277255e-05, "loss": 0.5096, "step": 14033 }, { "epoch": 0.4311123398765091, "grad_norm": 0.3337060213088989, "learning_rate": 1.7817367733529817e-05, "loss": 0.4639, "step": 14034 }, { "epoch": 0.4311430590114582, "grad_norm": 0.3545793890953064, "learning_rate": 1.7817066348524374e-05, "loss": 0.5838, "step": 14035 }, { "epoch": 0.4311737781464074, "grad_norm": 0.3472176790237427, "learning_rate": 1.781676494526162e-05, "loss": 0.5551, "step": 14036 }, { "epoch": 0.43120449728135657, "grad_norm": 3.9404070377349854, "learning_rate": 1.781646352374227e-05, "loss": 0.6578, "step": 14037 }, { "epoch": 0.43123521641630574, "grad_norm": 0.3509845733642578, "learning_rate": 1.7816162083967023e-05, "loss": 0.5259, "step": 14038 }, { "epoch": 0.43126593555125486, "grad_norm": 0.3957943618297577, "learning_rate": 1.7815860625936583e-05, "loss": 0.5651, "step": 14039 }, { "epoch": 0.43129665468620404, "grad_norm": 0.3421042561531067, "learning_rate": 1.7815559149651654e-05, "loss": 0.5591, "step": 14040 }, { "epoch": 0.4313273738211532, "grad_norm": 0.3734426498413086, "learning_rate": 1.781525765511294e-05, "loss": 0.672, "step": 14041 }, { "epoch": 0.43135809295610233, "grad_norm": 0.3344945013523102, "learning_rate": 1.781495614232115e-05, "loss": 0.5299, "step": 14042 }, { "epoch": 0.4313888120910515, "grad_norm": 0.3785218298435211, "learning_rate": 1.781465461127698e-05, "loss": 0.554, "step": 14043 }, { "epoch": 0.4314195312260007, "grad_norm": 0.3682476580142975, "learning_rate": 1.7814353061981143e-05, "loss": 0.5271, "step": 14044 }, { "epoch": 0.43145025036094986, "grad_norm": 0.3543630838394165, "learning_rate": 1.7814051494434333e-05, "loss": 0.4966, "step": 14045 }, { "epoch": 0.431480969495899, "grad_norm": 0.39185863733291626, "learning_rate": 1.7813749908637263e-05, "loss": 0.648, "step": 14046 }, { "epoch": 0.43151168863084816, "grad_norm": 0.30123528838157654, "learning_rate": 1.7813448304590638e-05, "loss": 0.5836, "step": 14047 }, { "epoch": 0.43154240776579733, "grad_norm": 0.33181896805763245, "learning_rate": 1.7813146682295152e-05, "loss": 0.5904, "step": 14048 }, { "epoch": 0.43157312690074645, "grad_norm": 0.436411052942276, "learning_rate": 1.7812845041751517e-05, "loss": 0.6115, "step": 14049 }, { "epoch": 0.4316038460356956, "grad_norm": 0.34418633580207825, "learning_rate": 1.7812543382960445e-05, "loss": 0.6063, "step": 14050 }, { "epoch": 0.4316345651706448, "grad_norm": 0.36330944299697876, "learning_rate": 1.7812241705922622e-05, "loss": 0.5133, "step": 14051 }, { "epoch": 0.431665284305594, "grad_norm": 0.3952060341835022, "learning_rate": 1.781194001063877e-05, "loss": 0.6001, "step": 14052 }, { "epoch": 0.4316960034405431, "grad_norm": 0.3745608925819397, "learning_rate": 1.781163829710958e-05, "loss": 0.5869, "step": 14053 }, { "epoch": 0.43172672257549227, "grad_norm": 0.3405171036720276, "learning_rate": 1.781133656533577e-05, "loss": 0.5846, "step": 14054 }, { "epoch": 0.43175744171044145, "grad_norm": 0.3398650288581848, "learning_rate": 1.7811034815318034e-05, "loss": 0.4699, "step": 14055 }, { "epoch": 0.43178816084539057, "grad_norm": 0.33874982595443726, "learning_rate": 1.7810733047057084e-05, "loss": 0.5377, "step": 14056 }, { "epoch": 0.43181887998033974, "grad_norm": 0.3904070258140564, "learning_rate": 1.781043126055362e-05, "loss": 0.6461, "step": 14057 }, { "epoch": 0.4318495991152889, "grad_norm": 0.3501852750778198, "learning_rate": 1.781012945580834e-05, "loss": 0.4952, "step": 14058 }, { "epoch": 0.4318803182502381, "grad_norm": 0.3335086405277252, "learning_rate": 1.780982763282197e-05, "loss": 0.5933, "step": 14059 }, { "epoch": 0.4319110373851872, "grad_norm": 0.359163373708725, "learning_rate": 1.7809525791595198e-05, "loss": 0.6267, "step": 14060 }, { "epoch": 0.4319417565201364, "grad_norm": 0.34758326411247253, "learning_rate": 1.780922393212873e-05, "loss": 0.6013, "step": 14061 }, { "epoch": 0.43197247565508556, "grad_norm": 0.34582433104515076, "learning_rate": 1.7808922054423275e-05, "loss": 0.6195, "step": 14062 }, { "epoch": 0.43200319479003474, "grad_norm": 0.37053945660591125, "learning_rate": 1.7808620158479538e-05, "loss": 0.6155, "step": 14063 }, { "epoch": 0.43203391392498386, "grad_norm": 0.32199084758758545, "learning_rate": 1.7808318244298223e-05, "loss": 0.5255, "step": 14064 }, { "epoch": 0.43206463305993303, "grad_norm": 0.34385210275650024, "learning_rate": 1.7808016311880035e-05, "loss": 0.588, "step": 14065 }, { "epoch": 0.4320953521948822, "grad_norm": 0.3364744782447815, "learning_rate": 1.780771436122568e-05, "loss": 0.5194, "step": 14066 }, { "epoch": 0.43212607132983133, "grad_norm": 0.3476884961128235, "learning_rate": 1.7807412392335864e-05, "loss": 0.5379, "step": 14067 }, { "epoch": 0.4321567904647805, "grad_norm": 0.3295537829399109, "learning_rate": 1.780711040521129e-05, "loss": 0.5437, "step": 14068 }, { "epoch": 0.4321875095997297, "grad_norm": 0.3496818542480469, "learning_rate": 1.7806808399852666e-05, "loss": 0.6457, "step": 14069 }, { "epoch": 0.43221822873467886, "grad_norm": 0.35949525237083435, "learning_rate": 1.7806506376260695e-05, "loss": 0.5075, "step": 14070 }, { "epoch": 0.432248947869628, "grad_norm": 0.3994367718696594, "learning_rate": 1.7806204334436082e-05, "loss": 0.5983, "step": 14071 }, { "epoch": 0.43227966700457715, "grad_norm": 0.33497360348701477, "learning_rate": 1.7805902274379536e-05, "loss": 0.528, "step": 14072 }, { "epoch": 0.4323103861395263, "grad_norm": 0.3600756824016571, "learning_rate": 1.7805600196091757e-05, "loss": 0.461, "step": 14073 }, { "epoch": 0.43234110527447545, "grad_norm": 0.3500407040119171, "learning_rate": 1.7805298099573454e-05, "loss": 0.5921, "step": 14074 }, { "epoch": 0.4323718244094246, "grad_norm": 0.3436615765094757, "learning_rate": 1.7804995984825332e-05, "loss": 0.5546, "step": 14075 }, { "epoch": 0.4324025435443738, "grad_norm": 0.3904936611652374, "learning_rate": 1.78046938518481e-05, "loss": 0.5561, "step": 14076 }, { "epoch": 0.43243326267932297, "grad_norm": 0.3782128393650055, "learning_rate": 1.780439170064246e-05, "loss": 0.584, "step": 14077 }, { "epoch": 0.4324639818142721, "grad_norm": 0.3535205125808716, "learning_rate": 1.7804089531209114e-05, "loss": 0.5545, "step": 14078 }, { "epoch": 0.43249470094922127, "grad_norm": 0.42000874876976013, "learning_rate": 1.7803787343548776e-05, "loss": 0.6064, "step": 14079 }, { "epoch": 0.43252542008417044, "grad_norm": 0.34127211570739746, "learning_rate": 1.7803485137662148e-05, "loss": 0.5054, "step": 14080 }, { "epoch": 0.43255613921911956, "grad_norm": 0.38031402230262756, "learning_rate": 1.780318291354993e-05, "loss": 0.5975, "step": 14081 }, { "epoch": 0.43258685835406874, "grad_norm": 0.4616107940673828, "learning_rate": 1.7802880671212835e-05, "loss": 0.5863, "step": 14082 }, { "epoch": 0.4326175774890179, "grad_norm": 0.37628796696662903, "learning_rate": 1.780257841065157e-05, "loss": 0.4712, "step": 14083 }, { "epoch": 0.4326482966239671, "grad_norm": 0.3431304097175598, "learning_rate": 1.780227613186684e-05, "loss": 0.5703, "step": 14084 }, { "epoch": 0.4326790157589162, "grad_norm": 0.3855423629283905, "learning_rate": 1.7801973834859345e-05, "loss": 0.5352, "step": 14085 }, { "epoch": 0.4327097348938654, "grad_norm": 0.35889288783073425, "learning_rate": 1.7801671519629793e-05, "loss": 0.6124, "step": 14086 }, { "epoch": 0.43274045402881456, "grad_norm": 0.32931676506996155, "learning_rate": 1.78013691861789e-05, "loss": 0.5326, "step": 14087 }, { "epoch": 0.43277117316376373, "grad_norm": 0.3353606164455414, "learning_rate": 1.7801066834507358e-05, "loss": 0.5406, "step": 14088 }, { "epoch": 0.43280189229871285, "grad_norm": 0.323148250579834, "learning_rate": 1.7800764464615883e-05, "loss": 0.6221, "step": 14089 }, { "epoch": 0.43283261143366203, "grad_norm": 0.3337446451187134, "learning_rate": 1.7800462076505177e-05, "loss": 0.4909, "step": 14090 }, { "epoch": 0.4328633305686112, "grad_norm": 0.3643101751804352, "learning_rate": 1.7800159670175948e-05, "loss": 0.6364, "step": 14091 }, { "epoch": 0.4328940497035603, "grad_norm": 0.3274995684623718, "learning_rate": 1.7799857245628897e-05, "loss": 0.5392, "step": 14092 }, { "epoch": 0.4329247688385095, "grad_norm": 0.3249182105064392, "learning_rate": 1.7799554802864742e-05, "loss": 0.5483, "step": 14093 }, { "epoch": 0.4329554879734587, "grad_norm": 0.3623753488063812, "learning_rate": 1.7799252341884178e-05, "loss": 0.5827, "step": 14094 }, { "epoch": 0.43298620710840785, "grad_norm": 0.352922260761261, "learning_rate": 1.7798949862687913e-05, "loss": 0.6414, "step": 14095 }, { "epoch": 0.43301692624335697, "grad_norm": 0.3454199433326721, "learning_rate": 1.7798647365276654e-05, "loss": 0.5856, "step": 14096 }, { "epoch": 0.43304764537830615, "grad_norm": 0.3610476851463318, "learning_rate": 1.779834484965112e-05, "loss": 0.6194, "step": 14097 }, { "epoch": 0.4330783645132553, "grad_norm": 0.4210748076438904, "learning_rate": 1.7798042315811995e-05, "loss": 0.596, "step": 14098 }, { "epoch": 0.43310908364820444, "grad_norm": 0.38875463604927063, "learning_rate": 1.7797739763760007e-05, "loss": 0.6318, "step": 14099 }, { "epoch": 0.4331398027831536, "grad_norm": 0.36456891894340515, "learning_rate": 1.779743719349585e-05, "loss": 0.5859, "step": 14100 }, { "epoch": 0.4331705219181028, "grad_norm": 0.39892101287841797, "learning_rate": 1.779713460502023e-05, "loss": 0.5571, "step": 14101 }, { "epoch": 0.43320124105305197, "grad_norm": 0.32761135697364807, "learning_rate": 1.7796831998333862e-05, "loss": 0.5396, "step": 14102 }, { "epoch": 0.4332319601880011, "grad_norm": 0.3717096745967865, "learning_rate": 1.7796529373437447e-05, "loss": 0.5945, "step": 14103 }, { "epoch": 0.43326267932295026, "grad_norm": 0.3694019317626953, "learning_rate": 1.7796226730331694e-05, "loss": 0.5795, "step": 14104 }, { "epoch": 0.43329339845789944, "grad_norm": 0.3513898551464081, "learning_rate": 1.7795924069017308e-05, "loss": 0.5542, "step": 14105 }, { "epoch": 0.4333241175928486, "grad_norm": 0.34485021233558655, "learning_rate": 1.7795621389494996e-05, "loss": 0.5944, "step": 14106 }, { "epoch": 0.43335483672779773, "grad_norm": 0.3408079147338867, "learning_rate": 1.7795318691765467e-05, "loss": 0.628, "step": 14107 }, { "epoch": 0.4333855558627469, "grad_norm": 0.415396511554718, "learning_rate": 1.779501597582943e-05, "loss": 0.5824, "step": 14108 }, { "epoch": 0.4334162749976961, "grad_norm": 0.3730771243572235, "learning_rate": 1.7794713241687583e-05, "loss": 0.5455, "step": 14109 }, { "epoch": 0.4334469941326452, "grad_norm": 0.30827391147613525, "learning_rate": 1.7794410489340645e-05, "loss": 0.5552, "step": 14110 }, { "epoch": 0.4334777132675944, "grad_norm": 0.39067542552948, "learning_rate": 1.7794107718789312e-05, "loss": 0.6153, "step": 14111 }, { "epoch": 0.43350843240254355, "grad_norm": 0.4319620430469513, "learning_rate": 1.7793804930034295e-05, "loss": 0.5631, "step": 14112 }, { "epoch": 0.43353915153749273, "grad_norm": 0.35792991518974304, "learning_rate": 1.7793502123076303e-05, "loss": 0.5773, "step": 14113 }, { "epoch": 0.43356987067244185, "grad_norm": 0.4077709913253784, "learning_rate": 1.7793199297916045e-05, "loss": 0.5724, "step": 14114 }, { "epoch": 0.433600589807391, "grad_norm": 0.3254302144050598, "learning_rate": 1.7792896454554223e-05, "loss": 0.5487, "step": 14115 }, { "epoch": 0.4336313089423402, "grad_norm": 0.3333504796028137, "learning_rate": 1.779259359299155e-05, "loss": 0.5782, "step": 14116 }, { "epoch": 0.4336620280772893, "grad_norm": 0.3515288233757019, "learning_rate": 1.779229071322873e-05, "loss": 0.5173, "step": 14117 }, { "epoch": 0.4336927472122385, "grad_norm": 0.35341939330101013, "learning_rate": 1.779198781526647e-05, "loss": 0.6481, "step": 14118 }, { "epoch": 0.43372346634718767, "grad_norm": 0.3244360089302063, "learning_rate": 1.779168489910548e-05, "loss": 0.5018, "step": 14119 }, { "epoch": 0.43375418548213684, "grad_norm": 0.38168272376060486, "learning_rate": 1.7791381964746466e-05, "loss": 0.5838, "step": 14120 }, { "epoch": 0.43378490461708596, "grad_norm": 0.37901026010513306, "learning_rate": 1.779107901219013e-05, "loss": 0.5706, "step": 14121 }, { "epoch": 0.43381562375203514, "grad_norm": 1.8165149688720703, "learning_rate": 1.779077604143719e-05, "loss": 0.5345, "step": 14122 }, { "epoch": 0.4338463428869843, "grad_norm": 0.41119998693466187, "learning_rate": 1.779047305248835e-05, "loss": 0.6311, "step": 14123 }, { "epoch": 0.43387706202193344, "grad_norm": 0.3407804071903229, "learning_rate": 1.7790170045344312e-05, "loss": 0.5509, "step": 14124 }, { "epoch": 0.4339077811568826, "grad_norm": 0.3709058463573456, "learning_rate": 1.778986702000579e-05, "loss": 0.6314, "step": 14125 }, { "epoch": 0.4339385002918318, "grad_norm": 0.32058826088905334, "learning_rate": 1.7789563976473485e-05, "loss": 0.5289, "step": 14126 }, { "epoch": 0.43396921942678096, "grad_norm": 0.3521212339401245, "learning_rate": 1.7789260914748112e-05, "loss": 0.6386, "step": 14127 }, { "epoch": 0.4339999385617301, "grad_norm": 0.4155852794647217, "learning_rate": 1.7788957834830378e-05, "loss": 0.5375, "step": 14128 }, { "epoch": 0.43403065769667926, "grad_norm": 0.4325881004333496, "learning_rate": 1.7788654736720986e-05, "loss": 0.6083, "step": 14129 }, { "epoch": 0.43406137683162843, "grad_norm": 0.32859671115875244, "learning_rate": 1.7788351620420653e-05, "loss": 0.596, "step": 14130 }, { "epoch": 0.4340920959665776, "grad_norm": 0.32109612226486206, "learning_rate": 1.7788048485930077e-05, "loss": 0.5518, "step": 14131 }, { "epoch": 0.4341228151015267, "grad_norm": 0.3376460373401642, "learning_rate": 1.7787745333249967e-05, "loss": 0.6468, "step": 14132 }, { "epoch": 0.4341535342364759, "grad_norm": 0.36458465456962585, "learning_rate": 1.7787442162381037e-05, "loss": 0.6563, "step": 14133 }, { "epoch": 0.4341842533714251, "grad_norm": 0.361511766910553, "learning_rate": 1.778713897332399e-05, "loss": 0.4513, "step": 14134 }, { "epoch": 0.4342149725063742, "grad_norm": 0.35300761461257935, "learning_rate": 1.7786835766079538e-05, "loss": 0.5864, "step": 14135 }, { "epoch": 0.4342456916413234, "grad_norm": 0.3205384910106659, "learning_rate": 1.7786532540648388e-05, "loss": 0.623, "step": 14136 }, { "epoch": 0.43427641077627255, "grad_norm": 0.35529613494873047, "learning_rate": 1.7786229297031244e-05, "loss": 0.5181, "step": 14137 }, { "epoch": 0.4343071299112217, "grad_norm": 0.35337451100349426, "learning_rate": 1.7785926035228818e-05, "loss": 0.5473, "step": 14138 }, { "epoch": 0.43433784904617084, "grad_norm": 0.37767311930656433, "learning_rate": 1.778562275524182e-05, "loss": 0.5599, "step": 14139 }, { "epoch": 0.43436856818112, "grad_norm": 0.3489845097064972, "learning_rate": 1.7785319457070958e-05, "loss": 0.6391, "step": 14140 }, { "epoch": 0.4343992873160692, "grad_norm": 0.3840947151184082, "learning_rate": 1.7785016140716937e-05, "loss": 0.5665, "step": 14141 }, { "epoch": 0.4344300064510183, "grad_norm": 0.4219222962856293, "learning_rate": 1.7784712806180466e-05, "loss": 0.5258, "step": 14142 }, { "epoch": 0.4344607255859675, "grad_norm": 0.37674856185913086, "learning_rate": 1.7784409453462256e-05, "loss": 0.5358, "step": 14143 }, { "epoch": 0.43449144472091666, "grad_norm": 0.36477482318878174, "learning_rate": 1.7784106082563013e-05, "loss": 0.6023, "step": 14144 }, { "epoch": 0.43452216385586584, "grad_norm": 0.3386416435241699, "learning_rate": 1.7783802693483448e-05, "loss": 0.646, "step": 14145 }, { "epoch": 0.43455288299081496, "grad_norm": 0.4059995114803314, "learning_rate": 1.7783499286224265e-05, "loss": 0.6579, "step": 14146 }, { "epoch": 0.43458360212576413, "grad_norm": 0.32824641466140747, "learning_rate": 1.778319586078618e-05, "loss": 0.565, "step": 14147 }, { "epoch": 0.4346143212607133, "grad_norm": 0.3500811457633972, "learning_rate": 1.7782892417169893e-05, "loss": 0.5379, "step": 14148 }, { "epoch": 0.4346450403956625, "grad_norm": 0.3675687611103058, "learning_rate": 1.7782588955376124e-05, "loss": 0.5982, "step": 14149 }, { "epoch": 0.4346757595306116, "grad_norm": 0.3126089870929718, "learning_rate": 1.778228547540557e-05, "loss": 0.5784, "step": 14150 }, { "epoch": 0.4347064786655608, "grad_norm": 0.3648272156715393, "learning_rate": 1.7781981977258945e-05, "loss": 0.6872, "step": 14151 }, { "epoch": 0.43473719780050996, "grad_norm": 0.34676945209503174, "learning_rate": 1.778167846093696e-05, "loss": 0.6091, "step": 14152 }, { "epoch": 0.4347679169354591, "grad_norm": 0.37069517374038696, "learning_rate": 1.7781374926440315e-05, "loss": 0.5269, "step": 14153 }, { "epoch": 0.43479863607040825, "grad_norm": 0.33704468607902527, "learning_rate": 1.7781071373769733e-05, "loss": 0.6143, "step": 14154 }, { "epoch": 0.4348293552053574, "grad_norm": 0.37161785364151, "learning_rate": 1.7780767802925913e-05, "loss": 0.5632, "step": 14155 }, { "epoch": 0.4348600743403066, "grad_norm": 0.3203677237033844, "learning_rate": 1.7780464213909563e-05, "loss": 0.5582, "step": 14156 }, { "epoch": 0.4348907934752557, "grad_norm": 0.32953810691833496, "learning_rate": 1.77801606067214e-05, "loss": 0.5769, "step": 14157 }, { "epoch": 0.4349215126102049, "grad_norm": 0.3666595220565796, "learning_rate": 1.7779856981362126e-05, "loss": 0.6176, "step": 14158 }, { "epoch": 0.43495223174515407, "grad_norm": 0.3485622704029083, "learning_rate": 1.777955333783245e-05, "loss": 0.6037, "step": 14159 }, { "epoch": 0.4349829508801032, "grad_norm": 0.34679925441741943, "learning_rate": 1.777924967613309e-05, "loss": 0.584, "step": 14160 }, { "epoch": 0.43501367001505237, "grad_norm": 0.36865296959877014, "learning_rate": 1.7778945996264747e-05, "loss": 0.5707, "step": 14161 }, { "epoch": 0.43504438915000154, "grad_norm": 0.34387657046318054, "learning_rate": 1.777864229822813e-05, "loss": 0.5766, "step": 14162 }, { "epoch": 0.4350751082849507, "grad_norm": 0.3513590097427368, "learning_rate": 1.7778338582023954e-05, "loss": 0.5421, "step": 14163 }, { "epoch": 0.43510582741989984, "grad_norm": 0.43151214718818665, "learning_rate": 1.7778034847652922e-05, "loss": 0.6009, "step": 14164 }, { "epoch": 0.435136546554849, "grad_norm": 0.40378648042678833, "learning_rate": 1.7777731095115748e-05, "loss": 0.6232, "step": 14165 }, { "epoch": 0.4351672656897982, "grad_norm": 0.3418087959289551, "learning_rate": 1.7777427324413142e-05, "loss": 0.5737, "step": 14166 }, { "epoch": 0.4351979848247473, "grad_norm": 0.35335269570350647, "learning_rate": 1.7777123535545806e-05, "loss": 0.5589, "step": 14167 }, { "epoch": 0.4352287039596965, "grad_norm": 0.3550027906894684, "learning_rate": 1.7776819728514462e-05, "loss": 0.573, "step": 14168 }, { "epoch": 0.43525942309464566, "grad_norm": 0.33285048604011536, "learning_rate": 1.7776515903319804e-05, "loss": 0.54, "step": 14169 }, { "epoch": 0.43529014222959483, "grad_norm": 0.3957229256629944, "learning_rate": 1.7776212059962557e-05, "loss": 0.6433, "step": 14170 }, { "epoch": 0.43532086136454395, "grad_norm": 0.3049108386039734, "learning_rate": 1.777590819844342e-05, "loss": 0.4789, "step": 14171 }, { "epoch": 0.43535158049949313, "grad_norm": 0.33933356404304504, "learning_rate": 1.7775604318763107e-05, "loss": 0.5636, "step": 14172 }, { "epoch": 0.4353822996344423, "grad_norm": 0.3896392285823822, "learning_rate": 1.7775300420922326e-05, "loss": 0.584, "step": 14173 }, { "epoch": 0.4354130187693915, "grad_norm": 0.49082276225090027, "learning_rate": 1.7774996504921792e-05, "loss": 0.495, "step": 14174 }, { "epoch": 0.4354437379043406, "grad_norm": 0.3327676057815552, "learning_rate": 1.7774692570762207e-05, "loss": 0.5894, "step": 14175 }, { "epoch": 0.4354744570392898, "grad_norm": 0.3611638844013214, "learning_rate": 1.7774388618444285e-05, "loss": 0.6036, "step": 14176 }, { "epoch": 0.43550517617423895, "grad_norm": 0.41548240184783936, "learning_rate": 1.7774084647968735e-05, "loss": 0.5336, "step": 14177 }, { "epoch": 0.43553589530918807, "grad_norm": 0.33642345666885376, "learning_rate": 1.7773780659336268e-05, "loss": 0.5838, "step": 14178 }, { "epoch": 0.43556661444413725, "grad_norm": 0.39118537306785583, "learning_rate": 1.7773476652547592e-05, "loss": 0.5286, "step": 14179 }, { "epoch": 0.4355973335790864, "grad_norm": 0.35646969079971313, "learning_rate": 1.7773172627603422e-05, "loss": 0.5319, "step": 14180 }, { "epoch": 0.4356280527140356, "grad_norm": 0.35202494263648987, "learning_rate": 1.777286858450446e-05, "loss": 0.6109, "step": 14181 }, { "epoch": 0.4356587718489847, "grad_norm": 0.36491599678993225, "learning_rate": 1.7772564523251423e-05, "loss": 0.628, "step": 14182 }, { "epoch": 0.4356894909839339, "grad_norm": 0.36916038393974304, "learning_rate": 1.777226044384502e-05, "loss": 0.5616, "step": 14183 }, { "epoch": 0.43572021011888307, "grad_norm": 0.35997137427330017, "learning_rate": 1.777195634628596e-05, "loss": 0.6086, "step": 14184 }, { "epoch": 0.4357509292538322, "grad_norm": 0.3545610010623932, "learning_rate": 1.777165223057495e-05, "loss": 0.6037, "step": 14185 }, { "epoch": 0.43578164838878136, "grad_norm": 0.8912855386734009, "learning_rate": 1.7771348096712702e-05, "loss": 0.5368, "step": 14186 }, { "epoch": 0.43581236752373054, "grad_norm": 0.3654244840145111, "learning_rate": 1.7771043944699933e-05, "loss": 0.5713, "step": 14187 }, { "epoch": 0.4358430866586797, "grad_norm": 0.33516934514045715, "learning_rate": 1.7770739774537348e-05, "loss": 0.6269, "step": 14188 }, { "epoch": 0.43587380579362883, "grad_norm": 0.3412504494190216, "learning_rate": 1.7770435586225656e-05, "loss": 0.5419, "step": 14189 }, { "epoch": 0.435904524928578, "grad_norm": 0.3467404842376709, "learning_rate": 1.7770131379765568e-05, "loss": 0.5868, "step": 14190 }, { "epoch": 0.4359352440635272, "grad_norm": 0.33678242564201355, "learning_rate": 1.7769827155157795e-05, "loss": 0.5608, "step": 14191 }, { "epoch": 0.43596596319847636, "grad_norm": 0.34189653396606445, "learning_rate": 1.776952291240305e-05, "loss": 0.5627, "step": 14192 }, { "epoch": 0.4359966823334255, "grad_norm": 0.3240010440349579, "learning_rate": 1.776921865150204e-05, "loss": 0.556, "step": 14193 }, { "epoch": 0.43602740146837465, "grad_norm": 0.3834017515182495, "learning_rate": 1.7768914372455478e-05, "loss": 0.6739, "step": 14194 }, { "epoch": 0.43605812060332383, "grad_norm": 0.33283960819244385, "learning_rate": 1.7768610075264073e-05, "loss": 0.559, "step": 14195 }, { "epoch": 0.43608883973827295, "grad_norm": 0.4497429430484772, "learning_rate": 1.776830575992854e-05, "loss": 0.5669, "step": 14196 }, { "epoch": 0.4361195588732221, "grad_norm": 0.3447738587856293, "learning_rate": 1.7768001426449582e-05, "loss": 0.5653, "step": 14197 }, { "epoch": 0.4361502780081713, "grad_norm": 0.34786882996559143, "learning_rate": 1.7767697074827915e-05, "loss": 0.5077, "step": 14198 }, { "epoch": 0.4361809971431205, "grad_norm": 0.33079618215560913, "learning_rate": 1.776739270506425e-05, "loss": 0.6603, "step": 14199 }, { "epoch": 0.4362117162780696, "grad_norm": 0.3275967836380005, "learning_rate": 1.77670883171593e-05, "loss": 0.5526, "step": 14200 }, { "epoch": 0.43624243541301877, "grad_norm": 0.3224526643753052, "learning_rate": 1.776678391111377e-05, "loss": 0.5275, "step": 14201 }, { "epoch": 0.43627315454796795, "grad_norm": 0.4158262610435486, "learning_rate": 1.776647948692837e-05, "loss": 0.5599, "step": 14202 }, { "epoch": 0.43630387368291706, "grad_norm": 0.34095528721809387, "learning_rate": 1.776617504460382e-05, "loss": 0.4922, "step": 14203 }, { "epoch": 0.43633459281786624, "grad_norm": 0.3436516523361206, "learning_rate": 1.7765870584140822e-05, "loss": 0.5973, "step": 14204 }, { "epoch": 0.4363653119528154, "grad_norm": 0.34132513403892517, "learning_rate": 1.7765566105540094e-05, "loss": 0.6132, "step": 14205 }, { "epoch": 0.4363960310877646, "grad_norm": 0.3520374000072479, "learning_rate": 1.7765261608802343e-05, "loss": 0.5781, "step": 14206 }, { "epoch": 0.4364267502227137, "grad_norm": 0.35770121216773987, "learning_rate": 1.7764957093928282e-05, "loss": 0.5763, "step": 14207 }, { "epoch": 0.4364574693576629, "grad_norm": 0.35713455080986023, "learning_rate": 1.776465256091862e-05, "loss": 0.5689, "step": 14208 }, { "epoch": 0.43648818849261206, "grad_norm": 0.37546348571777344, "learning_rate": 1.776434800977407e-05, "loss": 0.5492, "step": 14209 }, { "epoch": 0.4365189076275612, "grad_norm": 0.35421621799468994, "learning_rate": 1.7764043440495343e-05, "loss": 0.6074, "step": 14210 }, { "epoch": 0.43654962676251036, "grad_norm": 0.34438377618789673, "learning_rate": 1.7763738853083152e-05, "loss": 0.4985, "step": 14211 }, { "epoch": 0.43658034589745953, "grad_norm": 0.3532830774784088, "learning_rate": 1.7763434247538206e-05, "loss": 0.4865, "step": 14212 }, { "epoch": 0.4366110650324087, "grad_norm": 0.37198832631111145, "learning_rate": 1.7763129623861215e-05, "loss": 0.5437, "step": 14213 }, { "epoch": 0.4366417841673578, "grad_norm": 0.31544315814971924, "learning_rate": 1.7762824982052894e-05, "loss": 0.5304, "step": 14214 }, { "epoch": 0.436672503302307, "grad_norm": 0.33390846848487854, "learning_rate": 1.776252032211395e-05, "loss": 0.5407, "step": 14215 }, { "epoch": 0.4367032224372562, "grad_norm": 0.38420671224594116, "learning_rate": 1.7762215644045105e-05, "loss": 0.6147, "step": 14216 }, { "epoch": 0.43673394157220535, "grad_norm": 0.3537635803222656, "learning_rate": 1.7761910947847057e-05, "loss": 0.5994, "step": 14217 }, { "epoch": 0.4367646607071545, "grad_norm": 0.38762685656547546, "learning_rate": 1.7761606233520525e-05, "loss": 0.5836, "step": 14218 }, { "epoch": 0.43679537984210365, "grad_norm": 0.3135058581829071, "learning_rate": 1.776130150106622e-05, "loss": 0.5881, "step": 14219 }, { "epoch": 0.4368260989770528, "grad_norm": 0.34566715359687805, "learning_rate": 1.7760996750484856e-05, "loss": 0.5912, "step": 14220 }, { "epoch": 0.43685681811200194, "grad_norm": 1.158207654953003, "learning_rate": 1.7760691981777142e-05, "loss": 0.525, "step": 14221 }, { "epoch": 0.4368875372469511, "grad_norm": 0.36657896637916565, "learning_rate": 1.7760387194943784e-05, "loss": 0.5789, "step": 14222 }, { "epoch": 0.4369182563819003, "grad_norm": 0.3606001138687134, "learning_rate": 1.7760082389985506e-05, "loss": 0.6178, "step": 14223 }, { "epoch": 0.43694897551684947, "grad_norm": 0.32222068309783936, "learning_rate": 1.775977756690301e-05, "loss": 0.5607, "step": 14224 }, { "epoch": 0.4369796946517986, "grad_norm": 0.36108943819999695, "learning_rate": 1.775947272569701e-05, "loss": 0.6129, "step": 14225 }, { "epoch": 0.43701041378674776, "grad_norm": 0.3425448536872864, "learning_rate": 1.7759167866368223e-05, "loss": 0.5115, "step": 14226 }, { "epoch": 0.43704113292169694, "grad_norm": 0.36817580461502075, "learning_rate": 1.7758862988917356e-05, "loss": 0.536, "step": 14227 }, { "epoch": 0.43707185205664606, "grad_norm": 0.3567482829093933, "learning_rate": 1.7758558093345123e-05, "loss": 0.6773, "step": 14228 }, { "epoch": 0.43710257119159523, "grad_norm": 0.48154836893081665, "learning_rate": 1.775825317965224e-05, "loss": 0.5214, "step": 14229 }, { "epoch": 0.4371332903265444, "grad_norm": 0.334797203540802, "learning_rate": 1.7757948247839407e-05, "loss": 0.5026, "step": 14230 }, { "epoch": 0.4371640094614936, "grad_norm": 0.35755568742752075, "learning_rate": 1.7757643297907347e-05, "loss": 0.5759, "step": 14231 }, { "epoch": 0.4371947285964427, "grad_norm": 0.5552546977996826, "learning_rate": 1.7757338329856772e-05, "loss": 0.48, "step": 14232 }, { "epoch": 0.4372254477313919, "grad_norm": 0.3797314465045929, "learning_rate": 1.775703334368839e-05, "loss": 0.5319, "step": 14233 }, { "epoch": 0.43725616686634106, "grad_norm": 0.34438809752464294, "learning_rate": 1.7756728339402913e-05, "loss": 0.6046, "step": 14234 }, { "epoch": 0.4372868860012902, "grad_norm": 0.4181865155696869, "learning_rate": 1.775642331700106e-05, "loss": 0.5711, "step": 14235 }, { "epoch": 0.43731760513623935, "grad_norm": 0.31704065203666687, "learning_rate": 1.775611827648353e-05, "loss": 0.5373, "step": 14236 }, { "epoch": 0.4373483242711885, "grad_norm": 0.3285945653915405, "learning_rate": 1.7755813217851052e-05, "loss": 0.632, "step": 14237 }, { "epoch": 0.4373790434061377, "grad_norm": 0.3679514229297638, "learning_rate": 1.775550814110433e-05, "loss": 0.5437, "step": 14238 }, { "epoch": 0.4374097625410868, "grad_norm": 0.39210349321365356, "learning_rate": 1.7755203046244073e-05, "loss": 0.5316, "step": 14239 }, { "epoch": 0.437440481676036, "grad_norm": 0.5005276203155518, "learning_rate": 1.7754897933271e-05, "loss": 0.6263, "step": 14240 }, { "epoch": 0.4374712008109852, "grad_norm": 0.34972837567329407, "learning_rate": 1.775459280218582e-05, "loss": 0.5653, "step": 14241 }, { "epoch": 0.43750191994593435, "grad_norm": 0.37817996740341187, "learning_rate": 1.775428765298925e-05, "loss": 0.6414, "step": 14242 }, { "epoch": 0.43753263908088347, "grad_norm": 0.35831791162490845, "learning_rate": 1.7753982485681998e-05, "loss": 0.5781, "step": 14243 }, { "epoch": 0.43756335821583264, "grad_norm": 0.3686431646347046, "learning_rate": 1.7753677300264777e-05, "loss": 0.6753, "step": 14244 }, { "epoch": 0.4375940773507818, "grad_norm": 0.3821297585964203, "learning_rate": 1.7753372096738303e-05, "loss": 0.587, "step": 14245 }, { "epoch": 0.43762479648573094, "grad_norm": 0.33246541023254395, "learning_rate": 1.7753066875103285e-05, "loss": 0.6368, "step": 14246 }, { "epoch": 0.4376555156206801, "grad_norm": 0.35600319504737854, "learning_rate": 1.7752761635360444e-05, "loss": 0.688, "step": 14247 }, { "epoch": 0.4376862347556293, "grad_norm": 0.35839492082595825, "learning_rate": 1.7752456377510478e-05, "loss": 0.5808, "step": 14248 }, { "epoch": 0.43771695389057846, "grad_norm": 0.3427979350090027, "learning_rate": 1.7752151101554112e-05, "loss": 0.5082, "step": 14249 }, { "epoch": 0.4377476730255276, "grad_norm": 0.5108407139778137, "learning_rate": 1.7751845807492055e-05, "loss": 0.5359, "step": 14250 }, { "epoch": 0.43777839216047676, "grad_norm": 0.35261499881744385, "learning_rate": 1.7751540495325024e-05, "loss": 0.5538, "step": 14251 }, { "epoch": 0.43780911129542593, "grad_norm": 0.369702011346817, "learning_rate": 1.7751235165053727e-05, "loss": 0.4815, "step": 14252 }, { "epoch": 0.43783983043037505, "grad_norm": 0.36828210949897766, "learning_rate": 1.775092981667888e-05, "loss": 0.5719, "step": 14253 }, { "epoch": 0.43787054956532423, "grad_norm": 0.34233590960502625, "learning_rate": 1.775062445020119e-05, "loss": 0.5407, "step": 14254 }, { "epoch": 0.4379012687002734, "grad_norm": 0.31937241554260254, "learning_rate": 1.775031906562138e-05, "loss": 0.5333, "step": 14255 }, { "epoch": 0.4379319878352226, "grad_norm": 0.45620062947273254, "learning_rate": 1.775001366294016e-05, "loss": 0.5866, "step": 14256 }, { "epoch": 0.4379627069701717, "grad_norm": 0.3589589297771454, "learning_rate": 1.7749708242158238e-05, "loss": 0.595, "step": 14257 }, { "epoch": 0.4379934261051209, "grad_norm": 0.32726746797561646, "learning_rate": 1.7749402803276335e-05, "loss": 0.5217, "step": 14258 }, { "epoch": 0.43802414524007005, "grad_norm": 0.35578712821006775, "learning_rate": 1.7749097346295157e-05, "loss": 0.6111, "step": 14259 }, { "epoch": 0.4380548643750192, "grad_norm": 0.34946709871292114, "learning_rate": 1.7748791871215423e-05, "loss": 0.6107, "step": 14260 }, { "epoch": 0.43808558350996835, "grad_norm": 0.3259239196777344, "learning_rate": 1.774848637803784e-05, "loss": 0.5499, "step": 14261 }, { "epoch": 0.4381163026449175, "grad_norm": 0.44467243552207947, "learning_rate": 1.7748180866763132e-05, "loss": 0.603, "step": 14262 }, { "epoch": 0.4381470217798667, "grad_norm": 0.36500421166419983, "learning_rate": 1.7747875337392005e-05, "loss": 0.5784, "step": 14263 }, { "epoch": 0.4381777409148158, "grad_norm": 0.33670860528945923, "learning_rate": 1.774756978992517e-05, "loss": 0.5465, "step": 14264 }, { "epoch": 0.438208460049765, "grad_norm": 0.3340592086315155, "learning_rate": 1.774726422436335e-05, "loss": 0.5326, "step": 14265 }, { "epoch": 0.43823917918471417, "grad_norm": 0.31837770342826843, "learning_rate": 1.774695864070725e-05, "loss": 0.5278, "step": 14266 }, { "epoch": 0.43826989831966334, "grad_norm": 0.3197746276855469, "learning_rate": 1.7746653038957592e-05, "loss": 0.6102, "step": 14267 }, { "epoch": 0.43830061745461246, "grad_norm": 0.35229936242103577, "learning_rate": 1.774634741911508e-05, "loss": 0.6001, "step": 14268 }, { "epoch": 0.43833133658956164, "grad_norm": 0.30653634667396545, "learning_rate": 1.7746041781180433e-05, "loss": 0.5343, "step": 14269 }, { "epoch": 0.4383620557245108, "grad_norm": 0.3498680293560028, "learning_rate": 1.7745736125154363e-05, "loss": 0.5474, "step": 14270 }, { "epoch": 0.43839277485945993, "grad_norm": 0.3348331153392792, "learning_rate": 1.7745430451037587e-05, "loss": 0.593, "step": 14271 }, { "epoch": 0.4384234939944091, "grad_norm": 0.35992082953453064, "learning_rate": 1.7745124758830816e-05, "loss": 0.5638, "step": 14272 }, { "epoch": 0.4384542131293583, "grad_norm": 0.32295194268226624, "learning_rate": 1.774481904853477e-05, "loss": 0.5006, "step": 14273 }, { "epoch": 0.43848493226430746, "grad_norm": 0.34848153591156006, "learning_rate": 1.7744513320150155e-05, "loss": 0.5628, "step": 14274 }, { "epoch": 0.4385156513992566, "grad_norm": 7.059569358825684, "learning_rate": 1.7744207573677686e-05, "loss": 0.6619, "step": 14275 }, { "epoch": 0.43854637053420575, "grad_norm": 0.3423803448677063, "learning_rate": 1.7743901809118083e-05, "loss": 0.6147, "step": 14276 }, { "epoch": 0.43857708966915493, "grad_norm": 0.34741172194480896, "learning_rate": 1.7743596026472053e-05, "loss": 0.6077, "step": 14277 }, { "epoch": 0.43860780880410405, "grad_norm": 0.3526429831981659, "learning_rate": 1.7743290225740317e-05, "loss": 0.5595, "step": 14278 }, { "epoch": 0.4386385279390532, "grad_norm": 0.427121102809906, "learning_rate": 1.7742984406923585e-05, "loss": 0.5831, "step": 14279 }, { "epoch": 0.4386692470740024, "grad_norm": 0.3596748113632202, "learning_rate": 1.774267857002257e-05, "loss": 0.5694, "step": 14280 }, { "epoch": 0.4386999662089516, "grad_norm": 0.34419772028923035, "learning_rate": 1.7742372715037992e-05, "loss": 0.5466, "step": 14281 }, { "epoch": 0.4387306853439007, "grad_norm": 0.33897197246551514, "learning_rate": 1.774206684197056e-05, "loss": 0.6009, "step": 14282 }, { "epoch": 0.43876140447884987, "grad_norm": 0.30232781171798706, "learning_rate": 1.774176095082099e-05, "loss": 0.5444, "step": 14283 }, { "epoch": 0.43879212361379905, "grad_norm": 0.35238775610923767, "learning_rate": 1.7741455041589995e-05, "loss": 0.6259, "step": 14284 }, { "epoch": 0.4388228427487482, "grad_norm": 0.3369317948818207, "learning_rate": 1.7741149114278296e-05, "loss": 0.5902, "step": 14285 }, { "epoch": 0.43885356188369734, "grad_norm": 0.36147332191467285, "learning_rate": 1.7740843168886598e-05, "loss": 0.5429, "step": 14286 }, { "epoch": 0.4388842810186465, "grad_norm": 0.36988022923469543, "learning_rate": 1.774053720541562e-05, "loss": 0.6789, "step": 14287 }, { "epoch": 0.4389150001535957, "grad_norm": 0.383041650056839, "learning_rate": 1.774023122386608e-05, "loss": 0.6354, "step": 14288 }, { "epoch": 0.4389457192885448, "grad_norm": 0.3515559732913971, "learning_rate": 1.773992522423869e-05, "loss": 0.5958, "step": 14289 }, { "epoch": 0.438976438423494, "grad_norm": 0.3988370895385742, "learning_rate": 1.7739619206534162e-05, "loss": 0.5714, "step": 14290 }, { "epoch": 0.43900715755844316, "grad_norm": 0.3688338100910187, "learning_rate": 1.7739313170753213e-05, "loss": 0.5677, "step": 14291 }, { "epoch": 0.43903787669339234, "grad_norm": 0.33290618658065796, "learning_rate": 1.7739007116896557e-05, "loss": 0.554, "step": 14292 }, { "epoch": 0.43906859582834146, "grad_norm": 0.41673341393470764, "learning_rate": 1.7738701044964906e-05, "loss": 0.5526, "step": 14293 }, { "epoch": 0.43909931496329063, "grad_norm": 0.3694443702697754, "learning_rate": 1.7738394954958985e-05, "loss": 0.5749, "step": 14294 }, { "epoch": 0.4391300340982398, "grad_norm": 0.34277305006980896, "learning_rate": 1.7738088846879498e-05, "loss": 0.6033, "step": 14295 }, { "epoch": 0.4391607532331889, "grad_norm": 0.3422392010688782, "learning_rate": 1.7737782720727167e-05, "loss": 0.5299, "step": 14296 }, { "epoch": 0.4391914723681381, "grad_norm": 0.34064429998397827, "learning_rate": 1.77374765765027e-05, "loss": 0.625, "step": 14297 }, { "epoch": 0.4392221915030873, "grad_norm": 0.34688007831573486, "learning_rate": 1.773717041420682e-05, "loss": 0.5782, "step": 14298 }, { "epoch": 0.43925291063803645, "grad_norm": 0.3510338366031647, "learning_rate": 1.7736864233840238e-05, "loss": 0.6232, "step": 14299 }, { "epoch": 0.4392836297729856, "grad_norm": 0.3520839512348175, "learning_rate": 1.7736558035403667e-05, "loss": 0.612, "step": 14300 }, { "epoch": 0.43931434890793475, "grad_norm": 0.37809768319129944, "learning_rate": 1.7736251818897824e-05, "loss": 0.5998, "step": 14301 }, { "epoch": 0.4393450680428839, "grad_norm": 0.361564964056015, "learning_rate": 1.7735945584323423e-05, "loss": 0.5373, "step": 14302 }, { "epoch": 0.4393757871778331, "grad_norm": 0.34981024265289307, "learning_rate": 1.773563933168119e-05, "loss": 0.5242, "step": 14303 }, { "epoch": 0.4394065063127822, "grad_norm": 0.36069369316101074, "learning_rate": 1.773533306097182e-05, "loss": 0.5692, "step": 14304 }, { "epoch": 0.4394372254477314, "grad_norm": 0.46777164936065674, "learning_rate": 1.7735026772196045e-05, "loss": 0.5395, "step": 14305 }, { "epoch": 0.43946794458268057, "grad_norm": 0.32218876481056213, "learning_rate": 1.7734720465354575e-05, "loss": 0.5374, "step": 14306 }, { "epoch": 0.4394986637176297, "grad_norm": 0.36235660314559937, "learning_rate": 1.773441414044812e-05, "loss": 0.5549, "step": 14307 }, { "epoch": 0.43952938285257886, "grad_norm": 0.39947476983070374, "learning_rate": 1.7734107797477406e-05, "loss": 0.5998, "step": 14308 }, { "epoch": 0.43956010198752804, "grad_norm": 0.35265904664993286, "learning_rate": 1.7733801436443144e-05, "loss": 0.5829, "step": 14309 }, { "epoch": 0.4395908211224772, "grad_norm": 0.3486497700214386, "learning_rate": 1.7733495057346047e-05, "loss": 0.5804, "step": 14310 }, { "epoch": 0.43962154025742634, "grad_norm": 0.33972474932670593, "learning_rate": 1.773318866018683e-05, "loss": 0.5638, "step": 14311 }, { "epoch": 0.4396522593923755, "grad_norm": 0.3620332181453705, "learning_rate": 1.7732882244966213e-05, "loss": 0.5837, "step": 14312 }, { "epoch": 0.4396829785273247, "grad_norm": 0.3649156987667084, "learning_rate": 1.773257581168491e-05, "loss": 0.6011, "step": 14313 }, { "epoch": 0.4397136976622738, "grad_norm": 0.3722979426383972, "learning_rate": 1.7732269360343633e-05, "loss": 0.5675, "step": 14314 }, { "epoch": 0.439744416797223, "grad_norm": 0.3626846969127655, "learning_rate": 1.7731962890943103e-05, "loss": 0.6391, "step": 14315 }, { "epoch": 0.43977513593217216, "grad_norm": 0.3222985863685608, "learning_rate": 1.7731656403484034e-05, "loss": 0.5525, "step": 14316 }, { "epoch": 0.43980585506712133, "grad_norm": 0.38590696454048157, "learning_rate": 1.7731349897967137e-05, "loss": 0.5347, "step": 14317 }, { "epoch": 0.43983657420207045, "grad_norm": 0.39328327775001526, "learning_rate": 1.773104337439314e-05, "loss": 0.5976, "step": 14318 }, { "epoch": 0.4398672933370196, "grad_norm": 0.6778598427772522, "learning_rate": 1.7730736832762747e-05, "loss": 0.5411, "step": 14319 }, { "epoch": 0.4398980124719688, "grad_norm": 0.33849629759788513, "learning_rate": 1.7730430273076677e-05, "loss": 0.5407, "step": 14320 }, { "epoch": 0.4399287316069179, "grad_norm": 0.3427720367908478, "learning_rate": 1.773012369533565e-05, "loss": 0.6532, "step": 14321 }, { "epoch": 0.4399594507418671, "grad_norm": 0.3153238594532013, "learning_rate": 1.772981709954038e-05, "loss": 0.5607, "step": 14322 }, { "epoch": 0.4399901698768163, "grad_norm": 0.32733747363090515, "learning_rate": 1.772951048569158e-05, "loss": 0.5729, "step": 14323 }, { "epoch": 0.44002088901176545, "grad_norm": 0.3308362066745758, "learning_rate": 1.772920385378997e-05, "loss": 0.6273, "step": 14324 }, { "epoch": 0.44005160814671457, "grad_norm": 0.32945436239242554, "learning_rate": 1.772889720383626e-05, "loss": 0.5634, "step": 14325 }, { "epoch": 0.44008232728166374, "grad_norm": 0.3711308538913727, "learning_rate": 1.7728590535831177e-05, "loss": 0.6482, "step": 14326 }, { "epoch": 0.4401130464166129, "grad_norm": 0.33745384216308594, "learning_rate": 1.7728283849775428e-05, "loss": 0.5576, "step": 14327 }, { "epoch": 0.4401437655515621, "grad_norm": 0.335204154253006, "learning_rate": 1.7727977145669732e-05, "loss": 0.6098, "step": 14328 }, { "epoch": 0.4401744846865112, "grad_norm": 0.3913590908050537, "learning_rate": 1.7727670423514808e-05, "loss": 0.6038, "step": 14329 }, { "epoch": 0.4402052038214604, "grad_norm": 0.3532904386520386, "learning_rate": 1.7727363683311366e-05, "loss": 0.5481, "step": 14330 }, { "epoch": 0.44023592295640956, "grad_norm": 0.35673898458480835, "learning_rate": 1.772705692506013e-05, "loss": 0.6292, "step": 14331 }, { "epoch": 0.4402666420913587, "grad_norm": 0.33524149656295776, "learning_rate": 1.7726750148761812e-05, "loss": 0.5767, "step": 14332 }, { "epoch": 0.44029736122630786, "grad_norm": 0.3363966941833496, "learning_rate": 1.772644335441713e-05, "loss": 0.533, "step": 14333 }, { "epoch": 0.44032808036125703, "grad_norm": 0.3357883393764496, "learning_rate": 1.77261365420268e-05, "loss": 0.6125, "step": 14334 }, { "epoch": 0.4403587994962062, "grad_norm": 0.33486440777778625, "learning_rate": 1.7725829711591537e-05, "loss": 0.5189, "step": 14335 }, { "epoch": 0.44038951863115533, "grad_norm": 0.3394237160682678, "learning_rate": 1.772552286311206e-05, "loss": 0.583, "step": 14336 }, { "epoch": 0.4404202377661045, "grad_norm": 0.35431161522865295, "learning_rate": 1.772521599658908e-05, "loss": 0.5666, "step": 14337 }, { "epoch": 0.4404509569010537, "grad_norm": 0.3571116030216217, "learning_rate": 1.7724909112023328e-05, "loss": 0.5714, "step": 14338 }, { "epoch": 0.4404816760360028, "grad_norm": 0.3483213186264038, "learning_rate": 1.7724602209415508e-05, "loss": 0.6332, "step": 14339 }, { "epoch": 0.440512395170952, "grad_norm": 0.31724298000335693, "learning_rate": 1.7724295288766334e-05, "loss": 0.5886, "step": 14340 }, { "epoch": 0.44054311430590115, "grad_norm": 0.3776123821735382, "learning_rate": 1.7723988350076534e-05, "loss": 0.5305, "step": 14341 }, { "epoch": 0.4405738334408503, "grad_norm": 0.42954614758491516, "learning_rate": 1.7723681393346823e-05, "loss": 0.5804, "step": 14342 }, { "epoch": 0.44060455257579945, "grad_norm": 0.37006157636642456, "learning_rate": 1.7723374418577908e-05, "loss": 0.5753, "step": 14343 }, { "epoch": 0.4406352717107486, "grad_norm": 0.4082850217819214, "learning_rate": 1.7723067425770516e-05, "loss": 0.6435, "step": 14344 }, { "epoch": 0.4406659908456978, "grad_norm": 0.3574734926223755, "learning_rate": 1.772276041492536e-05, "loss": 0.593, "step": 14345 }, { "epoch": 0.440696709980647, "grad_norm": 0.3490654528141022, "learning_rate": 1.7722453386043157e-05, "loss": 0.5184, "step": 14346 }, { "epoch": 0.4407274291155961, "grad_norm": 0.5093880891799927, "learning_rate": 1.7722146339124626e-05, "loss": 0.6313, "step": 14347 }, { "epoch": 0.44075814825054527, "grad_norm": 0.3430178165435791, "learning_rate": 1.772183927417048e-05, "loss": 0.5773, "step": 14348 }, { "epoch": 0.44078886738549444, "grad_norm": 0.3318985402584076, "learning_rate": 1.7721532191181446e-05, "loss": 0.5849, "step": 14349 }, { "epoch": 0.44081958652044356, "grad_norm": 0.5787627100944519, "learning_rate": 1.7721225090158228e-05, "loss": 0.6288, "step": 14350 }, { "epoch": 0.44085030565539274, "grad_norm": 0.38112467527389526, "learning_rate": 1.772091797110155e-05, "loss": 0.6097, "step": 14351 }, { "epoch": 0.4408810247903419, "grad_norm": 0.3537488281726837, "learning_rate": 1.772061083401213e-05, "loss": 0.6533, "step": 14352 }, { "epoch": 0.4409117439252911, "grad_norm": 0.44130390882492065, "learning_rate": 1.7720303678890685e-05, "loss": 0.5872, "step": 14353 }, { "epoch": 0.4409424630602402, "grad_norm": 0.37117642164230347, "learning_rate": 1.7719996505737934e-05, "loss": 0.5507, "step": 14354 }, { "epoch": 0.4409731821951894, "grad_norm": 0.34680962562561035, "learning_rate": 1.7719689314554588e-05, "loss": 0.5966, "step": 14355 }, { "epoch": 0.44100390133013856, "grad_norm": 0.40280216932296753, "learning_rate": 1.771938210534137e-05, "loss": 0.5082, "step": 14356 }, { "epoch": 0.4410346204650877, "grad_norm": 0.3178340494632721, "learning_rate": 1.7719074878098995e-05, "loss": 0.5424, "step": 14357 }, { "epoch": 0.44106533960003685, "grad_norm": 0.5011940002441406, "learning_rate": 1.771876763282818e-05, "loss": 0.5588, "step": 14358 }, { "epoch": 0.44109605873498603, "grad_norm": 0.431881844997406, "learning_rate": 1.771846036952965e-05, "loss": 0.564, "step": 14359 }, { "epoch": 0.4411267778699352, "grad_norm": 0.34585338830947876, "learning_rate": 1.7718153088204113e-05, "loss": 0.6633, "step": 14360 }, { "epoch": 0.4411574970048843, "grad_norm": 0.3224860429763794, "learning_rate": 1.7717845788852292e-05, "loss": 0.5625, "step": 14361 }, { "epoch": 0.4411882161398335, "grad_norm": 0.3466618061065674, "learning_rate": 1.77175384714749e-05, "loss": 0.5572, "step": 14362 }, { "epoch": 0.4412189352747827, "grad_norm": 0.30919936299324036, "learning_rate": 1.7717231136072657e-05, "loss": 0.5754, "step": 14363 }, { "epoch": 0.4412496544097318, "grad_norm": 0.3743022084236145, "learning_rate": 1.7716923782646286e-05, "loss": 0.5681, "step": 14364 }, { "epoch": 0.44128037354468097, "grad_norm": 0.4315684735774994, "learning_rate": 1.7716616411196498e-05, "loss": 0.5852, "step": 14365 }, { "epoch": 0.44131109267963015, "grad_norm": 0.39862287044525146, "learning_rate": 1.7716309021724013e-05, "loss": 0.5552, "step": 14366 }, { "epoch": 0.4413418118145793, "grad_norm": 0.3255908787250519, "learning_rate": 1.771600161422955e-05, "loss": 0.5491, "step": 14367 }, { "epoch": 0.44137253094952844, "grad_norm": 0.6014980673789978, "learning_rate": 1.771569418871383e-05, "loss": 0.6642, "step": 14368 }, { "epoch": 0.4414032500844776, "grad_norm": 0.3889240324497223, "learning_rate": 1.7715386745177562e-05, "loss": 0.6616, "step": 14369 }, { "epoch": 0.4414339692194268, "grad_norm": 0.3290168046951294, "learning_rate": 1.7715079283621473e-05, "loss": 0.5199, "step": 14370 }, { "epoch": 0.44146468835437597, "grad_norm": 0.39319276809692383, "learning_rate": 1.7714771804046275e-05, "loss": 0.5042, "step": 14371 }, { "epoch": 0.4414954074893251, "grad_norm": 0.3477868139743805, "learning_rate": 1.771446430645269e-05, "loss": 0.4988, "step": 14372 }, { "epoch": 0.44152612662427426, "grad_norm": 0.3516961932182312, "learning_rate": 1.7714156790841436e-05, "loss": 0.5517, "step": 14373 }, { "epoch": 0.44155684575922344, "grad_norm": 0.4396090805530548, "learning_rate": 1.7713849257213227e-05, "loss": 0.6114, "step": 14374 }, { "epoch": 0.44158756489417256, "grad_norm": 0.4446621239185333, "learning_rate": 1.7713541705568787e-05, "loss": 0.5545, "step": 14375 }, { "epoch": 0.44161828402912173, "grad_norm": 0.3547326624393463, "learning_rate": 1.7713234135908833e-05, "loss": 0.6002, "step": 14376 }, { "epoch": 0.4416490031640709, "grad_norm": 0.3308441638946533, "learning_rate": 1.7712926548234078e-05, "loss": 0.5999, "step": 14377 }, { "epoch": 0.4416797222990201, "grad_norm": 0.3283008635044098, "learning_rate": 1.7712618942545246e-05, "loss": 0.5698, "step": 14378 }, { "epoch": 0.4417104414339692, "grad_norm": 0.34067362546920776, "learning_rate": 1.7712311318843052e-05, "loss": 0.5987, "step": 14379 }, { "epoch": 0.4417411605689184, "grad_norm": 0.3517695963382721, "learning_rate": 1.771200367712822e-05, "loss": 0.6379, "step": 14380 }, { "epoch": 0.44177187970386755, "grad_norm": 0.3319064974784851, "learning_rate": 1.7711696017401463e-05, "loss": 0.6053, "step": 14381 }, { "epoch": 0.4418025988388167, "grad_norm": 0.34226247668266296, "learning_rate": 1.7711388339663503e-05, "loss": 0.5165, "step": 14382 }, { "epoch": 0.44183331797376585, "grad_norm": 0.33781927824020386, "learning_rate": 1.7711080643915056e-05, "loss": 0.5895, "step": 14383 }, { "epoch": 0.441864037108715, "grad_norm": 0.33314603567123413, "learning_rate": 1.7710772930156837e-05, "loss": 0.5603, "step": 14384 }, { "epoch": 0.4418947562436642, "grad_norm": 0.39530614018440247, "learning_rate": 1.7710465198389574e-05, "loss": 0.6072, "step": 14385 }, { "epoch": 0.4419254753786133, "grad_norm": 0.35518336296081543, "learning_rate": 1.7710157448613983e-05, "loss": 0.5753, "step": 14386 }, { "epoch": 0.4419561945135625, "grad_norm": 0.3127215802669525, "learning_rate": 1.7709849680830777e-05, "loss": 0.5615, "step": 14387 }, { "epoch": 0.44198691364851167, "grad_norm": 0.3844248652458191, "learning_rate": 1.770954189504068e-05, "loss": 0.6431, "step": 14388 }, { "epoch": 0.44201763278346085, "grad_norm": 0.312347412109375, "learning_rate": 1.7709234091244408e-05, "loss": 0.5864, "step": 14389 }, { "epoch": 0.44204835191840997, "grad_norm": 0.33770477771759033, "learning_rate": 1.7708926269442685e-05, "loss": 0.5577, "step": 14390 }, { "epoch": 0.44207907105335914, "grad_norm": 0.40297818183898926, "learning_rate": 1.7708618429636223e-05, "loss": 0.6169, "step": 14391 }, { "epoch": 0.4421097901883083, "grad_norm": 0.32883283495903015, "learning_rate": 1.7708310571825748e-05, "loss": 0.5745, "step": 14392 }, { "epoch": 0.44214050932325744, "grad_norm": 0.4138140082359314, "learning_rate": 1.770800269601197e-05, "loss": 0.613, "step": 14393 }, { "epoch": 0.4421712284582066, "grad_norm": 0.33626285195350647, "learning_rate": 1.7707694802195617e-05, "loss": 0.5874, "step": 14394 }, { "epoch": 0.4422019475931558, "grad_norm": 0.3760296106338501, "learning_rate": 1.7707386890377403e-05, "loss": 0.5143, "step": 14395 }, { "epoch": 0.44223266672810496, "grad_norm": 0.34088101983070374, "learning_rate": 1.7707078960558048e-05, "loss": 0.5952, "step": 14396 }, { "epoch": 0.4422633858630541, "grad_norm": 0.3247789740562439, "learning_rate": 1.7706771012738273e-05, "loss": 0.5437, "step": 14397 }, { "epoch": 0.44229410499800326, "grad_norm": 0.3649034798145294, "learning_rate": 1.7706463046918798e-05, "loss": 0.579, "step": 14398 }, { "epoch": 0.44232482413295243, "grad_norm": 0.36861708760261536, "learning_rate": 1.7706155063100336e-05, "loss": 0.5821, "step": 14399 }, { "epoch": 0.44235554326790155, "grad_norm": 0.38890573382377625, "learning_rate": 1.770584706128361e-05, "loss": 0.622, "step": 14400 }, { "epoch": 0.4423862624028507, "grad_norm": 0.3418840169906616, "learning_rate": 1.7705539041469346e-05, "loss": 0.5426, "step": 14401 }, { "epoch": 0.4424169815377999, "grad_norm": 0.38287848234176636, "learning_rate": 1.7705231003658253e-05, "loss": 0.619, "step": 14402 }, { "epoch": 0.4424477006727491, "grad_norm": 0.34152477979660034, "learning_rate": 1.7704922947851058e-05, "loss": 0.5123, "step": 14403 }, { "epoch": 0.4424784198076982, "grad_norm": 0.3509504795074463, "learning_rate": 1.7704614874048475e-05, "loss": 0.5844, "step": 14404 }, { "epoch": 0.4425091389426474, "grad_norm": 0.36065199971199036, "learning_rate": 1.7704306782251226e-05, "loss": 0.5344, "step": 14405 }, { "epoch": 0.44253985807759655, "grad_norm": 0.3357742130756378, "learning_rate": 1.7703998672460033e-05, "loss": 0.5864, "step": 14406 }, { "epoch": 0.44257057721254567, "grad_norm": 0.3335370421409607, "learning_rate": 1.770369054467561e-05, "loss": 0.5715, "step": 14407 }, { "epoch": 0.44260129634749484, "grad_norm": 0.4311310052871704, "learning_rate": 1.7703382398898682e-05, "loss": 0.5512, "step": 14408 }, { "epoch": 0.442632015482444, "grad_norm": 0.3526076078414917, "learning_rate": 1.7703074235129965e-05, "loss": 0.575, "step": 14409 }, { "epoch": 0.4426627346173932, "grad_norm": 0.3444397449493408, "learning_rate": 1.770276605337018e-05, "loss": 0.5156, "step": 14410 }, { "epoch": 0.4426934537523423, "grad_norm": 0.3343066871166229, "learning_rate": 1.7702457853620046e-05, "loss": 0.5155, "step": 14411 }, { "epoch": 0.4427241728872915, "grad_norm": 0.41618362069129944, "learning_rate": 1.7702149635880287e-05, "loss": 0.4983, "step": 14412 }, { "epoch": 0.44275489202224066, "grad_norm": 0.39311859011650085, "learning_rate": 1.7701841400151615e-05, "loss": 0.6126, "step": 14413 }, { "epoch": 0.44278561115718984, "grad_norm": 0.39372608065605164, "learning_rate": 1.7701533146434758e-05, "loss": 0.6333, "step": 14414 }, { "epoch": 0.44281633029213896, "grad_norm": 0.35354864597320557, "learning_rate": 1.770122487473043e-05, "loss": 0.5943, "step": 14415 }, { "epoch": 0.44284704942708814, "grad_norm": 0.3821197748184204, "learning_rate": 1.7700916585039357e-05, "loss": 0.6857, "step": 14416 }, { "epoch": 0.4428777685620373, "grad_norm": 0.328844279050827, "learning_rate": 1.7700608277362253e-05, "loss": 0.5147, "step": 14417 }, { "epoch": 0.44290848769698643, "grad_norm": 0.3554466664791107, "learning_rate": 1.770029995169984e-05, "loss": 0.4796, "step": 14418 }, { "epoch": 0.4429392068319356, "grad_norm": 0.35396063327789307, "learning_rate": 1.7699991608052842e-05, "loss": 0.5542, "step": 14419 }, { "epoch": 0.4429699259668848, "grad_norm": 0.3436600863933563, "learning_rate": 1.7699683246421972e-05, "loss": 0.6363, "step": 14420 }, { "epoch": 0.44300064510183396, "grad_norm": 0.36035022139549255, "learning_rate": 1.7699374866807955e-05, "loss": 0.5646, "step": 14421 }, { "epoch": 0.4430313642367831, "grad_norm": 0.33530282974243164, "learning_rate": 1.769906646921151e-05, "loss": 0.619, "step": 14422 }, { "epoch": 0.44306208337173225, "grad_norm": 0.3621293306350708, "learning_rate": 1.769875805363336e-05, "loss": 0.5642, "step": 14423 }, { "epoch": 0.4430928025066814, "grad_norm": 0.34471067786216736, "learning_rate": 1.7698449620074224e-05, "loss": 0.5634, "step": 14424 }, { "epoch": 0.44312352164163055, "grad_norm": 0.4045447111129761, "learning_rate": 1.7698141168534818e-05, "loss": 0.6635, "step": 14425 }, { "epoch": 0.4431542407765797, "grad_norm": 0.355633944272995, "learning_rate": 1.7697832699015864e-05, "loss": 0.5933, "step": 14426 }, { "epoch": 0.4431849599115289, "grad_norm": 0.33961841464042664, "learning_rate": 1.7697524211518086e-05, "loss": 0.5367, "step": 14427 }, { "epoch": 0.4432156790464781, "grad_norm": 0.357686311006546, "learning_rate": 1.7697215706042204e-05, "loss": 0.6368, "step": 14428 }, { "epoch": 0.4432463981814272, "grad_norm": 0.34730127453804016, "learning_rate": 1.7696907182588935e-05, "loss": 0.604, "step": 14429 }, { "epoch": 0.44327711731637637, "grad_norm": 0.35805749893188477, "learning_rate": 1.7696598641159005e-05, "loss": 0.636, "step": 14430 }, { "epoch": 0.44330783645132554, "grad_norm": 0.3320354223251343, "learning_rate": 1.7696290081753128e-05, "loss": 0.5497, "step": 14431 }, { "epoch": 0.44333855558627466, "grad_norm": 0.37473922967910767, "learning_rate": 1.769598150437203e-05, "loss": 0.6409, "step": 14432 }, { "epoch": 0.44336927472122384, "grad_norm": 0.32529258728027344, "learning_rate": 1.7695672909016427e-05, "loss": 0.5962, "step": 14433 }, { "epoch": 0.443399993856173, "grad_norm": 0.34258878231048584, "learning_rate": 1.7695364295687047e-05, "loss": 0.5617, "step": 14434 }, { "epoch": 0.4434307129911222, "grad_norm": 0.3777921497821808, "learning_rate": 1.7695055664384603e-05, "loss": 0.6416, "step": 14435 }, { "epoch": 0.4434614321260713, "grad_norm": 0.3296549320220947, "learning_rate": 1.769474701510982e-05, "loss": 0.4582, "step": 14436 }, { "epoch": 0.4434921512610205, "grad_norm": 0.3402400016784668, "learning_rate": 1.769443834786342e-05, "loss": 0.5641, "step": 14437 }, { "epoch": 0.44352287039596966, "grad_norm": 0.36236947774887085, "learning_rate": 1.769412966264612e-05, "loss": 0.5869, "step": 14438 }, { "epoch": 0.44355358953091883, "grad_norm": 0.3429669439792633, "learning_rate": 1.7693820959458642e-05, "loss": 0.6266, "step": 14439 }, { "epoch": 0.44358430866586795, "grad_norm": 0.3691180646419525, "learning_rate": 1.7693512238301707e-05, "loss": 0.522, "step": 14440 }, { "epoch": 0.44361502780081713, "grad_norm": 0.43263405561447144, "learning_rate": 1.769320349917604e-05, "loss": 0.6892, "step": 14441 }, { "epoch": 0.4436457469357663, "grad_norm": 0.577573299407959, "learning_rate": 1.7692894742082355e-05, "loss": 0.5721, "step": 14442 }, { "epoch": 0.4436764660707154, "grad_norm": 0.5038996338844299, "learning_rate": 1.7692585967021378e-05, "loss": 0.6109, "step": 14443 }, { "epoch": 0.4437071852056646, "grad_norm": 0.32382866740226746, "learning_rate": 1.769227717399383e-05, "loss": 0.4721, "step": 14444 }, { "epoch": 0.4437379043406138, "grad_norm": 0.3490365445613861, "learning_rate": 1.769196836300043e-05, "loss": 0.6404, "step": 14445 }, { "epoch": 0.44376862347556295, "grad_norm": 0.32229188084602356, "learning_rate": 1.7691659534041906e-05, "loss": 0.5726, "step": 14446 }, { "epoch": 0.44379934261051207, "grad_norm": 0.3198053538799286, "learning_rate": 1.7691350687118973e-05, "loss": 0.5774, "step": 14447 }, { "epoch": 0.44383006174546125, "grad_norm": 0.36743035912513733, "learning_rate": 1.769104182223235e-05, "loss": 0.5829, "step": 14448 }, { "epoch": 0.4438607808804104, "grad_norm": 0.3117993474006653, "learning_rate": 1.769073293938276e-05, "loss": 0.4982, "step": 14449 }, { "epoch": 0.44389150001535954, "grad_norm": 0.3494184911251068, "learning_rate": 1.7690424038570928e-05, "loss": 0.6244, "step": 14450 }, { "epoch": 0.4439222191503087, "grad_norm": 0.3165338337421417, "learning_rate": 1.7690115119797574e-05, "loss": 0.5485, "step": 14451 }, { "epoch": 0.4439529382852579, "grad_norm": 0.3749319911003113, "learning_rate": 1.7689806183063418e-05, "loss": 0.5326, "step": 14452 }, { "epoch": 0.44398365742020707, "grad_norm": 0.3382202684879303, "learning_rate": 1.7689497228369186e-05, "loss": 0.5421, "step": 14453 }, { "epoch": 0.4440143765551562, "grad_norm": 0.463530957698822, "learning_rate": 1.7689188255715595e-05, "loss": 0.643, "step": 14454 }, { "epoch": 0.44404509569010536, "grad_norm": 0.38087329268455505, "learning_rate": 1.7688879265103364e-05, "loss": 0.5614, "step": 14455 }, { "epoch": 0.44407581482505454, "grad_norm": 0.3366742432117462, "learning_rate": 1.7688570256533225e-05, "loss": 0.5622, "step": 14456 }, { "epoch": 0.4441065339600037, "grad_norm": 0.39341115951538086, "learning_rate": 1.7688261230005886e-05, "loss": 0.658, "step": 14457 }, { "epoch": 0.44413725309495283, "grad_norm": 0.4516869783401489, "learning_rate": 1.7687952185522082e-05, "loss": 0.5871, "step": 14458 }, { "epoch": 0.444167972229902, "grad_norm": 1.0472359657287598, "learning_rate": 1.7687643123082524e-05, "loss": 0.4992, "step": 14459 }, { "epoch": 0.4441986913648512, "grad_norm": 0.35419365763664246, "learning_rate": 1.7687334042687942e-05, "loss": 0.5208, "step": 14460 }, { "epoch": 0.4442294104998003, "grad_norm": 0.3995177745819092, "learning_rate": 1.7687024944339053e-05, "loss": 0.6178, "step": 14461 }, { "epoch": 0.4442601296347495, "grad_norm": 0.3688625991344452, "learning_rate": 1.768671582803658e-05, "loss": 0.4923, "step": 14462 }, { "epoch": 0.44429084876969865, "grad_norm": 0.33791232109069824, "learning_rate": 1.7686406693781245e-05, "loss": 0.5565, "step": 14463 }, { "epoch": 0.44432156790464783, "grad_norm": 0.37763020396232605, "learning_rate": 1.7686097541573773e-05, "loss": 0.4946, "step": 14464 }, { "epoch": 0.44435228703959695, "grad_norm": 0.33691805601119995, "learning_rate": 1.768578837141488e-05, "loss": 0.5827, "step": 14465 }, { "epoch": 0.4443830061745461, "grad_norm": 0.3628377914428711, "learning_rate": 1.7685479183305293e-05, "loss": 0.5699, "step": 14466 }, { "epoch": 0.4444137253094953, "grad_norm": 0.3813329041004181, "learning_rate": 1.7685169977245736e-05, "loss": 0.6511, "step": 14467 }, { "epoch": 0.4444444444444444, "grad_norm": 0.37670478224754333, "learning_rate": 1.7684860753236925e-05, "loss": 0.5641, "step": 14468 }, { "epoch": 0.4444751635793936, "grad_norm": 0.3357093930244446, "learning_rate": 1.7684551511279583e-05, "loss": 0.5507, "step": 14469 }, { "epoch": 0.44450588271434277, "grad_norm": 0.4183429181575775, "learning_rate": 1.7684242251374436e-05, "loss": 0.5735, "step": 14470 }, { "epoch": 0.44453660184929195, "grad_norm": 0.40480169653892517, "learning_rate": 1.7683932973522203e-05, "loss": 0.5874, "step": 14471 }, { "epoch": 0.44456732098424107, "grad_norm": 0.4052973985671997, "learning_rate": 1.7683623677723608e-05, "loss": 0.5392, "step": 14472 }, { "epoch": 0.44459804011919024, "grad_norm": 0.335418701171875, "learning_rate": 1.7683314363979377e-05, "loss": 0.617, "step": 14473 }, { "epoch": 0.4446287592541394, "grad_norm": 0.3877299129962921, "learning_rate": 1.7683005032290224e-05, "loss": 0.5653, "step": 14474 }, { "epoch": 0.44465947838908854, "grad_norm": 0.5879667401313782, "learning_rate": 1.7682695682656882e-05, "loss": 0.4518, "step": 14475 }, { "epoch": 0.4446901975240377, "grad_norm": 0.35718315839767456, "learning_rate": 1.7682386315080065e-05, "loss": 0.5177, "step": 14476 }, { "epoch": 0.4447209166589869, "grad_norm": 0.3779819905757904, "learning_rate": 1.7682076929560498e-05, "loss": 0.5974, "step": 14477 }, { "epoch": 0.44475163579393606, "grad_norm": 0.3239845633506775, "learning_rate": 1.76817675260989e-05, "loss": 0.5209, "step": 14478 }, { "epoch": 0.4447823549288852, "grad_norm": 0.35350364446640015, "learning_rate": 1.7681458104696e-05, "loss": 0.5594, "step": 14479 }, { "epoch": 0.44481307406383436, "grad_norm": 0.3465787172317505, "learning_rate": 1.7681148665352522e-05, "loss": 0.5663, "step": 14480 }, { "epoch": 0.44484379319878353, "grad_norm": 0.3453620970249176, "learning_rate": 1.768083920806918e-05, "loss": 0.5587, "step": 14481 }, { "epoch": 0.4448745123337327, "grad_norm": 0.4374029040336609, "learning_rate": 1.7680529732846703e-05, "loss": 0.6008, "step": 14482 }, { "epoch": 0.4449052314686818, "grad_norm": 0.35281476378440857, "learning_rate": 1.7680220239685813e-05, "loss": 0.5821, "step": 14483 }, { "epoch": 0.444935950603631, "grad_norm": 0.3403327465057373, "learning_rate": 1.7679910728587232e-05, "loss": 0.5764, "step": 14484 }, { "epoch": 0.4449666697385802, "grad_norm": 0.3224913775920868, "learning_rate": 1.7679601199551684e-05, "loss": 0.6342, "step": 14485 }, { "epoch": 0.4449973888735293, "grad_norm": 0.36708948016166687, "learning_rate": 1.767929165257989e-05, "loss": 0.6848, "step": 14486 }, { "epoch": 0.4450281080084785, "grad_norm": 0.35330089926719666, "learning_rate": 1.7678982087672574e-05, "loss": 0.5947, "step": 14487 }, { "epoch": 0.44505882714342765, "grad_norm": 0.3859032094478607, "learning_rate": 1.7678672504830458e-05, "loss": 0.5774, "step": 14488 }, { "epoch": 0.4450895462783768, "grad_norm": 0.3687684237957001, "learning_rate": 1.767836290405427e-05, "loss": 0.6309, "step": 14489 }, { "epoch": 0.44512026541332594, "grad_norm": 0.37691372632980347, "learning_rate": 1.7678053285344726e-05, "loss": 0.504, "step": 14490 }, { "epoch": 0.4451509845482751, "grad_norm": 0.3886066675186157, "learning_rate": 1.7677743648702554e-05, "loss": 0.5779, "step": 14491 }, { "epoch": 0.4451817036832243, "grad_norm": 0.32917094230651855, "learning_rate": 1.7677433994128474e-05, "loss": 0.5488, "step": 14492 }, { "epoch": 0.4452124228181734, "grad_norm": 0.3439317047595978, "learning_rate": 1.767712432162321e-05, "loss": 0.5755, "step": 14493 }, { "epoch": 0.4452431419531226, "grad_norm": 0.31596508622169495, "learning_rate": 1.7676814631187487e-05, "loss": 0.5506, "step": 14494 }, { "epoch": 0.44527386108807177, "grad_norm": 0.33660900592803955, "learning_rate": 1.7676504922822027e-05, "loss": 0.5834, "step": 14495 }, { "epoch": 0.44530458022302094, "grad_norm": 0.36636632680892944, "learning_rate": 1.7676195196527556e-05, "loss": 0.56, "step": 14496 }, { "epoch": 0.44533529935797006, "grad_norm": 0.33357831835746765, "learning_rate": 1.767588545230479e-05, "loss": 0.6151, "step": 14497 }, { "epoch": 0.44536601849291924, "grad_norm": 0.325295627117157, "learning_rate": 1.7675575690154462e-05, "loss": 0.569, "step": 14498 }, { "epoch": 0.4453967376278684, "grad_norm": 0.3227526545524597, "learning_rate": 1.767526591007729e-05, "loss": 0.5686, "step": 14499 }, { "epoch": 0.4454274567628176, "grad_norm": 0.4261064827442169, "learning_rate": 1.7674956112073994e-05, "loss": 0.5913, "step": 14500 }, { "epoch": 0.4454581758977667, "grad_norm": 0.3343321979045868, "learning_rate": 1.7674646296145308e-05, "loss": 0.5884, "step": 14501 }, { "epoch": 0.4454888950327159, "grad_norm": 0.3269988000392914, "learning_rate": 1.7674336462291948e-05, "loss": 0.5771, "step": 14502 }, { "epoch": 0.44551961416766506, "grad_norm": 0.37660086154937744, "learning_rate": 1.7674026610514638e-05, "loss": 0.5609, "step": 14503 }, { "epoch": 0.4455503333026142, "grad_norm": 0.35835233330726624, "learning_rate": 1.76737167408141e-05, "loss": 0.5795, "step": 14504 }, { "epoch": 0.44558105243756335, "grad_norm": 0.33055150508880615, "learning_rate": 1.767340685319107e-05, "loss": 0.5502, "step": 14505 }, { "epoch": 0.4456117715725125, "grad_norm": 0.3358340263366699, "learning_rate": 1.7673096947646253e-05, "loss": 0.591, "step": 14506 }, { "epoch": 0.4456424907074617, "grad_norm": 0.3528597950935364, "learning_rate": 1.7672787024180387e-05, "loss": 0.5686, "step": 14507 }, { "epoch": 0.4456732098424108, "grad_norm": 0.33811014890670776, "learning_rate": 1.767247708279419e-05, "loss": 0.5873, "step": 14508 }, { "epoch": 0.44570392897736, "grad_norm": 0.36651870608329773, "learning_rate": 1.7672167123488388e-05, "loss": 0.5495, "step": 14509 }, { "epoch": 0.4457346481123092, "grad_norm": 0.39970284700393677, "learning_rate": 1.7671857146263703e-05, "loss": 0.5405, "step": 14510 }, { "epoch": 0.4457653672472583, "grad_norm": 0.3306867778301239, "learning_rate": 1.767154715112086e-05, "loss": 0.6268, "step": 14511 }, { "epoch": 0.44579608638220747, "grad_norm": 0.2905316948890686, "learning_rate": 1.767123713806058e-05, "loss": 0.485, "step": 14512 }, { "epoch": 0.44582680551715664, "grad_norm": 0.34061872959136963, "learning_rate": 1.7670927107083594e-05, "loss": 0.6034, "step": 14513 }, { "epoch": 0.4458575246521058, "grad_norm": 0.3769204616546631, "learning_rate": 1.7670617058190614e-05, "loss": 0.5478, "step": 14514 }, { "epoch": 0.44588824378705494, "grad_norm": 0.3645727038383484, "learning_rate": 1.7670306991382383e-05, "loss": 0.4971, "step": 14515 }, { "epoch": 0.4459189629220041, "grad_norm": 0.3683595657348633, "learning_rate": 1.766999690665961e-05, "loss": 0.5963, "step": 14516 }, { "epoch": 0.4459496820569533, "grad_norm": 0.3500445783138275, "learning_rate": 1.766968680402302e-05, "loss": 0.6167, "step": 14517 }, { "epoch": 0.4459804011919024, "grad_norm": 0.3325667083263397, "learning_rate": 1.7669376683473347e-05, "loss": 0.6296, "step": 14518 }, { "epoch": 0.4460111203268516, "grad_norm": 0.3168543875217438, "learning_rate": 1.7669066545011305e-05, "loss": 0.5949, "step": 14519 }, { "epoch": 0.44604183946180076, "grad_norm": 0.36013907194137573, "learning_rate": 1.766875638863762e-05, "loss": 0.5768, "step": 14520 }, { "epoch": 0.44607255859674994, "grad_norm": 0.32453230023384094, "learning_rate": 1.7668446214353025e-05, "loss": 0.6236, "step": 14521 }, { "epoch": 0.44610327773169905, "grad_norm": 0.3510715365409851, "learning_rate": 1.7668136022158236e-05, "loss": 0.5393, "step": 14522 }, { "epoch": 0.44613399686664823, "grad_norm": 0.33518102765083313, "learning_rate": 1.766782581205398e-05, "loss": 0.5664, "step": 14523 }, { "epoch": 0.4461647160015974, "grad_norm": 0.3735114336013794, "learning_rate": 1.7667515584040978e-05, "loss": 0.5912, "step": 14524 }, { "epoch": 0.4461954351365466, "grad_norm": 0.3990814983844757, "learning_rate": 1.7667205338119963e-05, "loss": 0.5338, "step": 14525 }, { "epoch": 0.4462261542714957, "grad_norm": 0.3400079011917114, "learning_rate": 1.7666895074291652e-05, "loss": 0.6007, "step": 14526 }, { "epoch": 0.4462568734064449, "grad_norm": 0.3165518343448639, "learning_rate": 1.7666584792556773e-05, "loss": 0.4777, "step": 14527 }, { "epoch": 0.44628759254139405, "grad_norm": 0.3399011492729187, "learning_rate": 1.7666274492916047e-05, "loss": 0.5186, "step": 14528 }, { "epoch": 0.44631831167634317, "grad_norm": 0.3606298565864563, "learning_rate": 1.7665964175370204e-05, "loss": 0.5621, "step": 14529 }, { "epoch": 0.44634903081129235, "grad_norm": 0.32291704416275024, "learning_rate": 1.766565383991997e-05, "loss": 0.5884, "step": 14530 }, { "epoch": 0.4463797499462415, "grad_norm": 0.37435853481292725, "learning_rate": 1.766534348656606e-05, "loss": 0.6658, "step": 14531 }, { "epoch": 0.4464104690811907, "grad_norm": 0.32451504468917847, "learning_rate": 1.7665033115309205e-05, "loss": 0.5097, "step": 14532 }, { "epoch": 0.4464411882161398, "grad_norm": 0.34154096245765686, "learning_rate": 1.766472272615013e-05, "loss": 0.5272, "step": 14533 }, { "epoch": 0.446471907351089, "grad_norm": 0.3158506155014038, "learning_rate": 1.7664412319089565e-05, "loss": 0.5724, "step": 14534 }, { "epoch": 0.44650262648603817, "grad_norm": 0.3471941649913788, "learning_rate": 1.7664101894128225e-05, "loss": 0.5706, "step": 14535 }, { "epoch": 0.4465333456209873, "grad_norm": 0.3755855858325958, "learning_rate": 1.7663791451266842e-05, "loss": 0.6149, "step": 14536 }, { "epoch": 0.44656406475593646, "grad_norm": 0.4016554653644562, "learning_rate": 1.766348099050614e-05, "loss": 0.6122, "step": 14537 }, { "epoch": 0.44659478389088564, "grad_norm": 0.40238261222839355, "learning_rate": 1.766317051184684e-05, "loss": 0.5851, "step": 14538 }, { "epoch": 0.4466255030258348, "grad_norm": 0.3415824770927429, "learning_rate": 1.766286001528967e-05, "loss": 0.4549, "step": 14539 }, { "epoch": 0.44665622216078393, "grad_norm": 0.3512069880962372, "learning_rate": 1.7662549500835355e-05, "loss": 0.5321, "step": 14540 }, { "epoch": 0.4466869412957331, "grad_norm": 0.3830816447734833, "learning_rate": 1.7662238968484623e-05, "loss": 0.6255, "step": 14541 }, { "epoch": 0.4467176604306823, "grad_norm": 0.40642398595809937, "learning_rate": 1.7661928418238196e-05, "loss": 0.4874, "step": 14542 }, { "epoch": 0.44674837956563146, "grad_norm": 0.36085155606269836, "learning_rate": 1.7661617850096798e-05, "loss": 0.6411, "step": 14543 }, { "epoch": 0.4467790987005806, "grad_norm": 0.36699268221855164, "learning_rate": 1.7661307264061158e-05, "loss": 0.6217, "step": 14544 }, { "epoch": 0.44680981783552975, "grad_norm": 0.35000157356262207, "learning_rate": 1.7660996660132e-05, "loss": 0.5703, "step": 14545 }, { "epoch": 0.44684053697047893, "grad_norm": 0.34919920563697815, "learning_rate": 1.766068603831005e-05, "loss": 0.5583, "step": 14546 }, { "epoch": 0.44687125610542805, "grad_norm": 0.40133655071258545, "learning_rate": 1.7660375398596032e-05, "loss": 0.5302, "step": 14547 }, { "epoch": 0.4469019752403772, "grad_norm": 0.36922842264175415, "learning_rate": 1.7660064740990672e-05, "loss": 0.5926, "step": 14548 }, { "epoch": 0.4469326943753264, "grad_norm": 0.3412439525127411, "learning_rate": 1.7659754065494696e-05, "loss": 0.5513, "step": 14549 }, { "epoch": 0.4469634135102756, "grad_norm": 0.4693240523338318, "learning_rate": 1.7659443372108828e-05, "loss": 0.5439, "step": 14550 }, { "epoch": 0.4469941326452247, "grad_norm": 0.3350710868835449, "learning_rate": 1.7659132660833797e-05, "loss": 0.549, "step": 14551 }, { "epoch": 0.44702485178017387, "grad_norm": 0.36688852310180664, "learning_rate": 1.7658821931670328e-05, "loss": 0.5687, "step": 14552 }, { "epoch": 0.44705557091512305, "grad_norm": 0.34582966566085815, "learning_rate": 1.7658511184619142e-05, "loss": 0.6105, "step": 14553 }, { "epoch": 0.44708629005007217, "grad_norm": 0.3220674395561218, "learning_rate": 1.765820041968097e-05, "loss": 0.6121, "step": 14554 }, { "epoch": 0.44711700918502134, "grad_norm": 0.3510936200618744, "learning_rate": 1.7657889636856537e-05, "loss": 0.5622, "step": 14555 }, { "epoch": 0.4471477283199705, "grad_norm": 0.3531995713710785, "learning_rate": 1.7657578836146566e-05, "loss": 0.5852, "step": 14556 }, { "epoch": 0.4471784474549197, "grad_norm": 0.40028756856918335, "learning_rate": 1.7657268017551787e-05, "loss": 0.6036, "step": 14557 }, { "epoch": 0.4472091665898688, "grad_norm": 0.40450119972229004, "learning_rate": 1.765695718107292e-05, "loss": 0.5329, "step": 14558 }, { "epoch": 0.447239885724818, "grad_norm": 0.315731018781662, "learning_rate": 1.7656646326710696e-05, "loss": 0.5243, "step": 14559 }, { "epoch": 0.44727060485976716, "grad_norm": 0.37794724106788635, "learning_rate": 1.7656335454465843e-05, "loss": 0.5627, "step": 14560 }, { "epoch": 0.4473013239947163, "grad_norm": 0.3782263398170471, "learning_rate": 1.765602456433908e-05, "loss": 0.5219, "step": 14561 }, { "epoch": 0.44733204312966546, "grad_norm": 0.3534214198589325, "learning_rate": 1.7655713656331143e-05, "loss": 0.4584, "step": 14562 }, { "epoch": 0.44736276226461463, "grad_norm": 0.4122005105018616, "learning_rate": 1.7655402730442744e-05, "loss": 0.5435, "step": 14563 }, { "epoch": 0.4473934813995638, "grad_norm": 0.4781878888607025, "learning_rate": 1.765509178667462e-05, "loss": 0.4806, "step": 14564 }, { "epoch": 0.44742420053451293, "grad_norm": 0.3651675581932068, "learning_rate": 1.76547808250275e-05, "loss": 0.5207, "step": 14565 }, { "epoch": 0.4474549196694621, "grad_norm": 0.4997875392436981, "learning_rate": 1.7654469845502098e-05, "loss": 0.6394, "step": 14566 }, { "epoch": 0.4474856388044113, "grad_norm": 0.39089369773864746, "learning_rate": 1.765415884809915e-05, "loss": 0.6243, "step": 14567 }, { "epoch": 0.44751635793936045, "grad_norm": 0.34501081705093384, "learning_rate": 1.765384783281938e-05, "loss": 0.5732, "step": 14568 }, { "epoch": 0.4475470770743096, "grad_norm": 0.42848825454711914, "learning_rate": 1.7653536799663512e-05, "loss": 0.5084, "step": 14569 }, { "epoch": 0.44757779620925875, "grad_norm": 0.3647865056991577, "learning_rate": 1.7653225748632273e-05, "loss": 0.6216, "step": 14570 }, { "epoch": 0.4476085153442079, "grad_norm": 0.3179760277271271, "learning_rate": 1.7652914679726393e-05, "loss": 0.5214, "step": 14571 }, { "epoch": 0.44763923447915704, "grad_norm": 0.3342128098011017, "learning_rate": 1.76526035929466e-05, "loss": 0.5685, "step": 14572 }, { "epoch": 0.4476699536141062, "grad_norm": 0.391663134098053, "learning_rate": 1.765229248829361e-05, "loss": 0.5536, "step": 14573 }, { "epoch": 0.4477006727490554, "grad_norm": 0.3587493598461151, "learning_rate": 1.765198136576816e-05, "loss": 0.5077, "step": 14574 }, { "epoch": 0.44773139188400457, "grad_norm": 0.3560161292552948, "learning_rate": 1.7651670225370975e-05, "loss": 0.6576, "step": 14575 }, { "epoch": 0.4477621110189537, "grad_norm": 0.35324323177337646, "learning_rate": 1.7651359067102776e-05, "loss": 0.566, "step": 14576 }, { "epoch": 0.44779283015390287, "grad_norm": 0.36529064178466797, "learning_rate": 1.7651047890964297e-05, "loss": 0.5551, "step": 14577 }, { "epoch": 0.44782354928885204, "grad_norm": 0.3382253050804138, "learning_rate": 1.7650736696956258e-05, "loss": 0.5987, "step": 14578 }, { "epoch": 0.44785426842380116, "grad_norm": 0.5261014699935913, "learning_rate": 1.765042548507939e-05, "loss": 0.6349, "step": 14579 }, { "epoch": 0.44788498755875034, "grad_norm": 0.35101914405822754, "learning_rate": 1.765011425533442e-05, "loss": 0.616, "step": 14580 }, { "epoch": 0.4479157066936995, "grad_norm": 0.3530002534389496, "learning_rate": 1.7649803007722074e-05, "loss": 0.5617, "step": 14581 }, { "epoch": 0.4479464258286487, "grad_norm": 0.3710082173347473, "learning_rate": 1.7649491742243076e-05, "loss": 0.5991, "step": 14582 }, { "epoch": 0.4479771449635978, "grad_norm": 0.36374491453170776, "learning_rate": 1.7649180458898157e-05, "loss": 0.6251, "step": 14583 }, { "epoch": 0.448007864098547, "grad_norm": 0.37365466356277466, "learning_rate": 1.7648869157688046e-05, "loss": 0.6179, "step": 14584 }, { "epoch": 0.44803858323349616, "grad_norm": 0.3989616632461548, "learning_rate": 1.7648557838613465e-05, "loss": 0.6517, "step": 14585 }, { "epoch": 0.4480693023684453, "grad_norm": 0.3215179443359375, "learning_rate": 1.7648246501675142e-05, "loss": 0.5616, "step": 14586 }, { "epoch": 0.44810002150339445, "grad_norm": 0.49504348635673523, "learning_rate": 1.7647935146873805e-05, "loss": 0.5023, "step": 14587 }, { "epoch": 0.4481307406383436, "grad_norm": 0.3509659469127655, "learning_rate": 1.7647623774210184e-05, "loss": 0.5642, "step": 14588 }, { "epoch": 0.4481614597732928, "grad_norm": 2.6187334060668945, "learning_rate": 1.7647312383685e-05, "loss": 0.586, "step": 14589 }, { "epoch": 0.4481921789082419, "grad_norm": 0.3609037697315216, "learning_rate": 1.7647000975298986e-05, "loss": 0.6448, "step": 14590 }, { "epoch": 0.4482228980431911, "grad_norm": 0.41279155015945435, "learning_rate": 1.7646689549052863e-05, "loss": 0.5348, "step": 14591 }, { "epoch": 0.4482536171781403, "grad_norm": 0.4316806495189667, "learning_rate": 1.764637810494737e-05, "loss": 0.6497, "step": 14592 }, { "epoch": 0.44828433631308945, "grad_norm": 0.3354444205760956, "learning_rate": 1.7646066642983223e-05, "loss": 0.5668, "step": 14593 }, { "epoch": 0.44831505544803857, "grad_norm": 0.6927863955497742, "learning_rate": 1.764575516316115e-05, "loss": 0.5994, "step": 14594 }, { "epoch": 0.44834577458298774, "grad_norm": 0.44309791922569275, "learning_rate": 1.7645443665481887e-05, "loss": 0.6003, "step": 14595 }, { "epoch": 0.4483764937179369, "grad_norm": 0.3199245035648346, "learning_rate": 1.7645132149946156e-05, "loss": 0.5172, "step": 14596 }, { "epoch": 0.44840721285288604, "grad_norm": 0.40287402272224426, "learning_rate": 1.764482061655468e-05, "loss": 0.582, "step": 14597 }, { "epoch": 0.4484379319878352, "grad_norm": 0.34839561581611633, "learning_rate": 1.76445090653082e-05, "loss": 0.5908, "step": 14598 }, { "epoch": 0.4484686511227844, "grad_norm": 0.32405781745910645, "learning_rate": 1.7644197496207426e-05, "loss": 0.5695, "step": 14599 }, { "epoch": 0.44849937025773357, "grad_norm": 0.3710947632789612, "learning_rate": 1.76438859092531e-05, "loss": 0.5989, "step": 14600 }, { "epoch": 0.4485300893926827, "grad_norm": 0.31610697507858276, "learning_rate": 1.7643574304445946e-05, "loss": 0.5547, "step": 14601 }, { "epoch": 0.44856080852763186, "grad_norm": 0.3428916335105896, "learning_rate": 1.764326268178669e-05, "loss": 0.5492, "step": 14602 }, { "epoch": 0.44859152766258104, "grad_norm": 0.31076595187187195, "learning_rate": 1.7642951041276054e-05, "loss": 0.5831, "step": 14603 }, { "epoch": 0.44862224679753016, "grad_norm": 0.4130495488643646, "learning_rate": 1.7642639382914775e-05, "loss": 0.583, "step": 14604 }, { "epoch": 0.44865296593247933, "grad_norm": 0.4093203544616699, "learning_rate": 1.7642327706703582e-05, "loss": 0.512, "step": 14605 }, { "epoch": 0.4486836850674285, "grad_norm": 0.3227510452270508, "learning_rate": 1.76420160126432e-05, "loss": 0.5877, "step": 14606 }, { "epoch": 0.4487144042023777, "grad_norm": 0.41135093569755554, "learning_rate": 1.7641704300734347e-05, "loss": 0.5996, "step": 14607 }, { "epoch": 0.4487451233373268, "grad_norm": 0.35143789649009705, "learning_rate": 1.7641392570977766e-05, "loss": 0.6484, "step": 14608 }, { "epoch": 0.448775842472276, "grad_norm": 0.32897576689720154, "learning_rate": 1.764108082337418e-05, "loss": 0.5958, "step": 14609 }, { "epoch": 0.44880656160722515, "grad_norm": 0.3982601761817932, "learning_rate": 1.7640769057924315e-05, "loss": 0.5222, "step": 14610 }, { "epoch": 0.4488372807421743, "grad_norm": 1.812828779220581, "learning_rate": 1.76404572746289e-05, "loss": 0.5391, "step": 14611 }, { "epoch": 0.44886799987712345, "grad_norm": 0.33562928438186646, "learning_rate": 1.764014547348866e-05, "loss": 0.574, "step": 14612 }, { "epoch": 0.4488987190120726, "grad_norm": 0.36209362745285034, "learning_rate": 1.7639833654504334e-05, "loss": 0.5672, "step": 14613 }, { "epoch": 0.4489294381470218, "grad_norm": 0.35998642444610596, "learning_rate": 1.7639521817676635e-05, "loss": 0.524, "step": 14614 }, { "epoch": 0.4489601572819709, "grad_norm": 0.3280092477798462, "learning_rate": 1.7639209963006303e-05, "loss": 0.55, "step": 14615 }, { "epoch": 0.4489908764169201, "grad_norm": 0.368806391954422, "learning_rate": 1.7638898090494062e-05, "loss": 0.4653, "step": 14616 }, { "epoch": 0.44902159555186927, "grad_norm": 0.351606547832489, "learning_rate": 1.763858620014064e-05, "loss": 0.613, "step": 14617 }, { "epoch": 0.44905231468681844, "grad_norm": 0.38613951206207275, "learning_rate": 1.7638274291946767e-05, "loss": 0.586, "step": 14618 }, { "epoch": 0.44908303382176756, "grad_norm": 0.351244181394577, "learning_rate": 1.7637962365913174e-05, "loss": 0.5527, "step": 14619 }, { "epoch": 0.44911375295671674, "grad_norm": 0.3361966907978058, "learning_rate": 1.7637650422040585e-05, "loss": 0.593, "step": 14620 }, { "epoch": 0.4491444720916659, "grad_norm": 0.3209379017353058, "learning_rate": 1.763733846032973e-05, "loss": 0.5623, "step": 14621 }, { "epoch": 0.44917519122661503, "grad_norm": 0.35527583956718445, "learning_rate": 1.7637026480781337e-05, "loss": 0.5743, "step": 14622 }, { "epoch": 0.4492059103615642, "grad_norm": 0.41673552989959717, "learning_rate": 1.7636714483396135e-05, "loss": 0.6192, "step": 14623 }, { "epoch": 0.4492366294965134, "grad_norm": 0.37408727407455444, "learning_rate": 1.7636402468174854e-05, "loss": 0.6088, "step": 14624 }, { "epoch": 0.44926734863146256, "grad_norm": 0.36594337224960327, "learning_rate": 1.763609043511822e-05, "loss": 0.5952, "step": 14625 }, { "epoch": 0.4492980677664117, "grad_norm": 0.34434813261032104, "learning_rate": 1.7635778384226963e-05, "loss": 0.6034, "step": 14626 }, { "epoch": 0.44932878690136085, "grad_norm": 0.35090628266334534, "learning_rate": 1.7635466315501814e-05, "loss": 0.5864, "step": 14627 }, { "epoch": 0.44935950603631003, "grad_norm": 0.3258262872695923, "learning_rate": 1.76351542289435e-05, "loss": 0.4915, "step": 14628 }, { "epoch": 0.44939022517125915, "grad_norm": 0.3040028512477875, "learning_rate": 1.763484212455275e-05, "loss": 0.5325, "step": 14629 }, { "epoch": 0.4494209443062083, "grad_norm": 0.45368489623069763, "learning_rate": 1.7634530002330293e-05, "loss": 0.6308, "step": 14630 }, { "epoch": 0.4494516634411575, "grad_norm": 0.5585698485374451, "learning_rate": 1.7634217862276858e-05, "loss": 0.5739, "step": 14631 }, { "epoch": 0.4494823825761067, "grad_norm": 0.520238995552063, "learning_rate": 1.7633905704393176e-05, "loss": 0.5898, "step": 14632 }, { "epoch": 0.4495131017110558, "grad_norm": 1.4467670917510986, "learning_rate": 1.7633593528679972e-05, "loss": 0.5842, "step": 14633 }, { "epoch": 0.44954382084600497, "grad_norm": 0.35766932368278503, "learning_rate": 1.7633281335137975e-05, "loss": 0.4861, "step": 14634 }, { "epoch": 0.44957453998095415, "grad_norm": 0.31568828225135803, "learning_rate": 1.763296912376792e-05, "loss": 0.5322, "step": 14635 }, { "epoch": 0.4496052591159033, "grad_norm": 0.3698199689388275, "learning_rate": 1.763265689457053e-05, "loss": 0.5517, "step": 14636 }, { "epoch": 0.44963597825085244, "grad_norm": 0.329062283039093, "learning_rate": 1.7632344647546538e-05, "loss": 0.5417, "step": 14637 }, { "epoch": 0.4496666973858016, "grad_norm": 0.39632275700569153, "learning_rate": 1.7632032382696673e-05, "loss": 0.6803, "step": 14638 }, { "epoch": 0.4496974165207508, "grad_norm": 0.3501470983028412, "learning_rate": 1.763172010002166e-05, "loss": 0.6155, "step": 14639 }, { "epoch": 0.4497281356556999, "grad_norm": 0.4525614380836487, "learning_rate": 1.7631407799522237e-05, "loss": 0.5782, "step": 14640 }, { "epoch": 0.4497588547906491, "grad_norm": 0.3361092805862427, "learning_rate": 1.7631095481199124e-05, "loss": 0.5527, "step": 14641 }, { "epoch": 0.44978957392559826, "grad_norm": 0.42806243896484375, "learning_rate": 1.7630783145053058e-05, "loss": 0.5914, "step": 14642 }, { "epoch": 0.44982029306054744, "grad_norm": 0.4150003492832184, "learning_rate": 1.763047079108476e-05, "loss": 0.603, "step": 14643 }, { "epoch": 0.44985101219549656, "grad_norm": 0.364383339881897, "learning_rate": 1.7630158419294968e-05, "loss": 0.5472, "step": 14644 }, { "epoch": 0.44988173133044573, "grad_norm": 0.3281328082084656, "learning_rate": 1.7629846029684406e-05, "loss": 0.4991, "step": 14645 }, { "epoch": 0.4499124504653949, "grad_norm": 0.32557401061058044, "learning_rate": 1.7629533622253806e-05, "loss": 0.5996, "step": 14646 }, { "epoch": 0.44994316960034403, "grad_norm": 0.3677683472633362, "learning_rate": 1.76292211970039e-05, "loss": 0.605, "step": 14647 }, { "epoch": 0.4499738887352932, "grad_norm": 0.3465220332145691, "learning_rate": 1.7628908753935416e-05, "loss": 0.5707, "step": 14648 }, { "epoch": 0.4500046078702424, "grad_norm": 0.3369542062282562, "learning_rate": 1.7628596293049075e-05, "loss": 0.554, "step": 14649 }, { "epoch": 0.45003532700519155, "grad_norm": 0.3415788412094116, "learning_rate": 1.7628283814345623e-05, "loss": 0.5747, "step": 14650 }, { "epoch": 0.4500660461401407, "grad_norm": 0.5039899349212646, "learning_rate": 1.7627971317825778e-05, "loss": 0.5788, "step": 14651 }, { "epoch": 0.45009676527508985, "grad_norm": 0.4049831032752991, "learning_rate": 1.7627658803490272e-05, "loss": 0.6636, "step": 14652 }, { "epoch": 0.450127484410039, "grad_norm": 0.39964428544044495, "learning_rate": 1.762734627133984e-05, "loss": 0.5516, "step": 14653 }, { "epoch": 0.4501582035449882, "grad_norm": 0.3761940896511078, "learning_rate": 1.7627033721375206e-05, "loss": 0.5651, "step": 14654 }, { "epoch": 0.4501889226799373, "grad_norm": 0.34671202301979065, "learning_rate": 1.76267211535971e-05, "loss": 0.5619, "step": 14655 }, { "epoch": 0.4502196418148865, "grad_norm": 0.3564152717590332, "learning_rate": 1.7626408568006257e-05, "loss": 0.5361, "step": 14656 }, { "epoch": 0.45025036094983567, "grad_norm": 0.39534419775009155, "learning_rate": 1.7626095964603404e-05, "loss": 0.583, "step": 14657 }, { "epoch": 0.4502810800847848, "grad_norm": 0.34327054023742676, "learning_rate": 1.7625783343389268e-05, "loss": 0.6214, "step": 14658 }, { "epoch": 0.45031179921973397, "grad_norm": 0.34277549386024475, "learning_rate": 1.7625470704364586e-05, "loss": 0.5562, "step": 14659 }, { "epoch": 0.45034251835468314, "grad_norm": 0.43000638484954834, "learning_rate": 1.7625158047530085e-05, "loss": 0.5628, "step": 14660 }, { "epoch": 0.4503732374896323, "grad_norm": 0.46507853269577026, "learning_rate": 1.7624845372886493e-05, "loss": 0.538, "step": 14661 }, { "epoch": 0.45040395662458144, "grad_norm": 0.40938612818717957, "learning_rate": 1.762453268043454e-05, "loss": 0.5555, "step": 14662 }, { "epoch": 0.4504346757595306, "grad_norm": 0.553307831287384, "learning_rate": 1.762421997017496e-05, "loss": 0.568, "step": 14663 }, { "epoch": 0.4504653948944798, "grad_norm": 0.4095250368118286, "learning_rate": 1.7623907242108486e-05, "loss": 0.5014, "step": 14664 }, { "epoch": 0.4504961140294289, "grad_norm": 0.3502374589443207, "learning_rate": 1.762359449623584e-05, "loss": 0.5931, "step": 14665 }, { "epoch": 0.4505268331643781, "grad_norm": 0.323743999004364, "learning_rate": 1.7623281732557758e-05, "loss": 0.5804, "step": 14666 }, { "epoch": 0.45055755229932726, "grad_norm": 0.3815695643424988, "learning_rate": 1.762296895107497e-05, "loss": 0.5109, "step": 14667 }, { "epoch": 0.45058827143427643, "grad_norm": 0.32901710271835327, "learning_rate": 1.7622656151788202e-05, "loss": 0.6471, "step": 14668 }, { "epoch": 0.45061899056922555, "grad_norm": 0.39116477966308594, "learning_rate": 1.762234333469819e-05, "loss": 0.6286, "step": 14669 }, { "epoch": 0.45064970970417473, "grad_norm": 0.33530768752098083, "learning_rate": 1.7622030499805665e-05, "loss": 0.4921, "step": 14670 }, { "epoch": 0.4506804288391239, "grad_norm": 0.33936721086502075, "learning_rate": 1.7621717647111353e-05, "loss": 0.6457, "step": 14671 }, { "epoch": 0.450711147974073, "grad_norm": 0.35841304063796997, "learning_rate": 1.762140477661599e-05, "loss": 0.4566, "step": 14672 }, { "epoch": 0.4507418671090222, "grad_norm": 0.34048306941986084, "learning_rate": 1.76210918883203e-05, "loss": 0.5837, "step": 14673 }, { "epoch": 0.4507725862439714, "grad_norm": 0.3594250977039337, "learning_rate": 1.762077898222502e-05, "loss": 0.594, "step": 14674 }, { "epoch": 0.45080330537892055, "grad_norm": 0.3369969129562378, "learning_rate": 1.7620466058330875e-05, "loss": 0.5684, "step": 14675 }, { "epoch": 0.45083402451386967, "grad_norm": 0.41737985610961914, "learning_rate": 1.7620153116638604e-05, "loss": 0.646, "step": 14676 }, { "epoch": 0.45086474364881884, "grad_norm": 0.3636126220226288, "learning_rate": 1.761984015714893e-05, "loss": 0.5288, "step": 14677 }, { "epoch": 0.450895462783768, "grad_norm": 0.3746907114982605, "learning_rate": 1.761952717986259e-05, "loss": 0.5852, "step": 14678 }, { "epoch": 0.4509261819187172, "grad_norm": 0.9287598133087158, "learning_rate": 1.7619214184780313e-05, "loss": 0.602, "step": 14679 }, { "epoch": 0.4509569010536663, "grad_norm": 0.34421584010124207, "learning_rate": 1.7618901171902824e-05, "loss": 0.5444, "step": 14680 }, { "epoch": 0.4509876201886155, "grad_norm": 0.4049091637134552, "learning_rate": 1.761858814123086e-05, "loss": 0.5352, "step": 14681 }, { "epoch": 0.45101833932356467, "grad_norm": 0.3336898982524872, "learning_rate": 1.7618275092765156e-05, "loss": 0.5906, "step": 14682 }, { "epoch": 0.4510490584585138, "grad_norm": 0.3777966797351837, "learning_rate": 1.7617962026506435e-05, "loss": 0.5896, "step": 14683 }, { "epoch": 0.45107977759346296, "grad_norm": 0.35507166385650635, "learning_rate": 1.7617648942455434e-05, "loss": 0.4673, "step": 14684 }, { "epoch": 0.45111049672841214, "grad_norm": 0.39237406849861145, "learning_rate": 1.761733584061288e-05, "loss": 0.602, "step": 14685 }, { "epoch": 0.4511412158633613, "grad_norm": 0.3521982729434967, "learning_rate": 1.7617022720979508e-05, "loss": 0.5939, "step": 14686 }, { "epoch": 0.45117193499831043, "grad_norm": 0.3830109238624573, "learning_rate": 1.7616709583556046e-05, "loss": 0.5976, "step": 14687 }, { "epoch": 0.4512026541332596, "grad_norm": 0.36372435092926025, "learning_rate": 1.7616396428343225e-05, "loss": 0.4328, "step": 14688 }, { "epoch": 0.4512333732682088, "grad_norm": 0.3342306613922119, "learning_rate": 1.7616083255341777e-05, "loss": 0.58, "step": 14689 }, { "epoch": 0.4512640924031579, "grad_norm": 0.8010926842689514, "learning_rate": 1.761577006455244e-05, "loss": 0.5886, "step": 14690 }, { "epoch": 0.4512948115381071, "grad_norm": 0.3348539173603058, "learning_rate": 1.761545685597594e-05, "loss": 0.6187, "step": 14691 }, { "epoch": 0.45132553067305625, "grad_norm": 0.48828890919685364, "learning_rate": 1.7615143629613004e-05, "loss": 0.6604, "step": 14692 }, { "epoch": 0.4513562498080054, "grad_norm": 0.42751824855804443, "learning_rate": 1.761483038546437e-05, "loss": 0.5743, "step": 14693 }, { "epoch": 0.45138696894295455, "grad_norm": 0.3485841155052185, "learning_rate": 1.761451712353077e-05, "loss": 0.5388, "step": 14694 }, { "epoch": 0.4514176880779037, "grad_norm": 0.3946281969547272, "learning_rate": 1.761420384381293e-05, "loss": 0.6387, "step": 14695 }, { "epoch": 0.4514484072128529, "grad_norm": 0.35686057806015015, "learning_rate": 1.761389054631159e-05, "loss": 0.4854, "step": 14696 }, { "epoch": 0.4514791263478021, "grad_norm": 0.35842210054397583, "learning_rate": 1.761357723102747e-05, "loss": 0.4847, "step": 14697 }, { "epoch": 0.4515098454827512, "grad_norm": 0.34427908062934875, "learning_rate": 1.7613263897961313e-05, "loss": 0.5763, "step": 14698 }, { "epoch": 0.45154056461770037, "grad_norm": 0.33906134963035583, "learning_rate": 1.761295054711385e-05, "loss": 0.6735, "step": 14699 }, { "epoch": 0.45157128375264954, "grad_norm": 0.3573475778102875, "learning_rate": 1.7612637178485803e-05, "loss": 0.6079, "step": 14700 }, { "epoch": 0.45160200288759866, "grad_norm": 0.3679570257663727, "learning_rate": 1.761232379207791e-05, "loss": 0.5329, "step": 14701 }, { "epoch": 0.45163272202254784, "grad_norm": 0.3498518466949463, "learning_rate": 1.761201038789091e-05, "loss": 0.5867, "step": 14702 }, { "epoch": 0.451663441157497, "grad_norm": 0.37355297803878784, "learning_rate": 1.7611696965925526e-05, "loss": 0.595, "step": 14703 }, { "epoch": 0.4516941602924462, "grad_norm": 0.3685338497161865, "learning_rate": 1.7611383526182487e-05, "loss": 0.5552, "step": 14704 }, { "epoch": 0.4517248794273953, "grad_norm": 0.3283860683441162, "learning_rate": 1.7611070068662533e-05, "loss": 0.5421, "step": 14705 }, { "epoch": 0.4517555985623445, "grad_norm": 0.3737379014492035, "learning_rate": 1.7610756593366394e-05, "loss": 0.6216, "step": 14706 }, { "epoch": 0.45178631769729366, "grad_norm": 0.3656452000141144, "learning_rate": 1.76104431002948e-05, "loss": 0.6094, "step": 14707 }, { "epoch": 0.4518170368322428, "grad_norm": 0.3365270495414734, "learning_rate": 1.7610129589448485e-05, "loss": 0.5847, "step": 14708 }, { "epoch": 0.45184775596719196, "grad_norm": 0.470643550157547, "learning_rate": 1.7609816060828183e-05, "loss": 0.5899, "step": 14709 }, { "epoch": 0.45187847510214113, "grad_norm": 0.346583753824234, "learning_rate": 1.7609502514434622e-05, "loss": 0.5049, "step": 14710 }, { "epoch": 0.4519091942370903, "grad_norm": 0.3707916736602783, "learning_rate": 1.7609188950268538e-05, "loss": 0.5964, "step": 14711 }, { "epoch": 0.4519399133720394, "grad_norm": 0.35933446884155273, "learning_rate": 1.760887536833066e-05, "loss": 0.6334, "step": 14712 }, { "epoch": 0.4519706325069886, "grad_norm": 0.36831361055374146, "learning_rate": 1.7608561768621724e-05, "loss": 0.5583, "step": 14713 }, { "epoch": 0.4520013516419378, "grad_norm": 0.4090663492679596, "learning_rate": 1.760824815114246e-05, "loss": 0.5698, "step": 14714 }, { "epoch": 0.4520320707768869, "grad_norm": 0.4253503084182739, "learning_rate": 1.76079345158936e-05, "loss": 0.5676, "step": 14715 }, { "epoch": 0.45206278991183607, "grad_norm": 0.6666960120201111, "learning_rate": 1.7607620862875877e-05, "loss": 0.5328, "step": 14716 }, { "epoch": 0.45209350904678525, "grad_norm": 0.33213210105895996, "learning_rate": 1.7607307192090026e-05, "loss": 0.5736, "step": 14717 }, { "epoch": 0.4521242281817344, "grad_norm": 0.37527552247047424, "learning_rate": 1.7606993503536774e-05, "loss": 0.6435, "step": 14718 }, { "epoch": 0.45215494731668354, "grad_norm": 0.38226738572120667, "learning_rate": 1.7606679797216864e-05, "loss": 0.566, "step": 14719 }, { "epoch": 0.4521856664516327, "grad_norm": 0.3939184546470642, "learning_rate": 1.7606366073131017e-05, "loss": 0.5601, "step": 14720 }, { "epoch": 0.4522163855865819, "grad_norm": 0.40327689051628113, "learning_rate": 1.7606052331279976e-05, "loss": 0.573, "step": 14721 }, { "epoch": 0.45224710472153107, "grad_norm": 0.35520294308662415, "learning_rate": 1.7605738571664464e-05, "loss": 0.6318, "step": 14722 }, { "epoch": 0.4522778238564802, "grad_norm": 0.3091677725315094, "learning_rate": 1.760542479428522e-05, "loss": 0.541, "step": 14723 }, { "epoch": 0.45230854299142936, "grad_norm": 0.40154436230659485, "learning_rate": 1.7605110999142974e-05, "loss": 0.6535, "step": 14724 }, { "epoch": 0.45233926212637854, "grad_norm": 0.3543560802936554, "learning_rate": 1.7604797186238462e-05, "loss": 0.584, "step": 14725 }, { "epoch": 0.45236998126132766, "grad_norm": 0.39388057589530945, "learning_rate": 1.760448335557241e-05, "loss": 0.5411, "step": 14726 }, { "epoch": 0.45240070039627683, "grad_norm": 0.3948739469051361, "learning_rate": 1.7604169507145563e-05, "loss": 0.6182, "step": 14727 }, { "epoch": 0.452431419531226, "grad_norm": 0.39936211705207825, "learning_rate": 1.7603855640958643e-05, "loss": 0.5069, "step": 14728 }, { "epoch": 0.4524621386661752, "grad_norm": 0.3140372633934021, "learning_rate": 1.760354175701239e-05, "loss": 0.5096, "step": 14729 }, { "epoch": 0.4524928578011243, "grad_norm": 0.37498196959495544, "learning_rate": 1.760322785530753e-05, "loss": 0.5659, "step": 14730 }, { "epoch": 0.4525235769360735, "grad_norm": 0.361552894115448, "learning_rate": 1.7602913935844802e-05, "loss": 0.5645, "step": 14731 }, { "epoch": 0.45255429607102265, "grad_norm": 0.346391499042511, "learning_rate": 1.760259999862494e-05, "loss": 0.5815, "step": 14732 }, { "epoch": 0.4525850152059718, "grad_norm": 0.3138768672943115, "learning_rate": 1.7602286043648673e-05, "loss": 0.5841, "step": 14733 }, { "epoch": 0.45261573434092095, "grad_norm": 0.4010869562625885, "learning_rate": 1.7601972070916734e-05, "loss": 0.589, "step": 14734 }, { "epoch": 0.4526464534758701, "grad_norm": 0.3154456615447998, "learning_rate": 1.760165808042986e-05, "loss": 0.507, "step": 14735 }, { "epoch": 0.4526771726108193, "grad_norm": 0.3401024639606476, "learning_rate": 1.7601344072188783e-05, "loss": 0.5675, "step": 14736 }, { "epoch": 0.4527078917457684, "grad_norm": 0.36966732144355774, "learning_rate": 1.7601030046194238e-05, "loss": 0.5803, "step": 14737 }, { "epoch": 0.4527386108807176, "grad_norm": 0.4297854006290436, "learning_rate": 1.7600716002446952e-05, "loss": 0.5426, "step": 14738 }, { "epoch": 0.45276933001566677, "grad_norm": 0.35616594552993774, "learning_rate": 1.7600401940947665e-05, "loss": 0.5951, "step": 14739 }, { "epoch": 0.45280004915061595, "grad_norm": 0.3660380244255066, "learning_rate": 1.7600087861697113e-05, "loss": 0.5358, "step": 14740 }, { "epoch": 0.45283076828556507, "grad_norm": 0.353437215089798, "learning_rate": 1.759977376469602e-05, "loss": 0.5779, "step": 14741 }, { "epoch": 0.45286148742051424, "grad_norm": 0.3488626480102539, "learning_rate": 1.7599459649945126e-05, "loss": 0.5871, "step": 14742 }, { "epoch": 0.4528922065554634, "grad_norm": 0.3275333642959595, "learning_rate": 1.7599145517445162e-05, "loss": 0.5889, "step": 14743 }, { "epoch": 0.45292292569041254, "grad_norm": 0.34206753969192505, "learning_rate": 1.7598831367196863e-05, "loss": 0.6233, "step": 14744 }, { "epoch": 0.4529536448253617, "grad_norm": 0.31300365924835205, "learning_rate": 1.7598517199200964e-05, "loss": 0.5673, "step": 14745 }, { "epoch": 0.4529843639603109, "grad_norm": 0.33142104744911194, "learning_rate": 1.7598203013458193e-05, "loss": 0.5358, "step": 14746 }, { "epoch": 0.45301508309526006, "grad_norm": 0.31612086296081543, "learning_rate": 1.7597888809969296e-05, "loss": 0.486, "step": 14747 }, { "epoch": 0.4530458022302092, "grad_norm": 0.36908385157585144, "learning_rate": 1.7597574588734995e-05, "loss": 0.5313, "step": 14748 }, { "epoch": 0.45307652136515836, "grad_norm": 0.36004775762557983, "learning_rate": 1.7597260349756027e-05, "loss": 0.5428, "step": 14749 }, { "epoch": 0.45310724050010753, "grad_norm": 0.3703804910182953, "learning_rate": 1.759694609303313e-05, "loss": 0.5339, "step": 14750 }, { "epoch": 0.45313795963505665, "grad_norm": 0.39009618759155273, "learning_rate": 1.7596631818567028e-05, "loss": 0.5497, "step": 14751 }, { "epoch": 0.45316867877000583, "grad_norm": 0.35303637385368347, "learning_rate": 1.759631752635847e-05, "loss": 0.6286, "step": 14752 }, { "epoch": 0.453199397904955, "grad_norm": 0.34755733609199524, "learning_rate": 1.7596003216408174e-05, "loss": 0.6485, "step": 14753 }, { "epoch": 0.4532301170399042, "grad_norm": 0.3399241268634796, "learning_rate": 1.7595688888716887e-05, "loss": 0.532, "step": 14754 }, { "epoch": 0.4532608361748533, "grad_norm": 0.36805614829063416, "learning_rate": 1.759537454328534e-05, "loss": 0.6116, "step": 14755 }, { "epoch": 0.4532915553098025, "grad_norm": 0.34877872467041016, "learning_rate": 1.7595060180114262e-05, "loss": 0.5789, "step": 14756 }, { "epoch": 0.45332227444475165, "grad_norm": 0.33930259943008423, "learning_rate": 1.759474579920439e-05, "loss": 0.5195, "step": 14757 }, { "epoch": 0.45335299357970077, "grad_norm": 0.48760148882865906, "learning_rate": 1.7594431400556463e-05, "loss": 0.5917, "step": 14758 }, { "epoch": 0.45338371271464994, "grad_norm": 0.3429584503173828, "learning_rate": 1.759411698417121e-05, "loss": 0.5629, "step": 14759 }, { "epoch": 0.4534144318495991, "grad_norm": 0.36942026019096375, "learning_rate": 1.7593802550049362e-05, "loss": 0.5722, "step": 14760 }, { "epoch": 0.4534451509845483, "grad_norm": 0.356810986995697, "learning_rate": 1.7593488098191658e-05, "loss": 0.6008, "step": 14761 }, { "epoch": 0.4534758701194974, "grad_norm": 0.30681416392326355, "learning_rate": 1.7593173628598833e-05, "loss": 0.5444, "step": 14762 }, { "epoch": 0.4535065892544466, "grad_norm": 0.38577985763549805, "learning_rate": 1.7592859141271624e-05, "loss": 0.529, "step": 14763 }, { "epoch": 0.45353730838939577, "grad_norm": 0.37435266375541687, "learning_rate": 1.759254463621076e-05, "loss": 0.5455, "step": 14764 }, { "epoch": 0.45356802752434494, "grad_norm": 0.33314743638038635, "learning_rate": 1.759223011341698e-05, "loss": 0.5857, "step": 14765 }, { "epoch": 0.45359874665929406, "grad_norm": 0.32554998993873596, "learning_rate": 1.7591915572891013e-05, "loss": 0.5701, "step": 14766 }, { "epoch": 0.45362946579424324, "grad_norm": 0.3387303054332733, "learning_rate": 1.75916010146336e-05, "loss": 0.5834, "step": 14767 }, { "epoch": 0.4536601849291924, "grad_norm": 0.36572393774986267, "learning_rate": 1.7591286438645472e-05, "loss": 0.5689, "step": 14768 }, { "epoch": 0.45369090406414153, "grad_norm": 0.3605833649635315, "learning_rate": 1.759097184492736e-05, "loss": 0.6464, "step": 14769 }, { "epoch": 0.4537216231990907, "grad_norm": 0.34780094027519226, "learning_rate": 1.7590657233480007e-05, "loss": 0.6044, "step": 14770 }, { "epoch": 0.4537523423340399, "grad_norm": 0.364944726228714, "learning_rate": 1.7590342604304145e-05, "loss": 0.6232, "step": 14771 }, { "epoch": 0.45378306146898906, "grad_norm": 0.3233402371406555, "learning_rate": 1.7590027957400507e-05, "loss": 0.5588, "step": 14772 }, { "epoch": 0.4538137806039382, "grad_norm": 0.32655465602874756, "learning_rate": 1.7589713292769827e-05, "loss": 0.4942, "step": 14773 }, { "epoch": 0.45384449973888735, "grad_norm": 0.36105531454086304, "learning_rate": 1.7589398610412846e-05, "loss": 0.5474, "step": 14774 }, { "epoch": 0.45387521887383653, "grad_norm": 0.3568357527256012, "learning_rate": 1.7589083910330287e-05, "loss": 0.5741, "step": 14775 }, { "epoch": 0.45390593800878565, "grad_norm": 0.36982589960098267, "learning_rate": 1.75887691925229e-05, "loss": 0.4824, "step": 14776 }, { "epoch": 0.4539366571437348, "grad_norm": 0.33987388014793396, "learning_rate": 1.7588454456991406e-05, "loss": 0.5823, "step": 14777 }, { "epoch": 0.453967376278684, "grad_norm": 0.6266050338745117, "learning_rate": 1.758813970373655e-05, "loss": 0.4933, "step": 14778 }, { "epoch": 0.4539980954136332, "grad_norm": 0.3435169756412506, "learning_rate": 1.758782493275906e-05, "loss": 0.5577, "step": 14779 }, { "epoch": 0.4540288145485823, "grad_norm": 0.3395032286643982, "learning_rate": 1.7587510144059682e-05, "loss": 0.6532, "step": 14780 }, { "epoch": 0.45405953368353147, "grad_norm": 0.368660569190979, "learning_rate": 1.758719533763914e-05, "loss": 0.5385, "step": 14781 }, { "epoch": 0.45409025281848064, "grad_norm": 0.5005515217781067, "learning_rate": 1.7586880513498174e-05, "loss": 0.4494, "step": 14782 }, { "epoch": 0.45412097195342976, "grad_norm": 0.39519476890563965, "learning_rate": 1.758656567163752e-05, "loss": 0.64, "step": 14783 }, { "epoch": 0.45415169108837894, "grad_norm": 0.34761038422584534, "learning_rate": 1.758625081205791e-05, "loss": 0.6484, "step": 14784 }, { "epoch": 0.4541824102233281, "grad_norm": 0.3295415937900543, "learning_rate": 1.758593593476008e-05, "loss": 0.6081, "step": 14785 }, { "epoch": 0.4542131293582773, "grad_norm": 0.35725101828575134, "learning_rate": 1.7585621039744772e-05, "loss": 0.5848, "step": 14786 }, { "epoch": 0.4542438484932264, "grad_norm": 0.3419910669326782, "learning_rate": 1.758530612701271e-05, "loss": 0.6219, "step": 14787 }, { "epoch": 0.4542745676281756, "grad_norm": 0.7743199467658997, "learning_rate": 1.758499119656464e-05, "loss": 0.5589, "step": 14788 }, { "epoch": 0.45430528676312476, "grad_norm": 0.3317103981971741, "learning_rate": 1.758467624840129e-05, "loss": 0.598, "step": 14789 }, { "epoch": 0.45433600589807394, "grad_norm": 0.3548225462436676, "learning_rate": 1.7584361282523405e-05, "loss": 0.5315, "step": 14790 }, { "epoch": 0.45436672503302306, "grad_norm": 0.30635207891464233, "learning_rate": 1.758404629893171e-05, "loss": 0.5058, "step": 14791 }, { "epoch": 0.45439744416797223, "grad_norm": 0.3996305763721466, "learning_rate": 1.7583731297626944e-05, "loss": 0.5754, "step": 14792 }, { "epoch": 0.4544281633029214, "grad_norm": 0.33714714646339417, "learning_rate": 1.7583416278609847e-05, "loss": 0.5332, "step": 14793 }, { "epoch": 0.4544588824378705, "grad_norm": 0.33379510045051575, "learning_rate": 1.758310124188115e-05, "loss": 0.5678, "step": 14794 }, { "epoch": 0.4544896015728197, "grad_norm": 0.4315635859966278, "learning_rate": 1.758278618744159e-05, "loss": 0.5427, "step": 14795 }, { "epoch": 0.4545203207077689, "grad_norm": 0.36857613921165466, "learning_rate": 1.7582471115291905e-05, "loss": 0.6229, "step": 14796 }, { "epoch": 0.45455103984271805, "grad_norm": 0.3525790572166443, "learning_rate": 1.7582156025432827e-05, "loss": 0.6351, "step": 14797 }, { "epoch": 0.45458175897766717, "grad_norm": 0.3383762836456299, "learning_rate": 1.7581840917865096e-05, "loss": 0.6126, "step": 14798 }, { "epoch": 0.45461247811261635, "grad_norm": 0.3619092106819153, "learning_rate": 1.7581525792589447e-05, "loss": 0.5603, "step": 14799 }, { "epoch": 0.4546431972475655, "grad_norm": 0.3275085985660553, "learning_rate": 1.7581210649606614e-05, "loss": 0.5719, "step": 14800 }, { "epoch": 0.45467391638251464, "grad_norm": 0.3604305684566498, "learning_rate": 1.7580895488917334e-05, "loss": 0.6195, "step": 14801 }, { "epoch": 0.4547046355174638, "grad_norm": 0.3329273760318756, "learning_rate": 1.758058031052234e-05, "loss": 0.5715, "step": 14802 }, { "epoch": 0.454735354652413, "grad_norm": 0.40876078605651855, "learning_rate": 1.7580265114422377e-05, "loss": 0.5808, "step": 14803 }, { "epoch": 0.45476607378736217, "grad_norm": 0.3279174268245697, "learning_rate": 1.757994990061817e-05, "loss": 0.5513, "step": 14804 }, { "epoch": 0.4547967929223113, "grad_norm": 0.4939618408679962, "learning_rate": 1.7579634669110465e-05, "loss": 0.4538, "step": 14805 }, { "epoch": 0.45482751205726046, "grad_norm": 0.3662663400173187, "learning_rate": 1.757931941989999e-05, "loss": 0.5512, "step": 14806 }, { "epoch": 0.45485823119220964, "grad_norm": 0.3453003466129303, "learning_rate": 1.7579004152987486e-05, "loss": 0.5102, "step": 14807 }, { "epoch": 0.4548889503271588, "grad_norm": 0.3408786952495575, "learning_rate": 1.7578688868373694e-05, "loss": 0.6323, "step": 14808 }, { "epoch": 0.45491966946210793, "grad_norm": 0.40161749720573425, "learning_rate": 1.7578373566059336e-05, "loss": 0.6289, "step": 14809 }, { "epoch": 0.4549503885970571, "grad_norm": 0.3815045952796936, "learning_rate": 1.7578058246045164e-05, "loss": 0.6258, "step": 14810 }, { "epoch": 0.4549811077320063, "grad_norm": 0.3351389765739441, "learning_rate": 1.7577742908331906e-05, "loss": 0.4604, "step": 14811 }, { "epoch": 0.4550118268669554, "grad_norm": 0.37020763754844666, "learning_rate": 1.75774275529203e-05, "loss": 0.5461, "step": 14812 }, { "epoch": 0.4550425460019046, "grad_norm": 0.3447721302509308, "learning_rate": 1.757711217981108e-05, "loss": 0.5591, "step": 14813 }, { "epoch": 0.45507326513685376, "grad_norm": 0.45211368799209595, "learning_rate": 1.757679678900499e-05, "loss": 0.6562, "step": 14814 }, { "epoch": 0.45510398427180293, "grad_norm": 0.32899636030197144, "learning_rate": 1.757648138050276e-05, "loss": 0.5418, "step": 14815 }, { "epoch": 0.45513470340675205, "grad_norm": 0.35579565167427063, "learning_rate": 1.7576165954305127e-05, "loss": 0.5667, "step": 14816 }, { "epoch": 0.4551654225417012, "grad_norm": 0.39517873525619507, "learning_rate": 1.757585051041283e-05, "loss": 0.5706, "step": 14817 }, { "epoch": 0.4551961416766504, "grad_norm": 0.33052220940589905, "learning_rate": 1.7575535048826605e-05, "loss": 0.5376, "step": 14818 }, { "epoch": 0.4552268608115995, "grad_norm": 0.3311185836791992, "learning_rate": 1.757521956954719e-05, "loss": 0.5643, "step": 14819 }, { "epoch": 0.4552575799465487, "grad_norm": 0.39226725697517395, "learning_rate": 1.7574904072575322e-05, "loss": 0.5786, "step": 14820 }, { "epoch": 0.45528829908149787, "grad_norm": 0.3479754328727722, "learning_rate": 1.7574588557911732e-05, "loss": 0.593, "step": 14821 }, { "epoch": 0.45531901821644705, "grad_norm": 0.32683905959129333, "learning_rate": 1.7574273025557164e-05, "loss": 0.5539, "step": 14822 }, { "epoch": 0.45534973735139617, "grad_norm": 0.3613910377025604, "learning_rate": 1.7573957475512354e-05, "loss": 0.5281, "step": 14823 }, { "epoch": 0.45538045648634534, "grad_norm": 0.33554190397262573, "learning_rate": 1.7573641907778033e-05, "loss": 0.5942, "step": 14824 }, { "epoch": 0.4554111756212945, "grad_norm": 0.3428145945072174, "learning_rate": 1.7573326322354947e-05, "loss": 0.5757, "step": 14825 }, { "epoch": 0.45544189475624364, "grad_norm": 0.3339780867099762, "learning_rate": 1.7573010719243827e-05, "loss": 0.5269, "step": 14826 }, { "epoch": 0.4554726138911928, "grad_norm": 0.3802330195903778, "learning_rate": 1.757269509844541e-05, "loss": 0.5683, "step": 14827 }, { "epoch": 0.455503333026142, "grad_norm": 0.34515050053596497, "learning_rate": 1.757237945996044e-05, "loss": 0.5556, "step": 14828 }, { "epoch": 0.45553405216109116, "grad_norm": 0.35292544960975647, "learning_rate": 1.757206380378964e-05, "loss": 0.5606, "step": 14829 }, { "epoch": 0.4555647712960403, "grad_norm": 0.34009286761283875, "learning_rate": 1.7571748129933764e-05, "loss": 0.5568, "step": 14830 }, { "epoch": 0.45559549043098946, "grad_norm": 0.40338024497032166, "learning_rate": 1.7571432438393537e-05, "loss": 0.5823, "step": 14831 }, { "epoch": 0.45562620956593863, "grad_norm": 0.3686307966709137, "learning_rate": 1.75711167291697e-05, "loss": 0.567, "step": 14832 }, { "epoch": 0.4556569287008878, "grad_norm": 0.3606644868850708, "learning_rate": 1.7570801002262995e-05, "loss": 0.6015, "step": 14833 }, { "epoch": 0.45568764783583693, "grad_norm": 0.42062658071517944, "learning_rate": 1.757048525767415e-05, "loss": 0.6453, "step": 14834 }, { "epoch": 0.4557183669707861, "grad_norm": 0.3746088147163391, "learning_rate": 1.7570169495403914e-05, "loss": 0.5045, "step": 14835 }, { "epoch": 0.4557490861057353, "grad_norm": 0.3008498251438141, "learning_rate": 1.7569853715453017e-05, "loss": 0.5022, "step": 14836 }, { "epoch": 0.4557798052406844, "grad_norm": 0.3292538821697235, "learning_rate": 1.7569537917822197e-05, "loss": 0.5356, "step": 14837 }, { "epoch": 0.4558105243756336, "grad_norm": 0.33369964361190796, "learning_rate": 1.7569222102512194e-05, "loss": 0.5758, "step": 14838 }, { "epoch": 0.45584124351058275, "grad_norm": 0.3226816952228546, "learning_rate": 1.7568906269523743e-05, "loss": 0.5592, "step": 14839 }, { "epoch": 0.4558719626455319, "grad_norm": 0.35098299384117126, "learning_rate": 1.756859041885758e-05, "loss": 0.5322, "step": 14840 }, { "epoch": 0.45590268178048104, "grad_norm": 0.3510268032550812, "learning_rate": 1.756827455051445e-05, "loss": 0.5734, "step": 14841 }, { "epoch": 0.4559334009154302, "grad_norm": 0.3882661759853363, "learning_rate": 1.7567958664495088e-05, "loss": 0.5497, "step": 14842 }, { "epoch": 0.4559641200503794, "grad_norm": 0.394694447517395, "learning_rate": 1.7567642760800222e-05, "loss": 0.67, "step": 14843 }, { "epoch": 0.4559948391853285, "grad_norm": 0.33559751510620117, "learning_rate": 1.7567326839430604e-05, "loss": 0.6244, "step": 14844 }, { "epoch": 0.4560255583202777, "grad_norm": 0.3713327646255493, "learning_rate": 1.7567010900386963e-05, "loss": 0.5745, "step": 14845 }, { "epoch": 0.45605627745522687, "grad_norm": 0.3466780185699463, "learning_rate": 1.756669494367004e-05, "loss": 0.5849, "step": 14846 }, { "epoch": 0.45608699659017604, "grad_norm": 0.3614327907562256, "learning_rate": 1.7566378969280572e-05, "loss": 0.6031, "step": 14847 }, { "epoch": 0.45611771572512516, "grad_norm": 0.331123024225235, "learning_rate": 1.75660629772193e-05, "loss": 0.561, "step": 14848 }, { "epoch": 0.45614843486007434, "grad_norm": 0.3737928867340088, "learning_rate": 1.7565746967486958e-05, "loss": 0.6026, "step": 14849 }, { "epoch": 0.4561791539950235, "grad_norm": 0.3417396545410156, "learning_rate": 1.7565430940084285e-05, "loss": 0.5618, "step": 14850 }, { "epoch": 0.4562098731299727, "grad_norm": 0.3488541841506958, "learning_rate": 1.756511489501202e-05, "loss": 0.567, "step": 14851 }, { "epoch": 0.4562405922649218, "grad_norm": 0.3534490168094635, "learning_rate": 1.75647988322709e-05, "loss": 0.5846, "step": 14852 }, { "epoch": 0.456271311399871, "grad_norm": 0.320895254611969, "learning_rate": 1.7564482751861665e-05, "loss": 0.579, "step": 14853 }, { "epoch": 0.45630203053482016, "grad_norm": 0.37051814794540405, "learning_rate": 1.7564166653785054e-05, "loss": 0.554, "step": 14854 }, { "epoch": 0.4563327496697693, "grad_norm": 0.37271347641944885, "learning_rate": 1.7563850538041802e-05, "loss": 0.5825, "step": 14855 }, { "epoch": 0.45636346880471845, "grad_norm": 0.3426390588283539, "learning_rate": 1.756353440463265e-05, "loss": 0.5516, "step": 14856 }, { "epoch": 0.45639418793966763, "grad_norm": 0.3523322343826294, "learning_rate": 1.756321825355833e-05, "loss": 0.6199, "step": 14857 }, { "epoch": 0.4564249070746168, "grad_norm": 0.3579956591129303, "learning_rate": 1.7562902084819592e-05, "loss": 0.6005, "step": 14858 }, { "epoch": 0.4564556262095659, "grad_norm": 0.33915185928344727, "learning_rate": 1.7562585898417163e-05, "loss": 0.5016, "step": 14859 }, { "epoch": 0.4564863453445151, "grad_norm": 0.4568637013435364, "learning_rate": 1.756226969435179e-05, "loss": 0.5837, "step": 14860 }, { "epoch": 0.4565170644794643, "grad_norm": 0.3499954044818878, "learning_rate": 1.756195347262421e-05, "loss": 0.5244, "step": 14861 }, { "epoch": 0.4565477836144134, "grad_norm": 0.3435781002044678, "learning_rate": 1.7561637233235153e-05, "loss": 0.561, "step": 14862 }, { "epoch": 0.45657850274936257, "grad_norm": 0.3518495559692383, "learning_rate": 1.756132097618537e-05, "loss": 0.6242, "step": 14863 }, { "epoch": 0.45660922188431174, "grad_norm": 0.32434141635894775, "learning_rate": 1.7561004701475588e-05, "loss": 0.5147, "step": 14864 }, { "epoch": 0.4566399410192609, "grad_norm": 0.3508748412132263, "learning_rate": 1.7560688409106556e-05, "loss": 0.6141, "step": 14865 }, { "epoch": 0.45667066015421004, "grad_norm": 0.33934295177459717, "learning_rate": 1.7560372099079005e-05, "loss": 0.5306, "step": 14866 }, { "epoch": 0.4567013792891592, "grad_norm": 0.34875357151031494, "learning_rate": 1.756005577139368e-05, "loss": 0.5648, "step": 14867 }, { "epoch": 0.4567320984241084, "grad_norm": 0.35613417625427246, "learning_rate": 1.7559739426051315e-05, "loss": 0.4906, "step": 14868 }, { "epoch": 0.4567628175590575, "grad_norm": 0.44192421436309814, "learning_rate": 1.755942306305265e-05, "loss": 0.549, "step": 14869 }, { "epoch": 0.4567935366940067, "grad_norm": 0.43918323516845703, "learning_rate": 1.755910668239843e-05, "loss": 0.5201, "step": 14870 }, { "epoch": 0.45682425582895586, "grad_norm": 0.6091206073760986, "learning_rate": 1.7558790284089383e-05, "loss": 0.5686, "step": 14871 }, { "epoch": 0.45685497496390504, "grad_norm": 0.36402517557144165, "learning_rate": 1.7558473868126254e-05, "loss": 0.5314, "step": 14872 }, { "epoch": 0.45688569409885416, "grad_norm": 0.35422322154045105, "learning_rate": 1.755815743450978e-05, "loss": 0.5273, "step": 14873 }, { "epoch": 0.45691641323380333, "grad_norm": 0.34074053168296814, "learning_rate": 1.7557840983240703e-05, "loss": 0.5345, "step": 14874 }, { "epoch": 0.4569471323687525, "grad_norm": 0.3740515112876892, "learning_rate": 1.7557524514319762e-05, "loss": 0.6278, "step": 14875 }, { "epoch": 0.4569778515037017, "grad_norm": 0.6121364831924438, "learning_rate": 1.7557208027747692e-05, "loss": 0.5265, "step": 14876 }, { "epoch": 0.4570085706386508, "grad_norm": 0.3226225674152374, "learning_rate": 1.755689152352524e-05, "loss": 0.6067, "step": 14877 }, { "epoch": 0.4570392897736, "grad_norm": 0.323478639125824, "learning_rate": 1.7556575001653136e-05, "loss": 0.5323, "step": 14878 }, { "epoch": 0.45707000890854915, "grad_norm": 0.3535043001174927, "learning_rate": 1.7556258462132125e-05, "loss": 0.6066, "step": 14879 }, { "epoch": 0.4571007280434983, "grad_norm": 0.32660573720932007, "learning_rate": 1.755594190496294e-05, "loss": 0.5383, "step": 14880 }, { "epoch": 0.45713144717844745, "grad_norm": 0.3388632833957672, "learning_rate": 1.7555625330146328e-05, "loss": 0.7079, "step": 14881 }, { "epoch": 0.4571621663133966, "grad_norm": 0.3633915185928345, "learning_rate": 1.7555308737683026e-05, "loss": 0.5709, "step": 14882 }, { "epoch": 0.4571928854483458, "grad_norm": 0.3545389771461487, "learning_rate": 1.7554992127573772e-05, "loss": 0.6435, "step": 14883 }, { "epoch": 0.4572236045832949, "grad_norm": 0.3471790850162506, "learning_rate": 1.7554675499819307e-05, "loss": 0.589, "step": 14884 }, { "epoch": 0.4572543237182441, "grad_norm": 0.37680748105049133, "learning_rate": 1.755435885442037e-05, "loss": 0.5615, "step": 14885 }, { "epoch": 0.45728504285319327, "grad_norm": 0.35151341557502747, "learning_rate": 1.7554042191377702e-05, "loss": 0.6009, "step": 14886 }, { "epoch": 0.4573157619881424, "grad_norm": 0.35122454166412354, "learning_rate": 1.7553725510692037e-05, "loss": 0.5911, "step": 14887 }, { "epoch": 0.45734648112309156, "grad_norm": 0.3701283931732178, "learning_rate": 1.755340881236412e-05, "loss": 0.6167, "step": 14888 }, { "epoch": 0.45737720025804074, "grad_norm": 0.42722538113594055, "learning_rate": 1.7553092096394692e-05, "loss": 0.5699, "step": 14889 }, { "epoch": 0.4574079193929899, "grad_norm": 0.45421740412712097, "learning_rate": 1.7552775362784486e-05, "loss": 0.5998, "step": 14890 }, { "epoch": 0.45743863852793903, "grad_norm": 0.5588492155075073, "learning_rate": 1.7552458611534247e-05, "loss": 0.6178, "step": 14891 }, { "epoch": 0.4574693576628882, "grad_norm": 0.5668898224830627, "learning_rate": 1.7552141842644713e-05, "loss": 0.5635, "step": 14892 }, { "epoch": 0.4575000767978374, "grad_norm": 0.37723931670188904, "learning_rate": 1.7551825056116626e-05, "loss": 0.5893, "step": 14893 }, { "epoch": 0.45753079593278656, "grad_norm": 0.37540265917778015, "learning_rate": 1.7551508251950724e-05, "loss": 0.5535, "step": 14894 }, { "epoch": 0.4575615150677357, "grad_norm": 0.334349125623703, "learning_rate": 1.7551191430147746e-05, "loss": 0.5615, "step": 14895 }, { "epoch": 0.45759223420268486, "grad_norm": 0.35603782534599304, "learning_rate": 1.755087459070843e-05, "loss": 0.6242, "step": 14896 }, { "epoch": 0.45762295333763403, "grad_norm": 0.3564946949481964, "learning_rate": 1.7550557733633525e-05, "loss": 0.6469, "step": 14897 }, { "epoch": 0.45765367247258315, "grad_norm": 0.3363029360771179, "learning_rate": 1.755024085892376e-05, "loss": 0.5711, "step": 14898 }, { "epoch": 0.4576843916075323, "grad_norm": 0.37370792031288147, "learning_rate": 1.7549923966579883e-05, "loss": 0.5756, "step": 14899 }, { "epoch": 0.4577151107424815, "grad_norm": 0.4230613708496094, "learning_rate": 1.7549607056602632e-05, "loss": 0.5749, "step": 14900 }, { "epoch": 0.4577458298774307, "grad_norm": 0.30758658051490784, "learning_rate": 1.7549290128992744e-05, "loss": 0.5068, "step": 14901 }, { "epoch": 0.4577765490123798, "grad_norm": 0.32288849353790283, "learning_rate": 1.7548973183750964e-05, "loss": 0.5122, "step": 14902 }, { "epoch": 0.45780726814732897, "grad_norm": 0.34583914279937744, "learning_rate": 1.754865622087803e-05, "loss": 0.5887, "step": 14903 }, { "epoch": 0.45783798728227815, "grad_norm": 0.3827151358127594, "learning_rate": 1.7548339240374682e-05, "loss": 0.5925, "step": 14904 }, { "epoch": 0.45786870641722727, "grad_norm": 0.32615333795547485, "learning_rate": 1.7548022242241657e-05, "loss": 0.5622, "step": 14905 }, { "epoch": 0.45789942555217644, "grad_norm": 0.4096027612686157, "learning_rate": 1.7547705226479702e-05, "loss": 0.5535, "step": 14906 }, { "epoch": 0.4579301446871256, "grad_norm": 0.33866146206855774, "learning_rate": 1.754738819308955e-05, "loss": 0.5368, "step": 14907 }, { "epoch": 0.4579608638220748, "grad_norm": 0.3856838345527649, "learning_rate": 1.7547071142071953e-05, "loss": 0.6375, "step": 14908 }, { "epoch": 0.4579915829570239, "grad_norm": 0.334995836019516, "learning_rate": 1.754675407342764e-05, "loss": 0.5137, "step": 14909 }, { "epoch": 0.4580223020919731, "grad_norm": 0.5355144143104553, "learning_rate": 1.7546436987157357e-05, "loss": 0.5362, "step": 14910 }, { "epoch": 0.45805302122692226, "grad_norm": 0.3374653458595276, "learning_rate": 1.7546119883261844e-05, "loss": 0.5874, "step": 14911 }, { "epoch": 0.4580837403618714, "grad_norm": 0.38145512342453003, "learning_rate": 1.7545802761741838e-05, "loss": 0.6004, "step": 14912 }, { "epoch": 0.45811445949682056, "grad_norm": 0.3533911108970642, "learning_rate": 1.7545485622598085e-05, "loss": 0.6404, "step": 14913 }, { "epoch": 0.45814517863176973, "grad_norm": 0.3750940263271332, "learning_rate": 1.7545168465831324e-05, "loss": 0.539, "step": 14914 }, { "epoch": 0.4581758977667189, "grad_norm": 0.37020593881607056, "learning_rate": 1.7544851291442297e-05, "loss": 0.5717, "step": 14915 }, { "epoch": 0.45820661690166803, "grad_norm": 0.31016111373901367, "learning_rate": 1.7544534099431735e-05, "loss": 0.5788, "step": 14916 }, { "epoch": 0.4582373360366172, "grad_norm": 0.3931322693824768, "learning_rate": 1.7544216889800394e-05, "loss": 0.6035, "step": 14917 }, { "epoch": 0.4582680551715664, "grad_norm": 0.33466702699661255, "learning_rate": 1.7543899662549006e-05, "loss": 0.5676, "step": 14918 }, { "epoch": 0.45829877430651556, "grad_norm": 0.3280591368675232, "learning_rate": 1.7543582417678313e-05, "loss": 0.5407, "step": 14919 }, { "epoch": 0.4583294934414647, "grad_norm": 0.3598901033401489, "learning_rate": 1.7543265155189053e-05, "loss": 0.6052, "step": 14920 }, { "epoch": 0.45836021257641385, "grad_norm": 0.3527345061302185, "learning_rate": 1.7542947875081975e-05, "loss": 0.662, "step": 14921 }, { "epoch": 0.458390931711363, "grad_norm": 0.41162171959877014, "learning_rate": 1.7542630577357815e-05, "loss": 0.6154, "step": 14922 }, { "epoch": 0.45842165084631215, "grad_norm": 0.3654109239578247, "learning_rate": 1.7542313262017315e-05, "loss": 0.4905, "step": 14923 }, { "epoch": 0.4584523699812613, "grad_norm": 0.5760310292243958, "learning_rate": 1.7541995929061212e-05, "loss": 0.6116, "step": 14924 }, { "epoch": 0.4584830891162105, "grad_norm": 0.36771395802497864, "learning_rate": 1.7541678578490253e-05, "loss": 0.6434, "step": 14925 }, { "epoch": 0.45851380825115967, "grad_norm": 0.36297619342803955, "learning_rate": 1.7541361210305177e-05, "loss": 0.5816, "step": 14926 }, { "epoch": 0.4585445273861088, "grad_norm": 0.347223162651062, "learning_rate": 1.7541043824506724e-05, "loss": 0.5967, "step": 14927 }, { "epoch": 0.45857524652105797, "grad_norm": 0.3490247130393982, "learning_rate": 1.7540726421095637e-05, "loss": 0.6237, "step": 14928 }, { "epoch": 0.45860596565600714, "grad_norm": 0.3637227416038513, "learning_rate": 1.754040900007266e-05, "loss": 0.5485, "step": 14929 }, { "epoch": 0.45863668479095626, "grad_norm": 0.3685954213142395, "learning_rate": 1.7540091561438527e-05, "loss": 0.5714, "step": 14930 }, { "epoch": 0.45866740392590544, "grad_norm": 0.34634625911712646, "learning_rate": 1.7539774105193984e-05, "loss": 0.5468, "step": 14931 }, { "epoch": 0.4586981230608546, "grad_norm": 0.3964401185512543, "learning_rate": 1.7539456631339775e-05, "loss": 0.5354, "step": 14932 }, { "epoch": 0.4587288421958038, "grad_norm": 1.3535985946655273, "learning_rate": 1.753913913987664e-05, "loss": 0.5269, "step": 14933 }, { "epoch": 0.4587595613307529, "grad_norm": 0.3245680630207062, "learning_rate": 1.7538821630805314e-05, "loss": 0.5721, "step": 14934 }, { "epoch": 0.4587902804657021, "grad_norm": 0.3759658932685852, "learning_rate": 1.7538504104126542e-05, "loss": 0.5471, "step": 14935 }, { "epoch": 0.45882099960065126, "grad_norm": 0.4034577012062073, "learning_rate": 1.753818655984107e-05, "loss": 0.6558, "step": 14936 }, { "epoch": 0.4588517187356004, "grad_norm": 0.32194286584854126, "learning_rate": 1.7537868997949637e-05, "loss": 0.4624, "step": 14937 }, { "epoch": 0.45888243787054955, "grad_norm": 0.3612785339355469, "learning_rate": 1.7537551418452987e-05, "loss": 0.5782, "step": 14938 }, { "epoch": 0.45891315700549873, "grad_norm": 0.3218516409397125, "learning_rate": 1.753723382135186e-05, "loss": 0.5331, "step": 14939 }, { "epoch": 0.4589438761404479, "grad_norm": 0.363787442445755, "learning_rate": 1.753691620664699e-05, "loss": 0.5032, "step": 14940 }, { "epoch": 0.458974595275397, "grad_norm": 0.3287619948387146, "learning_rate": 1.7536598574339132e-05, "loss": 0.561, "step": 14941 }, { "epoch": 0.4590053144103462, "grad_norm": 0.3187781274318695, "learning_rate": 1.753628092442902e-05, "loss": 0.5481, "step": 14942 }, { "epoch": 0.4590360335452954, "grad_norm": 0.4004264771938324, "learning_rate": 1.7535963256917393e-05, "loss": 0.635, "step": 14943 }, { "epoch": 0.45906675268024455, "grad_norm": 0.43304935097694397, "learning_rate": 1.7535645571805005e-05, "loss": 0.5569, "step": 14944 }, { "epoch": 0.45909747181519367, "grad_norm": 0.36263129115104675, "learning_rate": 1.7535327869092587e-05, "loss": 0.6444, "step": 14945 }, { "epoch": 0.45912819095014284, "grad_norm": 0.3390050232410431, "learning_rate": 1.753501014878088e-05, "loss": 0.6889, "step": 14946 }, { "epoch": 0.459158910085092, "grad_norm": 0.31136438250541687, "learning_rate": 1.7534692410870636e-05, "loss": 0.6272, "step": 14947 }, { "epoch": 0.45918962922004114, "grad_norm": 0.32567089796066284, "learning_rate": 1.7534374655362593e-05, "loss": 0.5816, "step": 14948 }, { "epoch": 0.4592203483549903, "grad_norm": 0.39119428396224976, "learning_rate": 1.7534056882257492e-05, "loss": 0.5947, "step": 14949 }, { "epoch": 0.4592510674899395, "grad_norm": 0.3525576591491699, "learning_rate": 1.7533739091556074e-05, "loss": 0.6033, "step": 14950 }, { "epoch": 0.45928178662488867, "grad_norm": 0.35392841696739197, "learning_rate": 1.7533421283259078e-05, "loss": 0.5157, "step": 14951 }, { "epoch": 0.4593125057598378, "grad_norm": 0.3410913050174713, "learning_rate": 1.7533103457367252e-05, "loss": 0.5943, "step": 14952 }, { "epoch": 0.45934322489478696, "grad_norm": 0.3890867233276367, "learning_rate": 1.7532785613881336e-05, "loss": 0.5841, "step": 14953 }, { "epoch": 0.45937394402973614, "grad_norm": 0.3270435333251953, "learning_rate": 1.753246775280208e-05, "loss": 0.5219, "step": 14954 }, { "epoch": 0.45940466316468526, "grad_norm": 0.3465689718723297, "learning_rate": 1.753214987413021e-05, "loss": 0.559, "step": 14955 }, { "epoch": 0.45943538229963443, "grad_norm": 0.34675613045692444, "learning_rate": 1.7531831977866485e-05, "loss": 0.5828, "step": 14956 }, { "epoch": 0.4594661014345836, "grad_norm": 0.33259934186935425, "learning_rate": 1.753151406401164e-05, "loss": 0.5814, "step": 14957 }, { "epoch": 0.4594968205695328, "grad_norm": 0.3885191082954407, "learning_rate": 1.7531196132566414e-05, "loss": 0.6061, "step": 14958 }, { "epoch": 0.4595275397044819, "grad_norm": 0.3273746371269226, "learning_rate": 1.7530878183531553e-05, "loss": 0.5998, "step": 14959 }, { "epoch": 0.4595582588394311, "grad_norm": 0.4218716025352478, "learning_rate": 1.7530560216907806e-05, "loss": 0.5709, "step": 14960 }, { "epoch": 0.45958897797438025, "grad_norm": 0.33008357882499695, "learning_rate": 1.7530242232695904e-05, "loss": 0.4888, "step": 14961 }, { "epoch": 0.45961969710932943, "grad_norm": 0.35682862997055054, "learning_rate": 1.75299242308966e-05, "loss": 0.5728, "step": 14962 }, { "epoch": 0.45965041624427855, "grad_norm": 0.34896549582481384, "learning_rate": 1.7529606211510627e-05, "loss": 0.57, "step": 14963 }, { "epoch": 0.4596811353792277, "grad_norm": 0.6465823650360107, "learning_rate": 1.7529288174538734e-05, "loss": 0.6176, "step": 14964 }, { "epoch": 0.4597118545141769, "grad_norm": 0.37932538986206055, "learning_rate": 1.7528970119981664e-05, "loss": 0.6198, "step": 14965 }, { "epoch": 0.459742573649126, "grad_norm": 0.3673594295978546, "learning_rate": 1.752865204784016e-05, "loss": 0.5377, "step": 14966 }, { "epoch": 0.4597732927840752, "grad_norm": 0.35222306847572327, "learning_rate": 1.7528333958114962e-05, "loss": 0.532, "step": 14967 }, { "epoch": 0.45980401191902437, "grad_norm": 0.4675556719303131, "learning_rate": 1.7528015850806818e-05, "loss": 0.4967, "step": 14968 }, { "epoch": 0.45983473105397354, "grad_norm": 0.3285087049007416, "learning_rate": 1.7527697725916462e-05, "loss": 0.5379, "step": 14969 }, { "epoch": 0.45986545018892266, "grad_norm": 0.3432493209838867, "learning_rate": 1.7527379583444643e-05, "loss": 0.5832, "step": 14970 }, { "epoch": 0.45989616932387184, "grad_norm": 0.3686516582965851, "learning_rate": 1.7527061423392102e-05, "loss": 0.5489, "step": 14971 }, { "epoch": 0.459926888458821, "grad_norm": 0.3398393988609314, "learning_rate": 1.7526743245759586e-05, "loss": 0.5415, "step": 14972 }, { "epoch": 0.45995760759377013, "grad_norm": 0.3922847509384155, "learning_rate": 1.7526425050547833e-05, "loss": 0.6031, "step": 14973 }, { "epoch": 0.4599883267287193, "grad_norm": 0.3710283637046814, "learning_rate": 1.7526106837757592e-05, "loss": 0.5965, "step": 14974 }, { "epoch": 0.4600190458636685, "grad_norm": 0.42710816860198975, "learning_rate": 1.75257886073896e-05, "loss": 0.5612, "step": 14975 }, { "epoch": 0.46004976499861766, "grad_norm": 0.3521771728992462, "learning_rate": 1.7525470359444606e-05, "loss": 0.5708, "step": 14976 }, { "epoch": 0.4600804841335668, "grad_norm": 0.3011832535266876, "learning_rate": 1.7525152093923348e-05, "loss": 0.52, "step": 14977 }, { "epoch": 0.46011120326851596, "grad_norm": 0.3888145685195923, "learning_rate": 1.7524833810826574e-05, "loss": 0.5614, "step": 14978 }, { "epoch": 0.46014192240346513, "grad_norm": 0.362020879983902, "learning_rate": 1.752451551015502e-05, "loss": 0.5802, "step": 14979 }, { "epoch": 0.46017264153841425, "grad_norm": 0.3551254868507385, "learning_rate": 1.752419719190944e-05, "loss": 0.6016, "step": 14980 }, { "epoch": 0.4602033606733634, "grad_norm": 0.4265778958797455, "learning_rate": 1.7523878856090572e-05, "loss": 0.5478, "step": 14981 }, { "epoch": 0.4602340798083126, "grad_norm": 1.3012709617614746, "learning_rate": 1.7523560502699153e-05, "loss": 0.5658, "step": 14982 }, { "epoch": 0.4602647989432618, "grad_norm": 0.4265270233154297, "learning_rate": 1.7523242131735938e-05, "loss": 0.678, "step": 14983 }, { "epoch": 0.4602955180782109, "grad_norm": 0.4003060460090637, "learning_rate": 1.7522923743201667e-05, "loss": 0.4192, "step": 14984 }, { "epoch": 0.46032623721316007, "grad_norm": 0.3987058699131012, "learning_rate": 1.7522605337097078e-05, "loss": 0.5728, "step": 14985 }, { "epoch": 0.46035695634810925, "grad_norm": 0.38765034079551697, "learning_rate": 1.7522286913422924e-05, "loss": 0.5066, "step": 14986 }, { "epoch": 0.4603876754830584, "grad_norm": 0.39925310015678406, "learning_rate": 1.752196847217994e-05, "loss": 0.5065, "step": 14987 }, { "epoch": 0.46041839461800754, "grad_norm": 0.3321259319782257, "learning_rate": 1.7521650013368875e-05, "loss": 0.5677, "step": 14988 }, { "epoch": 0.4604491137529567, "grad_norm": 0.4296007454395294, "learning_rate": 1.7521331536990468e-05, "loss": 0.5136, "step": 14989 }, { "epoch": 0.4604798328879059, "grad_norm": 0.3290773630142212, "learning_rate": 1.752101304304547e-05, "loss": 0.5882, "step": 14990 }, { "epoch": 0.460510552022855, "grad_norm": 0.3107479214668274, "learning_rate": 1.7520694531534618e-05, "loss": 0.4692, "step": 14991 }, { "epoch": 0.4605412711578042, "grad_norm": 0.33969900012016296, "learning_rate": 1.752037600245866e-05, "loss": 0.5355, "step": 14992 }, { "epoch": 0.46057199029275336, "grad_norm": 0.3866717517375946, "learning_rate": 1.7520057455818337e-05, "loss": 0.5334, "step": 14993 }, { "epoch": 0.46060270942770254, "grad_norm": 0.3429889678955078, "learning_rate": 1.7519738891614397e-05, "loss": 0.651, "step": 14994 }, { "epoch": 0.46063342856265166, "grad_norm": 0.3592424988746643, "learning_rate": 1.7519420309847578e-05, "loss": 0.5188, "step": 14995 }, { "epoch": 0.46066414769760083, "grad_norm": 0.3526012599468231, "learning_rate": 1.751910171051863e-05, "loss": 0.5125, "step": 14996 }, { "epoch": 0.46069486683255, "grad_norm": 0.3529806435108185, "learning_rate": 1.7518783093628297e-05, "loss": 0.6407, "step": 14997 }, { "epoch": 0.46072558596749913, "grad_norm": 0.3347106873989105, "learning_rate": 1.7518464459177316e-05, "loss": 0.5496, "step": 14998 }, { "epoch": 0.4607563051024483, "grad_norm": 0.3469483554363251, "learning_rate": 1.751814580716644e-05, "loss": 0.5195, "step": 14999 }, { "epoch": 0.4607870242373975, "grad_norm": 0.3423756957054138, "learning_rate": 1.7517827137596407e-05, "loss": 0.5799, "step": 15000 }, { "epoch": 0.46081774337234666, "grad_norm": 0.3763512969017029, "learning_rate": 1.7517508450467966e-05, "loss": 0.5622, "step": 15001 }, { "epoch": 0.4608484625072958, "grad_norm": 0.3555516004562378, "learning_rate": 1.751718974578186e-05, "loss": 0.4256, "step": 15002 }, { "epoch": 0.46087918164224495, "grad_norm": 0.33605489134788513, "learning_rate": 1.751687102353883e-05, "loss": 0.4706, "step": 15003 }, { "epoch": 0.4609099007771941, "grad_norm": 0.3154623210430145, "learning_rate": 1.751655228373962e-05, "loss": 0.587, "step": 15004 }, { "epoch": 0.4609406199121433, "grad_norm": 0.33751019835472107, "learning_rate": 1.7516233526384983e-05, "loss": 0.5623, "step": 15005 }, { "epoch": 0.4609713390470924, "grad_norm": 0.39987844228744507, "learning_rate": 1.7515914751475655e-05, "loss": 0.6252, "step": 15006 }, { "epoch": 0.4610020581820416, "grad_norm": 0.34484097361564636, "learning_rate": 1.7515595959012385e-05, "loss": 0.6, "step": 15007 }, { "epoch": 0.46103277731699077, "grad_norm": 0.33656638860702515, "learning_rate": 1.7515277148995913e-05, "loss": 0.5846, "step": 15008 }, { "epoch": 0.4610634964519399, "grad_norm": 0.3522637188434601, "learning_rate": 1.7514958321426987e-05, "loss": 0.579, "step": 15009 }, { "epoch": 0.46109421558688907, "grad_norm": 0.3216972053050995, "learning_rate": 1.7514639476306352e-05, "loss": 0.6024, "step": 15010 }, { "epoch": 0.46112493472183824, "grad_norm": 0.44168075919151306, "learning_rate": 1.751432061363475e-05, "loss": 0.5436, "step": 15011 }, { "epoch": 0.4611556538567874, "grad_norm": 0.3661257326602936, "learning_rate": 1.751400173341293e-05, "loss": 0.5597, "step": 15012 }, { "epoch": 0.46118637299173654, "grad_norm": 0.35021716356277466, "learning_rate": 1.7513682835641634e-05, "loss": 0.5738, "step": 15013 }, { "epoch": 0.4612170921266857, "grad_norm": 0.3283958435058594, "learning_rate": 1.7513363920321605e-05, "loss": 0.4848, "step": 15014 }, { "epoch": 0.4612478112616349, "grad_norm": 0.3549174666404724, "learning_rate": 1.751304498745359e-05, "loss": 0.5874, "step": 15015 }, { "epoch": 0.461278530396584, "grad_norm": 0.31910598278045654, "learning_rate": 1.7512726037038335e-05, "loss": 0.5498, "step": 15016 }, { "epoch": 0.4613092495315332, "grad_norm": 0.328264981508255, "learning_rate": 1.751240706907658e-05, "loss": 0.5718, "step": 15017 }, { "epoch": 0.46133996866648236, "grad_norm": 0.3728581964969635, "learning_rate": 1.751208808356908e-05, "loss": 0.5135, "step": 15018 }, { "epoch": 0.46137068780143153, "grad_norm": 0.3672035336494446, "learning_rate": 1.7511769080516566e-05, "loss": 0.5397, "step": 15019 }, { "epoch": 0.46140140693638065, "grad_norm": 0.36930862069129944, "learning_rate": 1.75114500599198e-05, "loss": 0.5635, "step": 15020 }, { "epoch": 0.46143212607132983, "grad_norm": 0.4263584613800049, "learning_rate": 1.7511131021779508e-05, "loss": 0.5529, "step": 15021 }, { "epoch": 0.461462845206279, "grad_norm": 0.40738645195961, "learning_rate": 1.7510811966096454e-05, "loss": 0.5871, "step": 15022 }, { "epoch": 0.4614935643412281, "grad_norm": 0.32516762614250183, "learning_rate": 1.7510492892871368e-05, "loss": 0.5884, "step": 15023 }, { "epoch": 0.4615242834761773, "grad_norm": 0.3572433590888977, "learning_rate": 1.7510173802105002e-05, "loss": 0.5987, "step": 15024 }, { "epoch": 0.4615550026111265, "grad_norm": 0.3289932310581207, "learning_rate": 1.75098546937981e-05, "loss": 0.5942, "step": 15025 }, { "epoch": 0.46158572174607565, "grad_norm": 0.3523715138435364, "learning_rate": 1.7509535567951412e-05, "loss": 0.5079, "step": 15026 }, { "epoch": 0.46161644088102477, "grad_norm": 0.3423430919647217, "learning_rate": 1.7509216424565675e-05, "loss": 0.5953, "step": 15027 }, { "epoch": 0.46164716001597395, "grad_norm": 0.3147362470626831, "learning_rate": 1.750889726364164e-05, "loss": 0.6107, "step": 15028 }, { "epoch": 0.4616778791509231, "grad_norm": 0.3728281259536743, "learning_rate": 1.7508578085180053e-05, "loss": 0.5692, "step": 15029 }, { "epoch": 0.4617085982858723, "grad_norm": 0.3653053939342499, "learning_rate": 1.7508258889181654e-05, "loss": 0.6088, "step": 15030 }, { "epoch": 0.4617393174208214, "grad_norm": 0.4072294533252716, "learning_rate": 1.7507939675647193e-05, "loss": 0.5832, "step": 15031 }, { "epoch": 0.4617700365557706, "grad_norm": 0.3860301673412323, "learning_rate": 1.750762044457742e-05, "loss": 0.4962, "step": 15032 }, { "epoch": 0.46180075569071977, "grad_norm": 0.35495525598526, "learning_rate": 1.7507301195973067e-05, "loss": 0.5423, "step": 15033 }, { "epoch": 0.4618314748256689, "grad_norm": 0.31472501158714294, "learning_rate": 1.7506981929834892e-05, "loss": 0.5926, "step": 15034 }, { "epoch": 0.46186219396061806, "grad_norm": 0.35317564010620117, "learning_rate": 1.7506662646163633e-05, "loss": 0.5524, "step": 15035 }, { "epoch": 0.46189291309556724, "grad_norm": 0.3853405714035034, "learning_rate": 1.7506343344960042e-05, "loss": 0.6111, "step": 15036 }, { "epoch": 0.4619236322305164, "grad_norm": 0.43699702620506287, "learning_rate": 1.7506024026224862e-05, "loss": 0.5828, "step": 15037 }, { "epoch": 0.46195435136546553, "grad_norm": 0.3564210534095764, "learning_rate": 1.750570468995884e-05, "loss": 0.5776, "step": 15038 }, { "epoch": 0.4619850705004147, "grad_norm": 0.3246304392814636, "learning_rate": 1.750538533616272e-05, "loss": 0.4745, "step": 15039 }, { "epoch": 0.4620157896353639, "grad_norm": 0.4104432463645935, "learning_rate": 1.7505065964837246e-05, "loss": 0.6043, "step": 15040 }, { "epoch": 0.462046508770313, "grad_norm": 0.37187686562538147, "learning_rate": 1.7504746575983168e-05, "loss": 0.5863, "step": 15041 }, { "epoch": 0.4620772279052622, "grad_norm": 0.40208783745765686, "learning_rate": 1.750442716960123e-05, "loss": 0.5953, "step": 15042 }, { "epoch": 0.46210794704021135, "grad_norm": 0.393187552690506, "learning_rate": 1.7504107745692177e-05, "loss": 0.5493, "step": 15043 }, { "epoch": 0.46213866617516053, "grad_norm": 0.34870779514312744, "learning_rate": 1.750378830425676e-05, "loss": 0.6438, "step": 15044 }, { "epoch": 0.46216938531010965, "grad_norm": 0.35420823097229004, "learning_rate": 1.750346884529572e-05, "loss": 0.5098, "step": 15045 }, { "epoch": 0.4622001044450588, "grad_norm": 0.36294931173324585, "learning_rate": 1.75031493688098e-05, "loss": 0.5864, "step": 15046 }, { "epoch": 0.462230823580008, "grad_norm": 0.3392777442932129, "learning_rate": 1.7502829874799755e-05, "loss": 0.5106, "step": 15047 }, { "epoch": 0.4622615427149572, "grad_norm": 0.3675919771194458, "learning_rate": 1.7502510363266327e-05, "loss": 0.6076, "step": 15048 }, { "epoch": 0.4622922618499063, "grad_norm": 0.3371598720550537, "learning_rate": 1.7502190834210257e-05, "loss": 0.6014, "step": 15049 }, { "epoch": 0.46232298098485547, "grad_norm": 0.34554463624954224, "learning_rate": 1.75018712876323e-05, "loss": 0.5316, "step": 15050 }, { "epoch": 0.46235370011980464, "grad_norm": 0.38676148653030396, "learning_rate": 1.75015517235332e-05, "loss": 0.5589, "step": 15051 }, { "epoch": 0.46238441925475376, "grad_norm": 0.35091301798820496, "learning_rate": 1.75012321419137e-05, "loss": 0.572, "step": 15052 }, { "epoch": 0.46241513838970294, "grad_norm": 0.35504066944122314, "learning_rate": 1.7500912542774552e-05, "loss": 0.6727, "step": 15053 }, { "epoch": 0.4624458575246521, "grad_norm": 0.32074254751205444, "learning_rate": 1.7500592926116495e-05, "loss": 0.5949, "step": 15054 }, { "epoch": 0.4624765766596013, "grad_norm": 0.3243458867073059, "learning_rate": 1.750027329194028e-05, "loss": 0.5358, "step": 15055 }, { "epoch": 0.4625072957945504, "grad_norm": 0.36217111349105835, "learning_rate": 1.749995364024665e-05, "loss": 0.5549, "step": 15056 }, { "epoch": 0.4625380149294996, "grad_norm": 0.35141074657440186, "learning_rate": 1.749963397103636e-05, "loss": 0.6285, "step": 15057 }, { "epoch": 0.46256873406444876, "grad_norm": 0.34706398844718933, "learning_rate": 1.7499314284310148e-05, "loss": 0.5816, "step": 15058 }, { "epoch": 0.4625994531993979, "grad_norm": 0.3664652109146118, "learning_rate": 1.7498994580068765e-05, "loss": 0.5482, "step": 15059 }, { "epoch": 0.46263017233434706, "grad_norm": 0.3815475106239319, "learning_rate": 1.7498674858312957e-05, "loss": 0.5974, "step": 15060 }, { "epoch": 0.46266089146929623, "grad_norm": 0.35861366987228394, "learning_rate": 1.749835511904347e-05, "loss": 0.6207, "step": 15061 }, { "epoch": 0.4626916106042454, "grad_norm": 0.516350269317627, "learning_rate": 1.749803536226105e-05, "loss": 0.5951, "step": 15062 }, { "epoch": 0.4627223297391945, "grad_norm": 0.3613710403442383, "learning_rate": 1.7497715587966444e-05, "loss": 0.6417, "step": 15063 }, { "epoch": 0.4627530488741437, "grad_norm": 0.40708261728286743, "learning_rate": 1.7497395796160402e-05, "loss": 0.6, "step": 15064 }, { "epoch": 0.4627837680090929, "grad_norm": 0.39028021693229675, "learning_rate": 1.7497075986843667e-05, "loss": 0.5655, "step": 15065 }, { "epoch": 0.462814487144042, "grad_norm": 0.37652525305747986, "learning_rate": 1.7496756160016987e-05, "loss": 0.6009, "step": 15066 }, { "epoch": 0.4628452062789912, "grad_norm": 0.3639780282974243, "learning_rate": 1.7496436315681108e-05, "loss": 0.5678, "step": 15067 }, { "epoch": 0.46287592541394035, "grad_norm": 0.3541998267173767, "learning_rate": 1.749611645383678e-05, "loss": 0.5582, "step": 15068 }, { "epoch": 0.4629066445488895, "grad_norm": 0.40462344884872437, "learning_rate": 1.749579657448475e-05, "loss": 0.5663, "step": 15069 }, { "epoch": 0.46293736368383864, "grad_norm": 0.3566315472126007, "learning_rate": 1.749547667762576e-05, "loss": 0.5003, "step": 15070 }, { "epoch": 0.4629680828187878, "grad_norm": 0.34842565655708313, "learning_rate": 1.7495156763260567e-05, "loss": 0.5908, "step": 15071 }, { "epoch": 0.462998801953737, "grad_norm": 0.3843683898448944, "learning_rate": 1.7494836831389908e-05, "loss": 0.5468, "step": 15072 }, { "epoch": 0.46302952108868617, "grad_norm": 0.38187551498413086, "learning_rate": 1.7494516882014534e-05, "loss": 0.5245, "step": 15073 }, { "epoch": 0.4630602402236353, "grad_norm": 0.36740607023239136, "learning_rate": 1.7494196915135194e-05, "loss": 0.5823, "step": 15074 }, { "epoch": 0.46309095935858446, "grad_norm": 0.32930728793144226, "learning_rate": 1.749387693075263e-05, "loss": 0.5777, "step": 15075 }, { "epoch": 0.46312167849353364, "grad_norm": 0.3276188373565674, "learning_rate": 1.7493556928867597e-05, "loss": 0.5752, "step": 15076 }, { "epoch": 0.46315239762848276, "grad_norm": 0.35650742053985596, "learning_rate": 1.7493236909480836e-05, "loss": 0.5487, "step": 15077 }, { "epoch": 0.46318311676343193, "grad_norm": 0.6112621426582336, "learning_rate": 1.74929168725931e-05, "loss": 0.575, "step": 15078 }, { "epoch": 0.4632138358983811, "grad_norm": 0.33477744460105896, "learning_rate": 1.749259681820513e-05, "loss": 0.4576, "step": 15079 }, { "epoch": 0.4632445550333303, "grad_norm": 0.35465744137763977, "learning_rate": 1.7492276746317677e-05, "loss": 0.5286, "step": 15080 }, { "epoch": 0.4632752741682794, "grad_norm": 0.449050635099411, "learning_rate": 1.749195665693149e-05, "loss": 0.6035, "step": 15081 }, { "epoch": 0.4633059933032286, "grad_norm": 0.3368692696094513, "learning_rate": 1.7491636550047315e-05, "loss": 0.4526, "step": 15082 }, { "epoch": 0.46333671243817776, "grad_norm": 0.3351900279521942, "learning_rate": 1.7491316425665898e-05, "loss": 0.5761, "step": 15083 }, { "epoch": 0.4633674315731269, "grad_norm": 0.34053584933280945, "learning_rate": 1.7490996283787992e-05, "loss": 0.5836, "step": 15084 }, { "epoch": 0.46339815070807605, "grad_norm": 0.4583267569541931, "learning_rate": 1.7490676124414337e-05, "loss": 0.6679, "step": 15085 }, { "epoch": 0.4634288698430252, "grad_norm": 0.34589558839797974, "learning_rate": 1.7490355947545686e-05, "loss": 0.6653, "step": 15086 }, { "epoch": 0.4634595889779744, "grad_norm": 0.37623822689056396, "learning_rate": 1.7490035753182787e-05, "loss": 0.5978, "step": 15087 }, { "epoch": 0.4634903081129235, "grad_norm": 0.3566945791244507, "learning_rate": 1.748971554132638e-05, "loss": 0.5583, "step": 15088 }, { "epoch": 0.4635210272478727, "grad_norm": 0.3129368722438812, "learning_rate": 1.748939531197723e-05, "loss": 0.5496, "step": 15089 }, { "epoch": 0.46355174638282187, "grad_norm": 0.37627917528152466, "learning_rate": 1.7489075065136068e-05, "loss": 0.517, "step": 15090 }, { "epoch": 0.463582465517771, "grad_norm": 0.4804825484752655, "learning_rate": 1.7488754800803646e-05, "loss": 0.5651, "step": 15091 }, { "epoch": 0.46361318465272017, "grad_norm": 0.35902753472328186, "learning_rate": 1.748843451898072e-05, "loss": 0.5889, "step": 15092 }, { "epoch": 0.46364390378766934, "grad_norm": 0.33882755041122437, "learning_rate": 1.7488114219668028e-05, "loss": 0.5341, "step": 15093 }, { "epoch": 0.4636746229226185, "grad_norm": 0.3482252061367035, "learning_rate": 1.7487793902866323e-05, "loss": 0.5646, "step": 15094 }, { "epoch": 0.46370534205756764, "grad_norm": 0.3154295086860657, "learning_rate": 1.7487473568576352e-05, "loss": 0.5565, "step": 15095 }, { "epoch": 0.4637360611925168, "grad_norm": 0.3413528800010681, "learning_rate": 1.7487153216798864e-05, "loss": 0.523, "step": 15096 }, { "epoch": 0.463766780327466, "grad_norm": 0.35352572798728943, "learning_rate": 1.748683284753461e-05, "loss": 0.5779, "step": 15097 }, { "epoch": 0.46379749946241516, "grad_norm": 0.32871922850608826, "learning_rate": 1.748651246078433e-05, "loss": 0.5846, "step": 15098 }, { "epoch": 0.4638282185973643, "grad_norm": 0.36766594648361206, "learning_rate": 1.7486192056548777e-05, "loss": 0.5511, "step": 15099 }, { "epoch": 0.46385893773231346, "grad_norm": 0.3402208983898163, "learning_rate": 1.7485871634828702e-05, "loss": 0.5607, "step": 15100 }, { "epoch": 0.46388965686726263, "grad_norm": 0.3490822911262512, "learning_rate": 1.7485551195624855e-05, "loss": 0.5323, "step": 15101 }, { "epoch": 0.46392037600221175, "grad_norm": 0.37798061966896057, "learning_rate": 1.7485230738937976e-05, "loss": 0.6239, "step": 15102 }, { "epoch": 0.46395109513716093, "grad_norm": 0.3951759338378906, "learning_rate": 1.7484910264768818e-05, "loss": 0.5648, "step": 15103 }, { "epoch": 0.4639818142721101, "grad_norm": 0.37493181228637695, "learning_rate": 1.7484589773118127e-05, "loss": 0.6052, "step": 15104 }, { "epoch": 0.4640125334070593, "grad_norm": 0.39984196424484253, "learning_rate": 1.7484269263986657e-05, "loss": 0.4754, "step": 15105 }, { "epoch": 0.4640432525420084, "grad_norm": 0.6800329685211182, "learning_rate": 1.7483948737375152e-05, "loss": 0.545, "step": 15106 }, { "epoch": 0.4640739716769576, "grad_norm": 0.32180270552635193, "learning_rate": 1.7483628193284367e-05, "loss": 0.5699, "step": 15107 }, { "epoch": 0.46410469081190675, "grad_norm": 0.3560450077056885, "learning_rate": 1.748330763171504e-05, "loss": 0.6114, "step": 15108 }, { "epoch": 0.46413540994685587, "grad_norm": 0.3579772114753723, "learning_rate": 1.748298705266793e-05, "loss": 0.5937, "step": 15109 }, { "epoch": 0.46416612908180505, "grad_norm": 0.3183712661266327, "learning_rate": 1.7482666456143774e-05, "loss": 0.5608, "step": 15110 }, { "epoch": 0.4641968482167542, "grad_norm": 0.3678797781467438, "learning_rate": 1.748234584214333e-05, "loss": 0.5926, "step": 15111 }, { "epoch": 0.4642275673517034, "grad_norm": 0.33418789505958557, "learning_rate": 1.7482025210667348e-05, "loss": 0.6211, "step": 15112 }, { "epoch": 0.4642582864866525, "grad_norm": 0.3380526304244995, "learning_rate": 1.7481704561716574e-05, "loss": 0.5834, "step": 15113 }, { "epoch": 0.4642890056216017, "grad_norm": 0.36274921894073486, "learning_rate": 1.7481383895291753e-05, "loss": 0.6581, "step": 15114 }, { "epoch": 0.46431972475655087, "grad_norm": 0.3203476667404175, "learning_rate": 1.748106321139364e-05, "loss": 0.4972, "step": 15115 }, { "epoch": 0.46435044389150004, "grad_norm": 0.3548239469528198, "learning_rate": 1.748074251002298e-05, "loss": 0.5914, "step": 15116 }, { "epoch": 0.46438116302644916, "grad_norm": 0.3879430592060089, "learning_rate": 1.7480421791180527e-05, "loss": 0.5576, "step": 15117 }, { "epoch": 0.46441188216139834, "grad_norm": 0.374919056892395, "learning_rate": 1.748010105486702e-05, "loss": 0.5701, "step": 15118 }, { "epoch": 0.4644426012963475, "grad_norm": 0.36940738558769226, "learning_rate": 1.747978030108322e-05, "loss": 0.5908, "step": 15119 }, { "epoch": 0.46447332043129663, "grad_norm": 0.36034899950027466, "learning_rate": 1.747945952982987e-05, "loss": 0.5448, "step": 15120 }, { "epoch": 0.4645040395662458, "grad_norm": 0.38456836342811584, "learning_rate": 1.747913874110772e-05, "loss": 0.5984, "step": 15121 }, { "epoch": 0.464534758701195, "grad_norm": 0.3516639471054077, "learning_rate": 1.7478817934917517e-05, "loss": 0.562, "step": 15122 }, { "epoch": 0.46456547783614416, "grad_norm": 0.3458364009857178, "learning_rate": 1.7478497111260014e-05, "loss": 0.5103, "step": 15123 }, { "epoch": 0.4645961969710933, "grad_norm": 0.37188270688056946, "learning_rate": 1.7478176270135957e-05, "loss": 0.5391, "step": 15124 }, { "epoch": 0.46462691610604245, "grad_norm": 0.34533023834228516, "learning_rate": 1.74778554115461e-05, "loss": 0.5586, "step": 15125 }, { "epoch": 0.46465763524099163, "grad_norm": 0.34897857904434204, "learning_rate": 1.7477534535491186e-05, "loss": 0.546, "step": 15126 }, { "epoch": 0.46468835437594075, "grad_norm": 0.3389512598514557, "learning_rate": 1.7477213641971968e-05, "loss": 0.55, "step": 15127 }, { "epoch": 0.4647190735108899, "grad_norm": 0.3915907144546509, "learning_rate": 1.74768927309892e-05, "loss": 0.5452, "step": 15128 }, { "epoch": 0.4647497926458391, "grad_norm": 0.37128928303718567, "learning_rate": 1.747657180254362e-05, "loss": 0.6162, "step": 15129 }, { "epoch": 0.4647805117807883, "grad_norm": 0.4151308834552765, "learning_rate": 1.7476250856635987e-05, "loss": 0.6249, "step": 15130 }, { "epoch": 0.4648112309157374, "grad_norm": 0.3289475739002228, "learning_rate": 1.747592989326705e-05, "loss": 0.5773, "step": 15131 }, { "epoch": 0.46484195005068657, "grad_norm": 0.3350173830986023, "learning_rate": 1.7475608912437556e-05, "loss": 0.4872, "step": 15132 }, { "epoch": 0.46487266918563575, "grad_norm": 0.35706743597984314, "learning_rate": 1.7475287914148256e-05, "loss": 0.5096, "step": 15133 }, { "epoch": 0.46490338832058486, "grad_norm": 0.3954257667064667, "learning_rate": 1.7474966898399897e-05, "loss": 0.6365, "step": 15134 }, { "epoch": 0.46493410745553404, "grad_norm": 0.3492475152015686, "learning_rate": 1.7474645865193232e-05, "loss": 0.6, "step": 15135 }, { "epoch": 0.4649648265904832, "grad_norm": 0.34497517347335815, "learning_rate": 1.747432481452901e-05, "loss": 0.6018, "step": 15136 }, { "epoch": 0.4649955457254324, "grad_norm": 0.36866170167922974, "learning_rate": 1.7474003746407978e-05, "loss": 0.624, "step": 15137 }, { "epoch": 0.4650262648603815, "grad_norm": 0.3482343554496765, "learning_rate": 1.747368266083089e-05, "loss": 0.6397, "step": 15138 }, { "epoch": 0.4650569839953307, "grad_norm": 0.3666715919971466, "learning_rate": 1.7473361557798495e-05, "loss": 0.531, "step": 15139 }, { "epoch": 0.46508770313027986, "grad_norm": 0.38273510336875916, "learning_rate": 1.747304043731154e-05, "loss": 0.5447, "step": 15140 }, { "epoch": 0.46511842226522904, "grad_norm": 0.3599989116191864, "learning_rate": 1.747271929937078e-05, "loss": 0.5381, "step": 15141 }, { "epoch": 0.46514914140017816, "grad_norm": 0.3552003502845764, "learning_rate": 1.747239814397696e-05, "loss": 0.6233, "step": 15142 }, { "epoch": 0.46517986053512733, "grad_norm": 0.3514392077922821, "learning_rate": 1.7472076971130832e-05, "loss": 0.6007, "step": 15143 }, { "epoch": 0.4652105796700765, "grad_norm": 0.46120914816856384, "learning_rate": 1.7471755780833146e-05, "loss": 0.5072, "step": 15144 }, { "epoch": 0.4652412988050256, "grad_norm": 0.3423146903514862, "learning_rate": 1.7471434573084657e-05, "loss": 0.5895, "step": 15145 }, { "epoch": 0.4652720179399748, "grad_norm": 0.3601244390010834, "learning_rate": 1.7471113347886106e-05, "loss": 0.5584, "step": 15146 }, { "epoch": 0.465302737074924, "grad_norm": 0.34826943278312683, "learning_rate": 1.747079210523825e-05, "loss": 0.5135, "step": 15147 }, { "epoch": 0.46533345620987315, "grad_norm": 0.4933094084262848, "learning_rate": 1.7470470845141837e-05, "loss": 0.5825, "step": 15148 }, { "epoch": 0.4653641753448223, "grad_norm": 0.3589712679386139, "learning_rate": 1.747014956759762e-05, "loss": 0.5514, "step": 15149 }, { "epoch": 0.46539489447977145, "grad_norm": 0.327723890542984, "learning_rate": 1.7469828272606343e-05, "loss": 0.5845, "step": 15150 }, { "epoch": 0.4654256136147206, "grad_norm": 0.35572993755340576, "learning_rate": 1.7469506960168762e-05, "loss": 0.6729, "step": 15151 }, { "epoch": 0.46545633274966974, "grad_norm": 0.36295169591903687, "learning_rate": 1.7469185630285626e-05, "loss": 0.6093, "step": 15152 }, { "epoch": 0.4654870518846189, "grad_norm": 0.33789241313934326, "learning_rate": 1.746886428295768e-05, "loss": 0.5104, "step": 15153 }, { "epoch": 0.4655177710195681, "grad_norm": 0.3676663935184479, "learning_rate": 1.7468542918185685e-05, "loss": 0.651, "step": 15154 }, { "epoch": 0.46554849015451727, "grad_norm": 0.35232865810394287, "learning_rate": 1.7468221535970388e-05, "loss": 0.5563, "step": 15155 }, { "epoch": 0.4655792092894664, "grad_norm": 0.3533463776111603, "learning_rate": 1.7467900136312537e-05, "loss": 0.5845, "step": 15156 }, { "epoch": 0.46560992842441556, "grad_norm": 0.3538619577884674, "learning_rate": 1.7467578719212882e-05, "loss": 0.5243, "step": 15157 }, { "epoch": 0.46564064755936474, "grad_norm": 0.3469616174697876, "learning_rate": 1.7467257284672175e-05, "loss": 0.6121, "step": 15158 }, { "epoch": 0.4656713666943139, "grad_norm": 0.3317453861236572, "learning_rate": 1.746693583269117e-05, "loss": 0.6073, "step": 15159 }, { "epoch": 0.46570208582926303, "grad_norm": 0.349765807390213, "learning_rate": 1.7466614363270613e-05, "loss": 0.6371, "step": 15160 }, { "epoch": 0.4657328049642122, "grad_norm": 0.38808029890060425, "learning_rate": 1.7466292876411256e-05, "loss": 0.6297, "step": 15161 }, { "epoch": 0.4657635240991614, "grad_norm": 0.3559630215167999, "learning_rate": 1.746597137211385e-05, "loss": 0.608, "step": 15162 }, { "epoch": 0.4657942432341105, "grad_norm": 0.3407887816429138, "learning_rate": 1.7465649850379153e-05, "loss": 0.5416, "step": 15163 }, { "epoch": 0.4658249623690597, "grad_norm": 0.44187670946121216, "learning_rate": 1.7465328311207904e-05, "loss": 0.6258, "step": 15164 }, { "epoch": 0.46585568150400886, "grad_norm": 0.4043227434158325, "learning_rate": 1.7465006754600856e-05, "loss": 0.6137, "step": 15165 }, { "epoch": 0.46588640063895803, "grad_norm": 0.37306439876556396, "learning_rate": 1.746468518055877e-05, "loss": 0.5262, "step": 15166 }, { "epoch": 0.46591711977390715, "grad_norm": 0.3654891550540924, "learning_rate": 1.7464363589082386e-05, "loss": 0.5415, "step": 15167 }, { "epoch": 0.4659478389088563, "grad_norm": 0.3746562898159027, "learning_rate": 1.7464041980172462e-05, "loss": 0.6117, "step": 15168 }, { "epoch": 0.4659785580438055, "grad_norm": 0.40146467089653015, "learning_rate": 1.7463720353829746e-05, "loss": 0.4936, "step": 15169 }, { "epoch": 0.4660092771787546, "grad_norm": 0.3937714695930481, "learning_rate": 1.7463398710054988e-05, "loss": 0.6054, "step": 15170 }, { "epoch": 0.4660399963137038, "grad_norm": 0.3870045840740204, "learning_rate": 1.7463077048848945e-05, "loss": 0.6131, "step": 15171 }, { "epoch": 0.466070715448653, "grad_norm": 0.36773884296417236, "learning_rate": 1.746275537021236e-05, "loss": 0.5301, "step": 15172 }, { "epoch": 0.46610143458360215, "grad_norm": 0.3215831518173218, "learning_rate": 1.7462433674145992e-05, "loss": 0.5216, "step": 15173 }, { "epoch": 0.46613215371855127, "grad_norm": 0.3669358193874359, "learning_rate": 1.7462111960650586e-05, "loss": 0.6133, "step": 15174 }, { "epoch": 0.46616287285350044, "grad_norm": 0.34286028146743774, "learning_rate": 1.7461790229726898e-05, "loss": 0.4966, "step": 15175 }, { "epoch": 0.4661935919884496, "grad_norm": 0.4131510555744171, "learning_rate": 1.746146848137568e-05, "loss": 0.6388, "step": 15176 }, { "epoch": 0.46622431112339874, "grad_norm": 0.3755442202091217, "learning_rate": 1.746114671559768e-05, "loss": 0.578, "step": 15177 }, { "epoch": 0.4662550302583479, "grad_norm": 0.45600154995918274, "learning_rate": 1.7460824932393648e-05, "loss": 0.5211, "step": 15178 }, { "epoch": 0.4662857493932971, "grad_norm": 0.3173108994960785, "learning_rate": 1.746050313176434e-05, "loss": 0.5938, "step": 15179 }, { "epoch": 0.46631646852824626, "grad_norm": 0.3853795826435089, "learning_rate": 1.7460181313710504e-05, "loss": 0.5195, "step": 15180 }, { "epoch": 0.4663471876631954, "grad_norm": 0.3663421869277954, "learning_rate": 1.7459859478232894e-05, "loss": 0.5964, "step": 15181 }, { "epoch": 0.46637790679814456, "grad_norm": 0.33931228518486023, "learning_rate": 1.7459537625332267e-05, "loss": 0.5258, "step": 15182 }, { "epoch": 0.46640862593309373, "grad_norm": 0.32092925906181335, "learning_rate": 1.745921575500936e-05, "loss": 0.5611, "step": 15183 }, { "epoch": 0.4664393450680429, "grad_norm": 0.3394658863544464, "learning_rate": 1.745889386726494e-05, "loss": 0.5516, "step": 15184 }, { "epoch": 0.46647006420299203, "grad_norm": 0.341662734746933, "learning_rate": 1.745857196209975e-05, "loss": 0.4938, "step": 15185 }, { "epoch": 0.4665007833379412, "grad_norm": 0.4204656779766083, "learning_rate": 1.745825003951455e-05, "loss": 0.5744, "step": 15186 }, { "epoch": 0.4665315024728904, "grad_norm": 0.3704216480255127, "learning_rate": 1.745792809951008e-05, "loss": 0.4843, "step": 15187 }, { "epoch": 0.4665622216078395, "grad_norm": 0.3445076644420624, "learning_rate": 1.7457606142087097e-05, "loss": 0.503, "step": 15188 }, { "epoch": 0.4665929407427887, "grad_norm": 0.38017070293426514, "learning_rate": 1.7457284167246357e-05, "loss": 0.5861, "step": 15189 }, { "epoch": 0.46662365987773785, "grad_norm": 0.3213556408882141, "learning_rate": 1.7456962174988606e-05, "loss": 0.5253, "step": 15190 }, { "epoch": 0.466654379012687, "grad_norm": 0.353863000869751, "learning_rate": 1.74566401653146e-05, "loss": 0.5023, "step": 15191 }, { "epoch": 0.46668509814763615, "grad_norm": 0.36468854546546936, "learning_rate": 1.7456318138225094e-05, "loss": 0.5525, "step": 15192 }, { "epoch": 0.4667158172825853, "grad_norm": 0.44338807463645935, "learning_rate": 1.7455996093720834e-05, "loss": 0.5456, "step": 15193 }, { "epoch": 0.4667465364175345, "grad_norm": 0.4711027443408966, "learning_rate": 1.7455674031802576e-05, "loss": 0.5583, "step": 15194 }, { "epoch": 0.4667772555524836, "grad_norm": 0.3705287277698517, "learning_rate": 1.7455351952471068e-05, "loss": 0.6584, "step": 15195 }, { "epoch": 0.4668079746874328, "grad_norm": 0.3871898949146271, "learning_rate": 1.7455029855727064e-05, "loss": 0.5748, "step": 15196 }, { "epoch": 0.46683869382238197, "grad_norm": 0.4193970263004303, "learning_rate": 1.745470774157132e-05, "loss": 0.544, "step": 15197 }, { "epoch": 0.46686941295733114, "grad_norm": 0.39517489075660706, "learning_rate": 1.7454385610004583e-05, "loss": 0.563, "step": 15198 }, { "epoch": 0.46690013209228026, "grad_norm": 0.3505682051181793, "learning_rate": 1.7454063461027607e-05, "loss": 0.544, "step": 15199 }, { "epoch": 0.46693085122722944, "grad_norm": 0.3354949951171875, "learning_rate": 1.745374129464115e-05, "loss": 0.5592, "step": 15200 }, { "epoch": 0.4669615703621786, "grad_norm": 0.4253388047218323, "learning_rate": 1.7453419110845954e-05, "loss": 0.4677, "step": 15201 }, { "epoch": 0.4669922894971278, "grad_norm": 0.4756682813167572, "learning_rate": 1.7453096909642783e-05, "loss": 0.6168, "step": 15202 }, { "epoch": 0.4670230086320769, "grad_norm": 0.38148006796836853, "learning_rate": 1.7452774691032378e-05, "loss": 0.5274, "step": 15203 }, { "epoch": 0.4670537277670261, "grad_norm": 0.3887532651424408, "learning_rate": 1.74524524550155e-05, "loss": 0.5458, "step": 15204 }, { "epoch": 0.46708444690197526, "grad_norm": 0.3874104917049408, "learning_rate": 1.74521302015929e-05, "loss": 0.5857, "step": 15205 }, { "epoch": 0.4671151660369244, "grad_norm": 0.3933415710926056, "learning_rate": 1.745180793076533e-05, "loss": 0.6115, "step": 15206 }, { "epoch": 0.46714588517187355, "grad_norm": 0.3427456021308899, "learning_rate": 1.745148564253354e-05, "loss": 0.5692, "step": 15207 }, { "epoch": 0.46717660430682273, "grad_norm": 0.3112364113330841, "learning_rate": 1.7451163336898283e-05, "loss": 0.5216, "step": 15208 }, { "epoch": 0.4672073234417719, "grad_norm": 0.3043906092643738, "learning_rate": 1.7450841013860316e-05, "loss": 0.5586, "step": 15209 }, { "epoch": 0.467238042576721, "grad_norm": 0.33489498496055603, "learning_rate": 1.745051867342039e-05, "loss": 0.5464, "step": 15210 }, { "epoch": 0.4672687617116702, "grad_norm": 0.40852952003479004, "learning_rate": 1.745019631557925e-05, "loss": 0.6345, "step": 15211 }, { "epoch": 0.4672994808466194, "grad_norm": 0.38891974091529846, "learning_rate": 1.744987394033767e-05, "loss": 0.5907, "step": 15212 }, { "epoch": 0.4673301999815685, "grad_norm": 0.4153209924697876, "learning_rate": 1.7449551547696378e-05, "loss": 0.6519, "step": 15213 }, { "epoch": 0.46736091911651767, "grad_norm": 0.42603862285614014, "learning_rate": 1.7449229137656144e-05, "loss": 0.6679, "step": 15214 }, { "epoch": 0.46739163825146685, "grad_norm": 0.3691406846046448, "learning_rate": 1.7448906710217714e-05, "loss": 0.5229, "step": 15215 }, { "epoch": 0.467422357386416, "grad_norm": 0.37575843930244446, "learning_rate": 1.7448584265381837e-05, "loss": 0.5511, "step": 15216 }, { "epoch": 0.46745307652136514, "grad_norm": 0.3647887706756592, "learning_rate": 1.744826180314928e-05, "loss": 0.695, "step": 15217 }, { "epoch": 0.4674837956563143, "grad_norm": 0.36543455719947815, "learning_rate": 1.744793932352078e-05, "loss": 0.5484, "step": 15218 }, { "epoch": 0.4675145147912635, "grad_norm": 0.3195529878139496, "learning_rate": 1.7447616826497097e-05, "loss": 0.5425, "step": 15219 }, { "epoch": 0.4675452339262126, "grad_norm": 0.3481314778327942, "learning_rate": 1.744729431207899e-05, "loss": 0.481, "step": 15220 }, { "epoch": 0.4675759530611618, "grad_norm": 0.33704105019569397, "learning_rate": 1.7446971780267206e-05, "loss": 0.5655, "step": 15221 }, { "epoch": 0.46760667219611096, "grad_norm": 0.4021010100841522, "learning_rate": 1.7446649231062498e-05, "loss": 0.6044, "step": 15222 }, { "epoch": 0.46763739133106014, "grad_norm": 2.205193281173706, "learning_rate": 1.744632666446562e-05, "loss": 0.626, "step": 15223 }, { "epoch": 0.46766811046600926, "grad_norm": 0.3273734748363495, "learning_rate": 1.744600408047733e-05, "loss": 0.553, "step": 15224 }, { "epoch": 0.46769882960095843, "grad_norm": 0.3629317581653595, "learning_rate": 1.7445681479098372e-05, "loss": 0.5214, "step": 15225 }, { "epoch": 0.4677295487359076, "grad_norm": 0.3555382490158081, "learning_rate": 1.7445358860329507e-05, "loss": 0.5452, "step": 15226 }, { "epoch": 0.4677602678708568, "grad_norm": 0.40340808033943176, "learning_rate": 1.7445036224171486e-05, "loss": 0.5904, "step": 15227 }, { "epoch": 0.4677909870058059, "grad_norm": 0.34748902916908264, "learning_rate": 1.7444713570625064e-05, "loss": 0.5874, "step": 15228 }, { "epoch": 0.4678217061407551, "grad_norm": 0.33446264266967773, "learning_rate": 1.7444390899690995e-05, "loss": 0.5423, "step": 15229 }, { "epoch": 0.46785242527570425, "grad_norm": 0.32036179304122925, "learning_rate": 1.7444068211370032e-05, "loss": 0.4863, "step": 15230 }, { "epoch": 0.4678831444106534, "grad_norm": 0.34019848704338074, "learning_rate": 1.7443745505662925e-05, "loss": 0.6146, "step": 15231 }, { "epoch": 0.46791386354560255, "grad_norm": 0.32779327034950256, "learning_rate": 1.744342278257043e-05, "loss": 0.5833, "step": 15232 }, { "epoch": 0.4679445826805517, "grad_norm": 0.3636252284049988, "learning_rate": 1.7443100042093298e-05, "loss": 0.5466, "step": 15233 }, { "epoch": 0.4679753018155009, "grad_norm": 0.3531205952167511, "learning_rate": 1.7442777284232292e-05, "loss": 0.5561, "step": 15234 }, { "epoch": 0.46800602095045, "grad_norm": 0.4053487777709961, "learning_rate": 1.7442454508988155e-05, "loss": 0.6501, "step": 15235 }, { "epoch": 0.4680367400853992, "grad_norm": 0.39670121669769287, "learning_rate": 1.744213171636165e-05, "loss": 0.5674, "step": 15236 }, { "epoch": 0.46806745922034837, "grad_norm": 0.3441225290298462, "learning_rate": 1.7441808906353526e-05, "loss": 0.5852, "step": 15237 }, { "epoch": 0.4680981783552975, "grad_norm": 0.3644164502620697, "learning_rate": 1.744148607896454e-05, "loss": 0.546, "step": 15238 }, { "epoch": 0.46812889749024666, "grad_norm": 0.357755184173584, "learning_rate": 1.744116323419544e-05, "loss": 0.5753, "step": 15239 }, { "epoch": 0.46815961662519584, "grad_norm": 0.39328598976135254, "learning_rate": 1.744084037204698e-05, "loss": 0.653, "step": 15240 }, { "epoch": 0.468190335760145, "grad_norm": 0.3690771758556366, "learning_rate": 1.7440517492519925e-05, "loss": 0.607, "step": 15241 }, { "epoch": 0.46822105489509414, "grad_norm": 0.33504247665405273, "learning_rate": 1.7440194595615015e-05, "loss": 0.5466, "step": 15242 }, { "epoch": 0.4682517740300433, "grad_norm": 0.3672151565551758, "learning_rate": 1.7439871681333018e-05, "loss": 0.6, "step": 15243 }, { "epoch": 0.4682824931649925, "grad_norm": 0.3523682951927185, "learning_rate": 1.7439548749674675e-05, "loss": 0.6853, "step": 15244 }, { "epoch": 0.46831321229994166, "grad_norm": 0.36045971512794495, "learning_rate": 1.7439225800640745e-05, "loss": 0.6592, "step": 15245 }, { "epoch": 0.4683439314348908, "grad_norm": 0.3483760952949524, "learning_rate": 1.743890283423199e-05, "loss": 0.5969, "step": 15246 }, { "epoch": 0.46837465056983996, "grad_norm": 0.34445443749427795, "learning_rate": 1.7438579850449155e-05, "loss": 0.6136, "step": 15247 }, { "epoch": 0.46840536970478913, "grad_norm": 0.30732402205467224, "learning_rate": 1.7438256849293e-05, "loss": 0.5944, "step": 15248 }, { "epoch": 0.46843608883973825, "grad_norm": 0.36761805415153503, "learning_rate": 1.743793383076427e-05, "loss": 0.5694, "step": 15249 }, { "epoch": 0.4684668079746874, "grad_norm": 0.33591607213020325, "learning_rate": 1.7437610794863732e-05, "loss": 0.5372, "step": 15250 }, { "epoch": 0.4684975271096366, "grad_norm": 0.3615039885044098, "learning_rate": 1.7437287741592132e-05, "loss": 0.5287, "step": 15251 }, { "epoch": 0.4685282462445858, "grad_norm": 0.29399213194847107, "learning_rate": 1.7436964670950228e-05, "loss": 0.5423, "step": 15252 }, { "epoch": 0.4685589653795349, "grad_norm": 0.4320829510688782, "learning_rate": 1.7436641582938775e-05, "loss": 0.5594, "step": 15253 }, { "epoch": 0.4685896845144841, "grad_norm": 0.3440847396850586, "learning_rate": 1.743631847755852e-05, "loss": 0.5951, "step": 15254 }, { "epoch": 0.46862040364943325, "grad_norm": 0.37401899695396423, "learning_rate": 1.7435995354810233e-05, "loss": 0.6045, "step": 15255 }, { "epoch": 0.46865112278438237, "grad_norm": 0.3436407148838043, "learning_rate": 1.7435672214694656e-05, "loss": 0.5933, "step": 15256 }, { "epoch": 0.46868184191933154, "grad_norm": 0.3635391592979431, "learning_rate": 1.7435349057212545e-05, "loss": 0.5822, "step": 15257 }, { "epoch": 0.4687125610542807, "grad_norm": 0.39782479405403137, "learning_rate": 1.7435025882364657e-05, "loss": 0.5331, "step": 15258 }, { "epoch": 0.4687432801892299, "grad_norm": 0.34352582693099976, "learning_rate": 1.743470269015175e-05, "loss": 0.5754, "step": 15259 }, { "epoch": 0.468773999324179, "grad_norm": 0.3317194879055023, "learning_rate": 1.7434379480574574e-05, "loss": 0.5793, "step": 15260 }, { "epoch": 0.4688047184591282, "grad_norm": 0.32368239760398865, "learning_rate": 1.7434056253633885e-05, "loss": 0.5821, "step": 15261 }, { "epoch": 0.46883543759407736, "grad_norm": 0.33947303891181946, "learning_rate": 1.7433733009330436e-05, "loss": 0.6215, "step": 15262 }, { "epoch": 0.4688661567290265, "grad_norm": 0.3037901520729065, "learning_rate": 1.7433409747664987e-05, "loss": 0.494, "step": 15263 }, { "epoch": 0.46889687586397566, "grad_norm": 0.3486025035381317, "learning_rate": 1.743308646863829e-05, "loss": 0.6336, "step": 15264 }, { "epoch": 0.46892759499892483, "grad_norm": 0.32500913739204407, "learning_rate": 1.74327631722511e-05, "loss": 0.5801, "step": 15265 }, { "epoch": 0.468958314133874, "grad_norm": 0.35308313369750977, "learning_rate": 1.7432439858504175e-05, "loss": 0.554, "step": 15266 }, { "epoch": 0.46898903326882313, "grad_norm": 0.39290860295295715, "learning_rate": 1.7432116527398264e-05, "loss": 0.6761, "step": 15267 }, { "epoch": 0.4690197524037723, "grad_norm": 0.35500627756118774, "learning_rate": 1.743179317893413e-05, "loss": 0.5729, "step": 15268 }, { "epoch": 0.4690504715387215, "grad_norm": 0.3482816815376282, "learning_rate": 1.743146981311252e-05, "loss": 0.6146, "step": 15269 }, { "epoch": 0.46908119067367066, "grad_norm": 0.40450194478034973, "learning_rate": 1.7431146429934197e-05, "loss": 0.5505, "step": 15270 }, { "epoch": 0.4691119098086198, "grad_norm": 0.32359185814857483, "learning_rate": 1.743082302939991e-05, "loss": 0.5259, "step": 15271 }, { "epoch": 0.46914262894356895, "grad_norm": 0.3848201334476471, "learning_rate": 1.7430499611510418e-05, "loss": 0.6152, "step": 15272 }, { "epoch": 0.4691733480785181, "grad_norm": 0.35029691457748413, "learning_rate": 1.7430176176266474e-05, "loss": 0.5363, "step": 15273 }, { "epoch": 0.46920406721346725, "grad_norm": 0.35482972860336304, "learning_rate": 1.7429852723668835e-05, "loss": 0.5567, "step": 15274 }, { "epoch": 0.4692347863484164, "grad_norm": 0.3562295138835907, "learning_rate": 1.7429529253718252e-05, "loss": 0.6454, "step": 15275 }, { "epoch": 0.4692655054833656, "grad_norm": 0.5990108251571655, "learning_rate": 1.742920576641549e-05, "loss": 0.5533, "step": 15276 }, { "epoch": 0.4692962246183148, "grad_norm": 0.3691100478172302, "learning_rate": 1.7428882261761298e-05, "loss": 0.595, "step": 15277 }, { "epoch": 0.4693269437532639, "grad_norm": 0.4669521749019623, "learning_rate": 1.7428558739756438e-05, "loss": 0.5794, "step": 15278 }, { "epoch": 0.46935766288821307, "grad_norm": 0.39610689878463745, "learning_rate": 1.7428235200401652e-05, "loss": 0.6119, "step": 15279 }, { "epoch": 0.46938838202316224, "grad_norm": 0.34193655848503113, "learning_rate": 1.7427911643697707e-05, "loss": 0.5013, "step": 15280 }, { "epoch": 0.46941910115811136, "grad_norm": 0.4000982344150543, "learning_rate": 1.7427588069645352e-05, "loss": 0.5702, "step": 15281 }, { "epoch": 0.46944982029306054, "grad_norm": 0.3355148136615753, "learning_rate": 1.7427264478245353e-05, "loss": 0.5401, "step": 15282 }, { "epoch": 0.4694805394280097, "grad_norm": 0.370853453874588, "learning_rate": 1.7426940869498453e-05, "loss": 0.5816, "step": 15283 }, { "epoch": 0.4695112585629589, "grad_norm": 0.31844350695610046, "learning_rate": 1.742661724340542e-05, "loss": 0.5665, "step": 15284 }, { "epoch": 0.469541977697908, "grad_norm": 0.34829646348953247, "learning_rate": 1.7426293599966996e-05, "loss": 0.5409, "step": 15285 }, { "epoch": 0.4695726968328572, "grad_norm": 0.3407652974128723, "learning_rate": 1.742596993918395e-05, "loss": 0.5604, "step": 15286 }, { "epoch": 0.46960341596780636, "grad_norm": 0.3789529800415039, "learning_rate": 1.7425646261057032e-05, "loss": 0.5709, "step": 15287 }, { "epoch": 0.4696341351027555, "grad_norm": 0.37514784932136536, "learning_rate": 1.7425322565587e-05, "loss": 0.6046, "step": 15288 }, { "epoch": 0.46966485423770465, "grad_norm": 0.31911760568618774, "learning_rate": 1.7424998852774605e-05, "loss": 0.594, "step": 15289 }, { "epoch": 0.46969557337265383, "grad_norm": 0.3476778268814087, "learning_rate": 1.742467512262061e-05, "loss": 0.558, "step": 15290 }, { "epoch": 0.469726292507603, "grad_norm": 0.3762684166431427, "learning_rate": 1.7424351375125764e-05, "loss": 0.6216, "step": 15291 }, { "epoch": 0.4697570116425521, "grad_norm": 0.39758408069610596, "learning_rate": 1.742402761029083e-05, "loss": 0.603, "step": 15292 }, { "epoch": 0.4697877307775013, "grad_norm": 0.3249804377555847, "learning_rate": 1.742370382811656e-05, "loss": 0.5367, "step": 15293 }, { "epoch": 0.4698184499124505, "grad_norm": 0.3834912180900574, "learning_rate": 1.742338002860371e-05, "loss": 0.5795, "step": 15294 }, { "epoch": 0.46984916904739965, "grad_norm": 0.3593645989894867, "learning_rate": 1.7423056211753038e-05, "loss": 0.5836, "step": 15295 }, { "epoch": 0.46987988818234877, "grad_norm": 0.32722145318984985, "learning_rate": 1.74227323775653e-05, "loss": 0.6014, "step": 15296 }, { "epoch": 0.46991060731729795, "grad_norm": 0.32020458579063416, "learning_rate": 1.7422408526041253e-05, "loss": 0.5382, "step": 15297 }, { "epoch": 0.4699413264522471, "grad_norm": 0.35709813237190247, "learning_rate": 1.7422084657181652e-05, "loss": 0.5873, "step": 15298 }, { "epoch": 0.46997204558719624, "grad_norm": 0.3579193353652954, "learning_rate": 1.7421760770987255e-05, "loss": 0.5302, "step": 15299 }, { "epoch": 0.4700027647221454, "grad_norm": 0.35152876377105713, "learning_rate": 1.7421436867458815e-05, "loss": 0.6017, "step": 15300 }, { "epoch": 0.4700334838570946, "grad_norm": 0.33050861954689026, "learning_rate": 1.7421112946597097e-05, "loss": 0.5791, "step": 15301 }, { "epoch": 0.47006420299204377, "grad_norm": 0.3487630784511566, "learning_rate": 1.7420789008402846e-05, "loss": 0.6117, "step": 15302 }, { "epoch": 0.4700949221269929, "grad_norm": 0.3623282015323639, "learning_rate": 1.7420465052876822e-05, "loss": 0.5141, "step": 15303 }, { "epoch": 0.47012564126194206, "grad_norm": 0.3954010009765625, "learning_rate": 1.7420141080019787e-05, "loss": 0.659, "step": 15304 }, { "epoch": 0.47015636039689124, "grad_norm": 0.34622788429260254, "learning_rate": 1.7419817089832495e-05, "loss": 0.6316, "step": 15305 }, { "epoch": 0.47018707953184036, "grad_norm": 0.323954701423645, "learning_rate": 1.74194930823157e-05, "loss": 0.5222, "step": 15306 }, { "epoch": 0.47021779866678953, "grad_norm": 0.3590388298034668, "learning_rate": 1.741916905747016e-05, "loss": 0.5765, "step": 15307 }, { "epoch": 0.4702485178017387, "grad_norm": 0.3885011076927185, "learning_rate": 1.7418845015296636e-05, "loss": 0.498, "step": 15308 }, { "epoch": 0.4702792369366879, "grad_norm": 0.36577433347702026, "learning_rate": 1.7418520955795878e-05, "loss": 0.5543, "step": 15309 }, { "epoch": 0.470309956071637, "grad_norm": 0.408633291721344, "learning_rate": 1.741819687896865e-05, "loss": 0.4945, "step": 15310 }, { "epoch": 0.4703406752065862, "grad_norm": 0.3728128969669342, "learning_rate": 1.74178727848157e-05, "loss": 0.579, "step": 15311 }, { "epoch": 0.47037139434153535, "grad_norm": 0.37763017416000366, "learning_rate": 1.7417548673337795e-05, "loss": 0.6617, "step": 15312 }, { "epoch": 0.47040211347648453, "grad_norm": 0.35760852694511414, "learning_rate": 1.7417224544535685e-05, "loss": 0.498, "step": 15313 }, { "epoch": 0.47043283261143365, "grad_norm": 0.343004047870636, "learning_rate": 1.7416900398410125e-05, "loss": 0.5362, "step": 15314 }, { "epoch": 0.4704635517463828, "grad_norm": 0.339054673910141, "learning_rate": 1.741657623496188e-05, "loss": 0.5511, "step": 15315 }, { "epoch": 0.470494270881332, "grad_norm": 0.3606122136116028, "learning_rate": 1.7416252054191704e-05, "loss": 0.5679, "step": 15316 }, { "epoch": 0.4705249900162811, "grad_norm": 0.35711488127708435, "learning_rate": 1.741592785610035e-05, "loss": 0.5644, "step": 15317 }, { "epoch": 0.4705557091512303, "grad_norm": 0.36628398299217224, "learning_rate": 1.7415603640688584e-05, "loss": 0.5793, "step": 15318 }, { "epoch": 0.47058642828617947, "grad_norm": 0.3806704580783844, "learning_rate": 1.7415279407957154e-05, "loss": 0.6053, "step": 15319 }, { "epoch": 0.47061714742112865, "grad_norm": 0.39401543140411377, "learning_rate": 1.741495515790682e-05, "loss": 0.4859, "step": 15320 }, { "epoch": 0.47064786655607777, "grad_norm": 0.34477099776268005, "learning_rate": 1.7414630890538348e-05, "loss": 0.613, "step": 15321 }, { "epoch": 0.47067858569102694, "grad_norm": 0.3804318606853485, "learning_rate": 1.741430660585248e-05, "loss": 0.5649, "step": 15322 }, { "epoch": 0.4707093048259761, "grad_norm": 0.3644489347934723, "learning_rate": 1.741398230384998e-05, "loss": 0.5647, "step": 15323 }, { "epoch": 0.47074002396092524, "grad_norm": 0.331885427236557, "learning_rate": 1.741365798453161e-05, "loss": 0.5959, "step": 15324 }, { "epoch": 0.4707707430958744, "grad_norm": 0.38938108086586, "learning_rate": 1.741333364789813e-05, "loss": 0.5793, "step": 15325 }, { "epoch": 0.4708014622308236, "grad_norm": 0.3644320070743561, "learning_rate": 1.741300929395028e-05, "loss": 0.5639, "step": 15326 }, { "epoch": 0.47083218136577276, "grad_norm": 0.35625073313713074, "learning_rate": 1.7412684922688835e-05, "loss": 0.6127, "step": 15327 }, { "epoch": 0.4708629005007219, "grad_norm": 0.3974081575870514, "learning_rate": 1.7412360534114548e-05, "loss": 0.6112, "step": 15328 }, { "epoch": 0.47089361963567106, "grad_norm": 0.35002923011779785, "learning_rate": 1.741203612822817e-05, "loss": 0.5888, "step": 15329 }, { "epoch": 0.47092433877062023, "grad_norm": 0.3566001355648041, "learning_rate": 1.741171170503047e-05, "loss": 0.6191, "step": 15330 }, { "epoch": 0.47095505790556935, "grad_norm": 0.3353597819805145, "learning_rate": 1.7411387264522198e-05, "loss": 0.5727, "step": 15331 }, { "epoch": 0.4709857770405185, "grad_norm": 0.3826921880245209, "learning_rate": 1.741106280670411e-05, "loss": 0.5885, "step": 15332 }, { "epoch": 0.4710164961754677, "grad_norm": 0.45124170184135437, "learning_rate": 1.741073833157697e-05, "loss": 0.5848, "step": 15333 }, { "epoch": 0.4710472153104169, "grad_norm": 0.35391658544540405, "learning_rate": 1.7410413839141532e-05, "loss": 0.5403, "step": 15334 }, { "epoch": 0.471077934445366, "grad_norm": 0.3340965509414673, "learning_rate": 1.7410089329398558e-05, "loss": 0.5217, "step": 15335 }, { "epoch": 0.4711086535803152, "grad_norm": 0.6232472062110901, "learning_rate": 1.74097648023488e-05, "loss": 0.5552, "step": 15336 }, { "epoch": 0.47113937271526435, "grad_norm": 0.33544471859931946, "learning_rate": 1.7409440257993018e-05, "loss": 0.632, "step": 15337 }, { "epoch": 0.4711700918502135, "grad_norm": 0.3478744626045227, "learning_rate": 1.7409115696331973e-05, "loss": 0.5717, "step": 15338 }, { "epoch": 0.47120081098516264, "grad_norm": 0.3837464153766632, "learning_rate": 1.740879111736642e-05, "loss": 0.5558, "step": 15339 }, { "epoch": 0.4712315301201118, "grad_norm": 0.34578627347946167, "learning_rate": 1.7408466521097117e-05, "loss": 0.6185, "step": 15340 }, { "epoch": 0.471262249255061, "grad_norm": 0.3589742183685303, "learning_rate": 1.7408141907524824e-05, "loss": 0.6259, "step": 15341 }, { "epoch": 0.4712929683900101, "grad_norm": 0.3176213204860687, "learning_rate": 1.7407817276650298e-05, "loss": 0.5577, "step": 15342 }, { "epoch": 0.4713236875249593, "grad_norm": 0.357126921415329, "learning_rate": 1.74074926284743e-05, "loss": 0.5417, "step": 15343 }, { "epoch": 0.47135440665990846, "grad_norm": 0.4538467824459076, "learning_rate": 1.7407167962997583e-05, "loss": 0.5456, "step": 15344 }, { "epoch": 0.47138512579485764, "grad_norm": 0.3514200448989868, "learning_rate": 1.7406843280220903e-05, "loss": 0.5387, "step": 15345 }, { "epoch": 0.47141584492980676, "grad_norm": 0.3799632489681244, "learning_rate": 1.740651858014503e-05, "loss": 0.5909, "step": 15346 }, { "epoch": 0.47144656406475594, "grad_norm": 0.32820528745651245, "learning_rate": 1.7406193862770715e-05, "loss": 0.5233, "step": 15347 }, { "epoch": 0.4714772831997051, "grad_norm": 0.33566027879714966, "learning_rate": 1.7405869128098714e-05, "loss": 0.4723, "step": 15348 }, { "epoch": 0.47150800233465423, "grad_norm": 0.3658502399921417, "learning_rate": 1.7405544376129793e-05, "loss": 0.528, "step": 15349 }, { "epoch": 0.4715387214696034, "grad_norm": 0.3867243528366089, "learning_rate": 1.7405219606864703e-05, "loss": 0.6594, "step": 15350 }, { "epoch": 0.4715694406045526, "grad_norm": 0.45039427280426025, "learning_rate": 1.7404894820304203e-05, "loss": 0.5655, "step": 15351 }, { "epoch": 0.47160015973950176, "grad_norm": 0.35454899072647095, "learning_rate": 1.740457001644906e-05, "loss": 0.5764, "step": 15352 }, { "epoch": 0.4716308788744509, "grad_norm": 0.3163306415081024, "learning_rate": 1.7404245195300022e-05, "loss": 0.6119, "step": 15353 }, { "epoch": 0.47166159800940005, "grad_norm": 0.33900687098503113, "learning_rate": 1.740392035685785e-05, "loss": 0.5472, "step": 15354 }, { "epoch": 0.4716923171443492, "grad_norm": 0.3471027612686157, "learning_rate": 1.7403595501123313e-05, "loss": 0.5136, "step": 15355 }, { "epoch": 0.4717230362792984, "grad_norm": 0.4253346621990204, "learning_rate": 1.7403270628097154e-05, "loss": 0.573, "step": 15356 }, { "epoch": 0.4717537554142475, "grad_norm": 0.41095292568206787, "learning_rate": 1.7402945737780143e-05, "loss": 0.5619, "step": 15357 }, { "epoch": 0.4717844745491967, "grad_norm": 0.3558420240879059, "learning_rate": 1.7402620830173034e-05, "loss": 0.5648, "step": 15358 }, { "epoch": 0.4718151936841459, "grad_norm": 0.32900944352149963, "learning_rate": 1.7402295905276587e-05, "loss": 0.5073, "step": 15359 }, { "epoch": 0.471845912819095, "grad_norm": 0.40555018186569214, "learning_rate": 1.740197096309156e-05, "loss": 0.6127, "step": 15360 }, { "epoch": 0.47187663195404417, "grad_norm": 0.3589453101158142, "learning_rate": 1.7401646003618717e-05, "loss": 0.5772, "step": 15361 }, { "epoch": 0.47190735108899334, "grad_norm": 0.34511032700538635, "learning_rate": 1.740132102685881e-05, "loss": 0.6069, "step": 15362 }, { "epoch": 0.4719380702239425, "grad_norm": 0.3218221664428711, "learning_rate": 1.74009960328126e-05, "loss": 0.5878, "step": 15363 }, { "epoch": 0.47196878935889164, "grad_norm": 0.3963966369628906, "learning_rate": 1.7400671021480852e-05, "loss": 0.5272, "step": 15364 }, { "epoch": 0.4719995084938408, "grad_norm": 0.331624835729599, "learning_rate": 1.7400345992864312e-05, "loss": 0.5608, "step": 15365 }, { "epoch": 0.47203022762879, "grad_norm": 0.3387339115142822, "learning_rate": 1.7400020946963754e-05, "loss": 0.5184, "step": 15366 }, { "epoch": 0.4720609467637391, "grad_norm": 0.3463359475135803, "learning_rate": 1.7399695883779925e-05, "loss": 0.5516, "step": 15367 }, { "epoch": 0.4720916658986883, "grad_norm": 0.3713415563106537, "learning_rate": 1.7399370803313592e-05, "loss": 0.64, "step": 15368 }, { "epoch": 0.47212238503363746, "grad_norm": 0.36474600434303284, "learning_rate": 1.7399045705565513e-05, "loss": 0.5268, "step": 15369 }, { "epoch": 0.47215310416858663, "grad_norm": 0.3478292226791382, "learning_rate": 1.7398720590536444e-05, "loss": 0.6496, "step": 15370 }, { "epoch": 0.47218382330353575, "grad_norm": 0.4192321002483368, "learning_rate": 1.7398395458227148e-05, "loss": 0.5663, "step": 15371 }, { "epoch": 0.47221454243848493, "grad_norm": 0.34683674573898315, "learning_rate": 1.7398070308638382e-05, "loss": 0.5462, "step": 15372 }, { "epoch": 0.4722452615734341, "grad_norm": 0.3588685989379883, "learning_rate": 1.7397745141770904e-05, "loss": 0.4931, "step": 15373 }, { "epoch": 0.4722759807083832, "grad_norm": 0.3438624143600464, "learning_rate": 1.7397419957625478e-05, "loss": 0.5519, "step": 15374 }, { "epoch": 0.4723066998433324, "grad_norm": 0.3559296727180481, "learning_rate": 1.739709475620286e-05, "loss": 0.5946, "step": 15375 }, { "epoch": 0.4723374189782816, "grad_norm": 0.3427197337150574, "learning_rate": 1.7396769537503813e-05, "loss": 0.5045, "step": 15376 }, { "epoch": 0.47236813811323075, "grad_norm": 0.3533492386341095, "learning_rate": 1.739644430152909e-05, "loss": 0.5435, "step": 15377 }, { "epoch": 0.47239885724817987, "grad_norm": 0.34056904911994934, "learning_rate": 1.7396119048279455e-05, "loss": 0.5711, "step": 15378 }, { "epoch": 0.47242957638312905, "grad_norm": 0.329437792301178, "learning_rate": 1.7395793777755676e-05, "loss": 0.5524, "step": 15379 }, { "epoch": 0.4724602955180782, "grad_norm": 0.360853910446167, "learning_rate": 1.7395468489958492e-05, "loss": 0.5532, "step": 15380 }, { "epoch": 0.4724910146530274, "grad_norm": 0.33704832196235657, "learning_rate": 1.739514318488868e-05, "loss": 0.6089, "step": 15381 }, { "epoch": 0.4725217337879765, "grad_norm": 0.41825735569000244, "learning_rate": 1.7394817862546997e-05, "loss": 0.5251, "step": 15382 }, { "epoch": 0.4725524529229257, "grad_norm": 0.35778966546058655, "learning_rate": 1.7394492522934197e-05, "loss": 0.5291, "step": 15383 }, { "epoch": 0.47258317205787487, "grad_norm": 0.3360001742839813, "learning_rate": 1.7394167166051045e-05, "loss": 0.5417, "step": 15384 }, { "epoch": 0.472613891192824, "grad_norm": 0.35137075185775757, "learning_rate": 1.73938417918983e-05, "loss": 0.5746, "step": 15385 }, { "epoch": 0.47264461032777316, "grad_norm": 0.31396105885505676, "learning_rate": 1.739351640047672e-05, "loss": 0.601, "step": 15386 }, { "epoch": 0.47267532946272234, "grad_norm": 0.3746577799320221, "learning_rate": 1.7393190991787067e-05, "loss": 0.5979, "step": 15387 }, { "epoch": 0.4727060485976715, "grad_norm": 0.3909800350666046, "learning_rate": 1.73928655658301e-05, "loss": 0.5798, "step": 15388 }, { "epoch": 0.47273676773262063, "grad_norm": 0.3468107581138611, "learning_rate": 1.7392540122606575e-05, "loss": 0.5636, "step": 15389 }, { "epoch": 0.4727674868675698, "grad_norm": 0.32917284965515137, "learning_rate": 1.739221466211726e-05, "loss": 0.5569, "step": 15390 }, { "epoch": 0.472798206002519, "grad_norm": 0.3897896111011505, "learning_rate": 1.739188918436291e-05, "loss": 0.5705, "step": 15391 }, { "epoch": 0.4728289251374681, "grad_norm": 0.3404683470726013, "learning_rate": 1.739156368934429e-05, "loss": 0.6007, "step": 15392 }, { "epoch": 0.4728596442724173, "grad_norm": 0.4632088541984558, "learning_rate": 1.7391238177062152e-05, "loss": 0.596, "step": 15393 }, { "epoch": 0.47289036340736645, "grad_norm": 0.35257378220558167, "learning_rate": 1.7390912647517263e-05, "loss": 0.5708, "step": 15394 }, { "epoch": 0.47292108254231563, "grad_norm": 0.40153005719184875, "learning_rate": 1.7390587100710385e-05, "loss": 0.5677, "step": 15395 }, { "epoch": 0.47295180167726475, "grad_norm": 0.32786792516708374, "learning_rate": 1.739026153664227e-05, "loss": 0.5583, "step": 15396 }, { "epoch": 0.4729825208122139, "grad_norm": 0.37363845109939575, "learning_rate": 1.7389935955313685e-05, "loss": 0.5815, "step": 15397 }, { "epoch": 0.4730132399471631, "grad_norm": 0.3570508360862732, "learning_rate": 1.7389610356725386e-05, "loss": 0.5681, "step": 15398 }, { "epoch": 0.4730439590821123, "grad_norm": 0.3244105875492096, "learning_rate": 1.738928474087814e-05, "loss": 0.5336, "step": 15399 }, { "epoch": 0.4730746782170614, "grad_norm": 0.3286208510398865, "learning_rate": 1.73889591077727e-05, "loss": 0.528, "step": 15400 }, { "epoch": 0.47310539735201057, "grad_norm": 0.34112223982810974, "learning_rate": 1.7388633457409828e-05, "loss": 0.5789, "step": 15401 }, { "epoch": 0.47313611648695975, "grad_norm": 0.3537975549697876, "learning_rate": 1.738830778979029e-05, "loss": 0.5086, "step": 15402 }, { "epoch": 0.47316683562190887, "grad_norm": 0.3271577060222626, "learning_rate": 1.7387982104914844e-05, "loss": 0.4894, "step": 15403 }, { "epoch": 0.47319755475685804, "grad_norm": 0.3944144546985626, "learning_rate": 1.738765640278425e-05, "loss": 0.647, "step": 15404 }, { "epoch": 0.4732282738918072, "grad_norm": 0.33719468116760254, "learning_rate": 1.7387330683399268e-05, "loss": 0.4425, "step": 15405 }, { "epoch": 0.4732589930267564, "grad_norm": 0.31243443489074707, "learning_rate": 1.7387004946760657e-05, "loss": 0.5578, "step": 15406 }, { "epoch": 0.4732897121617055, "grad_norm": 0.35303404927253723, "learning_rate": 1.7386679192869182e-05, "loss": 0.5627, "step": 15407 }, { "epoch": 0.4733204312966547, "grad_norm": 0.36785897612571716, "learning_rate": 1.73863534217256e-05, "loss": 0.647, "step": 15408 }, { "epoch": 0.47335115043160386, "grad_norm": 0.377309113740921, "learning_rate": 1.7386027633330678e-05, "loss": 0.5362, "step": 15409 }, { "epoch": 0.473381869566553, "grad_norm": 0.4327535331249237, "learning_rate": 1.7385701827685168e-05, "loss": 0.5643, "step": 15410 }, { "epoch": 0.47341258870150216, "grad_norm": 0.37282103300094604, "learning_rate": 1.7385376004789838e-05, "loss": 0.59, "step": 15411 }, { "epoch": 0.47344330783645133, "grad_norm": 0.3619648516178131, "learning_rate": 1.7385050164645445e-05, "loss": 0.5509, "step": 15412 }, { "epoch": 0.4734740269714005, "grad_norm": 0.42223024368286133, "learning_rate": 1.7384724307252754e-05, "loss": 0.5923, "step": 15413 }, { "epoch": 0.4735047461063496, "grad_norm": 0.3543432354927063, "learning_rate": 1.738439843261252e-05, "loss": 0.6084, "step": 15414 }, { "epoch": 0.4735354652412988, "grad_norm": 0.34810566902160645, "learning_rate": 1.7384072540725513e-05, "loss": 0.5863, "step": 15415 }, { "epoch": 0.473566184376248, "grad_norm": 0.3524484634399414, "learning_rate": 1.7383746631592486e-05, "loss": 0.5153, "step": 15416 }, { "epoch": 0.4735969035111971, "grad_norm": 0.3708361089229584, "learning_rate": 1.7383420705214202e-05, "loss": 0.6095, "step": 15417 }, { "epoch": 0.4736276226461463, "grad_norm": 0.35317492485046387, "learning_rate": 1.7383094761591423e-05, "loss": 0.4745, "step": 15418 }, { "epoch": 0.47365834178109545, "grad_norm": 0.5683931708335876, "learning_rate": 1.738276880072491e-05, "loss": 0.567, "step": 15419 }, { "epoch": 0.4736890609160446, "grad_norm": 0.33836662769317627, "learning_rate": 1.7382442822615427e-05, "loss": 0.5194, "step": 15420 }, { "epoch": 0.47371978005099374, "grad_norm": 0.41718798875808716, "learning_rate": 1.738211682726373e-05, "loss": 0.5154, "step": 15421 }, { "epoch": 0.4737504991859429, "grad_norm": 0.3242138922214508, "learning_rate": 1.738179081467059e-05, "loss": 0.5382, "step": 15422 }, { "epoch": 0.4737812183208921, "grad_norm": 0.3555530309677124, "learning_rate": 1.738146478483676e-05, "loss": 0.5064, "step": 15423 }, { "epoch": 0.47381193745584127, "grad_norm": 0.32245007157325745, "learning_rate": 1.7381138737763003e-05, "loss": 0.5564, "step": 15424 }, { "epoch": 0.4738426565907904, "grad_norm": 0.33517858386039734, "learning_rate": 1.738081267345008e-05, "loss": 0.5411, "step": 15425 }, { "epoch": 0.47387337572573957, "grad_norm": 0.36820682883262634, "learning_rate": 1.7380486591898752e-05, "loss": 0.5689, "step": 15426 }, { "epoch": 0.47390409486068874, "grad_norm": 0.33861497044563293, "learning_rate": 1.7380160493109782e-05, "loss": 0.5269, "step": 15427 }, { "epoch": 0.47393481399563786, "grad_norm": 0.3603004813194275, "learning_rate": 1.7379834377083932e-05, "loss": 0.6129, "step": 15428 }, { "epoch": 0.47396553313058704, "grad_norm": 0.3664097487926483, "learning_rate": 1.7379508243821965e-05, "loss": 0.5917, "step": 15429 }, { "epoch": 0.4739962522655362, "grad_norm": 3.2881994247436523, "learning_rate": 1.737918209332464e-05, "loss": 0.6012, "step": 15430 }, { "epoch": 0.4740269714004854, "grad_norm": 0.31972256302833557, "learning_rate": 1.7378855925592723e-05, "loss": 0.5462, "step": 15431 }, { "epoch": 0.4740576905354345, "grad_norm": 0.3096390962600708, "learning_rate": 1.737852974062697e-05, "loss": 0.5317, "step": 15432 }, { "epoch": 0.4740884096703837, "grad_norm": 0.33041590452194214, "learning_rate": 1.7378203538428145e-05, "loss": 0.5623, "step": 15433 }, { "epoch": 0.47411912880533286, "grad_norm": 0.35450103878974915, "learning_rate": 1.7377877318997008e-05, "loss": 0.5921, "step": 15434 }, { "epoch": 0.474149847940282, "grad_norm": 0.35953226685523987, "learning_rate": 1.737755108233433e-05, "loss": 0.5699, "step": 15435 }, { "epoch": 0.47418056707523115, "grad_norm": 0.3646673560142517, "learning_rate": 1.7377224828440858e-05, "loss": 0.6024, "step": 15436 }, { "epoch": 0.4742112862101803, "grad_norm": 0.36138322949409485, "learning_rate": 1.737689855731737e-05, "loss": 0.5128, "step": 15437 }, { "epoch": 0.4742420053451295, "grad_norm": 0.35722148418426514, "learning_rate": 1.737657226896462e-05, "loss": 0.5279, "step": 15438 }, { "epoch": 0.4742727244800786, "grad_norm": 0.33894839882850647, "learning_rate": 1.7376245963383366e-05, "loss": 0.5808, "step": 15439 }, { "epoch": 0.4743034436150278, "grad_norm": 0.43053486943244934, "learning_rate": 1.737591964057437e-05, "loss": 0.5822, "step": 15440 }, { "epoch": 0.474334162749977, "grad_norm": 0.3565564453601837, "learning_rate": 1.737559330053841e-05, "loss": 0.5848, "step": 15441 }, { "epoch": 0.4743648818849261, "grad_norm": 0.3573339283466339, "learning_rate": 1.737526694327623e-05, "loss": 0.6071, "step": 15442 }, { "epoch": 0.47439560101987527, "grad_norm": 0.3496690094470978, "learning_rate": 1.73749405687886e-05, "loss": 0.5908, "step": 15443 }, { "epoch": 0.47442632015482444, "grad_norm": 0.3777221441268921, "learning_rate": 1.737461417707628e-05, "loss": 0.5115, "step": 15444 }, { "epoch": 0.4744570392897736, "grad_norm": 0.4103451669216156, "learning_rate": 1.7374287768140036e-05, "loss": 0.5845, "step": 15445 }, { "epoch": 0.47448775842472274, "grad_norm": 0.3451443612575531, "learning_rate": 1.7373961341980625e-05, "loss": 0.5935, "step": 15446 }, { "epoch": 0.4745184775596719, "grad_norm": 0.3928951323032379, "learning_rate": 1.7373634898598814e-05, "loss": 0.5371, "step": 15447 }, { "epoch": 0.4745491966946211, "grad_norm": 0.43516427278518677, "learning_rate": 1.737330843799536e-05, "loss": 0.5686, "step": 15448 }, { "epoch": 0.47457991582957026, "grad_norm": 0.3772067129611969, "learning_rate": 1.7372981960171036e-05, "loss": 0.6182, "step": 15449 }, { "epoch": 0.4746106349645194, "grad_norm": 0.44056281447410583, "learning_rate": 1.7372655465126595e-05, "loss": 0.6052, "step": 15450 }, { "epoch": 0.47464135409946856, "grad_norm": 0.3694130778312683, "learning_rate": 1.7372328952862802e-05, "loss": 0.55, "step": 15451 }, { "epoch": 0.47467207323441774, "grad_norm": 0.37444737553596497, "learning_rate": 1.7372002423380418e-05, "loss": 0.54, "step": 15452 }, { "epoch": 0.47470279236936685, "grad_norm": 0.34410399198532104, "learning_rate": 1.737167587668021e-05, "loss": 0.5607, "step": 15453 }, { "epoch": 0.47473351150431603, "grad_norm": 0.3485325574874878, "learning_rate": 1.7371349312762936e-05, "loss": 0.534, "step": 15454 }, { "epoch": 0.4747642306392652, "grad_norm": 0.31502020359039307, "learning_rate": 1.737102273162936e-05, "loss": 0.562, "step": 15455 }, { "epoch": 0.4747949497742144, "grad_norm": 0.3495251536369324, "learning_rate": 1.737069613328025e-05, "loss": 0.5428, "step": 15456 }, { "epoch": 0.4748256689091635, "grad_norm": 0.32383665442466736, "learning_rate": 1.7370369517716364e-05, "loss": 0.6065, "step": 15457 }, { "epoch": 0.4748563880441127, "grad_norm": 0.3613456189632416, "learning_rate": 1.7370042884938464e-05, "loss": 0.4792, "step": 15458 }, { "epoch": 0.47488710717906185, "grad_norm": 0.32448649406433105, "learning_rate": 1.736971623494731e-05, "loss": 0.5584, "step": 15459 }, { "epoch": 0.47491782631401097, "grad_norm": 0.3634085953235626, "learning_rate": 1.7369389567743674e-05, "loss": 0.6018, "step": 15460 }, { "epoch": 0.47494854544896015, "grad_norm": 0.3299083709716797, "learning_rate": 1.7369062883328315e-05, "loss": 0.5265, "step": 15461 }, { "epoch": 0.4749792645839093, "grad_norm": 0.3562838137149811, "learning_rate": 1.736873618170199e-05, "loss": 0.5947, "step": 15462 }, { "epoch": 0.4750099837188585, "grad_norm": 0.3531027138233185, "learning_rate": 1.736840946286547e-05, "loss": 0.5737, "step": 15463 }, { "epoch": 0.4750407028538076, "grad_norm": 0.3587208390235901, "learning_rate": 1.7368082726819512e-05, "loss": 0.5661, "step": 15464 }, { "epoch": 0.4750714219887568, "grad_norm": 0.34445270895957947, "learning_rate": 1.7367755973564887e-05, "loss": 0.5997, "step": 15465 }, { "epoch": 0.47510214112370597, "grad_norm": 0.3432323932647705, "learning_rate": 1.736742920310235e-05, "loss": 0.5105, "step": 15466 }, { "epoch": 0.47513286025865514, "grad_norm": 0.3533845841884613, "learning_rate": 1.736710241543267e-05, "loss": 0.6384, "step": 15467 }, { "epoch": 0.47516357939360426, "grad_norm": 0.4978148341178894, "learning_rate": 1.7366775610556608e-05, "loss": 0.6828, "step": 15468 }, { "epoch": 0.47519429852855344, "grad_norm": 0.3501807153224945, "learning_rate": 1.7366448788474925e-05, "loss": 0.5218, "step": 15469 }, { "epoch": 0.4752250176635026, "grad_norm": 0.3370748460292816, "learning_rate": 1.7366121949188387e-05, "loss": 0.5536, "step": 15470 }, { "epoch": 0.47525573679845173, "grad_norm": 0.3426799178123474, "learning_rate": 1.7365795092697756e-05, "loss": 0.4973, "step": 15471 }, { "epoch": 0.4752864559334009, "grad_norm": 0.33971211314201355, "learning_rate": 1.7365468219003795e-05, "loss": 0.528, "step": 15472 }, { "epoch": 0.4753171750683501, "grad_norm": 0.35255709290504456, "learning_rate": 1.736514132810727e-05, "loss": 0.5769, "step": 15473 }, { "epoch": 0.47534789420329926, "grad_norm": 0.6923590302467346, "learning_rate": 1.7364814420008946e-05, "loss": 0.5776, "step": 15474 }, { "epoch": 0.4753786133382484, "grad_norm": 0.32761672139167786, "learning_rate": 1.736448749470958e-05, "loss": 0.6439, "step": 15475 }, { "epoch": 0.47540933247319755, "grad_norm": 0.36152154207229614, "learning_rate": 1.7364160552209943e-05, "loss": 0.5368, "step": 15476 }, { "epoch": 0.47544005160814673, "grad_norm": 0.39028510451316833, "learning_rate": 1.7363833592510793e-05, "loss": 0.525, "step": 15477 }, { "epoch": 0.47547077074309585, "grad_norm": 0.3498074412345886, "learning_rate": 1.7363506615612894e-05, "loss": 0.586, "step": 15478 }, { "epoch": 0.475501489878045, "grad_norm": 0.3765280544757843, "learning_rate": 1.736317962151701e-05, "loss": 0.4832, "step": 15479 }, { "epoch": 0.4755322090129942, "grad_norm": 0.3340771794319153, "learning_rate": 1.7362852610223906e-05, "loss": 0.4542, "step": 15480 }, { "epoch": 0.4755629281479434, "grad_norm": 0.3544197082519531, "learning_rate": 1.7362525581734348e-05, "loss": 0.615, "step": 15481 }, { "epoch": 0.4755936472828925, "grad_norm": 0.44871070981025696, "learning_rate": 1.7362198536049096e-05, "loss": 0.6306, "step": 15482 }, { "epoch": 0.47562436641784167, "grad_norm": 0.33035537600517273, "learning_rate": 1.7361871473168913e-05, "loss": 0.5436, "step": 15483 }, { "epoch": 0.47565508555279085, "grad_norm": 0.4565214216709137, "learning_rate": 1.736154439309457e-05, "loss": 0.4984, "step": 15484 }, { "epoch": 0.47568580468773997, "grad_norm": 0.4872463345527649, "learning_rate": 1.736121729582682e-05, "loss": 0.6539, "step": 15485 }, { "epoch": 0.47571652382268914, "grad_norm": 0.3396097421646118, "learning_rate": 1.7360890181366438e-05, "loss": 0.4875, "step": 15486 }, { "epoch": 0.4757472429576383, "grad_norm": 0.3498178720474243, "learning_rate": 1.736056304971418e-05, "loss": 0.5335, "step": 15487 }, { "epoch": 0.4757779620925875, "grad_norm": 0.3377169966697693, "learning_rate": 1.7360235900870815e-05, "loss": 0.6058, "step": 15488 }, { "epoch": 0.4758086812275366, "grad_norm": 0.341140478849411, "learning_rate": 1.7359908734837106e-05, "loss": 0.6038, "step": 15489 }, { "epoch": 0.4758394003624858, "grad_norm": 0.3533953130245209, "learning_rate": 1.7359581551613812e-05, "loss": 0.4922, "step": 15490 }, { "epoch": 0.47587011949743496, "grad_norm": 0.37248048186302185, "learning_rate": 1.7359254351201702e-05, "loss": 0.6141, "step": 15491 }, { "epoch": 0.47590083863238414, "grad_norm": 0.3305078446865082, "learning_rate": 1.7358927133601542e-05, "loss": 0.5955, "step": 15492 }, { "epoch": 0.47593155776733326, "grad_norm": 0.385486364364624, "learning_rate": 1.735859989881409e-05, "loss": 0.5165, "step": 15493 }, { "epoch": 0.47596227690228243, "grad_norm": 0.3364427089691162, "learning_rate": 1.7358272646840118e-05, "loss": 0.5742, "step": 15494 }, { "epoch": 0.4759929960372316, "grad_norm": 0.36814379692077637, "learning_rate": 1.7357945377680385e-05, "loss": 0.5584, "step": 15495 }, { "epoch": 0.47602371517218073, "grad_norm": 0.32925596833229065, "learning_rate": 1.7357618091335658e-05, "loss": 0.5296, "step": 15496 }, { "epoch": 0.4760544343071299, "grad_norm": 0.35337576270103455, "learning_rate": 1.73572907878067e-05, "loss": 0.5746, "step": 15497 }, { "epoch": 0.4760851534420791, "grad_norm": 0.37300413846969604, "learning_rate": 1.735696346709427e-05, "loss": 0.6244, "step": 15498 }, { "epoch": 0.47611587257702825, "grad_norm": 0.36432334780693054, "learning_rate": 1.7356636129199146e-05, "loss": 0.4879, "step": 15499 }, { "epoch": 0.4761465917119774, "grad_norm": 0.3723975419998169, "learning_rate": 1.735630877412208e-05, "loss": 0.591, "step": 15500 }, { "epoch": 0.47617731084692655, "grad_norm": 0.33878466486930847, "learning_rate": 1.735598140186384e-05, "loss": 0.5512, "step": 15501 }, { "epoch": 0.4762080299818757, "grad_norm": 0.3155069649219513, "learning_rate": 1.7355654012425195e-05, "loss": 0.4979, "step": 15502 }, { "epoch": 0.47623874911682484, "grad_norm": 0.3724271357059479, "learning_rate": 1.7355326605806903e-05, "loss": 0.4522, "step": 15503 }, { "epoch": 0.476269468251774, "grad_norm": 0.41059738397598267, "learning_rate": 1.7354999182009735e-05, "loss": 0.6705, "step": 15504 }, { "epoch": 0.4763001873867232, "grad_norm": 0.34528741240501404, "learning_rate": 1.7354671741034454e-05, "loss": 0.5977, "step": 15505 }, { "epoch": 0.47633090652167237, "grad_norm": 0.37746959924697876, "learning_rate": 1.735434428288182e-05, "loss": 0.5277, "step": 15506 }, { "epoch": 0.4763616256566215, "grad_norm": 0.3260432779788971, "learning_rate": 1.7354016807552603e-05, "loss": 0.5848, "step": 15507 }, { "epoch": 0.47639234479157067, "grad_norm": 0.4086877107620239, "learning_rate": 1.7353689315047565e-05, "loss": 0.5444, "step": 15508 }, { "epoch": 0.47642306392651984, "grad_norm": 0.32031816244125366, "learning_rate": 1.7353361805367473e-05, "loss": 0.4838, "step": 15509 }, { "epoch": 0.476453783061469, "grad_norm": 0.30442360043525696, "learning_rate": 1.735303427851309e-05, "loss": 0.5809, "step": 15510 }, { "epoch": 0.47648450219641814, "grad_norm": 0.32364585995674133, "learning_rate": 1.7352706734485183e-05, "loss": 0.642, "step": 15511 }, { "epoch": 0.4765152213313673, "grad_norm": 0.3506607413291931, "learning_rate": 1.7352379173284516e-05, "loss": 0.5361, "step": 15512 }, { "epoch": 0.4765459404663165, "grad_norm": 0.3446476459503174, "learning_rate": 1.735205159491185e-05, "loss": 0.5515, "step": 15513 }, { "epoch": 0.4765766596012656, "grad_norm": 0.36923685669898987, "learning_rate": 1.735172399936796e-05, "loss": 0.5728, "step": 15514 }, { "epoch": 0.4766073787362148, "grad_norm": 0.3805038034915924, "learning_rate": 1.73513963866536e-05, "loss": 0.5902, "step": 15515 }, { "epoch": 0.47663809787116396, "grad_norm": 0.4298056662082672, "learning_rate": 1.7351068756769545e-05, "loss": 0.5331, "step": 15516 }, { "epoch": 0.47666881700611313, "grad_norm": 0.40244024991989136, "learning_rate": 1.7350741109716552e-05, "loss": 0.6518, "step": 15517 }, { "epoch": 0.47669953614106225, "grad_norm": 0.4099474251270294, "learning_rate": 1.735041344549539e-05, "loss": 0.5545, "step": 15518 }, { "epoch": 0.4767302552760114, "grad_norm": 0.36238330602645874, "learning_rate": 1.7350085764106827e-05, "loss": 0.5716, "step": 15519 }, { "epoch": 0.4767609744109606, "grad_norm": 0.34830427169799805, "learning_rate": 1.7349758065551622e-05, "loss": 0.5524, "step": 15520 }, { "epoch": 0.4767916935459097, "grad_norm": 0.35460561513900757, "learning_rate": 1.7349430349830546e-05, "loss": 0.533, "step": 15521 }, { "epoch": 0.4768224126808589, "grad_norm": 0.7615846991539001, "learning_rate": 1.7349102616944364e-05, "loss": 0.5596, "step": 15522 }, { "epoch": 0.4768531318158081, "grad_norm": 0.35641202330589294, "learning_rate": 1.7348774866893835e-05, "loss": 0.5862, "step": 15523 }, { "epoch": 0.47688385095075725, "grad_norm": 0.356607586145401, "learning_rate": 1.7348447099679732e-05, "loss": 0.6011, "step": 15524 }, { "epoch": 0.47691457008570637, "grad_norm": 0.39400485157966614, "learning_rate": 1.7348119315302817e-05, "loss": 0.5555, "step": 15525 }, { "epoch": 0.47694528922065554, "grad_norm": 0.33883944153785706, "learning_rate": 1.7347791513763855e-05, "loss": 0.5038, "step": 15526 }, { "epoch": 0.4769760083556047, "grad_norm": 0.32018017768859863, "learning_rate": 1.7347463695063617e-05, "loss": 0.503, "step": 15527 }, { "epoch": 0.47700672749055384, "grad_norm": 0.3719457685947418, "learning_rate": 1.734713585920286e-05, "loss": 0.6017, "step": 15528 }, { "epoch": 0.477037446625503, "grad_norm": 0.35656479001045227, "learning_rate": 1.7346808006182354e-05, "loss": 0.5756, "step": 15529 }, { "epoch": 0.4770681657604522, "grad_norm": 0.36224791407585144, "learning_rate": 1.7346480136002865e-05, "loss": 0.5556, "step": 15530 }, { "epoch": 0.47709888489540137, "grad_norm": 0.3198019862174988, "learning_rate": 1.734615224866516e-05, "loss": 0.5666, "step": 15531 }, { "epoch": 0.4771296040303505, "grad_norm": 0.3308795392513275, "learning_rate": 1.7345824344170003e-05, "loss": 0.4819, "step": 15532 }, { "epoch": 0.47716032316529966, "grad_norm": 0.39748963713645935, "learning_rate": 1.734549642251816e-05, "loss": 0.5292, "step": 15533 }, { "epoch": 0.47719104230024884, "grad_norm": 0.3615792989730835, "learning_rate": 1.7345168483710396e-05, "loss": 0.6319, "step": 15534 }, { "epoch": 0.477221761435198, "grad_norm": 0.34500452876091003, "learning_rate": 1.734484052774748e-05, "loss": 0.5686, "step": 15535 }, { "epoch": 0.47725248057014713, "grad_norm": 0.32462650537490845, "learning_rate": 1.7344512554630177e-05, "loss": 0.5136, "step": 15536 }, { "epoch": 0.4772831997050963, "grad_norm": 0.6345045566558838, "learning_rate": 1.734418456435925e-05, "loss": 0.5988, "step": 15537 }, { "epoch": 0.4773139188400455, "grad_norm": 0.317025750875473, "learning_rate": 1.7343856556935467e-05, "loss": 0.5295, "step": 15538 }, { "epoch": 0.4773446379749946, "grad_norm": 0.32173606753349304, "learning_rate": 1.734352853235959e-05, "loss": 0.5458, "step": 15539 }, { "epoch": 0.4773753571099438, "grad_norm": 0.33228832483291626, "learning_rate": 1.7343200490632397e-05, "loss": 0.5765, "step": 15540 }, { "epoch": 0.47740607624489295, "grad_norm": 0.33370694518089294, "learning_rate": 1.7342872431754645e-05, "loss": 0.4812, "step": 15541 }, { "epoch": 0.4774367953798421, "grad_norm": 0.3632431626319885, "learning_rate": 1.73425443557271e-05, "loss": 0.5558, "step": 15542 }, { "epoch": 0.47746751451479125, "grad_norm": 0.3835834860801697, "learning_rate": 1.7342216262550525e-05, "loss": 0.5274, "step": 15543 }, { "epoch": 0.4774982336497404, "grad_norm": 0.34986671805381775, "learning_rate": 1.73418881522257e-05, "loss": 0.63, "step": 15544 }, { "epoch": 0.4775289527846896, "grad_norm": 0.4356629550457001, "learning_rate": 1.7341560024753376e-05, "loss": 0.614, "step": 15545 }, { "epoch": 0.4775596719196387, "grad_norm": 0.36890482902526855, "learning_rate": 1.7341231880134328e-05, "loss": 0.5676, "step": 15546 }, { "epoch": 0.4775903910545879, "grad_norm": 0.36310893297195435, "learning_rate": 1.734090371836932e-05, "loss": 0.5643, "step": 15547 }, { "epoch": 0.47762111018953707, "grad_norm": 0.3365352749824524, "learning_rate": 1.734057553945912e-05, "loss": 0.6023, "step": 15548 }, { "epoch": 0.47765182932448624, "grad_norm": 0.34148409962654114, "learning_rate": 1.7340247343404493e-05, "loss": 0.5787, "step": 15549 }, { "epoch": 0.47768254845943536, "grad_norm": 0.42471635341644287, "learning_rate": 1.73399191302062e-05, "loss": 0.5867, "step": 15550 }, { "epoch": 0.47771326759438454, "grad_norm": 0.40844249725341797, "learning_rate": 1.7339590899865023e-05, "loss": 0.5728, "step": 15551 }, { "epoch": 0.4777439867293337, "grad_norm": 0.36694976687431335, "learning_rate": 1.733926265238171e-05, "loss": 0.6207, "step": 15552 }, { "epoch": 0.4777747058642829, "grad_norm": 0.35196033120155334, "learning_rate": 1.7338934387757044e-05, "loss": 0.5868, "step": 15553 }, { "epoch": 0.477805424999232, "grad_norm": 0.33903130888938904, "learning_rate": 1.733860610599178e-05, "loss": 0.5904, "step": 15554 }, { "epoch": 0.4778361441341812, "grad_norm": 0.3848533630371094, "learning_rate": 1.7338277807086687e-05, "loss": 0.5236, "step": 15555 }, { "epoch": 0.47786686326913036, "grad_norm": 0.355689138174057, "learning_rate": 1.733794949104254e-05, "loss": 0.5313, "step": 15556 }, { "epoch": 0.4778975824040795, "grad_norm": 0.3349611163139343, "learning_rate": 1.7337621157860093e-05, "loss": 0.5969, "step": 15557 }, { "epoch": 0.47792830153902865, "grad_norm": 0.3639744520187378, "learning_rate": 1.7337292807540123e-05, "loss": 0.6318, "step": 15558 }, { "epoch": 0.47795902067397783, "grad_norm": 0.3650461733341217, "learning_rate": 1.7336964440083393e-05, "loss": 0.5988, "step": 15559 }, { "epoch": 0.477989739808927, "grad_norm": 0.3726778030395508, "learning_rate": 1.7336636055490668e-05, "loss": 0.5469, "step": 15560 }, { "epoch": 0.4780204589438761, "grad_norm": 0.33550629019737244, "learning_rate": 1.7336307653762717e-05, "loss": 0.5445, "step": 15561 }, { "epoch": 0.4780511780788253, "grad_norm": 0.36267101764678955, "learning_rate": 1.733597923490031e-05, "loss": 0.663, "step": 15562 }, { "epoch": 0.4780818972137745, "grad_norm": 0.3375217616558075, "learning_rate": 1.733565079890421e-05, "loss": 0.6398, "step": 15563 }, { "epoch": 0.4781126163487236, "grad_norm": 0.3433980643749237, "learning_rate": 1.7335322345775187e-05, "loss": 0.4885, "step": 15564 }, { "epoch": 0.47814333548367277, "grad_norm": 0.3828219473361969, "learning_rate": 1.7334993875514e-05, "loss": 0.6184, "step": 15565 }, { "epoch": 0.47817405461862195, "grad_norm": 0.4106205701828003, "learning_rate": 1.733466538812143e-05, "loss": 0.6139, "step": 15566 }, { "epoch": 0.4782047737535711, "grad_norm": 0.3172977864742279, "learning_rate": 1.7334336883598234e-05, "loss": 0.5434, "step": 15567 }, { "epoch": 0.47823549288852024, "grad_norm": 0.36794474720954895, "learning_rate": 1.733400836194518e-05, "loss": 0.5874, "step": 15568 }, { "epoch": 0.4782662120234694, "grad_norm": 0.31145453453063965, "learning_rate": 1.733367982316304e-05, "loss": 0.4875, "step": 15569 }, { "epoch": 0.4782969311584186, "grad_norm": 0.3452140986919403, "learning_rate": 1.7333351267252574e-05, "loss": 0.5303, "step": 15570 }, { "epoch": 0.4783276502933677, "grad_norm": 0.3451298177242279, "learning_rate": 1.733302269421456e-05, "loss": 0.5658, "step": 15571 }, { "epoch": 0.4783583694283169, "grad_norm": 0.31771114468574524, "learning_rate": 1.7332694104049755e-05, "loss": 0.5669, "step": 15572 }, { "epoch": 0.47838908856326606, "grad_norm": 0.3339412808418274, "learning_rate": 1.7332365496758933e-05, "loss": 0.5252, "step": 15573 }, { "epoch": 0.47841980769821524, "grad_norm": 0.3589840829372406, "learning_rate": 1.7332036872342862e-05, "loss": 0.5849, "step": 15574 }, { "epoch": 0.47845052683316436, "grad_norm": 0.34008628129959106, "learning_rate": 1.7331708230802303e-05, "loss": 0.5053, "step": 15575 }, { "epoch": 0.47848124596811353, "grad_norm": 0.339419424533844, "learning_rate": 1.7331379572138027e-05, "loss": 0.5722, "step": 15576 }, { "epoch": 0.4785119651030627, "grad_norm": 0.3015213906764984, "learning_rate": 1.7331050896350804e-05, "loss": 0.5297, "step": 15577 }, { "epoch": 0.4785426842380119, "grad_norm": 0.36969149112701416, "learning_rate": 1.73307222034414e-05, "loss": 0.5679, "step": 15578 }, { "epoch": 0.478573403372961, "grad_norm": 0.33238035440444946, "learning_rate": 1.7330393493410576e-05, "loss": 0.5208, "step": 15579 }, { "epoch": 0.4786041225079102, "grad_norm": 0.42245933413505554, "learning_rate": 1.7330064766259113e-05, "loss": 0.6127, "step": 15580 }, { "epoch": 0.47863484164285935, "grad_norm": 0.3486548662185669, "learning_rate": 1.7329736021987772e-05, "loss": 0.5675, "step": 15581 }, { "epoch": 0.4786655607778085, "grad_norm": 0.3555736243724823, "learning_rate": 1.7329407260597316e-05, "loss": 0.5216, "step": 15582 }, { "epoch": 0.47869627991275765, "grad_norm": 0.35806798934936523, "learning_rate": 1.732907848208852e-05, "loss": 0.5915, "step": 15583 }, { "epoch": 0.4787269990477068, "grad_norm": 0.4145442843437195, "learning_rate": 1.732874968646215e-05, "loss": 0.6514, "step": 15584 }, { "epoch": 0.478757718182656, "grad_norm": 0.3206360638141632, "learning_rate": 1.732842087371897e-05, "loss": 0.5593, "step": 15585 }, { "epoch": 0.4787884373176051, "grad_norm": 0.3387293517589569, "learning_rate": 1.7328092043859758e-05, "loss": 0.5423, "step": 15586 }, { "epoch": 0.4788191564525543, "grad_norm": 0.3330969214439392, "learning_rate": 1.732776319688527e-05, "loss": 0.5814, "step": 15587 }, { "epoch": 0.47884987558750347, "grad_norm": 0.36168035864830017, "learning_rate": 1.732743433279628e-05, "loss": 0.5738, "step": 15588 }, { "epoch": 0.4788805947224526, "grad_norm": 0.35744935274124146, "learning_rate": 1.7327105451593555e-05, "loss": 0.5268, "step": 15589 }, { "epoch": 0.47891131385740177, "grad_norm": 0.34030646085739136, "learning_rate": 1.7326776553277868e-05, "loss": 0.515, "step": 15590 }, { "epoch": 0.47894203299235094, "grad_norm": 0.4648550748825073, "learning_rate": 1.7326447637849978e-05, "loss": 0.559, "step": 15591 }, { "epoch": 0.4789727521273001, "grad_norm": 0.34286704659461975, "learning_rate": 1.7326118705310657e-05, "loss": 0.5494, "step": 15592 }, { "epoch": 0.47900347126224924, "grad_norm": 0.32803720235824585, "learning_rate": 1.7325789755660677e-05, "loss": 0.5424, "step": 15593 }, { "epoch": 0.4790341903971984, "grad_norm": 0.35223308205604553, "learning_rate": 1.73254607889008e-05, "loss": 0.5252, "step": 15594 }, { "epoch": 0.4790649095321476, "grad_norm": 0.39793962240219116, "learning_rate": 1.73251318050318e-05, "loss": 0.5671, "step": 15595 }, { "epoch": 0.4790956286670967, "grad_norm": 0.3672584295272827, "learning_rate": 1.7324802804054447e-05, "loss": 0.5433, "step": 15596 }, { "epoch": 0.4791263478020459, "grad_norm": 0.3914015293121338, "learning_rate": 1.73244737859695e-05, "loss": 0.5224, "step": 15597 }, { "epoch": 0.47915706693699506, "grad_norm": 0.3070962131023407, "learning_rate": 1.7324144750777735e-05, "loss": 0.5346, "step": 15598 }, { "epoch": 0.47918778607194423, "grad_norm": 0.3557225167751312, "learning_rate": 1.732381569847992e-05, "loss": 0.6141, "step": 15599 }, { "epoch": 0.47921850520689335, "grad_norm": 0.3362705409526825, "learning_rate": 1.7323486629076817e-05, "loss": 0.5333, "step": 15600 }, { "epoch": 0.47924922434184253, "grad_norm": 0.3504515588283539, "learning_rate": 1.7323157542569204e-05, "loss": 0.5322, "step": 15601 }, { "epoch": 0.4792799434767917, "grad_norm": 0.35242152214050293, "learning_rate": 1.7322828438957843e-05, "loss": 0.539, "step": 15602 }, { "epoch": 0.4793106626117409, "grad_norm": 0.3624297082424164, "learning_rate": 1.73224993182435e-05, "loss": 0.5646, "step": 15603 }, { "epoch": 0.47934138174669, "grad_norm": 0.33676859736442566, "learning_rate": 1.732217018042696e-05, "loss": 0.5818, "step": 15604 }, { "epoch": 0.4793721008816392, "grad_norm": 0.3213867247104645, "learning_rate": 1.732184102550897e-05, "loss": 0.5668, "step": 15605 }, { "epoch": 0.47940282001658835, "grad_norm": 0.3640679717063904, "learning_rate": 1.7321511853490313e-05, "loss": 0.5232, "step": 15606 }, { "epoch": 0.47943353915153747, "grad_norm": 2.695023775100708, "learning_rate": 1.7321182664371753e-05, "loss": 0.5115, "step": 15607 }, { "epoch": 0.47946425828648664, "grad_norm": 0.34890151023864746, "learning_rate": 1.7320853458154063e-05, "loss": 0.7063, "step": 15608 }, { "epoch": 0.4794949774214358, "grad_norm": 0.5789188742637634, "learning_rate": 1.7320524234838004e-05, "loss": 0.5945, "step": 15609 }, { "epoch": 0.479525696556385, "grad_norm": 0.3746485114097595, "learning_rate": 1.732019499442435e-05, "loss": 0.6292, "step": 15610 }, { "epoch": 0.4795564156913341, "grad_norm": 0.3128478527069092, "learning_rate": 1.731986573691387e-05, "loss": 0.5149, "step": 15611 }, { "epoch": 0.4795871348262833, "grad_norm": 0.3518771529197693, "learning_rate": 1.7319536462307334e-05, "loss": 0.5543, "step": 15612 }, { "epoch": 0.47961785396123247, "grad_norm": 0.35659435391426086, "learning_rate": 1.7319207170605508e-05, "loss": 0.5237, "step": 15613 }, { "epoch": 0.4796485730961816, "grad_norm": 0.3587920665740967, "learning_rate": 1.7318877861809164e-05, "loss": 0.5303, "step": 15614 }, { "epoch": 0.47967929223113076, "grad_norm": 0.42373526096343994, "learning_rate": 1.731854853591907e-05, "loss": 0.5338, "step": 15615 }, { "epoch": 0.47971001136607994, "grad_norm": 0.38490405678749084, "learning_rate": 1.731821919293599e-05, "loss": 0.5507, "step": 15616 }, { "epoch": 0.4797407305010291, "grad_norm": 0.32971012592315674, "learning_rate": 1.7317889832860703e-05, "loss": 0.6029, "step": 15617 }, { "epoch": 0.47977144963597823, "grad_norm": 0.3246597349643707, "learning_rate": 1.731756045569397e-05, "loss": 0.5256, "step": 15618 }, { "epoch": 0.4798021687709274, "grad_norm": 0.3797847032546997, "learning_rate": 1.7317231061436562e-05, "loss": 0.6221, "step": 15619 }, { "epoch": 0.4798328879058766, "grad_norm": 0.35837382078170776, "learning_rate": 1.7316901650089255e-05, "loss": 0.5673, "step": 15620 }, { "epoch": 0.47986360704082576, "grad_norm": 0.35814476013183594, "learning_rate": 1.731657222165281e-05, "loss": 0.5321, "step": 15621 }, { "epoch": 0.4798943261757749, "grad_norm": 0.37409883737564087, "learning_rate": 1.7316242776128e-05, "loss": 0.584, "step": 15622 }, { "epoch": 0.47992504531072405, "grad_norm": 0.3886256515979767, "learning_rate": 1.7315913313515595e-05, "loss": 0.5904, "step": 15623 }, { "epoch": 0.4799557644456732, "grad_norm": 0.3207681179046631, "learning_rate": 1.7315583833816364e-05, "loss": 0.5696, "step": 15624 }, { "epoch": 0.47998648358062235, "grad_norm": 0.46518924832344055, "learning_rate": 1.7315254337031073e-05, "loss": 0.682, "step": 15625 }, { "epoch": 0.4800172027155715, "grad_norm": 0.31617146730422974, "learning_rate": 1.7314924823160497e-05, "loss": 0.5584, "step": 15626 }, { "epoch": 0.4800479218505207, "grad_norm": 0.3801088333129883, "learning_rate": 1.7314595292205406e-05, "loss": 0.5757, "step": 15627 }, { "epoch": 0.4800786409854699, "grad_norm": 0.32563695311546326, "learning_rate": 1.731426574416656e-05, "loss": 0.5555, "step": 15628 }, { "epoch": 0.480109360120419, "grad_norm": 0.33472543954849243, "learning_rate": 1.731393617904474e-05, "loss": 0.4885, "step": 15629 }, { "epoch": 0.48014007925536817, "grad_norm": 0.3942820131778717, "learning_rate": 1.7313606596840712e-05, "loss": 0.646, "step": 15630 }, { "epoch": 0.48017079839031734, "grad_norm": 0.4719225764274597, "learning_rate": 1.7313276997555242e-05, "loss": 0.5954, "step": 15631 }, { "epoch": 0.48020151752526646, "grad_norm": 0.3505592346191406, "learning_rate": 1.7312947381189102e-05, "loss": 0.5503, "step": 15632 }, { "epoch": 0.48023223666021564, "grad_norm": 0.34374257922172546, "learning_rate": 1.7312617747743065e-05, "loss": 0.4728, "step": 15633 }, { "epoch": 0.4802629557951648, "grad_norm": 0.41130590438842773, "learning_rate": 1.73122880972179e-05, "loss": 0.6454, "step": 15634 }, { "epoch": 0.480293674930114, "grad_norm": 0.3677595555782318, "learning_rate": 1.7311958429614374e-05, "loss": 0.5642, "step": 15635 }, { "epoch": 0.4803243940650631, "grad_norm": 0.40935736894607544, "learning_rate": 1.7311628744933258e-05, "loss": 0.5125, "step": 15636 }, { "epoch": 0.4803551132000123, "grad_norm": 0.36335334181785583, "learning_rate": 1.731129904317532e-05, "loss": 0.6007, "step": 15637 }, { "epoch": 0.48038583233496146, "grad_norm": 0.32127419114112854, "learning_rate": 1.731096932434134e-05, "loss": 0.5481, "step": 15638 }, { "epoch": 0.4804165514699106, "grad_norm": 0.3516536355018616, "learning_rate": 1.7310639588432074e-05, "loss": 0.5392, "step": 15639 }, { "epoch": 0.48044727060485976, "grad_norm": 0.36510005593299866, "learning_rate": 1.7310309835448302e-05, "loss": 0.6244, "step": 15640 }, { "epoch": 0.48047798973980893, "grad_norm": 0.3016154170036316, "learning_rate": 1.7309980065390788e-05, "loss": 0.5196, "step": 15641 }, { "epoch": 0.4805087088747581, "grad_norm": 0.33173680305480957, "learning_rate": 1.7309650278260306e-05, "loss": 0.5837, "step": 15642 }, { "epoch": 0.4805394280097072, "grad_norm": 0.34379899501800537, "learning_rate": 1.7309320474057627e-05, "loss": 0.6197, "step": 15643 }, { "epoch": 0.4805701471446564, "grad_norm": 0.33790817856788635, "learning_rate": 1.7308990652783518e-05, "loss": 0.5661, "step": 15644 }, { "epoch": 0.4806008662796056, "grad_norm": 0.4911993443965912, "learning_rate": 1.730866081443875e-05, "loss": 0.5832, "step": 15645 }, { "epoch": 0.48063158541455475, "grad_norm": 0.39675939083099365, "learning_rate": 1.7308330959024093e-05, "loss": 0.6151, "step": 15646 }, { "epoch": 0.48066230454950387, "grad_norm": 0.3755030930042267, "learning_rate": 1.730800108654032e-05, "loss": 0.5576, "step": 15647 }, { "epoch": 0.48069302368445305, "grad_norm": 0.32469770312309265, "learning_rate": 1.7307671196988202e-05, "loss": 0.5801, "step": 15648 }, { "epoch": 0.4807237428194022, "grad_norm": 0.37333357334136963, "learning_rate": 1.7307341290368507e-05, "loss": 0.5473, "step": 15649 }, { "epoch": 0.48075446195435134, "grad_norm": 0.33648401498794556, "learning_rate": 1.730701136668201e-05, "loss": 0.504, "step": 15650 }, { "epoch": 0.4807851810893005, "grad_norm": 0.36116909980773926, "learning_rate": 1.730668142592947e-05, "loss": 0.6167, "step": 15651 }, { "epoch": 0.4808159002242497, "grad_norm": 0.3578149974346161, "learning_rate": 1.7306351468111667e-05, "loss": 0.6443, "step": 15652 }, { "epoch": 0.48084661935919887, "grad_norm": 0.32502132654190063, "learning_rate": 1.7306021493229373e-05, "loss": 0.566, "step": 15653 }, { "epoch": 0.480877338494148, "grad_norm": 0.32410433888435364, "learning_rate": 1.7305691501283353e-05, "loss": 0.4995, "step": 15654 }, { "epoch": 0.48090805762909716, "grad_norm": 0.33389967679977417, "learning_rate": 1.730536149227438e-05, "loss": 0.5578, "step": 15655 }, { "epoch": 0.48093877676404634, "grad_norm": 0.3605579435825348, "learning_rate": 1.7305031466203227e-05, "loss": 0.6456, "step": 15656 }, { "epoch": 0.48096949589899546, "grad_norm": 0.33562013506889343, "learning_rate": 1.7304701423070663e-05, "loss": 0.5362, "step": 15657 }, { "epoch": 0.48100021503394463, "grad_norm": 0.4555802643299103, "learning_rate": 1.7304371362877457e-05, "loss": 0.5913, "step": 15658 }, { "epoch": 0.4810309341688938, "grad_norm": 0.3416529595851898, "learning_rate": 1.7304041285624383e-05, "loss": 0.5352, "step": 15659 }, { "epoch": 0.481061653303843, "grad_norm": 0.34348368644714355, "learning_rate": 1.730371119131221e-05, "loss": 0.5316, "step": 15660 }, { "epoch": 0.4810923724387921, "grad_norm": 0.33694422245025635, "learning_rate": 1.7303381079941708e-05, "loss": 0.5997, "step": 15661 }, { "epoch": 0.4811230915737413, "grad_norm": 0.3710257411003113, "learning_rate": 1.7303050951513652e-05, "loss": 0.5799, "step": 15662 }, { "epoch": 0.48115381070869045, "grad_norm": 0.3852437734603882, "learning_rate": 1.7302720806028808e-05, "loss": 0.5481, "step": 15663 }, { "epoch": 0.48118452984363963, "grad_norm": 0.3510846793651581, "learning_rate": 1.730239064348795e-05, "loss": 0.5731, "step": 15664 }, { "epoch": 0.48121524897858875, "grad_norm": 0.34345823526382446, "learning_rate": 1.730206046389185e-05, "loss": 0.5924, "step": 15665 }, { "epoch": 0.4812459681135379, "grad_norm": 0.3569084703922272, "learning_rate": 1.7301730267241276e-05, "loss": 0.5446, "step": 15666 }, { "epoch": 0.4812766872484871, "grad_norm": 0.35493287444114685, "learning_rate": 1.7301400053537002e-05, "loss": 0.604, "step": 15667 }, { "epoch": 0.4813074063834362, "grad_norm": 0.35209769010543823, "learning_rate": 1.73010698227798e-05, "loss": 0.528, "step": 15668 }, { "epoch": 0.4813381255183854, "grad_norm": 0.3469630479812622, "learning_rate": 1.7300739574970435e-05, "loss": 0.6455, "step": 15669 }, { "epoch": 0.48136884465333457, "grad_norm": 0.4557953476905823, "learning_rate": 1.7300409310109688e-05, "loss": 0.5819, "step": 15670 }, { "epoch": 0.48139956378828375, "grad_norm": 0.3378516137599945, "learning_rate": 1.7300079028198323e-05, "loss": 0.5689, "step": 15671 }, { "epoch": 0.48143028292323287, "grad_norm": 0.331480473279953, "learning_rate": 1.729974872923711e-05, "loss": 0.5584, "step": 15672 }, { "epoch": 0.48146100205818204, "grad_norm": 0.31670403480529785, "learning_rate": 1.729941841322683e-05, "loss": 0.5494, "step": 15673 }, { "epoch": 0.4814917211931312, "grad_norm": 0.37127986550331116, "learning_rate": 1.7299088080168245e-05, "loss": 0.6027, "step": 15674 }, { "epoch": 0.48152244032808034, "grad_norm": 0.353407084941864, "learning_rate": 1.7298757730062132e-05, "loss": 0.5948, "step": 15675 }, { "epoch": 0.4815531594630295, "grad_norm": 0.349269300699234, "learning_rate": 1.7298427362909258e-05, "loss": 0.5589, "step": 15676 }, { "epoch": 0.4815838785979787, "grad_norm": 0.3471521735191345, "learning_rate": 1.72980969787104e-05, "loss": 0.5337, "step": 15677 }, { "epoch": 0.48161459773292786, "grad_norm": 0.39009249210357666, "learning_rate": 1.7297766577466327e-05, "loss": 0.6091, "step": 15678 }, { "epoch": 0.481645316867877, "grad_norm": 0.3752479553222656, "learning_rate": 1.729743615917781e-05, "loss": 0.6151, "step": 15679 }, { "epoch": 0.48167603600282616, "grad_norm": 0.34029144048690796, "learning_rate": 1.729710572384562e-05, "loss": 0.6054, "step": 15680 }, { "epoch": 0.48170675513777533, "grad_norm": 0.35930585861206055, "learning_rate": 1.729677527147053e-05, "loss": 0.5155, "step": 15681 }, { "epoch": 0.48173747427272445, "grad_norm": 0.3410246670246124, "learning_rate": 1.7296444802053312e-05, "loss": 0.585, "step": 15682 }, { "epoch": 0.48176819340767363, "grad_norm": 0.3528454303741455, "learning_rate": 1.7296114315594736e-05, "loss": 0.6136, "step": 15683 }, { "epoch": 0.4817989125426228, "grad_norm": 0.3451441824436188, "learning_rate": 1.7295783812095577e-05, "loss": 0.5562, "step": 15684 }, { "epoch": 0.481829631677572, "grad_norm": 0.3526017665863037, "learning_rate": 1.7295453291556606e-05, "loss": 0.5809, "step": 15685 }, { "epoch": 0.4818603508125211, "grad_norm": 0.359002947807312, "learning_rate": 1.7295122753978595e-05, "loss": 0.6447, "step": 15686 }, { "epoch": 0.4818910699474703, "grad_norm": 0.34475526213645935, "learning_rate": 1.729479219936231e-05, "loss": 0.6218, "step": 15687 }, { "epoch": 0.48192178908241945, "grad_norm": 0.3351697325706482, "learning_rate": 1.7294461627708536e-05, "loss": 0.5989, "step": 15688 }, { "epoch": 0.4819525082173686, "grad_norm": 0.35583412647247314, "learning_rate": 1.7294131039018033e-05, "loss": 0.5933, "step": 15689 }, { "epoch": 0.48198322735231774, "grad_norm": 0.33511096239089966, "learning_rate": 1.729380043329158e-05, "loss": 0.5617, "step": 15690 }, { "epoch": 0.4820139464872669, "grad_norm": 0.33694690465927124, "learning_rate": 1.7293469810529942e-05, "loss": 0.5414, "step": 15691 }, { "epoch": 0.4820446656222161, "grad_norm": 0.3298153281211853, "learning_rate": 1.7293139170733902e-05, "loss": 0.5156, "step": 15692 }, { "epoch": 0.4820753847571652, "grad_norm": 0.32541871070861816, "learning_rate": 1.729280851390422e-05, "loss": 0.5028, "step": 15693 }, { "epoch": 0.4821061038921144, "grad_norm": 0.4098460078239441, "learning_rate": 1.729247784004168e-05, "loss": 0.5811, "step": 15694 }, { "epoch": 0.48213682302706357, "grad_norm": 0.34444573521614075, "learning_rate": 1.7292147149147044e-05, "loss": 0.601, "step": 15695 }, { "epoch": 0.48216754216201274, "grad_norm": 0.4556410610675812, "learning_rate": 1.7291816441221092e-05, "loss": 0.5423, "step": 15696 }, { "epoch": 0.48219826129696186, "grad_norm": 0.42567893862724304, "learning_rate": 1.7291485716264593e-05, "loss": 0.5658, "step": 15697 }, { "epoch": 0.48222898043191104, "grad_norm": 0.38303226232528687, "learning_rate": 1.729115497427832e-05, "loss": 0.5345, "step": 15698 }, { "epoch": 0.4822596995668602, "grad_norm": 0.34839552640914917, "learning_rate": 1.7290824215263045e-05, "loss": 0.4974, "step": 15699 }, { "epoch": 0.48229041870180933, "grad_norm": 0.503210723400116, "learning_rate": 1.729049343921954e-05, "loss": 0.5184, "step": 15700 }, { "epoch": 0.4823211378367585, "grad_norm": 0.3348735272884369, "learning_rate": 1.729016264614858e-05, "loss": 0.5717, "step": 15701 }, { "epoch": 0.4823518569717077, "grad_norm": 0.3471105396747589, "learning_rate": 1.7289831836050935e-05, "loss": 0.5545, "step": 15702 }, { "epoch": 0.48238257610665686, "grad_norm": 0.32524940371513367, "learning_rate": 1.7289501008927382e-05, "loss": 0.5922, "step": 15703 }, { "epoch": 0.482413295241606, "grad_norm": 0.44928690791130066, "learning_rate": 1.7289170164778684e-05, "loss": 0.5341, "step": 15704 }, { "epoch": 0.48244401437655515, "grad_norm": 0.37953445315361023, "learning_rate": 1.7288839303605624e-05, "loss": 0.5616, "step": 15705 }, { "epoch": 0.48247473351150433, "grad_norm": 0.34424787759780884, "learning_rate": 1.728850842540897e-05, "loss": 0.5933, "step": 15706 }, { "epoch": 0.4825054526464535, "grad_norm": 0.6616065502166748, "learning_rate": 1.7288177530189494e-05, "loss": 0.6061, "step": 15707 }, { "epoch": 0.4825361717814026, "grad_norm": 0.34685584902763367, "learning_rate": 1.728784661794797e-05, "loss": 0.6059, "step": 15708 }, { "epoch": 0.4825668909163518, "grad_norm": 0.33918502926826477, "learning_rate": 1.7287515688685177e-05, "loss": 0.5006, "step": 15709 }, { "epoch": 0.482597610051301, "grad_norm": 0.3459092080593109, "learning_rate": 1.7287184742401873e-05, "loss": 0.5729, "step": 15710 }, { "epoch": 0.4826283291862501, "grad_norm": 0.3541657030582428, "learning_rate": 1.728685377909885e-05, "loss": 0.6045, "step": 15711 }, { "epoch": 0.48265904832119927, "grad_norm": 0.32489603757858276, "learning_rate": 1.7286522798776865e-05, "loss": 0.5002, "step": 15712 }, { "epoch": 0.48268976745614844, "grad_norm": 0.3707578182220459, "learning_rate": 1.72861918014367e-05, "loss": 0.5721, "step": 15713 }, { "epoch": 0.4827204865910976, "grad_norm": 0.4014342725276947, "learning_rate": 1.7285860787079122e-05, "loss": 0.5388, "step": 15714 }, { "epoch": 0.48275120572604674, "grad_norm": 0.6813092827796936, "learning_rate": 1.7285529755704908e-05, "loss": 0.5699, "step": 15715 }, { "epoch": 0.4827819248609959, "grad_norm": 0.3397083580493927, "learning_rate": 1.7285198707314835e-05, "loss": 0.4955, "step": 15716 }, { "epoch": 0.4828126439959451, "grad_norm": 0.363910436630249, "learning_rate": 1.7284867641909666e-05, "loss": 0.5255, "step": 15717 }, { "epoch": 0.4828433631308942, "grad_norm": 0.36017531156539917, "learning_rate": 1.7284536559490183e-05, "loss": 0.6335, "step": 15718 }, { "epoch": 0.4828740822658434, "grad_norm": 3.620467185974121, "learning_rate": 1.7284205460057153e-05, "loss": 0.6221, "step": 15719 }, { "epoch": 0.48290480140079256, "grad_norm": 0.3365199863910675, "learning_rate": 1.7283874343611357e-05, "loss": 0.6097, "step": 15720 }, { "epoch": 0.48293552053574174, "grad_norm": 0.3482273817062378, "learning_rate": 1.7283543210153562e-05, "loss": 0.5941, "step": 15721 }, { "epoch": 0.48296623967069086, "grad_norm": 0.3374534845352173, "learning_rate": 1.728321205968454e-05, "loss": 0.5191, "step": 15722 }, { "epoch": 0.48299695880564003, "grad_norm": 0.3843139111995697, "learning_rate": 1.728288089220507e-05, "loss": 0.6495, "step": 15723 }, { "epoch": 0.4830276779405892, "grad_norm": 0.3482626974582672, "learning_rate": 1.7282549707715923e-05, "loss": 0.5355, "step": 15724 }, { "epoch": 0.4830583970755383, "grad_norm": 0.341667115688324, "learning_rate": 1.7282218506217875e-05, "loss": 0.5006, "step": 15725 }, { "epoch": 0.4830891162104875, "grad_norm": 0.33193090558052063, "learning_rate": 1.728188728771169e-05, "loss": 0.4926, "step": 15726 }, { "epoch": 0.4831198353454367, "grad_norm": 0.335126668214798, "learning_rate": 1.7281556052198155e-05, "loss": 0.6337, "step": 15727 }, { "epoch": 0.48315055448038585, "grad_norm": 0.36640891432762146, "learning_rate": 1.7281224799678034e-05, "loss": 0.6125, "step": 15728 }, { "epoch": 0.48318127361533497, "grad_norm": 0.33153435587882996, "learning_rate": 1.7280893530152108e-05, "loss": 0.594, "step": 15729 }, { "epoch": 0.48321199275028415, "grad_norm": 0.352229505777359, "learning_rate": 1.7280562243621144e-05, "loss": 0.5155, "step": 15730 }, { "epoch": 0.4832427118852333, "grad_norm": 0.3764321804046631, "learning_rate": 1.728023094008592e-05, "loss": 0.4888, "step": 15731 }, { "epoch": 0.4832734310201825, "grad_norm": 0.4096474051475525, "learning_rate": 1.7279899619547207e-05, "loss": 0.48, "step": 15732 }, { "epoch": 0.4833041501551316, "grad_norm": 0.3503411114215851, "learning_rate": 1.7279568282005778e-05, "loss": 0.5856, "step": 15733 }, { "epoch": 0.4833348692900808, "grad_norm": 0.3748670816421509, "learning_rate": 1.7279236927462413e-05, "loss": 0.5734, "step": 15734 }, { "epoch": 0.48336558842502997, "grad_norm": 0.36833611130714417, "learning_rate": 1.7278905555917876e-05, "loss": 0.6121, "step": 15735 }, { "epoch": 0.4833963075599791, "grad_norm": 0.36095795035362244, "learning_rate": 1.727857416737295e-05, "loss": 0.5843, "step": 15736 }, { "epoch": 0.48342702669492826, "grad_norm": 0.3149845004081726, "learning_rate": 1.7278242761828407e-05, "loss": 0.537, "step": 15737 }, { "epoch": 0.48345774582987744, "grad_norm": 0.3402508795261383, "learning_rate": 1.727791133928502e-05, "loss": 0.5216, "step": 15738 }, { "epoch": 0.4834884649648266, "grad_norm": 0.3475918471813202, "learning_rate": 1.7277579899743565e-05, "loss": 0.6108, "step": 15739 }, { "epoch": 0.48351918409977573, "grad_norm": 0.3776262700557709, "learning_rate": 1.727724844320481e-05, "loss": 0.626, "step": 15740 }, { "epoch": 0.4835499032347249, "grad_norm": 0.39229580760002136, "learning_rate": 1.7276916969669533e-05, "loss": 0.5744, "step": 15741 }, { "epoch": 0.4835806223696741, "grad_norm": 0.38276296854019165, "learning_rate": 1.7276585479138507e-05, "loss": 0.5381, "step": 15742 }, { "epoch": 0.4836113415046232, "grad_norm": 0.37546786665916443, "learning_rate": 1.7276253971612507e-05, "loss": 0.6214, "step": 15743 }, { "epoch": 0.4836420606395724, "grad_norm": 0.3588554859161377, "learning_rate": 1.7275922447092313e-05, "loss": 0.5721, "step": 15744 }, { "epoch": 0.48367277977452156, "grad_norm": 0.35103628039360046, "learning_rate": 1.727559090557869e-05, "loss": 0.6166, "step": 15745 }, { "epoch": 0.48370349890947073, "grad_norm": 0.44209036231040955, "learning_rate": 1.7275259347072416e-05, "loss": 0.6163, "step": 15746 }, { "epoch": 0.48373421804441985, "grad_norm": 0.35991838574409485, "learning_rate": 1.727492777157427e-05, "loss": 0.4842, "step": 15747 }, { "epoch": 0.483764937179369, "grad_norm": 0.33891910314559937, "learning_rate": 1.7274596179085016e-05, "loss": 0.5582, "step": 15748 }, { "epoch": 0.4837956563143182, "grad_norm": 0.349960058927536, "learning_rate": 1.7274264569605435e-05, "loss": 0.5847, "step": 15749 }, { "epoch": 0.4838263754492674, "grad_norm": 0.36290282011032104, "learning_rate": 1.7273932943136308e-05, "loss": 0.6041, "step": 15750 }, { "epoch": 0.4838570945842165, "grad_norm": 0.3306903839111328, "learning_rate": 1.7273601299678396e-05, "loss": 0.507, "step": 15751 }, { "epoch": 0.48388781371916567, "grad_norm": 0.3445807695388794, "learning_rate": 1.727326963923248e-05, "loss": 0.5412, "step": 15752 }, { "epoch": 0.48391853285411485, "grad_norm": 0.36060070991516113, "learning_rate": 1.7272937961799342e-05, "loss": 0.6137, "step": 15753 }, { "epoch": 0.48394925198906397, "grad_norm": 0.3841051161289215, "learning_rate": 1.7272606267379744e-05, "loss": 0.5883, "step": 15754 }, { "epoch": 0.48397997112401314, "grad_norm": 0.3660743534564972, "learning_rate": 1.7272274555974466e-05, "loss": 0.5967, "step": 15755 }, { "epoch": 0.4840106902589623, "grad_norm": 0.46666836738586426, "learning_rate": 1.7271942827584283e-05, "loss": 0.4829, "step": 15756 }, { "epoch": 0.4840414093939115, "grad_norm": 0.463594526052475, "learning_rate": 1.7271611082209968e-05, "loss": 0.6161, "step": 15757 }, { "epoch": 0.4840721285288606, "grad_norm": 0.34602001309394836, "learning_rate": 1.72712793198523e-05, "loss": 0.5772, "step": 15758 }, { "epoch": 0.4841028476638098, "grad_norm": 0.33482825756073, "learning_rate": 1.7270947540512052e-05, "loss": 0.5389, "step": 15759 }, { "epoch": 0.48413356679875896, "grad_norm": 0.39361247420310974, "learning_rate": 1.7270615744189996e-05, "loss": 0.5367, "step": 15760 }, { "epoch": 0.4841642859337081, "grad_norm": 0.3324570655822754, "learning_rate": 1.727028393088691e-05, "loss": 0.6058, "step": 15761 }, { "epoch": 0.48419500506865726, "grad_norm": 0.3352911174297333, "learning_rate": 1.7269952100603568e-05, "loss": 0.5404, "step": 15762 }, { "epoch": 0.48422572420360643, "grad_norm": 0.3528735339641571, "learning_rate": 1.7269620253340747e-05, "loss": 0.5703, "step": 15763 }, { "epoch": 0.4842564433385556, "grad_norm": 0.38353613018989563, "learning_rate": 1.726928838909922e-05, "loss": 0.6298, "step": 15764 }, { "epoch": 0.48428716247350473, "grad_norm": 0.3631245195865631, "learning_rate": 1.726895650787976e-05, "loss": 0.6384, "step": 15765 }, { "epoch": 0.4843178816084539, "grad_norm": 0.363518089056015, "learning_rate": 1.7268624609683146e-05, "loss": 0.6306, "step": 15766 }, { "epoch": 0.4843486007434031, "grad_norm": 0.3514781892299652, "learning_rate": 1.726829269451015e-05, "loss": 0.6773, "step": 15767 }, { "epoch": 0.4843793198783522, "grad_norm": 0.3805536925792694, "learning_rate": 1.726796076236155e-05, "loss": 0.544, "step": 15768 }, { "epoch": 0.4844100390133014, "grad_norm": 0.3302455544471741, "learning_rate": 1.7267628813238122e-05, "loss": 0.4611, "step": 15769 }, { "epoch": 0.48444075814825055, "grad_norm": 0.36925774812698364, "learning_rate": 1.7267296847140635e-05, "loss": 0.5526, "step": 15770 }, { "epoch": 0.4844714772831997, "grad_norm": 0.3502638041973114, "learning_rate": 1.7266964864069874e-05, "loss": 0.5608, "step": 15771 }, { "epoch": 0.48450219641814884, "grad_norm": 0.42221349477767944, "learning_rate": 1.7266632864026606e-05, "loss": 0.5414, "step": 15772 }, { "epoch": 0.484532915553098, "grad_norm": 0.34654757380485535, "learning_rate": 1.726630084701161e-05, "loss": 0.5668, "step": 15773 }, { "epoch": 0.4845636346880472, "grad_norm": 0.40870317816734314, "learning_rate": 1.7265968813025658e-05, "loss": 0.6065, "step": 15774 }, { "epoch": 0.48459435382299637, "grad_norm": 0.4163627624511719, "learning_rate": 1.7265636762069533e-05, "loss": 0.5836, "step": 15775 }, { "epoch": 0.4846250729579455, "grad_norm": 0.3859894573688507, "learning_rate": 1.7265304694144e-05, "loss": 0.5896, "step": 15776 }, { "epoch": 0.48465579209289467, "grad_norm": 0.38113853335380554, "learning_rate": 1.7264972609249845e-05, "loss": 0.5832, "step": 15777 }, { "epoch": 0.48468651122784384, "grad_norm": 0.3458234369754791, "learning_rate": 1.726464050738784e-05, "loss": 0.6286, "step": 15778 }, { "epoch": 0.48471723036279296, "grad_norm": 0.3912731111049652, "learning_rate": 1.726430838855876e-05, "loss": 0.5832, "step": 15779 }, { "epoch": 0.48474794949774214, "grad_norm": 0.38349103927612305, "learning_rate": 1.7263976252763373e-05, "loss": 0.6625, "step": 15780 }, { "epoch": 0.4847786686326913, "grad_norm": 0.8195056915283203, "learning_rate": 1.726364410000247e-05, "loss": 0.5266, "step": 15781 }, { "epoch": 0.4848093877676405, "grad_norm": 0.3457338213920593, "learning_rate": 1.7263311930276816e-05, "loss": 0.5633, "step": 15782 }, { "epoch": 0.4848401069025896, "grad_norm": 0.34510424733161926, "learning_rate": 1.726297974358719e-05, "loss": 0.6204, "step": 15783 }, { "epoch": 0.4848708260375388, "grad_norm": 0.4285133183002472, "learning_rate": 1.726264753993437e-05, "loss": 0.5696, "step": 15784 }, { "epoch": 0.48490154517248796, "grad_norm": 0.35409116744995117, "learning_rate": 1.7262315319319126e-05, "loss": 0.5452, "step": 15785 }, { "epoch": 0.4849322643074371, "grad_norm": 0.39601361751556396, "learning_rate": 1.726198308174224e-05, "loss": 0.6167, "step": 15786 }, { "epoch": 0.48496298344238625, "grad_norm": 0.3748704493045807, "learning_rate": 1.7261650827204483e-05, "loss": 0.6055, "step": 15787 }, { "epoch": 0.48499370257733543, "grad_norm": 0.37444350123405457, "learning_rate": 1.7261318555706634e-05, "loss": 0.5965, "step": 15788 }, { "epoch": 0.4850244217122846, "grad_norm": 0.3443230092525482, "learning_rate": 1.7260986267249467e-05, "loss": 0.5417, "step": 15789 }, { "epoch": 0.4850551408472337, "grad_norm": 0.3777427077293396, "learning_rate": 1.7260653961833762e-05, "loss": 0.5501, "step": 15790 }, { "epoch": 0.4850858599821829, "grad_norm": 0.34792765974998474, "learning_rate": 1.7260321639460292e-05, "loss": 0.559, "step": 15791 }, { "epoch": 0.4851165791171321, "grad_norm": 0.33857953548431396, "learning_rate": 1.7259989300129832e-05, "loss": 0.5298, "step": 15792 }, { "epoch": 0.4851472982520812, "grad_norm": 0.399840384721756, "learning_rate": 1.7259656943843165e-05, "loss": 0.5639, "step": 15793 }, { "epoch": 0.48517801738703037, "grad_norm": 0.34694674611091614, "learning_rate": 1.7259324570601056e-05, "loss": 0.6122, "step": 15794 }, { "epoch": 0.48520873652197954, "grad_norm": 0.3557078242301941, "learning_rate": 1.7258992180404292e-05, "loss": 0.5542, "step": 15795 }, { "epoch": 0.4852394556569287, "grad_norm": 0.342525452375412, "learning_rate": 1.7258659773253642e-05, "loss": 0.5707, "step": 15796 }, { "epoch": 0.48527017479187784, "grad_norm": 0.347680926322937, "learning_rate": 1.7258327349149885e-05, "loss": 0.5873, "step": 15797 }, { "epoch": 0.485300893926827, "grad_norm": 0.35126835107803345, "learning_rate": 1.7257994908093804e-05, "loss": 0.5092, "step": 15798 }, { "epoch": 0.4853316130617762, "grad_norm": 0.40541672706604004, "learning_rate": 1.7257662450086158e-05, "loss": 0.6287, "step": 15799 }, { "epoch": 0.48536233219672537, "grad_norm": 0.3587082028388977, "learning_rate": 1.7257329975127742e-05, "loss": 0.6221, "step": 15800 }, { "epoch": 0.4853930513316745, "grad_norm": 0.4093378186225891, "learning_rate": 1.7256997483219323e-05, "loss": 0.5865, "step": 15801 }, { "epoch": 0.48542377046662366, "grad_norm": 0.3812684416770935, "learning_rate": 1.725666497436168e-05, "loss": 0.5463, "step": 15802 }, { "epoch": 0.48545448960157284, "grad_norm": 0.373402863740921, "learning_rate": 1.7256332448555588e-05, "loss": 0.5813, "step": 15803 }, { "epoch": 0.48548520873652196, "grad_norm": 0.33486655354499817, "learning_rate": 1.7255999905801827e-05, "loss": 0.5027, "step": 15804 }, { "epoch": 0.48551592787147113, "grad_norm": 0.3703971803188324, "learning_rate": 1.725566734610117e-05, "loss": 0.571, "step": 15805 }, { "epoch": 0.4855466470064203, "grad_norm": 0.33081409335136414, "learning_rate": 1.7255334769454396e-05, "loss": 0.5629, "step": 15806 }, { "epoch": 0.4855773661413695, "grad_norm": 0.38928934931755066, "learning_rate": 1.725500217586228e-05, "loss": 0.5497, "step": 15807 }, { "epoch": 0.4856080852763186, "grad_norm": 0.3432006537914276, "learning_rate": 1.72546695653256e-05, "loss": 0.6426, "step": 15808 }, { "epoch": 0.4856388044112678, "grad_norm": 0.34252241253852844, "learning_rate": 1.7254336937845132e-05, "loss": 0.5296, "step": 15809 }, { "epoch": 0.48566952354621695, "grad_norm": 0.33474284410476685, "learning_rate": 1.7254004293421654e-05, "loss": 0.5654, "step": 15810 }, { "epoch": 0.4857002426811661, "grad_norm": 0.3339325785636902, "learning_rate": 1.7253671632055942e-05, "loss": 0.5835, "step": 15811 }, { "epoch": 0.48573096181611525, "grad_norm": 0.3814123868942261, "learning_rate": 1.7253338953748775e-05, "loss": 0.6117, "step": 15812 }, { "epoch": 0.4857616809510644, "grad_norm": 0.3390401601791382, "learning_rate": 1.7253006258500925e-05, "loss": 0.5743, "step": 15813 }, { "epoch": 0.4857924000860136, "grad_norm": 0.3994074761867523, "learning_rate": 1.7252673546313176e-05, "loss": 0.5634, "step": 15814 }, { "epoch": 0.4858231192209627, "grad_norm": 0.37351563572883606, "learning_rate": 1.7252340817186298e-05, "loss": 0.5656, "step": 15815 }, { "epoch": 0.4858538383559119, "grad_norm": 0.3309897184371948, "learning_rate": 1.7252008071121076e-05, "loss": 0.5493, "step": 15816 }, { "epoch": 0.48588455749086107, "grad_norm": 0.33505818247795105, "learning_rate": 1.7251675308118278e-05, "loss": 0.476, "step": 15817 }, { "epoch": 0.48591527662581024, "grad_norm": 0.39564138650894165, "learning_rate": 1.725134252817869e-05, "loss": 0.488, "step": 15818 }, { "epoch": 0.48594599576075936, "grad_norm": 0.33468788862228394, "learning_rate": 1.7251009731303082e-05, "loss": 0.5829, "step": 15819 }, { "epoch": 0.48597671489570854, "grad_norm": 0.32793134450912476, "learning_rate": 1.7250676917492236e-05, "loss": 0.5156, "step": 15820 }, { "epoch": 0.4860074340306577, "grad_norm": 0.35537654161453247, "learning_rate": 1.7250344086746925e-05, "loss": 0.5442, "step": 15821 }, { "epoch": 0.48603815316560683, "grad_norm": 0.33686569333076477, "learning_rate": 1.7250011239067934e-05, "loss": 0.5242, "step": 15822 }, { "epoch": 0.486068872300556, "grad_norm": 0.33361124992370605, "learning_rate": 1.7249678374456032e-05, "loss": 0.5339, "step": 15823 }, { "epoch": 0.4860995914355052, "grad_norm": 0.31749603152275085, "learning_rate": 1.7249345492912e-05, "loss": 0.5874, "step": 15824 }, { "epoch": 0.48613031057045436, "grad_norm": 0.35321488976478577, "learning_rate": 1.7249012594436613e-05, "loss": 0.5806, "step": 15825 }, { "epoch": 0.4861610297054035, "grad_norm": 2.367567777633667, "learning_rate": 1.7248679679030656e-05, "loss": 0.5039, "step": 15826 }, { "epoch": 0.48619174884035266, "grad_norm": 0.3937743306159973, "learning_rate": 1.7248346746694896e-05, "loss": 0.61, "step": 15827 }, { "epoch": 0.48622246797530183, "grad_norm": 0.3079245090484619, "learning_rate": 1.7248013797430122e-05, "loss": 0.4906, "step": 15828 }, { "epoch": 0.48625318711025095, "grad_norm": 0.35592636466026306, "learning_rate": 1.72476808312371e-05, "loss": 0.5336, "step": 15829 }, { "epoch": 0.4862839062452001, "grad_norm": 0.32471057772636414, "learning_rate": 1.7247347848116616e-05, "loss": 0.5375, "step": 15830 }, { "epoch": 0.4863146253801493, "grad_norm": 0.3378676772117615, "learning_rate": 1.7247014848069442e-05, "loss": 0.5836, "step": 15831 }, { "epoch": 0.4863453445150985, "grad_norm": 0.3423118591308594, "learning_rate": 1.7246681831096365e-05, "loss": 0.5357, "step": 15832 }, { "epoch": 0.4863760636500476, "grad_norm": 0.3314421772956848, "learning_rate": 1.724634879719815e-05, "loss": 0.6007, "step": 15833 }, { "epoch": 0.48640678278499677, "grad_norm": 0.324759304523468, "learning_rate": 1.7246015746375585e-05, "loss": 0.6213, "step": 15834 }, { "epoch": 0.48643750191994595, "grad_norm": 0.4024566411972046, "learning_rate": 1.7245682678629442e-05, "loss": 0.5202, "step": 15835 }, { "epoch": 0.48646822105489507, "grad_norm": 0.36835235357284546, "learning_rate": 1.7245349593960502e-05, "loss": 0.5685, "step": 15836 }, { "epoch": 0.48649894018984424, "grad_norm": 0.3754670321941376, "learning_rate": 1.724501649236954e-05, "loss": 0.5762, "step": 15837 }, { "epoch": 0.4865296593247934, "grad_norm": 0.3638819456100464, "learning_rate": 1.7244683373857342e-05, "loss": 0.5912, "step": 15838 }, { "epoch": 0.4865603784597426, "grad_norm": 0.37446022033691406, "learning_rate": 1.7244350238424674e-05, "loss": 0.5361, "step": 15839 }, { "epoch": 0.4865910975946917, "grad_norm": 0.3881780207157135, "learning_rate": 1.7244017086072323e-05, "loss": 0.603, "step": 15840 }, { "epoch": 0.4866218167296409, "grad_norm": 0.35924190282821655, "learning_rate": 1.7243683916801065e-05, "loss": 0.597, "step": 15841 }, { "epoch": 0.48665253586459006, "grad_norm": 0.39396539330482483, "learning_rate": 1.7243350730611675e-05, "loss": 0.6064, "step": 15842 }, { "epoch": 0.48668325499953924, "grad_norm": 0.4001755714416504, "learning_rate": 1.7243017527504934e-05, "loss": 0.5345, "step": 15843 }, { "epoch": 0.48671397413448836, "grad_norm": 0.3800857961177826, "learning_rate": 1.724268430748162e-05, "loss": 0.6216, "step": 15844 }, { "epoch": 0.48674469326943753, "grad_norm": 0.3597661554813385, "learning_rate": 1.7242351070542512e-05, "loss": 0.65, "step": 15845 }, { "epoch": 0.4867754124043867, "grad_norm": 0.3634174168109894, "learning_rate": 1.7242017816688387e-05, "loss": 0.6721, "step": 15846 }, { "epoch": 0.48680613153933583, "grad_norm": 0.38375216722488403, "learning_rate": 1.724168454592002e-05, "loss": 0.4762, "step": 15847 }, { "epoch": 0.486836850674285, "grad_norm": 0.3416902422904968, "learning_rate": 1.7241351258238197e-05, "loss": 0.57, "step": 15848 }, { "epoch": 0.4868675698092342, "grad_norm": 0.3577231466770172, "learning_rate": 1.724101795364369e-05, "loss": 0.5936, "step": 15849 }, { "epoch": 0.48689828894418336, "grad_norm": 0.38180527091026306, "learning_rate": 1.7240684632137282e-05, "loss": 0.6069, "step": 15850 }, { "epoch": 0.4869290080791325, "grad_norm": 0.3200504183769226, "learning_rate": 1.7240351293719748e-05, "loss": 0.5219, "step": 15851 }, { "epoch": 0.48695972721408165, "grad_norm": 0.3919501006603241, "learning_rate": 1.724001793839187e-05, "loss": 0.499, "step": 15852 }, { "epoch": 0.4869904463490308, "grad_norm": 0.4576396942138672, "learning_rate": 1.7239684566154423e-05, "loss": 0.5513, "step": 15853 }, { "epoch": 0.48702116548397995, "grad_norm": 0.32999634742736816, "learning_rate": 1.7239351177008187e-05, "loss": 0.5103, "step": 15854 }, { "epoch": 0.4870518846189291, "grad_norm": 0.33031463623046875, "learning_rate": 1.723901777095394e-05, "loss": 0.5188, "step": 15855 }, { "epoch": 0.4870826037538783, "grad_norm": 0.35687682032585144, "learning_rate": 1.723868434799246e-05, "loss": 0.5422, "step": 15856 }, { "epoch": 0.48711332288882747, "grad_norm": 0.32782912254333496, "learning_rate": 1.7238350908124532e-05, "loss": 0.4894, "step": 15857 }, { "epoch": 0.4871440420237766, "grad_norm": 0.37107110023498535, "learning_rate": 1.7238017451350928e-05, "loss": 0.5679, "step": 15858 }, { "epoch": 0.48717476115872577, "grad_norm": 0.3334061801433563, "learning_rate": 1.7237683977672427e-05, "loss": 0.5554, "step": 15859 }, { "epoch": 0.48720548029367494, "grad_norm": 0.35647809505462646, "learning_rate": 1.7237350487089812e-05, "loss": 0.534, "step": 15860 }, { "epoch": 0.4872361994286241, "grad_norm": 0.48343425989151, "learning_rate": 1.7237016979603854e-05, "loss": 0.5783, "step": 15861 }, { "epoch": 0.48726691856357324, "grad_norm": 0.3353699743747711, "learning_rate": 1.7236683455215344e-05, "loss": 0.5133, "step": 15862 }, { "epoch": 0.4872976376985224, "grad_norm": 0.34549129009246826, "learning_rate": 1.7236349913925054e-05, "loss": 0.5441, "step": 15863 }, { "epoch": 0.4873283568334716, "grad_norm": 0.3315117657184601, "learning_rate": 1.723601635573376e-05, "loss": 0.5424, "step": 15864 }, { "epoch": 0.4873590759684207, "grad_norm": 0.43855223059654236, "learning_rate": 1.7235682780642243e-05, "loss": 0.5976, "step": 15865 }, { "epoch": 0.4873897951033699, "grad_norm": 0.3304395079612732, "learning_rate": 1.7235349188651288e-05, "loss": 0.5031, "step": 15866 }, { "epoch": 0.48742051423831906, "grad_norm": 0.3741317391395569, "learning_rate": 1.7235015579761666e-05, "loss": 0.564, "step": 15867 }, { "epoch": 0.48745123337326823, "grad_norm": 0.3207203447818756, "learning_rate": 1.723468195397416e-05, "loss": 0.642, "step": 15868 }, { "epoch": 0.48748195250821735, "grad_norm": 0.3352016806602478, "learning_rate": 1.723434831128955e-05, "loss": 0.5412, "step": 15869 }, { "epoch": 0.48751267164316653, "grad_norm": 0.32562097907066345, "learning_rate": 1.723401465170862e-05, "loss": 0.4793, "step": 15870 }, { "epoch": 0.4875433907781157, "grad_norm": 0.40395215153694153, "learning_rate": 1.723368097523214e-05, "loss": 0.6683, "step": 15871 }, { "epoch": 0.4875741099130648, "grad_norm": 0.3444645404815674, "learning_rate": 1.7233347281860888e-05, "loss": 0.6402, "step": 15872 }, { "epoch": 0.487604829048014, "grad_norm": 0.7463846802711487, "learning_rate": 1.7233013571595652e-05, "loss": 0.5668, "step": 15873 }, { "epoch": 0.4876355481829632, "grad_norm": 0.35682377219200134, "learning_rate": 1.723267984443721e-05, "loss": 0.5809, "step": 15874 }, { "epoch": 0.48766626731791235, "grad_norm": 0.3387852907180786, "learning_rate": 1.7232346100386334e-05, "loss": 0.5198, "step": 15875 }, { "epoch": 0.48769698645286147, "grad_norm": 0.3553810119628906, "learning_rate": 1.7232012339443812e-05, "loss": 0.5289, "step": 15876 }, { "epoch": 0.48772770558781064, "grad_norm": 0.40099355578422546, "learning_rate": 1.723167856161042e-05, "loss": 0.5984, "step": 15877 }, { "epoch": 0.4877584247227598, "grad_norm": 0.36618998646736145, "learning_rate": 1.7231344766886936e-05, "loss": 0.6132, "step": 15878 }, { "epoch": 0.48778914385770894, "grad_norm": 0.36621296405792236, "learning_rate": 1.723101095527414e-05, "loss": 0.5477, "step": 15879 }, { "epoch": 0.4878198629926581, "grad_norm": 0.46913957595825195, "learning_rate": 1.7230677126772817e-05, "loss": 0.5443, "step": 15880 }, { "epoch": 0.4878505821276073, "grad_norm": 0.35819774866104126, "learning_rate": 1.723034328138374e-05, "loss": 0.6582, "step": 15881 }, { "epoch": 0.48788130126255647, "grad_norm": 0.5940369367599487, "learning_rate": 1.723000941910769e-05, "loss": 0.653, "step": 15882 }, { "epoch": 0.4879120203975056, "grad_norm": 0.3511312007904053, "learning_rate": 1.7229675539945452e-05, "loss": 0.488, "step": 15883 }, { "epoch": 0.48794273953245476, "grad_norm": 0.36265674233436584, "learning_rate": 1.72293416438978e-05, "loss": 0.5873, "step": 15884 }, { "epoch": 0.48797345866740394, "grad_norm": 0.35298094153404236, "learning_rate": 1.7229007730965513e-05, "loss": 0.4741, "step": 15885 }, { "epoch": 0.4880041778023531, "grad_norm": 0.32671990990638733, "learning_rate": 1.722867380114938e-05, "loss": 0.548, "step": 15886 }, { "epoch": 0.48803489693730223, "grad_norm": 0.40459007024765015, "learning_rate": 1.722833985445017e-05, "loss": 0.5353, "step": 15887 }, { "epoch": 0.4880656160722514, "grad_norm": 0.3142535090446472, "learning_rate": 1.7228005890868666e-05, "loss": 0.5382, "step": 15888 }, { "epoch": 0.4880963352072006, "grad_norm": 0.3559851050376892, "learning_rate": 1.7227671910405654e-05, "loss": 0.6412, "step": 15889 }, { "epoch": 0.4881270543421497, "grad_norm": 0.34887343645095825, "learning_rate": 1.7227337913061905e-05, "loss": 0.6142, "step": 15890 }, { "epoch": 0.4881577734770989, "grad_norm": 0.3352544903755188, "learning_rate": 1.7227003898838205e-05, "loss": 0.5521, "step": 15891 }, { "epoch": 0.48818849261204805, "grad_norm": 0.39563021063804626, "learning_rate": 1.722666986773533e-05, "loss": 0.5544, "step": 15892 }, { "epoch": 0.48821921174699723, "grad_norm": 0.3448066711425781, "learning_rate": 1.722633581975407e-05, "loss": 0.5261, "step": 15893 }, { "epoch": 0.48824993088194635, "grad_norm": 0.3542270362377167, "learning_rate": 1.7226001754895194e-05, "loss": 0.5841, "step": 15894 }, { "epoch": 0.4882806500168955, "grad_norm": 0.32017868757247925, "learning_rate": 1.7225667673159485e-05, "loss": 0.4991, "step": 15895 }, { "epoch": 0.4883113691518447, "grad_norm": 0.3435508608818054, "learning_rate": 1.7225333574547723e-05, "loss": 0.6109, "step": 15896 }, { "epoch": 0.4883420882867938, "grad_norm": 0.37082090973854065, "learning_rate": 1.7224999459060696e-05, "loss": 0.6194, "step": 15897 }, { "epoch": 0.488372807421743, "grad_norm": 0.3775657117366791, "learning_rate": 1.7224665326699173e-05, "loss": 0.5796, "step": 15898 }, { "epoch": 0.48840352655669217, "grad_norm": 0.33849036693573, "learning_rate": 1.722433117746394e-05, "loss": 0.5733, "step": 15899 }, { "epoch": 0.48843424569164134, "grad_norm": 0.34664633870124817, "learning_rate": 1.722399701135578e-05, "loss": 0.6114, "step": 15900 }, { "epoch": 0.48846496482659046, "grad_norm": 0.38498640060424805, "learning_rate": 1.722366282837547e-05, "loss": 0.5479, "step": 15901 }, { "epoch": 0.48849568396153964, "grad_norm": 0.3946678042411804, "learning_rate": 1.7223328628523788e-05, "loss": 0.5543, "step": 15902 }, { "epoch": 0.4885264030964888, "grad_norm": 0.39596956968307495, "learning_rate": 1.722299441180152e-05, "loss": 0.605, "step": 15903 }, { "epoch": 0.488557122231438, "grad_norm": 0.3837408125400543, "learning_rate": 1.722266017820944e-05, "loss": 0.5431, "step": 15904 }, { "epoch": 0.4885878413663871, "grad_norm": 0.3592754006385803, "learning_rate": 1.722232592774834e-05, "loss": 0.5169, "step": 15905 }, { "epoch": 0.4886185605013363, "grad_norm": 0.3656991124153137, "learning_rate": 1.722199166041899e-05, "loss": 0.53, "step": 15906 }, { "epoch": 0.48864927963628546, "grad_norm": 0.34409329295158386, "learning_rate": 1.7221657376222173e-05, "loss": 0.5773, "step": 15907 }, { "epoch": 0.4886799987712346, "grad_norm": 0.37829023599624634, "learning_rate": 1.722132307515867e-05, "loss": 0.6214, "step": 15908 }, { "epoch": 0.48871071790618376, "grad_norm": 0.3640749752521515, "learning_rate": 1.7220988757229268e-05, "loss": 0.5434, "step": 15909 }, { "epoch": 0.48874143704113293, "grad_norm": 0.4148114323616028, "learning_rate": 1.7220654422434738e-05, "loss": 0.6619, "step": 15910 }, { "epoch": 0.4887721561760821, "grad_norm": 0.34272676706314087, "learning_rate": 1.7220320070775864e-05, "loss": 0.5431, "step": 15911 }, { "epoch": 0.4888028753110312, "grad_norm": 0.3647187352180481, "learning_rate": 1.7219985702253427e-05, "loss": 0.6327, "step": 15912 }, { "epoch": 0.4888335944459804, "grad_norm": 0.35863637924194336, "learning_rate": 1.7219651316868215e-05, "loss": 0.5217, "step": 15913 }, { "epoch": 0.4888643135809296, "grad_norm": 0.356777161359787, "learning_rate": 1.7219316914621e-05, "loss": 0.6392, "step": 15914 }, { "epoch": 0.4888950327158787, "grad_norm": 0.33717381954193115, "learning_rate": 1.7218982495512566e-05, "loss": 0.5728, "step": 15915 }, { "epoch": 0.48892575185082787, "grad_norm": 0.35855066776275635, "learning_rate": 1.72186480595437e-05, "loss": 0.5042, "step": 15916 }, { "epoch": 0.48895647098577705, "grad_norm": 0.35962846875190735, "learning_rate": 1.7218313606715168e-05, "loss": 0.5168, "step": 15917 }, { "epoch": 0.4889871901207262, "grad_norm": 0.36661437153816223, "learning_rate": 1.721797913702776e-05, "loss": 0.5577, "step": 15918 }, { "epoch": 0.48901790925567534, "grad_norm": 0.3442879915237427, "learning_rate": 1.7217644650482265e-05, "loss": 0.5781, "step": 15919 }, { "epoch": 0.4890486283906245, "grad_norm": 0.35320672392845154, "learning_rate": 1.7217310147079454e-05, "loss": 0.4955, "step": 15920 }, { "epoch": 0.4890793475255737, "grad_norm": 0.33456552028656006, "learning_rate": 1.721697562682011e-05, "loss": 0.578, "step": 15921 }, { "epoch": 0.4891100666605228, "grad_norm": 0.3923182785511017, "learning_rate": 1.7216641089705017e-05, "loss": 0.6727, "step": 15922 }, { "epoch": 0.489140785795472, "grad_norm": 0.3443708121776581, "learning_rate": 1.7216306535734956e-05, "loss": 0.5877, "step": 15923 }, { "epoch": 0.48917150493042116, "grad_norm": 0.3277374804019928, "learning_rate": 1.7215971964910704e-05, "loss": 0.5307, "step": 15924 }, { "epoch": 0.48920222406537034, "grad_norm": 0.31702080368995667, "learning_rate": 1.7215637377233048e-05, "loss": 0.6148, "step": 15925 }, { "epoch": 0.48923294320031946, "grad_norm": 0.34553202986717224, "learning_rate": 1.7215302772702767e-05, "loss": 0.6023, "step": 15926 }, { "epoch": 0.48926366233526863, "grad_norm": 0.32214611768722534, "learning_rate": 1.721496815132064e-05, "loss": 0.5307, "step": 15927 }, { "epoch": 0.4892943814702178, "grad_norm": 0.3310830891132355, "learning_rate": 1.721463351308745e-05, "loss": 0.6052, "step": 15928 }, { "epoch": 0.489325100605167, "grad_norm": 0.3420695960521698, "learning_rate": 1.7214298858003982e-05, "loss": 0.5951, "step": 15929 }, { "epoch": 0.4893558197401161, "grad_norm": 0.35022222995758057, "learning_rate": 1.7213964186071014e-05, "loss": 0.5641, "step": 15930 }, { "epoch": 0.4893865388750653, "grad_norm": 0.5096449851989746, "learning_rate": 1.721362949728933e-05, "loss": 0.5798, "step": 15931 }, { "epoch": 0.48941725801001446, "grad_norm": 0.35328882932662964, "learning_rate": 1.7213294791659714e-05, "loss": 0.5417, "step": 15932 }, { "epoch": 0.4894479771449636, "grad_norm": 0.34181421995162964, "learning_rate": 1.7212960069182937e-05, "loss": 0.6821, "step": 15933 }, { "epoch": 0.48947869627991275, "grad_norm": 0.35211294889450073, "learning_rate": 1.7212625329859794e-05, "loss": 0.6489, "step": 15934 }, { "epoch": 0.4895094154148619, "grad_norm": 0.3599916100502014, "learning_rate": 1.721229057369106e-05, "loss": 0.5301, "step": 15935 }, { "epoch": 0.4895401345498111, "grad_norm": 0.36995020508766174, "learning_rate": 1.7211955800677515e-05, "loss": 0.547, "step": 15936 }, { "epoch": 0.4895708536847602, "grad_norm": 0.33658236265182495, "learning_rate": 1.7211621010819942e-05, "loss": 0.5695, "step": 15937 }, { "epoch": 0.4896015728197094, "grad_norm": 0.35510754585266113, "learning_rate": 1.721128620411913e-05, "loss": 0.5609, "step": 15938 }, { "epoch": 0.48963229195465857, "grad_norm": 0.39970526099205017, "learning_rate": 1.721095138057585e-05, "loss": 0.6374, "step": 15939 }, { "epoch": 0.4896630110896077, "grad_norm": 0.3594372868537903, "learning_rate": 1.7210616540190893e-05, "loss": 0.5748, "step": 15940 }, { "epoch": 0.48969373022455687, "grad_norm": 0.40590742230415344, "learning_rate": 1.7210281682965035e-05, "loss": 0.5243, "step": 15941 }, { "epoch": 0.48972444935950604, "grad_norm": 0.3372746407985687, "learning_rate": 1.720994680889906e-05, "loss": 0.5931, "step": 15942 }, { "epoch": 0.4897551684944552, "grad_norm": 0.3546251952648163, "learning_rate": 1.7209611917993755e-05, "loss": 0.5209, "step": 15943 }, { "epoch": 0.48978588762940434, "grad_norm": 0.33368533849716187, "learning_rate": 1.7209277010249897e-05, "loss": 0.5773, "step": 15944 }, { "epoch": 0.4898166067643535, "grad_norm": 0.37039169669151306, "learning_rate": 1.7208942085668264e-05, "loss": 0.5738, "step": 15945 }, { "epoch": 0.4898473258993027, "grad_norm": 0.3568626344203949, "learning_rate": 1.720860714424965e-05, "loss": 0.5345, "step": 15946 }, { "epoch": 0.4898780450342518, "grad_norm": 0.3629536032676697, "learning_rate": 1.7208272185994824e-05, "loss": 0.5345, "step": 15947 }, { "epoch": 0.489908764169201, "grad_norm": 0.3982233703136444, "learning_rate": 1.7207937210904583e-05, "loss": 0.5649, "step": 15948 }, { "epoch": 0.48993948330415016, "grad_norm": 0.3747924268245697, "learning_rate": 1.7207602218979694e-05, "loss": 0.5651, "step": 15949 }, { "epoch": 0.48997020243909933, "grad_norm": 0.3655373156070709, "learning_rate": 1.720726721022095e-05, "loss": 0.5777, "step": 15950 }, { "epoch": 0.49000092157404845, "grad_norm": 0.3832170069217682, "learning_rate": 1.720693218462913e-05, "loss": 0.6228, "step": 15951 }, { "epoch": 0.49003164070899763, "grad_norm": 0.3464858829975128, "learning_rate": 1.7206597142205015e-05, "loss": 0.5214, "step": 15952 }, { "epoch": 0.4900623598439468, "grad_norm": 0.40284985303878784, "learning_rate": 1.720626208294939e-05, "loss": 0.5047, "step": 15953 }, { "epoch": 0.490093078978896, "grad_norm": 0.3703502118587494, "learning_rate": 1.7205927006863038e-05, "loss": 0.5994, "step": 15954 }, { "epoch": 0.4901237981138451, "grad_norm": 0.33234894275665283, "learning_rate": 1.7205591913946736e-05, "loss": 0.5281, "step": 15955 }, { "epoch": 0.4901545172487943, "grad_norm": 0.404691219329834, "learning_rate": 1.7205256804201275e-05, "loss": 0.5725, "step": 15956 }, { "epoch": 0.49018523638374345, "grad_norm": 0.31443357467651367, "learning_rate": 1.720492167762743e-05, "loss": 0.576, "step": 15957 }, { "epoch": 0.49021595551869257, "grad_norm": 0.4067179560661316, "learning_rate": 1.720458653422599e-05, "loss": 0.5589, "step": 15958 }, { "epoch": 0.49024667465364175, "grad_norm": 0.3446801006793976, "learning_rate": 1.7204251373997735e-05, "loss": 0.5724, "step": 15959 }, { "epoch": 0.4902773937885909, "grad_norm": 0.34321707487106323, "learning_rate": 1.7203916196943448e-05, "loss": 0.5256, "step": 15960 }, { "epoch": 0.4903081129235401, "grad_norm": 0.384342759847641, "learning_rate": 1.720358100306391e-05, "loss": 0.5995, "step": 15961 }, { "epoch": 0.4903388320584892, "grad_norm": 0.3465743660926819, "learning_rate": 1.7203245792359906e-05, "loss": 0.6071, "step": 15962 }, { "epoch": 0.4903695511934384, "grad_norm": 0.46132174134254456, "learning_rate": 1.7202910564832217e-05, "loss": 0.6063, "step": 15963 }, { "epoch": 0.49040027032838757, "grad_norm": 0.32500776648521423, "learning_rate": 1.7202575320481633e-05, "loss": 0.5509, "step": 15964 }, { "epoch": 0.4904309894633367, "grad_norm": 0.3419919013977051, "learning_rate": 1.7202240059308924e-05, "loss": 0.5071, "step": 15965 }, { "epoch": 0.49046170859828586, "grad_norm": 0.3790881037712097, "learning_rate": 1.7201904781314883e-05, "loss": 0.6765, "step": 15966 }, { "epoch": 0.49049242773323504, "grad_norm": 0.314590185880661, "learning_rate": 1.7201569486500292e-05, "loss": 0.5918, "step": 15967 }, { "epoch": 0.4905231468681842, "grad_norm": 0.3656451404094696, "learning_rate": 1.720123417486593e-05, "loss": 0.6002, "step": 15968 }, { "epoch": 0.49055386600313333, "grad_norm": 0.3239993453025818, "learning_rate": 1.7200898846412585e-05, "loss": 0.528, "step": 15969 }, { "epoch": 0.4905845851380825, "grad_norm": 0.387391597032547, "learning_rate": 1.7200563501141038e-05, "loss": 0.5873, "step": 15970 }, { "epoch": 0.4906153042730317, "grad_norm": 0.32247036695480347, "learning_rate": 1.720022813905207e-05, "loss": 0.5477, "step": 15971 }, { "epoch": 0.49064602340798086, "grad_norm": 0.3978271782398224, "learning_rate": 1.719989276014647e-05, "loss": 0.5197, "step": 15972 }, { "epoch": 0.49067674254293, "grad_norm": 0.3417639136314392, "learning_rate": 1.7199557364425015e-05, "loss": 0.5747, "step": 15973 }, { "epoch": 0.49070746167787915, "grad_norm": 0.38877373933792114, "learning_rate": 1.719922195188849e-05, "loss": 0.5572, "step": 15974 }, { "epoch": 0.49073818081282833, "grad_norm": 0.41728532314300537, "learning_rate": 1.7198886522537682e-05, "loss": 0.5796, "step": 15975 }, { "epoch": 0.49076889994777745, "grad_norm": 2.6907005310058594, "learning_rate": 1.719855107637337e-05, "loss": 0.5486, "step": 15976 }, { "epoch": 0.4907996190827266, "grad_norm": 0.638908863067627, "learning_rate": 1.7198215613396336e-05, "loss": 0.5757, "step": 15977 }, { "epoch": 0.4908303382176758, "grad_norm": 0.3629932403564453, "learning_rate": 1.719788013360737e-05, "loss": 0.6705, "step": 15978 }, { "epoch": 0.490861057352625, "grad_norm": 0.33054065704345703, "learning_rate": 1.7197544637007255e-05, "loss": 0.4825, "step": 15979 }, { "epoch": 0.4908917764875741, "grad_norm": 0.34882745146751404, "learning_rate": 1.7197209123596767e-05, "loss": 0.5753, "step": 15980 }, { "epoch": 0.49092249562252327, "grad_norm": 0.3333929479122162, "learning_rate": 1.7196873593376697e-05, "loss": 0.4786, "step": 15981 }, { "epoch": 0.49095321475747244, "grad_norm": 0.37686389684677124, "learning_rate": 1.7196538046347825e-05, "loss": 0.5443, "step": 15982 }, { "epoch": 0.49098393389242156, "grad_norm": 0.39397892355918884, "learning_rate": 1.7196202482510938e-05, "loss": 0.5786, "step": 15983 }, { "epoch": 0.49101465302737074, "grad_norm": 0.34501469135284424, "learning_rate": 1.7195866901866817e-05, "loss": 0.5406, "step": 15984 }, { "epoch": 0.4910453721623199, "grad_norm": 0.34266284108161926, "learning_rate": 1.7195531304416245e-05, "loss": 0.4599, "step": 15985 }, { "epoch": 0.4910760912972691, "grad_norm": 0.33105459809303284, "learning_rate": 1.7195195690160007e-05, "loss": 0.56, "step": 15986 }, { "epoch": 0.4911068104322182, "grad_norm": 0.5027540922164917, "learning_rate": 1.7194860059098886e-05, "loss": 0.4995, "step": 15987 }, { "epoch": 0.4911375295671674, "grad_norm": 0.3897055983543396, "learning_rate": 1.7194524411233668e-05, "loss": 0.5434, "step": 15988 }, { "epoch": 0.49116824870211656, "grad_norm": 0.3765660524368286, "learning_rate": 1.7194188746565135e-05, "loss": 0.587, "step": 15989 }, { "epoch": 0.4911989678370657, "grad_norm": 0.3867647647857666, "learning_rate": 1.719385306509407e-05, "loss": 0.6191, "step": 15990 }, { "epoch": 0.49122968697201486, "grad_norm": 0.3250269293785095, "learning_rate": 1.7193517366821266e-05, "loss": 0.581, "step": 15991 }, { "epoch": 0.49126040610696403, "grad_norm": 0.3176504373550415, "learning_rate": 1.7193181651747493e-05, "loss": 0.641, "step": 15992 }, { "epoch": 0.4912911252419132, "grad_norm": 0.36668869853019714, "learning_rate": 1.7192845919873548e-05, "loss": 0.4744, "step": 15993 }, { "epoch": 0.4913218443768623, "grad_norm": 0.3366771936416626, "learning_rate": 1.7192510171200205e-05, "loss": 0.4658, "step": 15994 }, { "epoch": 0.4913525635118115, "grad_norm": 0.3701540231704712, "learning_rate": 1.719217440572825e-05, "loss": 0.5502, "step": 15995 }, { "epoch": 0.4913832826467607, "grad_norm": 0.38739246129989624, "learning_rate": 1.7191838623458473e-05, "loss": 0.673, "step": 15996 }, { "epoch": 0.49141400178170985, "grad_norm": 0.4320903420448303, "learning_rate": 1.7191502824391653e-05, "loss": 0.5727, "step": 15997 }, { "epoch": 0.491444720916659, "grad_norm": 0.3210753798484802, "learning_rate": 1.7191167008528577e-05, "loss": 0.6042, "step": 15998 }, { "epoch": 0.49147544005160815, "grad_norm": 0.33243492245674133, "learning_rate": 1.7190831175870026e-05, "loss": 0.5327, "step": 15999 }, { "epoch": 0.4915061591865573, "grad_norm": 0.35336172580718994, "learning_rate": 1.719049532641679e-05, "loss": 0.5363, "step": 16000 }, { "epoch": 0.49153687832150644, "grad_norm": 0.40095287561416626, "learning_rate": 1.7190159460169647e-05, "loss": 0.654, "step": 16001 }, { "epoch": 0.4915675974564556, "grad_norm": 0.3448103964328766, "learning_rate": 1.7189823577129384e-05, "loss": 0.536, "step": 16002 }, { "epoch": 0.4915983165914048, "grad_norm": 0.3580012917518616, "learning_rate": 1.7189487677296787e-05, "loss": 0.618, "step": 16003 }, { "epoch": 0.49162903572635397, "grad_norm": 0.36454641819000244, "learning_rate": 1.718915176067264e-05, "loss": 0.6052, "step": 16004 }, { "epoch": 0.4916597548613031, "grad_norm": 0.35853850841522217, "learning_rate": 1.7188815827257728e-05, "loss": 0.5319, "step": 16005 }, { "epoch": 0.49169047399625226, "grad_norm": 0.3889722228050232, "learning_rate": 1.7188479877052834e-05, "loss": 0.5022, "step": 16006 }, { "epoch": 0.49172119313120144, "grad_norm": 0.34823039174079895, "learning_rate": 1.718814391005874e-05, "loss": 0.5774, "step": 16007 }, { "epoch": 0.49175191226615056, "grad_norm": 2.135704517364502, "learning_rate": 1.7187807926276238e-05, "loss": 0.6254, "step": 16008 }, { "epoch": 0.49178263140109973, "grad_norm": 0.344096302986145, "learning_rate": 1.7187471925706108e-05, "loss": 0.5091, "step": 16009 }, { "epoch": 0.4918133505360489, "grad_norm": 0.36905211210250854, "learning_rate": 1.7187135908349133e-05, "loss": 0.6049, "step": 16010 }, { "epoch": 0.4918440696709981, "grad_norm": 0.3473510444164276, "learning_rate": 1.7186799874206103e-05, "loss": 0.5902, "step": 16011 }, { "epoch": 0.4918747888059472, "grad_norm": 0.35697996616363525, "learning_rate": 1.7186463823277797e-05, "loss": 0.576, "step": 16012 }, { "epoch": 0.4919055079408964, "grad_norm": 0.32782480120658875, "learning_rate": 1.7186127755565002e-05, "loss": 0.6106, "step": 16013 }, { "epoch": 0.49193622707584556, "grad_norm": 0.38466644287109375, "learning_rate": 1.718579167106851e-05, "loss": 0.574, "step": 16014 }, { "epoch": 0.49196694621079473, "grad_norm": 0.3448604941368103, "learning_rate": 1.7185455569789096e-05, "loss": 0.6363, "step": 16015 }, { "epoch": 0.49199766534574385, "grad_norm": 0.35466739535331726, "learning_rate": 1.7185119451727547e-05, "loss": 0.5555, "step": 16016 }, { "epoch": 0.492028384480693, "grad_norm": 0.3831886947154999, "learning_rate": 1.718478331688465e-05, "loss": 0.5152, "step": 16017 }, { "epoch": 0.4920591036156422, "grad_norm": 0.4321788251399994, "learning_rate": 1.7184447165261193e-05, "loss": 0.5393, "step": 16018 }, { "epoch": 0.4920898227505913, "grad_norm": 0.3472183048725128, "learning_rate": 1.7184110996857955e-05, "loss": 0.5427, "step": 16019 }, { "epoch": 0.4921205418855405, "grad_norm": 0.37314435839653015, "learning_rate": 1.7183774811675724e-05, "loss": 0.5798, "step": 16020 }, { "epoch": 0.49215126102048967, "grad_norm": 0.3447956442832947, "learning_rate": 1.7183438609715287e-05, "loss": 0.5507, "step": 16021 }, { "epoch": 0.49218198015543885, "grad_norm": 0.39391323924064636, "learning_rate": 1.7183102390977425e-05, "loss": 0.5768, "step": 16022 }, { "epoch": 0.49221269929038797, "grad_norm": 0.34334754943847656, "learning_rate": 1.7182766155462927e-05, "loss": 0.4469, "step": 16023 }, { "epoch": 0.49224341842533714, "grad_norm": 0.35707321763038635, "learning_rate": 1.718242990317258e-05, "loss": 0.6259, "step": 16024 }, { "epoch": 0.4922741375602863, "grad_norm": 0.31464534997940063, "learning_rate": 1.718209363410716e-05, "loss": 0.5927, "step": 16025 }, { "epoch": 0.49230485669523544, "grad_norm": 0.4825928509235382, "learning_rate": 1.7181757348267464e-05, "loss": 0.6371, "step": 16026 }, { "epoch": 0.4923355758301846, "grad_norm": 0.33679071068763733, "learning_rate": 1.718142104565427e-05, "loss": 0.521, "step": 16027 }, { "epoch": 0.4923662949651338, "grad_norm": 0.39400866627693176, "learning_rate": 1.718108472626837e-05, "loss": 0.5891, "step": 16028 }, { "epoch": 0.49239701410008296, "grad_norm": 0.40643712878227234, "learning_rate": 1.718074839011054e-05, "loss": 0.5879, "step": 16029 }, { "epoch": 0.4924277332350321, "grad_norm": 0.36848461627960205, "learning_rate": 1.718041203718157e-05, "loss": 0.6275, "step": 16030 }, { "epoch": 0.49245845236998126, "grad_norm": 0.38838720321655273, "learning_rate": 1.718007566748225e-05, "loss": 0.5839, "step": 16031 }, { "epoch": 0.49248917150493043, "grad_norm": 0.34465518593788147, "learning_rate": 1.717973928101336e-05, "loss": 0.6502, "step": 16032 }, { "epoch": 0.49251989063987955, "grad_norm": 0.34617599844932556, "learning_rate": 1.7179402877775685e-05, "loss": 0.5916, "step": 16033 }, { "epoch": 0.49255060977482873, "grad_norm": 0.352142333984375, "learning_rate": 1.7179066457770015e-05, "loss": 0.5434, "step": 16034 }, { "epoch": 0.4925813289097779, "grad_norm": 0.4902478754520416, "learning_rate": 1.7178730020997138e-05, "loss": 0.6307, "step": 16035 }, { "epoch": 0.4926120480447271, "grad_norm": 0.9816870093345642, "learning_rate": 1.717839356745783e-05, "loss": 0.5148, "step": 16036 }, { "epoch": 0.4926427671796762, "grad_norm": 0.3303186893463135, "learning_rate": 1.7178057097152887e-05, "loss": 0.4978, "step": 16037 }, { "epoch": 0.4926734863146254, "grad_norm": 0.4133071005344391, "learning_rate": 1.7177720610083085e-05, "loss": 0.5698, "step": 16038 }, { "epoch": 0.49270420544957455, "grad_norm": 0.46532776951789856, "learning_rate": 1.717738410624922e-05, "loss": 0.5392, "step": 16039 }, { "epoch": 0.4927349245845237, "grad_norm": 0.5220268368721008, "learning_rate": 1.717704758565207e-05, "loss": 0.5472, "step": 16040 }, { "epoch": 0.49276564371947285, "grad_norm": 0.32274192571640015, "learning_rate": 1.7176711048292426e-05, "loss": 0.5716, "step": 16041 }, { "epoch": 0.492796362854422, "grad_norm": 0.3315335810184479, "learning_rate": 1.7176374494171072e-05, "loss": 0.5312, "step": 16042 }, { "epoch": 0.4928270819893712, "grad_norm": 0.34962522983551025, "learning_rate": 1.7176037923288793e-05, "loss": 0.6082, "step": 16043 }, { "epoch": 0.4928578011243203, "grad_norm": 0.37167149782180786, "learning_rate": 1.7175701335646375e-05, "loss": 0.6236, "step": 16044 }, { "epoch": 0.4928885202592695, "grad_norm": 0.43113940954208374, "learning_rate": 1.717536473124461e-05, "loss": 0.6023, "step": 16045 }, { "epoch": 0.49291923939421867, "grad_norm": 0.3376847803592682, "learning_rate": 1.717502811008428e-05, "loss": 0.5826, "step": 16046 }, { "epoch": 0.49294995852916784, "grad_norm": 0.3770450949668884, "learning_rate": 1.7174691472166162e-05, "loss": 0.5372, "step": 16047 }, { "epoch": 0.49298067766411696, "grad_norm": 0.3559330105781555, "learning_rate": 1.717435481749106e-05, "loss": 0.5777, "step": 16048 }, { "epoch": 0.49301139679906614, "grad_norm": 0.3275180757045746, "learning_rate": 1.7174018146059747e-05, "loss": 0.5757, "step": 16049 }, { "epoch": 0.4930421159340153, "grad_norm": 0.35165905952453613, "learning_rate": 1.7173681457873014e-05, "loss": 0.5506, "step": 16050 }, { "epoch": 0.49307283506896443, "grad_norm": 0.45063549280166626, "learning_rate": 1.7173344752931647e-05, "loss": 0.5324, "step": 16051 }, { "epoch": 0.4931035542039136, "grad_norm": 0.36685696244239807, "learning_rate": 1.7173008031236432e-05, "loss": 0.4984, "step": 16052 }, { "epoch": 0.4931342733388628, "grad_norm": 0.3967823386192322, "learning_rate": 1.717267129278816e-05, "loss": 0.6595, "step": 16053 }, { "epoch": 0.49316499247381196, "grad_norm": 0.3795524835586548, "learning_rate": 1.7172334537587608e-05, "loss": 0.5214, "step": 16054 }, { "epoch": 0.4931957116087611, "grad_norm": 0.3710382878780365, "learning_rate": 1.717199776563557e-05, "loss": 0.6455, "step": 16055 }, { "epoch": 0.49322643074371025, "grad_norm": 0.3118261396884918, "learning_rate": 1.717166097693283e-05, "loss": 0.5051, "step": 16056 }, { "epoch": 0.49325714987865943, "grad_norm": 0.3151918649673462, "learning_rate": 1.7171324171480175e-05, "loss": 0.4948, "step": 16057 }, { "epoch": 0.4932878690136086, "grad_norm": 0.3697718381881714, "learning_rate": 1.7170987349278393e-05, "loss": 0.6015, "step": 16058 }, { "epoch": 0.4933185881485577, "grad_norm": 0.4274137020111084, "learning_rate": 1.7170650510328265e-05, "loss": 0.6054, "step": 16059 }, { "epoch": 0.4933493072835069, "grad_norm": 0.401314914226532, "learning_rate": 1.7170313654630585e-05, "loss": 0.5725, "step": 16060 }, { "epoch": 0.4933800264184561, "grad_norm": 0.35845813155174255, "learning_rate": 1.716997678218614e-05, "loss": 0.5674, "step": 16061 }, { "epoch": 0.4934107455534052, "grad_norm": 0.40052810311317444, "learning_rate": 1.7169639892995705e-05, "loss": 0.5766, "step": 16062 }, { "epoch": 0.49344146468835437, "grad_norm": 0.33729133009910583, "learning_rate": 1.716930298706008e-05, "loss": 0.5981, "step": 16063 }, { "epoch": 0.49347218382330355, "grad_norm": 0.37705668807029724, "learning_rate": 1.7168966064380052e-05, "loss": 0.553, "step": 16064 }, { "epoch": 0.4935029029582527, "grad_norm": 0.34886062145233154, "learning_rate": 1.7168629124956394e-05, "loss": 0.5247, "step": 16065 }, { "epoch": 0.49353362209320184, "grad_norm": 0.3646026849746704, "learning_rate": 1.716829216878991e-05, "loss": 0.5737, "step": 16066 }, { "epoch": 0.493564341228151, "grad_norm": 0.3678354322910309, "learning_rate": 1.7167955195881372e-05, "loss": 0.5171, "step": 16067 }, { "epoch": 0.4935950603631002, "grad_norm": 0.36062222719192505, "learning_rate": 1.716761820623158e-05, "loss": 0.6057, "step": 16068 }, { "epoch": 0.4936257794980493, "grad_norm": 0.35474568605422974, "learning_rate": 1.716728119984131e-05, "loss": 0.5549, "step": 16069 }, { "epoch": 0.4936564986329985, "grad_norm": 0.35734543204307556, "learning_rate": 1.716694417671136e-05, "loss": 0.5149, "step": 16070 }, { "epoch": 0.49368721776794766, "grad_norm": 0.3781082034111023, "learning_rate": 1.7166607136842504e-05, "loss": 0.555, "step": 16071 }, { "epoch": 0.49371793690289684, "grad_norm": 0.42131608724594116, "learning_rate": 1.716627008023554e-05, "loss": 0.5474, "step": 16072 }, { "epoch": 0.49374865603784596, "grad_norm": 0.32663312554359436, "learning_rate": 1.7165933006891258e-05, "loss": 0.5163, "step": 16073 }, { "epoch": 0.49377937517279513, "grad_norm": 0.38046297430992126, "learning_rate": 1.716559591681043e-05, "loss": 0.5158, "step": 16074 }, { "epoch": 0.4938100943077443, "grad_norm": 0.416147917509079, "learning_rate": 1.7165258809993858e-05, "loss": 0.5547, "step": 16075 }, { "epoch": 0.4938408134426934, "grad_norm": 0.34778517484664917, "learning_rate": 1.716492168644232e-05, "loss": 0.5739, "step": 16076 }, { "epoch": 0.4938715325776426, "grad_norm": 0.3457213044166565, "learning_rate": 1.716458454615661e-05, "loss": 0.6156, "step": 16077 }, { "epoch": 0.4939022517125918, "grad_norm": 0.3516850173473358, "learning_rate": 1.716424738913751e-05, "loss": 0.5882, "step": 16078 }, { "epoch": 0.49393297084754095, "grad_norm": 0.37466177344322205, "learning_rate": 1.7163910215385812e-05, "loss": 0.5476, "step": 16079 }, { "epoch": 0.4939636899824901, "grad_norm": 0.3767796754837036, "learning_rate": 1.7163573024902297e-05, "loss": 0.5446, "step": 16080 }, { "epoch": 0.49399440911743925, "grad_norm": 0.44943153858184814, "learning_rate": 1.716323581768776e-05, "loss": 0.66, "step": 16081 }, { "epoch": 0.4940251282523884, "grad_norm": 0.3208168148994446, "learning_rate": 1.7162898593742987e-05, "loss": 0.5191, "step": 16082 }, { "epoch": 0.4940558473873376, "grad_norm": 0.36784353852272034, "learning_rate": 1.716256135306876e-05, "loss": 0.6083, "step": 16083 }, { "epoch": 0.4940865665222867, "grad_norm": 0.39920157194137573, "learning_rate": 1.7162224095665875e-05, "loss": 0.5462, "step": 16084 }, { "epoch": 0.4941172856572359, "grad_norm": 0.355293869972229, "learning_rate": 1.7161886821535115e-05, "loss": 0.4973, "step": 16085 }, { "epoch": 0.49414800479218507, "grad_norm": 0.32970768213272095, "learning_rate": 1.7161549530677267e-05, "loss": 0.4866, "step": 16086 }, { "epoch": 0.4941787239271342, "grad_norm": 0.3696924149990082, "learning_rate": 1.7161212223093117e-05, "loss": 0.5848, "step": 16087 }, { "epoch": 0.49420944306208336, "grad_norm": 0.3471986651420593, "learning_rate": 1.716087489878346e-05, "loss": 0.5317, "step": 16088 }, { "epoch": 0.49424016219703254, "grad_norm": 0.3719319701194763, "learning_rate": 1.716053755774908e-05, "loss": 0.6401, "step": 16089 }, { "epoch": 0.4942708813319817, "grad_norm": 0.33937063813209534, "learning_rate": 1.7160200199990763e-05, "loss": 0.4947, "step": 16090 }, { "epoch": 0.49430160046693083, "grad_norm": 0.5480024814605713, "learning_rate": 1.7159862825509295e-05, "loss": 0.509, "step": 16091 }, { "epoch": 0.49433231960188, "grad_norm": 0.3286420702934265, "learning_rate": 1.715952543430547e-05, "loss": 0.572, "step": 16092 }, { "epoch": 0.4943630387368292, "grad_norm": 0.3450336456298828, "learning_rate": 1.7159188026380073e-05, "loss": 0.5834, "step": 16093 }, { "epoch": 0.4943937578717783, "grad_norm": 0.33128631114959717, "learning_rate": 1.7158850601733892e-05, "loss": 0.5377, "step": 16094 }, { "epoch": 0.4944244770067275, "grad_norm": 0.33700403571128845, "learning_rate": 1.715851316036772e-05, "loss": 0.4692, "step": 16095 }, { "epoch": 0.49445519614167666, "grad_norm": 0.3744939863681793, "learning_rate": 1.7158175702282334e-05, "loss": 0.6211, "step": 16096 }, { "epoch": 0.49448591527662583, "grad_norm": 0.3428981900215149, "learning_rate": 1.7157838227478532e-05, "loss": 0.6094, "step": 16097 }, { "epoch": 0.49451663441157495, "grad_norm": 0.3690149188041687, "learning_rate": 1.71575007359571e-05, "loss": 0.5549, "step": 16098 }, { "epoch": 0.4945473535465241, "grad_norm": 0.34965240955352783, "learning_rate": 1.7157163227718822e-05, "loss": 0.6032, "step": 16099 }, { "epoch": 0.4945780726814733, "grad_norm": 0.4417994022369385, "learning_rate": 1.715682570276449e-05, "loss": 0.5995, "step": 16100 }, { "epoch": 0.4946087918164225, "grad_norm": 0.36217060685157776, "learning_rate": 1.7156488161094894e-05, "loss": 0.5355, "step": 16101 }, { "epoch": 0.4946395109513716, "grad_norm": 0.3584793508052826, "learning_rate": 1.7156150602710817e-05, "loss": 0.5402, "step": 16102 }, { "epoch": 0.4946702300863208, "grad_norm": 0.3597504496574402, "learning_rate": 1.7155813027613052e-05, "loss": 0.5762, "step": 16103 }, { "epoch": 0.49470094922126995, "grad_norm": 0.38149338960647583, "learning_rate": 1.7155475435802385e-05, "loss": 0.576, "step": 16104 }, { "epoch": 0.49473166835621907, "grad_norm": 0.3648521900177002, "learning_rate": 1.715513782727961e-05, "loss": 0.5698, "step": 16105 }, { "epoch": 0.49476238749116824, "grad_norm": 0.3678721785545349, "learning_rate": 1.7154800202045507e-05, "loss": 0.5719, "step": 16106 }, { "epoch": 0.4947931066261174, "grad_norm": 0.3797542154788971, "learning_rate": 1.715446256010087e-05, "loss": 0.6289, "step": 16107 }, { "epoch": 0.4948238257610666, "grad_norm": 0.34845229983329773, "learning_rate": 1.7154124901446484e-05, "loss": 0.5184, "step": 16108 }, { "epoch": 0.4948545448960157, "grad_norm": 0.3655673563480377, "learning_rate": 1.7153787226083142e-05, "loss": 0.5185, "step": 16109 }, { "epoch": 0.4948852640309649, "grad_norm": 0.382609486579895, "learning_rate": 1.7153449534011632e-05, "loss": 0.547, "step": 16110 }, { "epoch": 0.49491598316591406, "grad_norm": 0.3145591616630554, "learning_rate": 1.7153111825232732e-05, "loss": 0.527, "step": 16111 }, { "epoch": 0.4949467023008632, "grad_norm": 0.39271795749664307, "learning_rate": 1.715277409974725e-05, "loss": 0.6052, "step": 16112 }, { "epoch": 0.49497742143581236, "grad_norm": 0.34911495447158813, "learning_rate": 1.715243635755596e-05, "loss": 0.6011, "step": 16113 }, { "epoch": 0.49500814057076153, "grad_norm": 0.3439100384712219, "learning_rate": 1.7152098598659654e-05, "loss": 0.6112, "step": 16114 }, { "epoch": 0.4950388597057107, "grad_norm": 0.3450501561164856, "learning_rate": 1.7151760823059124e-05, "loss": 0.6309, "step": 16115 }, { "epoch": 0.49506957884065983, "grad_norm": 0.38133254647254944, "learning_rate": 1.7151423030755156e-05, "loss": 0.6096, "step": 16116 }, { "epoch": 0.495100297975609, "grad_norm": 0.35599085688591003, "learning_rate": 1.7151085221748545e-05, "loss": 0.5788, "step": 16117 }, { "epoch": 0.4951310171105582, "grad_norm": 0.32541441917419434, "learning_rate": 1.715074739604007e-05, "loss": 0.4408, "step": 16118 }, { "epoch": 0.4951617362455073, "grad_norm": 0.3311357796192169, "learning_rate": 1.7150409553630525e-05, "loss": 0.5301, "step": 16119 }, { "epoch": 0.4951924553804565, "grad_norm": 0.3449161648750305, "learning_rate": 1.71500716945207e-05, "loss": 0.5945, "step": 16120 }, { "epoch": 0.49522317451540565, "grad_norm": 0.3509666323661804, "learning_rate": 1.7149733818711384e-05, "loss": 0.475, "step": 16121 }, { "epoch": 0.4952538936503548, "grad_norm": 0.37429752945899963, "learning_rate": 1.7149395926203364e-05, "loss": 0.5978, "step": 16122 }, { "epoch": 0.49528461278530395, "grad_norm": 0.36888429522514343, "learning_rate": 1.714905801699743e-05, "loss": 0.5358, "step": 16123 }, { "epoch": 0.4953153319202531, "grad_norm": 0.5444343686103821, "learning_rate": 1.7148720091094374e-05, "loss": 0.6178, "step": 16124 }, { "epoch": 0.4953460510552023, "grad_norm": 0.35884007811546326, "learning_rate": 1.7148382148494983e-05, "loss": 0.4773, "step": 16125 }, { "epoch": 0.49537677019015147, "grad_norm": 0.34825074672698975, "learning_rate": 1.7148044189200045e-05, "loss": 0.5269, "step": 16126 }, { "epoch": 0.4954074893251006, "grad_norm": 0.3232600688934326, "learning_rate": 1.7147706213210354e-05, "loss": 0.5016, "step": 16127 }, { "epoch": 0.49543820846004977, "grad_norm": 0.38211506605148315, "learning_rate": 1.7147368220526692e-05, "loss": 0.5622, "step": 16128 }, { "epoch": 0.49546892759499894, "grad_norm": 0.3782847821712494, "learning_rate": 1.714703021114985e-05, "loss": 0.6019, "step": 16129 }, { "epoch": 0.49549964672994806, "grad_norm": 0.386857271194458, "learning_rate": 1.7146692185080624e-05, "loss": 0.6399, "step": 16130 }, { "epoch": 0.49553036586489724, "grad_norm": 0.33477017283439636, "learning_rate": 1.71463541423198e-05, "loss": 0.6053, "step": 16131 }, { "epoch": 0.4955610849998464, "grad_norm": 0.34124788641929626, "learning_rate": 1.7146016082868166e-05, "loss": 0.5582, "step": 16132 }, { "epoch": 0.4955918041347956, "grad_norm": 0.3470323085784912, "learning_rate": 1.7145678006726513e-05, "loss": 0.5648, "step": 16133 }, { "epoch": 0.4956225232697447, "grad_norm": 0.40263572335243225, "learning_rate": 1.7145339913895624e-05, "loss": 0.5413, "step": 16134 }, { "epoch": 0.4956532424046939, "grad_norm": 0.4672381579875946, "learning_rate": 1.71450018043763e-05, "loss": 0.6231, "step": 16135 }, { "epoch": 0.49568396153964306, "grad_norm": 0.3611113131046295, "learning_rate": 1.7144663678169324e-05, "loss": 0.5536, "step": 16136 }, { "epoch": 0.4957146806745922, "grad_norm": 0.337490051984787, "learning_rate": 1.7144325535275488e-05, "loss": 0.5541, "step": 16137 }, { "epoch": 0.49574539980954135, "grad_norm": 0.3622978925704956, "learning_rate": 1.714398737569558e-05, "loss": 0.6305, "step": 16138 }, { "epoch": 0.49577611894449053, "grad_norm": 0.3460816442966461, "learning_rate": 1.714364919943039e-05, "loss": 0.5686, "step": 16139 }, { "epoch": 0.4958068380794397, "grad_norm": 0.3648105263710022, "learning_rate": 1.714331100648071e-05, "loss": 0.6393, "step": 16140 }, { "epoch": 0.4958375572143888, "grad_norm": 0.3274463713169098, "learning_rate": 1.7142972796847324e-05, "loss": 0.4825, "step": 16141 }, { "epoch": 0.495868276349338, "grad_norm": 0.3044370412826538, "learning_rate": 1.7142634570531033e-05, "loss": 0.5528, "step": 16142 }, { "epoch": 0.4958989954842872, "grad_norm": 0.39903002977371216, "learning_rate": 1.7142296327532614e-05, "loss": 0.6012, "step": 16143 }, { "epoch": 0.4959297146192363, "grad_norm": 0.3569788932800293, "learning_rate": 1.714195806785287e-05, "loss": 0.562, "step": 16144 }, { "epoch": 0.49596043375418547, "grad_norm": 0.3397604525089264, "learning_rate": 1.7141619791492577e-05, "loss": 0.5701, "step": 16145 }, { "epoch": 0.49599115288913465, "grad_norm": 0.41729697585105896, "learning_rate": 1.7141281498452536e-05, "loss": 0.5376, "step": 16146 }, { "epoch": 0.4960218720240838, "grad_norm": 0.33528459072113037, "learning_rate": 1.714094318873353e-05, "loss": 0.5371, "step": 16147 }, { "epoch": 0.49605259115903294, "grad_norm": 0.34427887201309204, "learning_rate": 1.7140604862336357e-05, "loss": 0.4686, "step": 16148 }, { "epoch": 0.4960833102939821, "grad_norm": 0.329799085855484, "learning_rate": 1.7140266519261798e-05, "loss": 0.6136, "step": 16149 }, { "epoch": 0.4961140294289313, "grad_norm": 0.34208717942237854, "learning_rate": 1.713992815951065e-05, "loss": 0.5686, "step": 16150 }, { "epoch": 0.49614474856388047, "grad_norm": 0.3535095453262329, "learning_rate": 1.7139589783083702e-05, "loss": 0.5286, "step": 16151 }, { "epoch": 0.4961754676988296, "grad_norm": 0.5538643002510071, "learning_rate": 1.7139251389981744e-05, "loss": 0.5537, "step": 16152 }, { "epoch": 0.49620618683377876, "grad_norm": 0.4334535598754883, "learning_rate": 1.7138912980205567e-05, "loss": 0.5685, "step": 16153 }, { "epoch": 0.49623690596872794, "grad_norm": 0.34803977608680725, "learning_rate": 1.713857455375596e-05, "loss": 0.5357, "step": 16154 }, { "epoch": 0.49626762510367706, "grad_norm": 0.6416824460029602, "learning_rate": 1.7138236110633707e-05, "loss": 0.5432, "step": 16155 }, { "epoch": 0.49629834423862623, "grad_norm": 0.35345712304115295, "learning_rate": 1.713789765083961e-05, "loss": 0.5441, "step": 16156 }, { "epoch": 0.4963290633735754, "grad_norm": 0.3570508360862732, "learning_rate": 1.7137559174374452e-05, "loss": 0.5411, "step": 16157 }, { "epoch": 0.4963597825085246, "grad_norm": 0.3510949909687042, "learning_rate": 1.7137220681239032e-05, "loss": 0.543, "step": 16158 }, { "epoch": 0.4963905016434737, "grad_norm": 0.34698042273521423, "learning_rate": 1.713688217143413e-05, "loss": 0.6281, "step": 16159 }, { "epoch": 0.4964212207784229, "grad_norm": 0.2928002178668976, "learning_rate": 1.713654364496054e-05, "loss": 0.5355, "step": 16160 }, { "epoch": 0.49645193991337205, "grad_norm": 0.3539695739746094, "learning_rate": 1.713620510181906e-05, "loss": 0.6011, "step": 16161 }, { "epoch": 0.4964826590483212, "grad_norm": 0.38938942551612854, "learning_rate": 1.7135866542010467e-05, "loss": 0.589, "step": 16162 }, { "epoch": 0.49651337818327035, "grad_norm": 0.3361605703830719, "learning_rate": 1.7135527965535566e-05, "loss": 0.4858, "step": 16163 }, { "epoch": 0.4965440973182195, "grad_norm": 0.3446982800960541, "learning_rate": 1.7135189372395136e-05, "loss": 0.5482, "step": 16164 }, { "epoch": 0.4965748164531687, "grad_norm": 0.34511733055114746, "learning_rate": 1.7134850762589974e-05, "loss": 0.5856, "step": 16165 }, { "epoch": 0.4966055355881178, "grad_norm": 0.29298555850982666, "learning_rate": 1.713451213612087e-05, "loss": 0.5721, "step": 16166 }, { "epoch": 0.496636254723067, "grad_norm": 0.3356286883354187, "learning_rate": 1.7134173492988617e-05, "loss": 0.519, "step": 16167 }, { "epoch": 0.49666697385801617, "grad_norm": 0.332584410905838, "learning_rate": 1.7133834833194e-05, "loss": 0.6366, "step": 16168 }, { "epoch": 0.49669769299296535, "grad_norm": 0.35788118839263916, "learning_rate": 1.7133496156737816e-05, "loss": 0.5662, "step": 16169 }, { "epoch": 0.49672841212791446, "grad_norm": 0.3622240722179413, "learning_rate": 1.7133157463620854e-05, "loss": 0.6091, "step": 16170 }, { "epoch": 0.49675913126286364, "grad_norm": 0.3872944414615631, "learning_rate": 1.7132818753843903e-05, "loss": 0.6681, "step": 16171 }, { "epoch": 0.4967898503978128, "grad_norm": 0.370185524225235, "learning_rate": 1.7132480027407758e-05, "loss": 0.6052, "step": 16172 }, { "epoch": 0.49682056953276194, "grad_norm": 0.350319504737854, "learning_rate": 1.7132141284313202e-05, "loss": 0.5724, "step": 16173 }, { "epoch": 0.4968512886677111, "grad_norm": 0.3784218430519104, "learning_rate": 1.7131802524561036e-05, "loss": 0.6018, "step": 16174 }, { "epoch": 0.4968820078026603, "grad_norm": 0.35377180576324463, "learning_rate": 1.7131463748152048e-05, "loss": 0.5757, "step": 16175 }, { "epoch": 0.49691272693760946, "grad_norm": 0.3753320574760437, "learning_rate": 1.7131124955087028e-05, "loss": 0.6798, "step": 16176 }, { "epoch": 0.4969434460725586, "grad_norm": 0.38605985045433044, "learning_rate": 1.7130786145366768e-05, "loss": 0.5655, "step": 16177 }, { "epoch": 0.49697416520750776, "grad_norm": 0.33640769124031067, "learning_rate": 1.7130447318992055e-05, "loss": 0.5832, "step": 16178 }, { "epoch": 0.49700488434245693, "grad_norm": 0.36033812165260315, "learning_rate": 1.713010847596369e-05, "loss": 0.6028, "step": 16179 }, { "epoch": 0.49703560347740605, "grad_norm": 0.3410463333129883, "learning_rate": 1.7129769616282455e-05, "loss": 0.6281, "step": 16180 }, { "epoch": 0.4970663226123552, "grad_norm": 0.32996660470962524, "learning_rate": 1.7129430739949146e-05, "loss": 0.4989, "step": 16181 }, { "epoch": 0.4970970417473044, "grad_norm": 0.33075594902038574, "learning_rate": 1.7129091846964555e-05, "loss": 0.572, "step": 16182 }, { "epoch": 0.4971277608822536, "grad_norm": 0.35805004835128784, "learning_rate": 1.7128752937329468e-05, "loss": 0.5878, "step": 16183 }, { "epoch": 0.4971584800172027, "grad_norm": 0.3230906128883362, "learning_rate": 1.712841401104468e-05, "loss": 0.5008, "step": 16184 }, { "epoch": 0.4971891991521519, "grad_norm": 0.36431145668029785, "learning_rate": 1.712807506811099e-05, "loss": 0.6811, "step": 16185 }, { "epoch": 0.49721991828710105, "grad_norm": 0.3858392536640167, "learning_rate": 1.712773610852918e-05, "loss": 0.5549, "step": 16186 }, { "epoch": 0.49725063742205017, "grad_norm": 0.3759765326976776, "learning_rate": 1.7127397132300044e-05, "loss": 0.6106, "step": 16187 }, { "epoch": 0.49728135655699934, "grad_norm": 0.3876376748085022, "learning_rate": 1.7127058139424373e-05, "loss": 0.5963, "step": 16188 }, { "epoch": 0.4973120756919485, "grad_norm": 0.3510383367538452, "learning_rate": 1.7126719129902963e-05, "loss": 0.5508, "step": 16189 }, { "epoch": 0.4973427948268977, "grad_norm": 0.37122195959091187, "learning_rate": 1.71263801037366e-05, "loss": 0.5811, "step": 16190 }, { "epoch": 0.4973735139618468, "grad_norm": 0.3569961190223694, "learning_rate": 1.712604106092608e-05, "loss": 0.5663, "step": 16191 }, { "epoch": 0.497404233096796, "grad_norm": 0.3956550359725952, "learning_rate": 1.7125702001472194e-05, "loss": 0.613, "step": 16192 }, { "epoch": 0.49743495223174516, "grad_norm": 0.4119057059288025, "learning_rate": 1.712536292537573e-05, "loss": 0.5824, "step": 16193 }, { "epoch": 0.49746567136669434, "grad_norm": 0.340414434671402, "learning_rate": 1.7125023832637486e-05, "loss": 0.5869, "step": 16194 }, { "epoch": 0.49749639050164346, "grad_norm": 0.386564165353775, "learning_rate": 1.712468472325825e-05, "loss": 0.5277, "step": 16195 }, { "epoch": 0.49752710963659263, "grad_norm": 0.33003583550453186, "learning_rate": 1.7124345597238816e-05, "loss": 0.5449, "step": 16196 }, { "epoch": 0.4975578287715418, "grad_norm": 0.36396098136901855, "learning_rate": 1.7124006454579974e-05, "loss": 0.5994, "step": 16197 }, { "epoch": 0.49758854790649093, "grad_norm": 0.3917171359062195, "learning_rate": 1.712366729528252e-05, "loss": 0.5923, "step": 16198 }, { "epoch": 0.4976192670414401, "grad_norm": 0.33746424317359924, "learning_rate": 1.7123328119347243e-05, "loss": 0.6029, "step": 16199 }, { "epoch": 0.4976499861763893, "grad_norm": 0.36788761615753174, "learning_rate": 1.7122988926774935e-05, "loss": 0.6127, "step": 16200 }, { "epoch": 0.49768070531133846, "grad_norm": 0.3541843593120575, "learning_rate": 1.7122649717566386e-05, "loss": 0.6154, "step": 16201 }, { "epoch": 0.4977114244462876, "grad_norm": 0.37091901898384094, "learning_rate": 1.7122310491722393e-05, "loss": 0.5658, "step": 16202 }, { "epoch": 0.49774214358123675, "grad_norm": 0.33512431383132935, "learning_rate": 1.7121971249243748e-05, "loss": 0.5845, "step": 16203 }, { "epoch": 0.4977728627161859, "grad_norm": 0.39528974890708923, "learning_rate": 1.7121631990131243e-05, "loss": 0.5655, "step": 16204 }, { "epoch": 0.49780358185113505, "grad_norm": 0.3828268349170685, "learning_rate": 1.7121292714385666e-05, "loss": 0.5883, "step": 16205 }, { "epoch": 0.4978343009860842, "grad_norm": 0.5199773907661438, "learning_rate": 1.7120953422007812e-05, "loss": 0.6736, "step": 16206 }, { "epoch": 0.4978650201210334, "grad_norm": 0.40562018752098083, "learning_rate": 1.712061411299848e-05, "loss": 0.5539, "step": 16207 }, { "epoch": 0.4978957392559826, "grad_norm": 0.37321192026138306, "learning_rate": 1.7120274787358448e-05, "loss": 0.6218, "step": 16208 }, { "epoch": 0.4979264583909317, "grad_norm": 0.31321245431900024, "learning_rate": 1.7119935445088523e-05, "loss": 0.5484, "step": 16209 }, { "epoch": 0.49795717752588087, "grad_norm": 0.34904488921165466, "learning_rate": 1.711959608618949e-05, "loss": 0.6333, "step": 16210 }, { "epoch": 0.49798789666083004, "grad_norm": 0.3702452480792999, "learning_rate": 1.711925671066214e-05, "loss": 0.5488, "step": 16211 }, { "epoch": 0.4980186157957792, "grad_norm": 0.35153141617774963, "learning_rate": 1.7118917318507275e-05, "loss": 0.5431, "step": 16212 }, { "epoch": 0.49804933493072834, "grad_norm": 0.3307999074459076, "learning_rate": 1.7118577909725677e-05, "loss": 0.519, "step": 16213 }, { "epoch": 0.4980800540656775, "grad_norm": 0.3239724040031433, "learning_rate": 1.7118238484318144e-05, "loss": 0.5123, "step": 16214 }, { "epoch": 0.4981107732006267, "grad_norm": 0.355420857667923, "learning_rate": 1.711789904228547e-05, "loss": 0.5595, "step": 16215 }, { "epoch": 0.4981414923355758, "grad_norm": 0.32512855529785156, "learning_rate": 1.711755958362844e-05, "loss": 0.5722, "step": 16216 }, { "epoch": 0.498172211470525, "grad_norm": 0.7446301579475403, "learning_rate": 1.7117220108347858e-05, "loss": 0.6138, "step": 16217 }, { "epoch": 0.49820293060547416, "grad_norm": 0.3186052441596985, "learning_rate": 1.7116880616444508e-05, "loss": 0.547, "step": 16218 }, { "epoch": 0.49823364974042333, "grad_norm": 0.3891338109970093, "learning_rate": 1.7116541107919187e-05, "loss": 0.5015, "step": 16219 }, { "epoch": 0.49826436887537245, "grad_norm": 0.3479968309402466, "learning_rate": 1.7116201582772688e-05, "loss": 0.4802, "step": 16220 }, { "epoch": 0.49829508801032163, "grad_norm": 0.3687351644039154, "learning_rate": 1.7115862041005803e-05, "loss": 0.603, "step": 16221 }, { "epoch": 0.4983258071452708, "grad_norm": 0.40630993247032166, "learning_rate": 1.711552248261933e-05, "loss": 0.5663, "step": 16222 }, { "epoch": 0.4983565262802199, "grad_norm": 0.3254249691963196, "learning_rate": 1.7115182907614047e-05, "loss": 0.5442, "step": 16223 }, { "epoch": 0.4983872454151691, "grad_norm": 0.3433317542076111, "learning_rate": 1.7114843315990762e-05, "loss": 0.5237, "step": 16224 }, { "epoch": 0.4984179645501183, "grad_norm": 0.3494336009025574, "learning_rate": 1.7114503707750266e-05, "loss": 0.4909, "step": 16225 }, { "epoch": 0.49844868368506745, "grad_norm": 0.39335402846336365, "learning_rate": 1.711416408289335e-05, "loss": 0.5194, "step": 16226 }, { "epoch": 0.49847940282001657, "grad_norm": 0.3675476014614105, "learning_rate": 1.7113824441420806e-05, "loss": 0.6027, "step": 16227 }, { "epoch": 0.49851012195496575, "grad_norm": 0.3598644435405731, "learning_rate": 1.7113484783333424e-05, "loss": 0.576, "step": 16228 }, { "epoch": 0.4985408410899149, "grad_norm": 0.340298056602478, "learning_rate": 1.7113145108632006e-05, "loss": 0.5527, "step": 16229 }, { "epoch": 0.49857156022486404, "grad_norm": 0.2931026518344879, "learning_rate": 1.711280541731734e-05, "loss": 0.4913, "step": 16230 }, { "epoch": 0.4986022793598132, "grad_norm": 0.415555864572525, "learning_rate": 1.711246570939022e-05, "loss": 0.6168, "step": 16231 }, { "epoch": 0.4986329984947624, "grad_norm": 0.3646012842655182, "learning_rate": 1.711212598485144e-05, "loss": 0.6026, "step": 16232 }, { "epoch": 0.49866371762971157, "grad_norm": 0.3278540074825287, "learning_rate": 1.7111786243701793e-05, "loss": 0.5854, "step": 16233 }, { "epoch": 0.4986944367646607, "grad_norm": 0.3585405945777893, "learning_rate": 1.7111446485942073e-05, "loss": 0.5386, "step": 16234 }, { "epoch": 0.49872515589960986, "grad_norm": 0.3623751401901245, "learning_rate": 1.7111106711573073e-05, "loss": 0.5619, "step": 16235 }, { "epoch": 0.49875587503455904, "grad_norm": 0.40888792276382446, "learning_rate": 1.7110766920595586e-05, "loss": 0.6784, "step": 16236 }, { "epoch": 0.4987865941695082, "grad_norm": 0.3498009443283081, "learning_rate": 1.7110427113010403e-05, "loss": 0.5596, "step": 16237 }, { "epoch": 0.49881731330445733, "grad_norm": 0.35903069376945496, "learning_rate": 1.7110087288818325e-05, "loss": 0.5916, "step": 16238 }, { "epoch": 0.4988480324394065, "grad_norm": 0.3619460463523865, "learning_rate": 1.7109747448020142e-05, "loss": 0.541, "step": 16239 }, { "epoch": 0.4988787515743557, "grad_norm": 0.40022918581962585, "learning_rate": 1.7109407590616648e-05, "loss": 0.5089, "step": 16240 }, { "epoch": 0.4989094707093048, "grad_norm": 0.35492241382598877, "learning_rate": 1.7109067716608632e-05, "loss": 0.5506, "step": 16241 }, { "epoch": 0.498940189844254, "grad_norm": 0.3719565272331238, "learning_rate": 1.7108727825996892e-05, "loss": 0.5386, "step": 16242 }, { "epoch": 0.49897090897920315, "grad_norm": 0.36096644401550293, "learning_rate": 1.7108387918782226e-05, "loss": 0.6313, "step": 16243 }, { "epoch": 0.49900162811415233, "grad_norm": 0.34093451499938965, "learning_rate": 1.710804799496542e-05, "loss": 0.5459, "step": 16244 }, { "epoch": 0.49903234724910145, "grad_norm": 0.3954748213291168, "learning_rate": 1.7107708054547273e-05, "loss": 0.6104, "step": 16245 }, { "epoch": 0.4990630663840506, "grad_norm": 0.31720325350761414, "learning_rate": 1.7107368097528577e-05, "loss": 0.5322, "step": 16246 }, { "epoch": 0.4990937855189998, "grad_norm": 0.3855191171169281, "learning_rate": 1.7107028123910126e-05, "loss": 0.5174, "step": 16247 }, { "epoch": 0.4991245046539489, "grad_norm": 0.38203349709510803, "learning_rate": 1.7106688133692716e-05, "loss": 0.5581, "step": 16248 }, { "epoch": 0.4991552237888981, "grad_norm": 0.5216809511184692, "learning_rate": 1.7106348126877137e-05, "loss": 0.5637, "step": 16249 }, { "epoch": 0.49918594292384727, "grad_norm": 0.34286627173423767, "learning_rate": 1.7106008103464187e-05, "loss": 0.5356, "step": 16250 }, { "epoch": 0.49921666205879645, "grad_norm": 0.33497512340545654, "learning_rate": 1.710566806345466e-05, "loss": 0.56, "step": 16251 }, { "epoch": 0.49924738119374557, "grad_norm": 0.3284938633441925, "learning_rate": 1.7105328006849346e-05, "loss": 0.5255, "step": 16252 }, { "epoch": 0.49927810032869474, "grad_norm": 0.3722510039806366, "learning_rate": 1.7104987933649043e-05, "loss": 0.5401, "step": 16253 }, { "epoch": 0.4993088194636439, "grad_norm": 0.3595581352710724, "learning_rate": 1.710464784385455e-05, "loss": 0.549, "step": 16254 }, { "epoch": 0.4993395385985931, "grad_norm": 0.35912302136421204, "learning_rate": 1.7104307737466648e-05, "loss": 0.5309, "step": 16255 }, { "epoch": 0.4993702577335422, "grad_norm": 0.3236071765422821, "learning_rate": 1.7103967614486144e-05, "loss": 0.5425, "step": 16256 }, { "epoch": 0.4994009768684914, "grad_norm": 0.3829495906829834, "learning_rate": 1.7103627474913827e-05, "loss": 0.6198, "step": 16257 }, { "epoch": 0.49943169600344056, "grad_norm": 0.433328241109848, "learning_rate": 1.710328731875049e-05, "loss": 0.5892, "step": 16258 }, { "epoch": 0.4994624151383897, "grad_norm": 0.35081401467323303, "learning_rate": 1.710294714599693e-05, "loss": 0.5865, "step": 16259 }, { "epoch": 0.49949313427333886, "grad_norm": 0.3352569043636322, "learning_rate": 1.7102606956653943e-05, "loss": 0.5175, "step": 16260 }, { "epoch": 0.49952385340828803, "grad_norm": 0.3555446267127991, "learning_rate": 1.7102266750722317e-05, "loss": 0.5411, "step": 16261 }, { "epoch": 0.4995545725432372, "grad_norm": 0.33151715993881226, "learning_rate": 1.7101926528202852e-05, "loss": 0.5789, "step": 16262 }, { "epoch": 0.4995852916781863, "grad_norm": 0.3409734070301056, "learning_rate": 1.7101586289096347e-05, "loss": 0.6021, "step": 16263 }, { "epoch": 0.4996160108131355, "grad_norm": 0.41606980562210083, "learning_rate": 1.7101246033403585e-05, "loss": 0.5548, "step": 16264 }, { "epoch": 0.4996467299480847, "grad_norm": 0.32631856203079224, "learning_rate": 1.710090576112537e-05, "loss": 0.5583, "step": 16265 }, { "epoch": 0.4996774490830338, "grad_norm": 0.411122590303421, "learning_rate": 1.7100565472262492e-05, "loss": 0.518, "step": 16266 }, { "epoch": 0.499708168217983, "grad_norm": 0.35872939229011536, "learning_rate": 1.710022516681575e-05, "loss": 0.5328, "step": 16267 }, { "epoch": 0.49973888735293215, "grad_norm": 0.38322392106056213, "learning_rate": 1.7099884844785933e-05, "loss": 0.5845, "step": 16268 }, { "epoch": 0.4997696064878813, "grad_norm": 0.36310213804244995, "learning_rate": 1.7099544506173843e-05, "loss": 0.5563, "step": 16269 }, { "epoch": 0.49980032562283044, "grad_norm": 0.3127036392688751, "learning_rate": 1.7099204150980267e-05, "loss": 0.4914, "step": 16270 }, { "epoch": 0.4998310447577796, "grad_norm": 0.35355547070503235, "learning_rate": 1.7098863779206007e-05, "loss": 0.5829, "step": 16271 }, { "epoch": 0.4998617638927288, "grad_norm": 0.3392949104309082, "learning_rate": 1.7098523390851853e-05, "loss": 0.6026, "step": 16272 }, { "epoch": 0.4998924830276779, "grad_norm": 0.32968395948410034, "learning_rate": 1.70981829859186e-05, "loss": 0.5372, "step": 16273 }, { "epoch": 0.4999232021626271, "grad_norm": 0.3403926193714142, "learning_rate": 1.7097842564407047e-05, "loss": 0.6052, "step": 16274 }, { "epoch": 0.49995392129757626, "grad_norm": 0.34779444336891174, "learning_rate": 1.7097502126317992e-05, "loss": 0.5185, "step": 16275 }, { "epoch": 0.49998464043252544, "grad_norm": 0.33226656913757324, "learning_rate": 1.7097161671652217e-05, "loss": 0.611, "step": 16276 }, { "epoch": 0.5000153595674746, "grad_norm": 0.3588397204875946, "learning_rate": 1.709682120041053e-05, "loss": 0.5196, "step": 16277 }, { "epoch": 0.5000460787024238, "grad_norm": 0.36176252365112305, "learning_rate": 1.709648071259372e-05, "loss": 0.5746, "step": 16278 }, { "epoch": 0.5000767978373729, "grad_norm": 0.3723160922527313, "learning_rate": 1.7096140208202582e-05, "loss": 0.5529, "step": 16279 }, { "epoch": 0.500107516972322, "grad_norm": 0.3814859390258789, "learning_rate": 1.709579968723792e-05, "loss": 0.5405, "step": 16280 }, { "epoch": 0.5001382361072713, "grad_norm": 0.3503668010234833, "learning_rate": 1.7095459149700514e-05, "loss": 0.6461, "step": 16281 }, { "epoch": 0.5001689552422204, "grad_norm": 0.3376065492630005, "learning_rate": 1.709511859559117e-05, "loss": 0.5349, "step": 16282 }, { "epoch": 0.5001996743771695, "grad_norm": 0.3207961916923523, "learning_rate": 1.709477802491068e-05, "loss": 0.5273, "step": 16283 }, { "epoch": 0.5002303935121187, "grad_norm": 0.7023917436599731, "learning_rate": 1.709443743765984e-05, "loss": 0.5573, "step": 16284 }, { "epoch": 0.5002611126470679, "grad_norm": 0.4817909002304077, "learning_rate": 1.7094096833839452e-05, "loss": 0.6578, "step": 16285 }, { "epoch": 0.500291831782017, "grad_norm": 0.3280881345272064, "learning_rate": 1.7093756213450302e-05, "loss": 0.5813, "step": 16286 }, { "epoch": 0.5003225509169662, "grad_norm": 0.34191077947616577, "learning_rate": 1.7093415576493188e-05, "loss": 0.5565, "step": 16287 }, { "epoch": 0.5003532700519153, "grad_norm": 0.3441908657550812, "learning_rate": 1.7093074922968905e-05, "loss": 0.5521, "step": 16288 }, { "epoch": 0.5003839891868646, "grad_norm": 0.3314736783504486, "learning_rate": 1.709273425287825e-05, "loss": 0.7059, "step": 16289 }, { "epoch": 0.5004147083218137, "grad_norm": 0.37093719840049744, "learning_rate": 1.7092393566222023e-05, "loss": 0.6595, "step": 16290 }, { "epoch": 0.5004454274567628, "grad_norm": 0.44469448924064636, "learning_rate": 1.7092052863001013e-05, "loss": 0.5725, "step": 16291 }, { "epoch": 0.500476146591712, "grad_norm": 2.4237515926361084, "learning_rate": 1.709171214321602e-05, "loss": 0.5474, "step": 16292 }, { "epoch": 0.5005068657266611, "grad_norm": 0.40841248631477356, "learning_rate": 1.7091371406867835e-05, "loss": 0.5578, "step": 16293 }, { "epoch": 0.5005375848616103, "grad_norm": 0.34654390811920166, "learning_rate": 1.709103065395726e-05, "loss": 0.5695, "step": 16294 }, { "epoch": 0.5005683039965595, "grad_norm": 0.3386409878730774, "learning_rate": 1.7090689884485087e-05, "loss": 0.6114, "step": 16295 }, { "epoch": 0.5005990231315086, "grad_norm": 0.3363141119480133, "learning_rate": 1.709034909845211e-05, "loss": 0.4811, "step": 16296 }, { "epoch": 0.5006297422664577, "grad_norm": 0.36815038323402405, "learning_rate": 1.7090008295859128e-05, "loss": 0.6178, "step": 16297 }, { "epoch": 0.500660461401407, "grad_norm": 0.3621724843978882, "learning_rate": 1.7089667476706937e-05, "loss": 0.5253, "step": 16298 }, { "epoch": 0.5006911805363561, "grad_norm": 0.3667140305042267, "learning_rate": 1.7089326640996337e-05, "loss": 0.4966, "step": 16299 }, { "epoch": 0.5007218996713052, "grad_norm": 0.3399885296821594, "learning_rate": 1.7088985788728113e-05, "loss": 0.4638, "step": 16300 }, { "epoch": 0.5007526188062544, "grad_norm": 0.4036417007446289, "learning_rate": 1.708864491990307e-05, "loss": 0.6322, "step": 16301 }, { "epoch": 0.5007833379412036, "grad_norm": 0.36664333939552307, "learning_rate": 1.7088304034522e-05, "loss": 0.5237, "step": 16302 }, { "epoch": 0.5008140570761528, "grad_norm": 0.3786278963088989, "learning_rate": 1.7087963132585703e-05, "loss": 0.5767, "step": 16303 }, { "epoch": 0.5008447762111019, "grad_norm": 0.41454869508743286, "learning_rate": 1.7087622214094973e-05, "loss": 0.5245, "step": 16304 }, { "epoch": 0.500875495346051, "grad_norm": 0.3532162606716156, "learning_rate": 1.7087281279050606e-05, "loss": 0.505, "step": 16305 }, { "epoch": 0.5009062144810003, "grad_norm": 0.3434602618217468, "learning_rate": 1.70869403274534e-05, "loss": 0.5293, "step": 16306 }, { "epoch": 0.5009369336159494, "grad_norm": 0.5159266591072083, "learning_rate": 1.708659935930415e-05, "loss": 0.5565, "step": 16307 }, { "epoch": 0.5009676527508985, "grad_norm": 0.39007171988487244, "learning_rate": 1.708625837460365e-05, "loss": 0.5321, "step": 16308 }, { "epoch": 0.5009983718858477, "grad_norm": 0.3637964725494385, "learning_rate": 1.70859173733527e-05, "loss": 0.5267, "step": 16309 }, { "epoch": 0.5010290910207968, "grad_norm": 0.3788507580757141, "learning_rate": 1.7085576355552094e-05, "loss": 0.5727, "step": 16310 }, { "epoch": 0.501059810155746, "grad_norm": 0.3969140648841858, "learning_rate": 1.708523532120263e-05, "loss": 0.5088, "step": 16311 }, { "epoch": 0.5010905292906952, "grad_norm": 0.38724640011787415, "learning_rate": 1.7084894270305103e-05, "loss": 0.5723, "step": 16312 }, { "epoch": 0.5011212484256443, "grad_norm": 0.47763657569885254, "learning_rate": 1.7084553202860315e-05, "loss": 0.6443, "step": 16313 }, { "epoch": 0.5011519675605935, "grad_norm": 0.4860011637210846, "learning_rate": 1.7084212118869054e-05, "loss": 0.5346, "step": 16314 }, { "epoch": 0.5011826866955427, "grad_norm": 0.5435155630111694, "learning_rate": 1.7083871018332122e-05, "loss": 0.7084, "step": 16315 }, { "epoch": 0.5012134058304918, "grad_norm": 0.36341339349746704, "learning_rate": 1.7083529901250316e-05, "loss": 0.5799, "step": 16316 }, { "epoch": 0.501244124965441, "grad_norm": 0.3250170946121216, "learning_rate": 1.7083188767624428e-05, "loss": 0.4741, "step": 16317 }, { "epoch": 0.5012748441003901, "grad_norm": 0.3602892756462097, "learning_rate": 1.7082847617455262e-05, "loss": 0.6095, "step": 16318 }, { "epoch": 0.5013055632353393, "grad_norm": 0.38007640838623047, "learning_rate": 1.7082506450743607e-05, "loss": 0.6418, "step": 16319 }, { "epoch": 0.5013362823702885, "grad_norm": 0.319638192653656, "learning_rate": 1.7082165267490265e-05, "loss": 0.5116, "step": 16320 }, { "epoch": 0.5013670015052376, "grad_norm": 0.3660510182380676, "learning_rate": 1.708182406769603e-05, "loss": 0.5749, "step": 16321 }, { "epoch": 0.5013977206401867, "grad_norm": 0.3154148459434509, "learning_rate": 1.7081482851361702e-05, "loss": 0.6107, "step": 16322 }, { "epoch": 0.501428439775136, "grad_norm": 0.36342787742614746, "learning_rate": 1.7081141618488076e-05, "loss": 0.6379, "step": 16323 }, { "epoch": 0.5014591589100851, "grad_norm": 0.35049375891685486, "learning_rate": 1.708080036907595e-05, "loss": 0.5416, "step": 16324 }, { "epoch": 0.5014898780450342, "grad_norm": 0.3397506773471832, "learning_rate": 1.708045910312612e-05, "loss": 0.5356, "step": 16325 }, { "epoch": 0.5015205971799834, "grad_norm": 0.3801797032356262, "learning_rate": 1.7080117820639383e-05, "loss": 0.5614, "step": 16326 }, { "epoch": 0.5015513163149325, "grad_norm": 0.3901551067829132, "learning_rate": 1.7079776521616533e-05, "loss": 0.4895, "step": 16327 }, { "epoch": 0.5015820354498818, "grad_norm": 0.3401913344860077, "learning_rate": 1.7079435206058376e-05, "loss": 0.5431, "step": 16328 }, { "epoch": 0.5016127545848309, "grad_norm": 0.4118620753288269, "learning_rate": 1.70790938739657e-05, "loss": 0.5893, "step": 16329 }, { "epoch": 0.50164347371978, "grad_norm": 0.33327195048332214, "learning_rate": 1.7078752525339306e-05, "loss": 0.5012, "step": 16330 }, { "epoch": 0.5016741928547293, "grad_norm": 0.33898305892944336, "learning_rate": 1.7078411160179994e-05, "loss": 0.4425, "step": 16331 }, { "epoch": 0.5017049119896784, "grad_norm": 0.384173184633255, "learning_rate": 1.7078069778488554e-05, "loss": 0.5307, "step": 16332 }, { "epoch": 0.5017356311246275, "grad_norm": 0.40704891085624695, "learning_rate": 1.7077728380265793e-05, "loss": 0.5736, "step": 16333 }, { "epoch": 0.5017663502595767, "grad_norm": 0.333587646484375, "learning_rate": 1.7077386965512496e-05, "loss": 0.563, "step": 16334 }, { "epoch": 0.5017970693945258, "grad_norm": 0.3925175368785858, "learning_rate": 1.7077045534229473e-05, "loss": 0.6195, "step": 16335 }, { "epoch": 0.501827788529475, "grad_norm": 0.3436698317527771, "learning_rate": 1.7076704086417516e-05, "loss": 0.5196, "step": 16336 }, { "epoch": 0.5018585076644242, "grad_norm": 0.34620893001556396, "learning_rate": 1.7076362622077423e-05, "loss": 0.5655, "step": 16337 }, { "epoch": 0.5018892267993733, "grad_norm": 0.32041582465171814, "learning_rate": 1.7076021141209985e-05, "loss": 0.558, "step": 16338 }, { "epoch": 0.5019199459343225, "grad_norm": 0.3410366177558899, "learning_rate": 1.707567964381601e-05, "loss": 0.5291, "step": 16339 }, { "epoch": 0.5019506650692717, "grad_norm": 0.383606880903244, "learning_rate": 1.707533812989629e-05, "loss": 0.5616, "step": 16340 }, { "epoch": 0.5019813842042208, "grad_norm": 0.3422068953514099, "learning_rate": 1.7074996599451624e-05, "loss": 0.6607, "step": 16341 }, { "epoch": 0.50201210333917, "grad_norm": 0.3503864109516144, "learning_rate": 1.707465505248281e-05, "loss": 0.5833, "step": 16342 }, { "epoch": 0.5020428224741191, "grad_norm": 0.3243504464626312, "learning_rate": 1.7074313488990646e-05, "loss": 0.5902, "step": 16343 }, { "epoch": 0.5020735416090683, "grad_norm": 0.5102798938751221, "learning_rate": 1.7073971908975928e-05, "loss": 0.5057, "step": 16344 }, { "epoch": 0.5021042607440175, "grad_norm": 0.32992905378341675, "learning_rate": 1.7073630312439454e-05, "loss": 0.5559, "step": 16345 }, { "epoch": 0.5021349798789666, "grad_norm": 0.7724525332450867, "learning_rate": 1.707328869938202e-05, "loss": 0.618, "step": 16346 }, { "epoch": 0.5021656990139157, "grad_norm": 0.5333803296089172, "learning_rate": 1.7072947069804428e-05, "loss": 0.5663, "step": 16347 }, { "epoch": 0.502196418148865, "grad_norm": 0.38434794545173645, "learning_rate": 1.7072605423707473e-05, "loss": 0.5207, "step": 16348 }, { "epoch": 0.5022271372838141, "grad_norm": 0.3617023825645447, "learning_rate": 1.707226376109196e-05, "loss": 0.6022, "step": 16349 }, { "epoch": 0.5022578564187632, "grad_norm": 0.3876306116580963, "learning_rate": 1.7071922081958673e-05, "loss": 0.6925, "step": 16350 }, { "epoch": 0.5022885755537124, "grad_norm": 0.44163450598716736, "learning_rate": 1.7071580386308423e-05, "loss": 0.668, "step": 16351 }, { "epoch": 0.5023192946886615, "grad_norm": 0.36485928297042847, "learning_rate": 1.7071238674142e-05, "loss": 0.6295, "step": 16352 }, { "epoch": 0.5023500138236108, "grad_norm": 0.3317892253398895, "learning_rate": 1.7070896945460207e-05, "loss": 0.5184, "step": 16353 }, { "epoch": 0.5023807329585599, "grad_norm": 0.38474875688552856, "learning_rate": 1.7070555200263843e-05, "loss": 0.5816, "step": 16354 }, { "epoch": 0.502411452093509, "grad_norm": 0.36048623919487, "learning_rate": 1.70702134385537e-05, "loss": 0.5992, "step": 16355 }, { "epoch": 0.5024421712284582, "grad_norm": 0.33131593465805054, "learning_rate": 1.706987166033058e-05, "loss": 0.6509, "step": 16356 }, { "epoch": 0.5024728903634074, "grad_norm": 0.34296703338623047, "learning_rate": 1.7069529865595284e-05, "loss": 0.6338, "step": 16357 }, { "epoch": 0.5025036094983565, "grad_norm": 0.36345380544662476, "learning_rate": 1.70691880543486e-05, "loss": 0.5947, "step": 16358 }, { "epoch": 0.5025343286333057, "grad_norm": 0.3285859227180481, "learning_rate": 1.7068846226591343e-05, "loss": 0.5439, "step": 16359 }, { "epoch": 0.5025650477682548, "grad_norm": 0.3811939060688019, "learning_rate": 1.7068504382324294e-05, "loss": 0.5398, "step": 16360 }, { "epoch": 0.502595766903204, "grad_norm": 0.35813403129577637, "learning_rate": 1.7068162521548265e-05, "loss": 0.5987, "step": 16361 }, { "epoch": 0.5026264860381532, "grad_norm": 0.383483350276947, "learning_rate": 1.706782064426405e-05, "loss": 0.6249, "step": 16362 }, { "epoch": 0.5026572051731023, "grad_norm": 0.38100990653038025, "learning_rate": 1.706747875047244e-05, "loss": 0.6143, "step": 16363 }, { "epoch": 0.5026879243080515, "grad_norm": 0.3434688150882721, "learning_rate": 1.7067136840174243e-05, "loss": 0.5646, "step": 16364 }, { "epoch": 0.5027186434430007, "grad_norm": 0.4042685031890869, "learning_rate": 1.706679491337025e-05, "loss": 0.6102, "step": 16365 }, { "epoch": 0.5027493625779498, "grad_norm": 0.3473168909549713, "learning_rate": 1.7066452970061273e-05, "loss": 0.6315, "step": 16366 }, { "epoch": 0.502780081712899, "grad_norm": 0.3217310309410095, "learning_rate": 1.7066111010248095e-05, "loss": 0.6224, "step": 16367 }, { "epoch": 0.5028108008478481, "grad_norm": 0.34870442748069763, "learning_rate": 1.7065769033931524e-05, "loss": 0.6149, "step": 16368 }, { "epoch": 0.5028415199827972, "grad_norm": 0.43693414330482483, "learning_rate": 1.7065427041112355e-05, "loss": 0.6062, "step": 16369 }, { "epoch": 0.5028722391177465, "grad_norm": 0.32666924595832825, "learning_rate": 1.706508503179139e-05, "loss": 0.5673, "step": 16370 }, { "epoch": 0.5029029582526956, "grad_norm": 0.34299221634864807, "learning_rate": 1.706474300596942e-05, "loss": 0.6193, "step": 16371 }, { "epoch": 0.5029336773876447, "grad_norm": 0.4202262759208679, "learning_rate": 1.706440096364725e-05, "loss": 0.5704, "step": 16372 }, { "epoch": 0.502964396522594, "grad_norm": 0.33028385043144226, "learning_rate": 1.7064058904825683e-05, "loss": 0.5435, "step": 16373 }, { "epoch": 0.5029951156575431, "grad_norm": 0.3666762709617615, "learning_rate": 1.706371682950551e-05, "loss": 0.6267, "step": 16374 }, { "epoch": 0.5030258347924922, "grad_norm": 0.3302522599697113, "learning_rate": 1.706337473768753e-05, "loss": 0.5767, "step": 16375 }, { "epoch": 0.5030565539274414, "grad_norm": 0.3537541925907135, "learning_rate": 1.706303262937255e-05, "loss": 0.5963, "step": 16376 }, { "epoch": 0.5030872730623905, "grad_norm": 0.37116751074790955, "learning_rate": 1.7062690504561366e-05, "loss": 0.6369, "step": 16377 }, { "epoch": 0.5031179921973398, "grad_norm": 0.35066941380500793, "learning_rate": 1.7062348363254767e-05, "loss": 0.6159, "step": 16378 }, { "epoch": 0.5031487113322889, "grad_norm": 0.34383609890937805, "learning_rate": 1.706200620545357e-05, "loss": 0.5847, "step": 16379 }, { "epoch": 0.503179430467238, "grad_norm": 0.3428250551223755, "learning_rate": 1.706166403115856e-05, "loss": 0.6162, "step": 16380 }, { "epoch": 0.5032101496021872, "grad_norm": 0.3963924050331116, "learning_rate": 1.7061321840370538e-05, "loss": 0.6142, "step": 16381 }, { "epoch": 0.5032408687371364, "grad_norm": 0.3643445074558258, "learning_rate": 1.7060979633090308e-05, "loss": 0.6086, "step": 16382 }, { "epoch": 0.5032715878720855, "grad_norm": 0.35429614782333374, "learning_rate": 1.7060637409318665e-05, "loss": 0.5447, "step": 16383 }, { "epoch": 0.5033023070070347, "grad_norm": 0.3518892228603363, "learning_rate": 1.7060295169056412e-05, "loss": 0.5656, "step": 16384 }, { "epoch": 0.5033330261419838, "grad_norm": 0.3703662157058716, "learning_rate": 1.7059952912304345e-05, "loss": 0.5563, "step": 16385 }, { "epoch": 0.503363745276933, "grad_norm": 0.46895894408226013, "learning_rate": 1.7059610639063268e-05, "loss": 0.5786, "step": 16386 }, { "epoch": 0.5033944644118822, "grad_norm": 0.33008161187171936, "learning_rate": 1.7059268349333972e-05, "loss": 0.4909, "step": 16387 }, { "epoch": 0.5034251835468313, "grad_norm": 0.3269476890563965, "learning_rate": 1.7058926043117265e-05, "loss": 0.6055, "step": 16388 }, { "epoch": 0.5034559026817805, "grad_norm": 0.40903931856155396, "learning_rate": 1.7058583720413942e-05, "loss": 0.6764, "step": 16389 }, { "epoch": 0.5034866218167297, "grad_norm": 0.3345228433609009, "learning_rate": 1.7058241381224804e-05, "loss": 0.5237, "step": 16390 }, { "epoch": 0.5035173409516788, "grad_norm": 0.47206416726112366, "learning_rate": 1.7057899025550653e-05, "loss": 0.5167, "step": 16391 }, { "epoch": 0.503548060086628, "grad_norm": 0.3585769236087799, "learning_rate": 1.7057556653392285e-05, "loss": 0.5604, "step": 16392 }, { "epoch": 0.5035787792215771, "grad_norm": 0.35749754309654236, "learning_rate": 1.70572142647505e-05, "loss": 0.5232, "step": 16393 }, { "epoch": 0.5036094983565262, "grad_norm": 0.3526688814163208, "learning_rate": 1.7056871859626098e-05, "loss": 0.5636, "step": 16394 }, { "epoch": 0.5036402174914755, "grad_norm": 0.3424634337425232, "learning_rate": 1.705652943801988e-05, "loss": 0.5107, "step": 16395 }, { "epoch": 0.5036709366264246, "grad_norm": 0.3279346525669098, "learning_rate": 1.7056186999932643e-05, "loss": 0.5991, "step": 16396 }, { "epoch": 0.5037016557613737, "grad_norm": 0.3407623767852783, "learning_rate": 1.705584454536519e-05, "loss": 0.5388, "step": 16397 }, { "epoch": 0.5037323748963229, "grad_norm": 0.3133019506931305, "learning_rate": 1.7055502074318317e-05, "loss": 0.4657, "step": 16398 }, { "epoch": 0.5037630940312721, "grad_norm": 0.4142535924911499, "learning_rate": 1.7055159586792828e-05, "loss": 0.4905, "step": 16399 }, { "epoch": 0.5037938131662213, "grad_norm": 0.3813423216342926, "learning_rate": 1.705481708278952e-05, "loss": 0.6615, "step": 16400 }, { "epoch": 0.5038245323011704, "grad_norm": 0.3572133183479309, "learning_rate": 1.7054474562309195e-05, "loss": 0.5411, "step": 16401 }, { "epoch": 0.5038552514361195, "grad_norm": 0.3718344271183014, "learning_rate": 1.7054132025352652e-05, "loss": 0.5306, "step": 16402 }, { "epoch": 0.5038859705710688, "grad_norm": 0.3569242060184479, "learning_rate": 1.705378947192069e-05, "loss": 0.6128, "step": 16403 }, { "epoch": 0.5039166897060179, "grad_norm": 0.3474127948284149, "learning_rate": 1.705344690201411e-05, "loss": 0.5932, "step": 16404 }, { "epoch": 0.503947408840967, "grad_norm": 0.3522145450115204, "learning_rate": 1.7053104315633714e-05, "loss": 0.5878, "step": 16405 }, { "epoch": 0.5039781279759162, "grad_norm": 0.3605780303478241, "learning_rate": 1.7052761712780296e-05, "loss": 0.4993, "step": 16406 }, { "epoch": 0.5040088471108654, "grad_norm": 0.3623238503932953, "learning_rate": 1.7052419093454668e-05, "loss": 0.5157, "step": 16407 }, { "epoch": 0.5040395662458145, "grad_norm": 0.3330104351043701, "learning_rate": 1.7052076457657617e-05, "loss": 0.5209, "step": 16408 }, { "epoch": 0.5040702853807637, "grad_norm": 0.3678736388683319, "learning_rate": 1.7051733805389946e-05, "loss": 0.5931, "step": 16409 }, { "epoch": 0.5041010045157128, "grad_norm": 0.3685755431652069, "learning_rate": 1.7051391136652465e-05, "loss": 0.7043, "step": 16410 }, { "epoch": 0.504131723650662, "grad_norm": 0.37114784121513367, "learning_rate": 1.7051048451445963e-05, "loss": 0.51, "step": 16411 }, { "epoch": 0.5041624427856112, "grad_norm": 0.3353554606437683, "learning_rate": 1.7050705749771247e-05, "loss": 0.5389, "step": 16412 }, { "epoch": 0.5041931619205603, "grad_norm": 0.3774676024913788, "learning_rate": 1.7050363031629116e-05, "loss": 0.5719, "step": 16413 }, { "epoch": 0.5042238810555095, "grad_norm": 0.3296366035938263, "learning_rate": 1.705002029702037e-05, "loss": 0.5705, "step": 16414 }, { "epoch": 0.5042546001904586, "grad_norm": 0.38162505626678467, "learning_rate": 1.7049677545945807e-05, "loss": 0.6149, "step": 16415 }, { "epoch": 0.5042853193254078, "grad_norm": 0.3594629466533661, "learning_rate": 1.704933477840623e-05, "loss": 0.6046, "step": 16416 }, { "epoch": 0.504316038460357, "grad_norm": 0.3698381781578064, "learning_rate": 1.704899199440244e-05, "loss": 0.5289, "step": 16417 }, { "epoch": 0.5043467575953061, "grad_norm": 0.3399701416492462, "learning_rate": 1.704864919393524e-05, "loss": 0.5217, "step": 16418 }, { "epoch": 0.5043774767302552, "grad_norm": 0.3488036096096039, "learning_rate": 1.704830637700542e-05, "loss": 0.5267, "step": 16419 }, { "epoch": 0.5044081958652045, "grad_norm": 0.35212236642837524, "learning_rate": 1.7047963543613794e-05, "loss": 0.535, "step": 16420 }, { "epoch": 0.5044389150001536, "grad_norm": 0.36171719431877136, "learning_rate": 1.7047620693761157e-05, "loss": 0.5713, "step": 16421 }, { "epoch": 0.5044696341351027, "grad_norm": 0.33248424530029297, "learning_rate": 1.7047277827448308e-05, "loss": 0.5458, "step": 16422 }, { "epoch": 0.5045003532700519, "grad_norm": 0.3476414084434509, "learning_rate": 1.704693494467605e-05, "loss": 0.4922, "step": 16423 }, { "epoch": 0.5045310724050011, "grad_norm": 0.40512871742248535, "learning_rate": 1.7046592045445183e-05, "loss": 0.6601, "step": 16424 }, { "epoch": 0.5045617915399503, "grad_norm": 0.3696553707122803, "learning_rate": 1.7046249129756506e-05, "loss": 0.618, "step": 16425 }, { "epoch": 0.5045925106748994, "grad_norm": 0.33864113688468933, "learning_rate": 1.7045906197610823e-05, "loss": 0.5726, "step": 16426 }, { "epoch": 0.5046232298098485, "grad_norm": 0.36879199743270874, "learning_rate": 1.7045563249008937e-05, "loss": 0.6177, "step": 16427 }, { "epoch": 0.5046539489447978, "grad_norm": 0.37027937173843384, "learning_rate": 1.7045220283951643e-05, "loss": 0.5627, "step": 16428 }, { "epoch": 0.5046846680797469, "grad_norm": 0.38616907596588135, "learning_rate": 1.7044877302439745e-05, "loss": 0.5572, "step": 16429 }, { "epoch": 0.504715387214696, "grad_norm": 0.4441499710083008, "learning_rate": 1.704453430447405e-05, "loss": 0.5796, "step": 16430 }, { "epoch": 0.5047461063496452, "grad_norm": 0.34404057264328003, "learning_rate": 1.7044191290055344e-05, "loss": 0.5914, "step": 16431 }, { "epoch": 0.5047768254845943, "grad_norm": 0.41086524724960327, "learning_rate": 1.7043848259184444e-05, "loss": 0.5631, "step": 16432 }, { "epoch": 0.5048075446195435, "grad_norm": 0.3904730975627899, "learning_rate": 1.704350521186214e-05, "loss": 0.603, "step": 16433 }, { "epoch": 0.5048382637544927, "grad_norm": 0.33796146512031555, "learning_rate": 1.704316214808924e-05, "loss": 0.5691, "step": 16434 }, { "epoch": 0.5048689828894418, "grad_norm": 0.3419289290904999, "learning_rate": 1.7042819067866544e-05, "loss": 0.5779, "step": 16435 }, { "epoch": 0.5048997020243909, "grad_norm": 0.3246111571788788, "learning_rate": 1.704247597119485e-05, "loss": 0.5118, "step": 16436 }, { "epoch": 0.5049304211593402, "grad_norm": 0.3854599595069885, "learning_rate": 1.7042132858074965e-05, "loss": 0.6525, "step": 16437 }, { "epoch": 0.5049611402942893, "grad_norm": 0.3631216287612915, "learning_rate": 1.704178972850768e-05, "loss": 0.6141, "step": 16438 }, { "epoch": 0.5049918594292385, "grad_norm": 0.34499746561050415, "learning_rate": 1.7041446582493812e-05, "loss": 0.5586, "step": 16439 }, { "epoch": 0.5050225785641876, "grad_norm": 0.3533022403717041, "learning_rate": 1.704110342003415e-05, "loss": 0.5963, "step": 16440 }, { "epoch": 0.5050532976991368, "grad_norm": 0.39058423042297363, "learning_rate": 1.7040760241129496e-05, "loss": 0.5693, "step": 16441 }, { "epoch": 0.505084016834086, "grad_norm": 0.35843124985694885, "learning_rate": 1.704041704578066e-05, "loss": 0.5843, "step": 16442 }, { "epoch": 0.5051147359690351, "grad_norm": 0.3417917788028717, "learning_rate": 1.7040073833988432e-05, "loss": 0.6039, "step": 16443 }, { "epoch": 0.5051454551039842, "grad_norm": 0.3495767116546631, "learning_rate": 1.7039730605753628e-05, "loss": 0.5098, "step": 16444 }, { "epoch": 0.5051761742389335, "grad_norm": 0.38381117582321167, "learning_rate": 1.7039387361077033e-05, "loss": 0.4761, "step": 16445 }, { "epoch": 0.5052068933738826, "grad_norm": 0.38330212235450745, "learning_rate": 1.7039044099959464e-05, "loss": 0.4926, "step": 16446 }, { "epoch": 0.5052376125088317, "grad_norm": 0.3325798809528351, "learning_rate": 1.703870082240171e-05, "loss": 0.5459, "step": 16447 }, { "epoch": 0.5052683316437809, "grad_norm": 0.43798038363456726, "learning_rate": 1.7038357528404582e-05, "loss": 0.5128, "step": 16448 }, { "epoch": 0.50529905077873, "grad_norm": 0.3683296740055084, "learning_rate": 1.703801421796888e-05, "loss": 0.5675, "step": 16449 }, { "epoch": 0.5053297699136793, "grad_norm": 0.33793550729751587, "learning_rate": 1.70376708910954e-05, "loss": 0.5519, "step": 16450 }, { "epoch": 0.5053604890486284, "grad_norm": 0.3584577143192291, "learning_rate": 1.7037327547784953e-05, "loss": 0.5808, "step": 16451 }, { "epoch": 0.5053912081835775, "grad_norm": 0.3837217390537262, "learning_rate": 1.7036984188038332e-05, "loss": 0.5666, "step": 16452 }, { "epoch": 0.5054219273185268, "grad_norm": 0.3687877953052521, "learning_rate": 1.7036640811856345e-05, "loss": 0.6112, "step": 16453 }, { "epoch": 0.5054526464534759, "grad_norm": 0.35973477363586426, "learning_rate": 1.703629741923979e-05, "loss": 0.6549, "step": 16454 }, { "epoch": 0.505483365588425, "grad_norm": 0.3848072290420532, "learning_rate": 1.7035954010189476e-05, "loss": 0.524, "step": 16455 }, { "epoch": 0.5055140847233742, "grad_norm": 0.3415275514125824, "learning_rate": 1.7035610584706194e-05, "loss": 0.5611, "step": 16456 }, { "epoch": 0.5055448038583233, "grad_norm": 0.3626692295074463, "learning_rate": 1.7035267142790757e-05, "loss": 0.5475, "step": 16457 }, { "epoch": 0.5055755229932725, "grad_norm": 0.3294049799442291, "learning_rate": 1.7034923684443958e-05, "loss": 0.5732, "step": 16458 }, { "epoch": 0.5056062421282217, "grad_norm": 0.3277556598186493, "learning_rate": 1.7034580209666604e-05, "loss": 0.5703, "step": 16459 }, { "epoch": 0.5056369612631708, "grad_norm": 0.3649975061416626, "learning_rate": 1.70342367184595e-05, "loss": 0.5079, "step": 16460 }, { "epoch": 0.5056676803981199, "grad_norm": 0.3199576735496521, "learning_rate": 1.7033893210823445e-05, "loss": 0.5876, "step": 16461 }, { "epoch": 0.5056983995330692, "grad_norm": 0.3362707495689392, "learning_rate": 1.7033549686759236e-05, "loss": 0.5803, "step": 16462 }, { "epoch": 0.5057291186680183, "grad_norm": 0.38490909337997437, "learning_rate": 1.7033206146267683e-05, "loss": 0.5633, "step": 16463 }, { "epoch": 0.5057598378029675, "grad_norm": 0.3471192419528961, "learning_rate": 1.7032862589349587e-05, "loss": 0.5909, "step": 16464 }, { "epoch": 0.5057905569379166, "grad_norm": 0.3443561792373657, "learning_rate": 1.703251901600575e-05, "loss": 0.6178, "step": 16465 }, { "epoch": 0.5058212760728658, "grad_norm": 0.33409011363983154, "learning_rate": 1.703217542623697e-05, "loss": 0.47, "step": 16466 }, { "epoch": 0.505851995207815, "grad_norm": 0.37545517086982727, "learning_rate": 1.7031831820044055e-05, "loss": 0.556, "step": 16467 }, { "epoch": 0.5058827143427641, "grad_norm": 0.31932613253593445, "learning_rate": 1.7031488197427806e-05, "loss": 0.6073, "step": 16468 }, { "epoch": 0.5059134334777132, "grad_norm": 0.43556147813796997, "learning_rate": 1.703114455838903e-05, "loss": 0.5462, "step": 16469 }, { "epoch": 0.5059441526126625, "grad_norm": 0.3497317135334015, "learning_rate": 1.7030800902928516e-05, "loss": 0.53, "step": 16470 }, { "epoch": 0.5059748717476116, "grad_norm": 0.33984193205833435, "learning_rate": 1.7030457231047083e-05, "loss": 0.568, "step": 16471 }, { "epoch": 0.5060055908825607, "grad_norm": 0.3311612606048584, "learning_rate": 1.703011354274552e-05, "loss": 0.5524, "step": 16472 }, { "epoch": 0.5060363100175099, "grad_norm": 0.3687147796154022, "learning_rate": 1.7029769838024643e-05, "loss": 0.5267, "step": 16473 }, { "epoch": 0.506067029152459, "grad_norm": 0.3366023302078247, "learning_rate": 1.7029426116885242e-05, "loss": 0.5742, "step": 16474 }, { "epoch": 0.5060977482874083, "grad_norm": 0.34957531094551086, "learning_rate": 1.7029082379328127e-05, "loss": 0.5393, "step": 16475 }, { "epoch": 0.5061284674223574, "grad_norm": 0.34707367420196533, "learning_rate": 1.7028738625354097e-05, "loss": 0.5188, "step": 16476 }, { "epoch": 0.5061591865573065, "grad_norm": 0.34074804186820984, "learning_rate": 1.702839485496396e-05, "loss": 0.6297, "step": 16477 }, { "epoch": 0.5061899056922557, "grad_norm": 0.3912140429019928, "learning_rate": 1.7028051068158515e-05, "loss": 0.5263, "step": 16478 }, { "epoch": 0.5062206248272049, "grad_norm": 0.33555325865745544, "learning_rate": 1.7027707264938566e-05, "loss": 0.5269, "step": 16479 }, { "epoch": 0.506251343962154, "grad_norm": 0.33788001537323, "learning_rate": 1.7027363445304913e-05, "loss": 0.5072, "step": 16480 }, { "epoch": 0.5062820630971032, "grad_norm": 0.36641332507133484, "learning_rate": 1.7027019609258365e-05, "loss": 0.6188, "step": 16481 }, { "epoch": 0.5063127822320523, "grad_norm": 0.34242090582847595, "learning_rate": 1.7026675756799722e-05, "loss": 0.6141, "step": 16482 }, { "epoch": 0.5063435013670015, "grad_norm": 0.33258190751075745, "learning_rate": 1.7026331887929786e-05, "loss": 0.5069, "step": 16483 }, { "epoch": 0.5063742205019507, "grad_norm": 0.40084341168403625, "learning_rate": 1.7025988002649363e-05, "loss": 0.5587, "step": 16484 }, { "epoch": 0.5064049396368998, "grad_norm": 0.5045223832130432, "learning_rate": 1.702564410095925e-05, "loss": 0.559, "step": 16485 }, { "epoch": 0.506435658771849, "grad_norm": 0.35669848322868347, "learning_rate": 1.702530018286026e-05, "loss": 0.6015, "step": 16486 }, { "epoch": 0.5064663779067982, "grad_norm": 0.41286545991897583, "learning_rate": 1.7024956248353187e-05, "loss": 0.5481, "step": 16487 }, { "epoch": 0.5064970970417473, "grad_norm": 0.3313335180282593, "learning_rate": 1.702461229743884e-05, "loss": 0.5588, "step": 16488 }, { "epoch": 0.5065278161766965, "grad_norm": 0.3453749120235443, "learning_rate": 1.7024268330118018e-05, "loss": 0.4875, "step": 16489 }, { "epoch": 0.5065585353116456, "grad_norm": 0.40341249108314514, "learning_rate": 1.702392434639153e-05, "loss": 0.4888, "step": 16490 }, { "epoch": 0.5065892544465948, "grad_norm": 0.3324063718318939, "learning_rate": 1.7023580346260177e-05, "loss": 0.5344, "step": 16491 }, { "epoch": 0.506619973581544, "grad_norm": 0.39296022057533264, "learning_rate": 1.7023236329724758e-05, "loss": 0.6248, "step": 16492 }, { "epoch": 0.5066506927164931, "grad_norm": 0.34492865204811096, "learning_rate": 1.702289229678608e-05, "loss": 0.5899, "step": 16493 }, { "epoch": 0.5066814118514422, "grad_norm": 0.34118467569351196, "learning_rate": 1.7022548247444948e-05, "loss": 0.5851, "step": 16494 }, { "epoch": 0.5067121309863915, "grad_norm": 0.35659393668174744, "learning_rate": 1.7022204181702166e-05, "loss": 0.5573, "step": 16495 }, { "epoch": 0.5067428501213406, "grad_norm": 0.3251129686832428, "learning_rate": 1.7021860099558532e-05, "loss": 0.5619, "step": 16496 }, { "epoch": 0.5067735692562897, "grad_norm": 0.33572420477867126, "learning_rate": 1.7021516001014854e-05, "loss": 0.54, "step": 16497 }, { "epoch": 0.5068042883912389, "grad_norm": 0.35621002316474915, "learning_rate": 1.7021171886071936e-05, "loss": 0.5312, "step": 16498 }, { "epoch": 0.506835007526188, "grad_norm": 0.4112642705440521, "learning_rate": 1.702082775473058e-05, "loss": 0.6723, "step": 16499 }, { "epoch": 0.5068657266611373, "grad_norm": 0.38374510407447815, "learning_rate": 1.702048360699159e-05, "loss": 0.629, "step": 16500 }, { "epoch": 0.5068964457960864, "grad_norm": 0.35273492336273193, "learning_rate": 1.7020139442855774e-05, "loss": 0.5724, "step": 16501 }, { "epoch": 0.5069271649310355, "grad_norm": 0.3725540041923523, "learning_rate": 1.7019795262323927e-05, "loss": 0.5231, "step": 16502 }, { "epoch": 0.5069578840659847, "grad_norm": 0.3546208441257477, "learning_rate": 1.701945106539686e-05, "loss": 0.5191, "step": 16503 }, { "epoch": 0.5069886032009339, "grad_norm": 0.3423897922039032, "learning_rate": 1.701910685207538e-05, "loss": 0.5534, "step": 16504 }, { "epoch": 0.507019322335883, "grad_norm": 0.3417624235153198, "learning_rate": 1.7018762622360277e-05, "loss": 0.6085, "step": 16505 }, { "epoch": 0.5070500414708322, "grad_norm": 0.34936320781707764, "learning_rate": 1.701841837625237e-05, "loss": 0.5393, "step": 16506 }, { "epoch": 0.5070807606057813, "grad_norm": 0.36458614468574524, "learning_rate": 1.7018074113752453e-05, "loss": 0.6269, "step": 16507 }, { "epoch": 0.5071114797407305, "grad_norm": 0.3504768908023834, "learning_rate": 1.7017729834861335e-05, "loss": 0.5898, "step": 16508 }, { "epoch": 0.5071421988756797, "grad_norm": 0.3580353856086731, "learning_rate": 1.7017385539579818e-05, "loss": 0.5806, "step": 16509 }, { "epoch": 0.5071729180106288, "grad_norm": 0.3759450316429138, "learning_rate": 1.7017041227908708e-05, "loss": 0.5853, "step": 16510 }, { "epoch": 0.507203637145578, "grad_norm": 0.3542855679988861, "learning_rate": 1.7016696899848807e-05, "loss": 0.5178, "step": 16511 }, { "epoch": 0.5072343562805272, "grad_norm": 0.3236028254032135, "learning_rate": 1.7016352555400924e-05, "loss": 0.5292, "step": 16512 }, { "epoch": 0.5072650754154763, "grad_norm": 0.34643203020095825, "learning_rate": 1.701600819456586e-05, "loss": 0.5693, "step": 16513 }, { "epoch": 0.5072957945504255, "grad_norm": 0.40425342321395874, "learning_rate": 1.701566381734441e-05, "loss": 0.6089, "step": 16514 }, { "epoch": 0.5073265136853746, "grad_norm": 0.33668088912963867, "learning_rate": 1.7015319423737395e-05, "loss": 0.5504, "step": 16515 }, { "epoch": 0.5073572328203237, "grad_norm": 0.3519352376461029, "learning_rate": 1.701497501374561e-05, "loss": 0.6254, "step": 16516 }, { "epoch": 0.507387951955273, "grad_norm": 0.3565995991230011, "learning_rate": 1.701463058736986e-05, "loss": 0.533, "step": 16517 }, { "epoch": 0.5074186710902221, "grad_norm": 0.3550094962120056, "learning_rate": 1.701428614461095e-05, "loss": 0.5279, "step": 16518 }, { "epoch": 0.5074493902251712, "grad_norm": 0.37985143065452576, "learning_rate": 1.7013941685469685e-05, "loss": 0.6099, "step": 16519 }, { "epoch": 0.5074801093601204, "grad_norm": 0.3506864011287689, "learning_rate": 1.701359720994687e-05, "loss": 0.5438, "step": 16520 }, { "epoch": 0.5075108284950696, "grad_norm": 0.34166571497917175, "learning_rate": 1.7013252718043308e-05, "loss": 0.5004, "step": 16521 }, { "epoch": 0.5075415476300187, "grad_norm": 0.4985184669494629, "learning_rate": 1.7012908209759808e-05, "loss": 0.6115, "step": 16522 }, { "epoch": 0.5075722667649679, "grad_norm": 0.36283382773399353, "learning_rate": 1.701256368509717e-05, "loss": 0.6274, "step": 16523 }, { "epoch": 0.507602985899917, "grad_norm": 0.32661452889442444, "learning_rate": 1.7012219144056196e-05, "loss": 0.5975, "step": 16524 }, { "epoch": 0.5076337050348663, "grad_norm": 0.3278598189353943, "learning_rate": 1.7011874586637697e-05, "loss": 0.4983, "step": 16525 }, { "epoch": 0.5076644241698154, "grad_norm": 0.3805789351463318, "learning_rate": 1.7011530012842476e-05, "loss": 0.5972, "step": 16526 }, { "epoch": 0.5076951433047645, "grad_norm": 0.36597609519958496, "learning_rate": 1.7011185422671338e-05, "loss": 0.6036, "step": 16527 }, { "epoch": 0.5077258624397137, "grad_norm": 0.31508883833885193, "learning_rate": 1.7010840816125084e-05, "loss": 0.4418, "step": 16528 }, { "epoch": 0.5077565815746629, "grad_norm": 0.3727176785469055, "learning_rate": 1.701049619320452e-05, "loss": 0.5442, "step": 16529 }, { "epoch": 0.507787300709612, "grad_norm": 0.41083505749702454, "learning_rate": 1.7010151553910458e-05, "loss": 0.644, "step": 16530 }, { "epoch": 0.5078180198445612, "grad_norm": 0.351976215839386, "learning_rate": 1.7009806898243693e-05, "loss": 0.4919, "step": 16531 }, { "epoch": 0.5078487389795103, "grad_norm": 0.3763429522514343, "learning_rate": 1.7009462226205037e-05, "loss": 0.5411, "step": 16532 }, { "epoch": 0.5078794581144594, "grad_norm": 0.3278643488883972, "learning_rate": 1.700911753779529e-05, "loss": 0.5606, "step": 16533 }, { "epoch": 0.5079101772494087, "grad_norm": 0.3524758219718933, "learning_rate": 1.7008772833015262e-05, "loss": 0.5302, "step": 16534 }, { "epoch": 0.5079408963843578, "grad_norm": 0.3800450563430786, "learning_rate": 1.7008428111865752e-05, "loss": 0.6034, "step": 16535 }, { "epoch": 0.507971615519307, "grad_norm": 0.34616830945014954, "learning_rate": 1.700808337434757e-05, "loss": 0.5771, "step": 16536 }, { "epoch": 0.5080023346542561, "grad_norm": 0.3379976153373718, "learning_rate": 1.700773862046152e-05, "loss": 0.6564, "step": 16537 }, { "epoch": 0.5080330537892053, "grad_norm": 0.3312512934207916, "learning_rate": 1.7007393850208413e-05, "loss": 0.5826, "step": 16538 }, { "epoch": 0.5080637729241545, "grad_norm": 0.36482739448547363, "learning_rate": 1.700704906358904e-05, "loss": 0.6349, "step": 16539 }, { "epoch": 0.5080944920591036, "grad_norm": 0.42604655027389526, "learning_rate": 1.700670426060422e-05, "loss": 0.562, "step": 16540 }, { "epoch": 0.5081252111940527, "grad_norm": 0.3218283951282501, "learning_rate": 1.700635944125475e-05, "loss": 0.5664, "step": 16541 }, { "epoch": 0.508155930329002, "grad_norm": 0.3398570716381073, "learning_rate": 1.7006014605541436e-05, "loss": 0.5877, "step": 16542 }, { "epoch": 0.5081866494639511, "grad_norm": 0.3439332842826843, "learning_rate": 1.7005669753465088e-05, "loss": 0.5683, "step": 16543 }, { "epoch": 0.5082173685989002, "grad_norm": 0.3246372938156128, "learning_rate": 1.7005324885026512e-05, "loss": 0.5209, "step": 16544 }, { "epoch": 0.5082480877338494, "grad_norm": 0.30856677889823914, "learning_rate": 1.7004980000226506e-05, "loss": 0.5152, "step": 16545 }, { "epoch": 0.5082788068687986, "grad_norm": 0.3446982502937317, "learning_rate": 1.7004635099065882e-05, "loss": 0.5353, "step": 16546 }, { "epoch": 0.5083095260037477, "grad_norm": 0.3335891366004944, "learning_rate": 1.7004290181545444e-05, "loss": 0.6602, "step": 16547 }, { "epoch": 0.5083402451386969, "grad_norm": 0.5275238752365112, "learning_rate": 1.7003945247665998e-05, "loss": 0.5435, "step": 16548 }, { "epoch": 0.508370964273646, "grad_norm": 0.3776610493659973, "learning_rate": 1.7003600297428346e-05, "loss": 0.5884, "step": 16549 }, { "epoch": 0.5084016834085953, "grad_norm": 0.3592738211154938, "learning_rate": 1.7003255330833296e-05, "loss": 0.5517, "step": 16550 }, { "epoch": 0.5084324025435444, "grad_norm": 0.34127503633499146, "learning_rate": 1.700291034788166e-05, "loss": 0.61, "step": 16551 }, { "epoch": 0.5084631216784935, "grad_norm": 0.3552951514720917, "learning_rate": 1.700256534857423e-05, "loss": 0.5733, "step": 16552 }, { "epoch": 0.5084938408134427, "grad_norm": 0.3729267716407776, "learning_rate": 1.7002220332911825e-05, "loss": 0.6105, "step": 16553 }, { "epoch": 0.5085245599483919, "grad_norm": 0.3680780529975891, "learning_rate": 1.700187530089524e-05, "loss": 0.5125, "step": 16554 }, { "epoch": 0.508555279083341, "grad_norm": 0.3215462267398834, "learning_rate": 1.700153025252529e-05, "loss": 0.563, "step": 16555 }, { "epoch": 0.5085859982182902, "grad_norm": 0.43576905131340027, "learning_rate": 1.7001185187802778e-05, "loss": 0.6403, "step": 16556 }, { "epoch": 0.5086167173532393, "grad_norm": 0.3311092257499695, "learning_rate": 1.7000840106728507e-05, "loss": 0.5606, "step": 16557 }, { "epoch": 0.5086474364881884, "grad_norm": 0.30111387372016907, "learning_rate": 1.7000495009303285e-05, "loss": 0.585, "step": 16558 }, { "epoch": 0.5086781556231377, "grad_norm": 0.3332185447216034, "learning_rate": 1.700014989552792e-05, "loss": 0.5936, "step": 16559 }, { "epoch": 0.5087088747580868, "grad_norm": 0.3910095691680908, "learning_rate": 1.6999804765403217e-05, "loss": 0.6717, "step": 16560 }, { "epoch": 0.508739593893036, "grad_norm": 0.3740534782409668, "learning_rate": 1.6999459618929976e-05, "loss": 0.5542, "step": 16561 }, { "epoch": 0.5087703130279851, "grad_norm": 0.3581983149051666, "learning_rate": 1.699911445610901e-05, "loss": 0.5072, "step": 16562 }, { "epoch": 0.5088010321629343, "grad_norm": 0.3699452877044678, "learning_rate": 1.6998769276941124e-05, "loss": 0.5923, "step": 16563 }, { "epoch": 0.5088317512978835, "grad_norm": 0.3641567528247833, "learning_rate": 1.6998424081427126e-05, "loss": 0.5949, "step": 16564 }, { "epoch": 0.5088624704328326, "grad_norm": 0.3402317762374878, "learning_rate": 1.6998078869567817e-05, "loss": 0.4606, "step": 16565 }, { "epoch": 0.5088931895677817, "grad_norm": 0.3457868695259094, "learning_rate": 1.699773364136401e-05, "loss": 0.5849, "step": 16566 }, { "epoch": 0.508923908702731, "grad_norm": 0.3810103237628937, "learning_rate": 1.6997388396816502e-05, "loss": 0.5326, "step": 16567 }, { "epoch": 0.5089546278376801, "grad_norm": 0.36601266264915466, "learning_rate": 1.699704313592611e-05, "loss": 0.5288, "step": 16568 }, { "epoch": 0.5089853469726292, "grad_norm": 0.3424490690231323, "learning_rate": 1.699669785869363e-05, "loss": 0.5464, "step": 16569 }, { "epoch": 0.5090160661075784, "grad_norm": 0.36186596751213074, "learning_rate": 1.6996352565119875e-05, "loss": 0.5384, "step": 16570 }, { "epoch": 0.5090467852425276, "grad_norm": 0.35276007652282715, "learning_rate": 1.699600725520565e-05, "loss": 0.647, "step": 16571 }, { "epoch": 0.5090775043774767, "grad_norm": 0.32069772481918335, "learning_rate": 1.6995661928951764e-05, "loss": 0.5597, "step": 16572 }, { "epoch": 0.5091082235124259, "grad_norm": 0.3806896209716797, "learning_rate": 1.699531658635902e-05, "loss": 0.5934, "step": 16573 }, { "epoch": 0.509138942647375, "grad_norm": 0.35602137446403503, "learning_rate": 1.6994971227428224e-05, "loss": 0.5884, "step": 16574 }, { "epoch": 0.5091696617823243, "grad_norm": 0.37929409742355347, "learning_rate": 1.6994625852160183e-05, "loss": 0.5003, "step": 16575 }, { "epoch": 0.5092003809172734, "grad_norm": 0.33972299098968506, "learning_rate": 1.6994280460555706e-05, "loss": 0.5715, "step": 16576 }, { "epoch": 0.5092311000522225, "grad_norm": 0.3628424406051636, "learning_rate": 1.69939350526156e-05, "loss": 0.628, "step": 16577 }, { "epoch": 0.5092618191871717, "grad_norm": 0.3933625817298889, "learning_rate": 1.699358962834067e-05, "loss": 0.6152, "step": 16578 }, { "epoch": 0.5092925383221208, "grad_norm": 0.310471773147583, "learning_rate": 1.6993244187731723e-05, "loss": 0.5319, "step": 16579 }, { "epoch": 0.50932325745707, "grad_norm": 0.3259899616241455, "learning_rate": 1.6992898730789563e-05, "loss": 0.5453, "step": 16580 }, { "epoch": 0.5093539765920192, "grad_norm": 0.35241934657096863, "learning_rate": 1.6992553257515003e-05, "loss": 0.592, "step": 16581 }, { "epoch": 0.5093846957269683, "grad_norm": 0.3511350750923157, "learning_rate": 1.6992207767908847e-05, "loss": 0.6085, "step": 16582 }, { "epoch": 0.5094154148619174, "grad_norm": 0.555147111415863, "learning_rate": 1.6991862261971897e-05, "loss": 0.6108, "step": 16583 }, { "epoch": 0.5094461339968667, "grad_norm": 0.37142276763916016, "learning_rate": 1.6991516739704967e-05, "loss": 0.5715, "step": 16584 }, { "epoch": 0.5094768531318158, "grad_norm": 0.5098720192909241, "learning_rate": 1.6991171201108862e-05, "loss": 0.524, "step": 16585 }, { "epoch": 0.509507572266765, "grad_norm": 0.36604177951812744, "learning_rate": 1.6990825646184388e-05, "loss": 0.5551, "step": 16586 }, { "epoch": 0.5095382914017141, "grad_norm": 0.34208497405052185, "learning_rate": 1.699048007493235e-05, "loss": 0.5692, "step": 16587 }, { "epoch": 0.5095690105366633, "grad_norm": 0.5650104880332947, "learning_rate": 1.6990134487353564e-05, "loss": 0.6239, "step": 16588 }, { "epoch": 0.5095997296716125, "grad_norm": 0.4907754063606262, "learning_rate": 1.6989788883448827e-05, "loss": 0.6235, "step": 16589 }, { "epoch": 0.5096304488065616, "grad_norm": 0.39810892939567566, "learning_rate": 1.6989443263218946e-05, "loss": 0.4855, "step": 16590 }, { "epoch": 0.5096611679415107, "grad_norm": 0.41098353266716003, "learning_rate": 1.6989097626664737e-05, "loss": 0.6319, "step": 16591 }, { "epoch": 0.50969188707646, "grad_norm": 0.35269778966903687, "learning_rate": 1.6988751973786998e-05, "loss": 0.5338, "step": 16592 }, { "epoch": 0.5097226062114091, "grad_norm": 0.33485308289527893, "learning_rate": 1.6988406304586544e-05, "loss": 0.4269, "step": 16593 }, { "epoch": 0.5097533253463582, "grad_norm": 0.35952529311180115, "learning_rate": 1.6988060619064182e-05, "loss": 0.4667, "step": 16594 }, { "epoch": 0.5097840444813074, "grad_norm": 0.36237633228302, "learning_rate": 1.698771491722071e-05, "loss": 0.5932, "step": 16595 }, { "epoch": 0.5098147636162565, "grad_norm": 0.3396117389202118, "learning_rate": 1.6987369199056945e-05, "loss": 0.5494, "step": 16596 }, { "epoch": 0.5098454827512058, "grad_norm": 0.3029267191886902, "learning_rate": 1.698702346457369e-05, "loss": 0.5473, "step": 16597 }, { "epoch": 0.5098762018861549, "grad_norm": 0.3591664135456085, "learning_rate": 1.698667771377176e-05, "loss": 0.5307, "step": 16598 }, { "epoch": 0.509906921021104, "grad_norm": 0.34584006667137146, "learning_rate": 1.698633194665195e-05, "loss": 0.493, "step": 16599 }, { "epoch": 0.5099376401560533, "grad_norm": 0.33521080017089844, "learning_rate": 1.6985986163215075e-05, "loss": 0.5111, "step": 16600 }, { "epoch": 0.5099683592910024, "grad_norm": 0.3228515088558197, "learning_rate": 1.698564036346194e-05, "loss": 0.5253, "step": 16601 }, { "epoch": 0.5099990784259515, "grad_norm": 0.3443116843700409, "learning_rate": 1.6985294547393357e-05, "loss": 0.5533, "step": 16602 }, { "epoch": 0.5100297975609007, "grad_norm": 0.30474478006362915, "learning_rate": 1.698494871501013e-05, "loss": 0.5177, "step": 16603 }, { "epoch": 0.5100605166958498, "grad_norm": 0.35691675543785095, "learning_rate": 1.6984602866313066e-05, "loss": 0.5344, "step": 16604 }, { "epoch": 0.510091235830799, "grad_norm": 0.3305722177028656, "learning_rate": 1.6984257001302972e-05, "loss": 0.5095, "step": 16605 }, { "epoch": 0.5101219549657482, "grad_norm": 0.33528611063957214, "learning_rate": 1.698391111998066e-05, "loss": 0.5492, "step": 16606 }, { "epoch": 0.5101526741006973, "grad_norm": 0.32127898931503296, "learning_rate": 1.6983565222346937e-05, "loss": 0.5715, "step": 16607 }, { "epoch": 0.5101833932356464, "grad_norm": 0.40661752223968506, "learning_rate": 1.698321930840261e-05, "loss": 0.6242, "step": 16608 }, { "epoch": 0.5102141123705957, "grad_norm": 0.33474403619766235, "learning_rate": 1.6982873378148487e-05, "loss": 0.4877, "step": 16609 }, { "epoch": 0.5102448315055448, "grad_norm": 0.39441296458244324, "learning_rate": 1.6982527431585373e-05, "loss": 0.5638, "step": 16610 }, { "epoch": 0.510275550640494, "grad_norm": 0.362701952457428, "learning_rate": 1.698218146871408e-05, "loss": 0.6066, "step": 16611 }, { "epoch": 0.5103062697754431, "grad_norm": 0.33609259128570557, "learning_rate": 1.6981835489535417e-05, "loss": 0.5503, "step": 16612 }, { "epoch": 0.5103369889103923, "grad_norm": 0.46907398104667664, "learning_rate": 1.6981489494050184e-05, "loss": 0.5766, "step": 16613 }, { "epoch": 0.5103677080453415, "grad_norm": 0.3569093942642212, "learning_rate": 1.6981143482259198e-05, "loss": 0.6049, "step": 16614 }, { "epoch": 0.5103984271802906, "grad_norm": 0.35034283995628357, "learning_rate": 1.6980797454163264e-05, "loss": 0.5916, "step": 16615 }, { "epoch": 0.5104291463152397, "grad_norm": 0.3477635085582733, "learning_rate": 1.6980451409763193e-05, "loss": 0.5629, "step": 16616 }, { "epoch": 0.510459865450189, "grad_norm": 0.3356991112232208, "learning_rate": 1.6980105349059785e-05, "loss": 0.5819, "step": 16617 }, { "epoch": 0.5104905845851381, "grad_norm": 0.3352585434913635, "learning_rate": 1.6979759272053854e-05, "loss": 0.496, "step": 16618 }, { "epoch": 0.5105213037200872, "grad_norm": 0.5206630825996399, "learning_rate": 1.6979413178746213e-05, "loss": 0.5815, "step": 16619 }, { "epoch": 0.5105520228550364, "grad_norm": 0.44878262281417847, "learning_rate": 1.697906706913766e-05, "loss": 0.6708, "step": 16620 }, { "epoch": 0.5105827419899855, "grad_norm": 0.3649146258831024, "learning_rate": 1.697872094322901e-05, "loss": 0.5386, "step": 16621 }, { "epoch": 0.5106134611249348, "grad_norm": 0.42647942900657654, "learning_rate": 1.697837480102107e-05, "loss": 0.6145, "step": 16622 }, { "epoch": 0.5106441802598839, "grad_norm": 0.34919002652168274, "learning_rate": 1.6978028642514646e-05, "loss": 0.5623, "step": 16623 }, { "epoch": 0.510674899394833, "grad_norm": 0.36301660537719727, "learning_rate": 1.6977682467710552e-05, "loss": 0.6447, "step": 16624 }, { "epoch": 0.5107056185297822, "grad_norm": 0.3542923033237457, "learning_rate": 1.697733627660959e-05, "loss": 0.569, "step": 16625 }, { "epoch": 0.5107363376647314, "grad_norm": 0.3932088017463684, "learning_rate": 1.6976990069212576e-05, "loss": 0.6549, "step": 16626 }, { "epoch": 0.5107670567996805, "grad_norm": 0.35032373666763306, "learning_rate": 1.697664384552031e-05, "loss": 0.5239, "step": 16627 }, { "epoch": 0.5107977759346297, "grad_norm": 0.3904242217540741, "learning_rate": 1.6976297605533605e-05, "loss": 0.5529, "step": 16628 }, { "epoch": 0.5108284950695788, "grad_norm": 0.4251341223716736, "learning_rate": 1.6975951349253274e-05, "loss": 0.5938, "step": 16629 }, { "epoch": 0.510859214204528, "grad_norm": 0.36328569054603577, "learning_rate": 1.6975605076680118e-05, "loss": 0.6062, "step": 16630 }, { "epoch": 0.5108899333394772, "grad_norm": 0.3420049846172333, "learning_rate": 1.697525878781495e-05, "loss": 0.6471, "step": 16631 }, { "epoch": 0.5109206524744263, "grad_norm": 0.3844349682331085, "learning_rate": 1.6974912482658583e-05, "loss": 0.5773, "step": 16632 }, { "epoch": 0.5109513716093754, "grad_norm": 0.3497650921344757, "learning_rate": 1.6974566161211815e-05, "loss": 0.5652, "step": 16633 }, { "epoch": 0.5109820907443247, "grad_norm": 0.3486821949481964, "learning_rate": 1.6974219823475463e-05, "loss": 0.5743, "step": 16634 }, { "epoch": 0.5110128098792738, "grad_norm": 0.34332311153411865, "learning_rate": 1.697387346945033e-05, "loss": 0.4702, "step": 16635 }, { "epoch": 0.511043529014223, "grad_norm": 0.42813539505004883, "learning_rate": 1.697352709913723e-05, "loss": 0.5612, "step": 16636 }, { "epoch": 0.5110742481491721, "grad_norm": 0.35816460847854614, "learning_rate": 1.6973180712536974e-05, "loss": 0.6247, "step": 16637 }, { "epoch": 0.5111049672841212, "grad_norm": 0.35438984632492065, "learning_rate": 1.6972834309650364e-05, "loss": 0.5936, "step": 16638 }, { "epoch": 0.5111356864190705, "grad_norm": 0.3786731958389282, "learning_rate": 1.6972487890478214e-05, "loss": 0.6174, "step": 16639 }, { "epoch": 0.5111664055540196, "grad_norm": 0.34212014079093933, "learning_rate": 1.697214145502133e-05, "loss": 0.5363, "step": 16640 }, { "epoch": 0.5111971246889687, "grad_norm": 0.3320932686328888, "learning_rate": 1.6971795003280523e-05, "loss": 0.5141, "step": 16641 }, { "epoch": 0.511227843823918, "grad_norm": 0.4228461980819702, "learning_rate": 1.6971448535256602e-05, "loss": 0.5524, "step": 16642 }, { "epoch": 0.5112585629588671, "grad_norm": 0.34028616547584534, "learning_rate": 1.6971102050950377e-05, "loss": 0.5677, "step": 16643 }, { "epoch": 0.5112892820938162, "grad_norm": 0.3198191523551941, "learning_rate": 1.6970755550362656e-05, "loss": 0.5258, "step": 16644 }, { "epoch": 0.5113200012287654, "grad_norm": 0.3764500617980957, "learning_rate": 1.6970409033494245e-05, "loss": 0.4929, "step": 16645 }, { "epoch": 0.5113507203637145, "grad_norm": 0.36218371987342834, "learning_rate": 1.697006250034596e-05, "loss": 0.5786, "step": 16646 }, { "epoch": 0.5113814394986638, "grad_norm": 0.4005981981754303, "learning_rate": 1.696971595091861e-05, "loss": 0.57, "step": 16647 }, { "epoch": 0.5114121586336129, "grad_norm": 0.4015752375125885, "learning_rate": 1.6969369385213e-05, "loss": 0.5934, "step": 16648 }, { "epoch": 0.511442877768562, "grad_norm": 0.35343825817108154, "learning_rate": 1.696902280322994e-05, "loss": 0.6164, "step": 16649 }, { "epoch": 0.5114735969035112, "grad_norm": 0.33493572473526, "learning_rate": 1.696867620497024e-05, "loss": 0.4864, "step": 16650 }, { "epoch": 0.5115043160384604, "grad_norm": 0.3339483439922333, "learning_rate": 1.696832959043471e-05, "loss": 0.5568, "step": 16651 }, { "epoch": 0.5115350351734095, "grad_norm": 0.40932753682136536, "learning_rate": 1.6967982959624163e-05, "loss": 0.6107, "step": 16652 }, { "epoch": 0.5115657543083587, "grad_norm": 0.3513936698436737, "learning_rate": 1.69676363125394e-05, "loss": 0.617, "step": 16653 }, { "epoch": 0.5115964734433078, "grad_norm": 0.3761894106864929, "learning_rate": 1.696728964918124e-05, "loss": 0.5426, "step": 16654 }, { "epoch": 0.511627192578257, "grad_norm": 3.8293540477752686, "learning_rate": 1.6966942969550486e-05, "loss": 0.5799, "step": 16655 }, { "epoch": 0.5116579117132062, "grad_norm": 0.3690864145755768, "learning_rate": 1.6966596273647953e-05, "loss": 0.5719, "step": 16656 }, { "epoch": 0.5116886308481553, "grad_norm": 0.37039461731910706, "learning_rate": 1.6966249561474445e-05, "loss": 0.5845, "step": 16657 }, { "epoch": 0.5117193499831044, "grad_norm": 0.3504750728607178, "learning_rate": 1.6965902833030773e-05, "loss": 0.5643, "step": 16658 }, { "epoch": 0.5117500691180537, "grad_norm": 0.3745456635951996, "learning_rate": 1.6965556088317752e-05, "loss": 0.6245, "step": 16659 }, { "epoch": 0.5117807882530028, "grad_norm": 0.3564121127128601, "learning_rate": 1.6965209327336187e-05, "loss": 0.588, "step": 16660 }, { "epoch": 0.511811507387952, "grad_norm": 0.3532083034515381, "learning_rate": 1.696486255008689e-05, "loss": 0.5241, "step": 16661 }, { "epoch": 0.5118422265229011, "grad_norm": 0.3636507987976074, "learning_rate": 1.696451575657067e-05, "loss": 0.4768, "step": 16662 }, { "epoch": 0.5118729456578502, "grad_norm": 0.4212581217288971, "learning_rate": 1.6964168946788334e-05, "loss": 0.6142, "step": 16663 }, { "epoch": 0.5119036647927995, "grad_norm": 0.31851837038993835, "learning_rate": 1.69638221207407e-05, "loss": 0.5458, "step": 16664 }, { "epoch": 0.5119343839277486, "grad_norm": 0.3447488248348236, "learning_rate": 1.696347527842857e-05, "loss": 0.5855, "step": 16665 }, { "epoch": 0.5119651030626977, "grad_norm": 0.3802010118961334, "learning_rate": 1.6963128419852758e-05, "loss": 0.5475, "step": 16666 }, { "epoch": 0.5119958221976469, "grad_norm": 0.383621484041214, "learning_rate": 1.696278154501407e-05, "loss": 0.5464, "step": 16667 }, { "epoch": 0.5120265413325961, "grad_norm": 0.3533349335193634, "learning_rate": 1.6962434653913323e-05, "loss": 0.6001, "step": 16668 }, { "epoch": 0.5120572604675452, "grad_norm": 0.32003799080848694, "learning_rate": 1.6962087746551325e-05, "loss": 0.6459, "step": 16669 }, { "epoch": 0.5120879796024944, "grad_norm": 0.3225225508213043, "learning_rate": 1.6961740822928883e-05, "loss": 0.5144, "step": 16670 }, { "epoch": 0.5121186987374435, "grad_norm": 0.5116333365440369, "learning_rate": 1.6961393883046807e-05, "loss": 0.5362, "step": 16671 }, { "epoch": 0.5121494178723928, "grad_norm": 0.35582539439201355, "learning_rate": 1.6961046926905913e-05, "loss": 0.6131, "step": 16672 }, { "epoch": 0.5121801370073419, "grad_norm": 0.353527307510376, "learning_rate": 1.6960699954507004e-05, "loss": 0.618, "step": 16673 }, { "epoch": 0.512210856142291, "grad_norm": 0.386699378490448, "learning_rate": 1.6960352965850897e-05, "loss": 0.6192, "step": 16674 }, { "epoch": 0.5122415752772402, "grad_norm": 0.363334983587265, "learning_rate": 1.6960005960938398e-05, "loss": 0.5257, "step": 16675 }, { "epoch": 0.5122722944121894, "grad_norm": 0.3261290192604065, "learning_rate": 1.6959658939770324e-05, "loss": 0.6184, "step": 16676 }, { "epoch": 0.5123030135471385, "grad_norm": 0.3607628345489502, "learning_rate": 1.6959311902347474e-05, "loss": 0.5981, "step": 16677 }, { "epoch": 0.5123337326820877, "grad_norm": 0.33964887261390686, "learning_rate": 1.6958964848670668e-05, "loss": 0.5831, "step": 16678 }, { "epoch": 0.5123644518170368, "grad_norm": 0.36008843779563904, "learning_rate": 1.6958617778740713e-05, "loss": 0.558, "step": 16679 }, { "epoch": 0.512395170951986, "grad_norm": 0.34126725792884827, "learning_rate": 1.695827069255842e-05, "loss": 0.5519, "step": 16680 }, { "epoch": 0.5124258900869352, "grad_norm": 0.3816923201084137, "learning_rate": 1.6957923590124597e-05, "loss": 0.6596, "step": 16681 }, { "epoch": 0.5124566092218843, "grad_norm": 0.37047278881073, "learning_rate": 1.695757647144006e-05, "loss": 0.5614, "step": 16682 }, { "epoch": 0.5124873283568335, "grad_norm": 0.3346508741378784, "learning_rate": 1.6957229336505617e-05, "loss": 0.5637, "step": 16683 }, { "epoch": 0.5125180474917826, "grad_norm": 0.34559327363967896, "learning_rate": 1.6956882185322082e-05, "loss": 0.58, "step": 16684 }, { "epoch": 0.5125487666267318, "grad_norm": 1.048160433769226, "learning_rate": 1.6956535017890263e-05, "loss": 0.5769, "step": 16685 }, { "epoch": 0.512579485761681, "grad_norm": 0.3450837731361389, "learning_rate": 1.6956187834210966e-05, "loss": 0.6527, "step": 16686 }, { "epoch": 0.5126102048966301, "grad_norm": 0.37714633345603943, "learning_rate": 1.6955840634285007e-05, "loss": 0.5471, "step": 16687 }, { "epoch": 0.5126409240315792, "grad_norm": 0.3184656798839569, "learning_rate": 1.69554934181132e-05, "loss": 0.5278, "step": 16688 }, { "epoch": 0.5126716431665285, "grad_norm": 0.39198580384254456, "learning_rate": 1.695514618569635e-05, "loss": 0.5426, "step": 16689 }, { "epoch": 0.5127023623014776, "grad_norm": 0.34338462352752686, "learning_rate": 1.695479893703527e-05, "loss": 0.5943, "step": 16690 }, { "epoch": 0.5127330814364267, "grad_norm": 0.3583843410015106, "learning_rate": 1.6954451672130772e-05, "loss": 0.5353, "step": 16691 }, { "epoch": 0.5127638005713759, "grad_norm": 0.3869650363922119, "learning_rate": 1.6954104390983665e-05, "loss": 0.5729, "step": 16692 }, { "epoch": 0.5127945197063251, "grad_norm": 0.3351300060749054, "learning_rate": 1.6953757093594765e-05, "loss": 0.5193, "step": 16693 }, { "epoch": 0.5128252388412742, "grad_norm": 0.4005741775035858, "learning_rate": 1.695340977996488e-05, "loss": 0.6275, "step": 16694 }, { "epoch": 0.5128559579762234, "grad_norm": 0.33736610412597656, "learning_rate": 1.6953062450094817e-05, "loss": 0.5373, "step": 16695 }, { "epoch": 0.5128866771111725, "grad_norm": 0.4764644205570221, "learning_rate": 1.6952715103985392e-05, "loss": 0.6726, "step": 16696 }, { "epoch": 0.5129173962461218, "grad_norm": 0.411628782749176, "learning_rate": 1.6952367741637417e-05, "loss": 0.5026, "step": 16697 }, { "epoch": 0.5129481153810709, "grad_norm": 0.3281288146972656, "learning_rate": 1.69520203630517e-05, "loss": 0.6327, "step": 16698 }, { "epoch": 0.51297883451602, "grad_norm": 0.37892505526542664, "learning_rate": 1.6951672968229053e-05, "loss": 0.6564, "step": 16699 }, { "epoch": 0.5130095536509692, "grad_norm": 0.33556225895881653, "learning_rate": 1.695132555717029e-05, "loss": 0.5855, "step": 16700 }, { "epoch": 0.5130402727859183, "grad_norm": 0.4660538136959076, "learning_rate": 1.695097812987622e-05, "loss": 0.5614, "step": 16701 }, { "epoch": 0.5130709919208675, "grad_norm": 0.33185625076293945, "learning_rate": 1.695063068634765e-05, "loss": 0.5156, "step": 16702 }, { "epoch": 0.5131017110558167, "grad_norm": 0.3260880410671234, "learning_rate": 1.6950283226585406e-05, "loss": 0.6095, "step": 16703 }, { "epoch": 0.5131324301907658, "grad_norm": 0.4438531696796417, "learning_rate": 1.694993575059029e-05, "loss": 0.6293, "step": 16704 }, { "epoch": 0.5131631493257149, "grad_norm": 0.343718558549881, "learning_rate": 1.694958825836311e-05, "loss": 0.5842, "step": 16705 }, { "epoch": 0.5131938684606642, "grad_norm": 0.3888421952724457, "learning_rate": 1.694924074990468e-05, "loss": 0.5594, "step": 16706 }, { "epoch": 0.5132245875956133, "grad_norm": 0.391339510679245, "learning_rate": 1.6948893225215815e-05, "loss": 0.6535, "step": 16707 }, { "epoch": 0.5132553067305625, "grad_norm": 0.36790093779563904, "learning_rate": 1.694854568429732e-05, "loss": 0.4911, "step": 16708 }, { "epoch": 0.5132860258655116, "grad_norm": 0.3338550925254822, "learning_rate": 1.6948198127150016e-05, "loss": 0.4755, "step": 16709 }, { "epoch": 0.5133167450004608, "grad_norm": 0.32875150442123413, "learning_rate": 1.694785055377471e-05, "loss": 0.4978, "step": 16710 }, { "epoch": 0.51334746413541, "grad_norm": 0.3542599678039551, "learning_rate": 1.6947502964172212e-05, "loss": 0.5715, "step": 16711 }, { "epoch": 0.5133781832703591, "grad_norm": 0.3634461760520935, "learning_rate": 1.6947155358343336e-05, "loss": 0.6191, "step": 16712 }, { "epoch": 0.5134089024053082, "grad_norm": 0.38945358991622925, "learning_rate": 1.6946807736288896e-05, "loss": 0.5375, "step": 16713 }, { "epoch": 0.5134396215402575, "grad_norm": 0.3486246168613434, "learning_rate": 1.69464600980097e-05, "loss": 0.574, "step": 16714 }, { "epoch": 0.5134703406752066, "grad_norm": 0.32793331146240234, "learning_rate": 1.694611244350656e-05, "loss": 0.5267, "step": 16715 }, { "epoch": 0.5135010598101557, "grad_norm": 0.35192015767097473, "learning_rate": 1.694576477278029e-05, "loss": 0.5666, "step": 16716 }, { "epoch": 0.5135317789451049, "grad_norm": 0.3883066773414612, "learning_rate": 1.69454170858317e-05, "loss": 0.569, "step": 16717 }, { "epoch": 0.513562498080054, "grad_norm": 0.3757852613925934, "learning_rate": 1.69450693826616e-05, "loss": 0.5849, "step": 16718 }, { "epoch": 0.5135932172150032, "grad_norm": 0.3494272828102112, "learning_rate": 1.694472166327081e-05, "loss": 0.5228, "step": 16719 }, { "epoch": 0.5136239363499524, "grad_norm": 0.391877144575119, "learning_rate": 1.694437392766014e-05, "loss": 0.6339, "step": 16720 }, { "epoch": 0.5136546554849015, "grad_norm": 0.35454073548316956, "learning_rate": 1.6944026175830396e-05, "loss": 0.599, "step": 16721 }, { "epoch": 0.5136853746198508, "grad_norm": 0.3764244019985199, "learning_rate": 1.6943678407782394e-05, "loss": 0.5572, "step": 16722 }, { "epoch": 0.5137160937547999, "grad_norm": 0.37299492955207825, "learning_rate": 1.694333062351695e-05, "loss": 0.5272, "step": 16723 }, { "epoch": 0.513746812889749, "grad_norm": 0.32180583477020264, "learning_rate": 1.694298282303487e-05, "loss": 0.6249, "step": 16724 }, { "epoch": 0.5137775320246982, "grad_norm": 0.36046692728996277, "learning_rate": 1.6942635006336967e-05, "loss": 0.5788, "step": 16725 }, { "epoch": 0.5138082511596473, "grad_norm": 0.31389111280441284, "learning_rate": 1.6942287173424053e-05, "loss": 0.57, "step": 16726 }, { "epoch": 0.5138389702945965, "grad_norm": 0.35290399193763733, "learning_rate": 1.6941939324296946e-05, "loss": 0.5987, "step": 16727 }, { "epoch": 0.5138696894295457, "grad_norm": 0.31776899099349976, "learning_rate": 1.6941591458956457e-05, "loss": 0.5347, "step": 16728 }, { "epoch": 0.5139004085644948, "grad_norm": 0.4290764033794403, "learning_rate": 1.6941243577403394e-05, "loss": 0.5338, "step": 16729 }, { "epoch": 0.5139311276994439, "grad_norm": 0.37578025460243225, "learning_rate": 1.694089567963857e-05, "loss": 0.5828, "step": 16730 }, { "epoch": 0.5139618468343932, "grad_norm": 0.35659554600715637, "learning_rate": 1.6940547765662802e-05, "loss": 0.5372, "step": 16731 }, { "epoch": 0.5139925659693423, "grad_norm": 0.37390056252479553, "learning_rate": 1.6940199835476902e-05, "loss": 0.5973, "step": 16732 }, { "epoch": 0.5140232851042915, "grad_norm": 0.37303999066352844, "learning_rate": 1.6939851889081675e-05, "loss": 0.5576, "step": 16733 }, { "epoch": 0.5140540042392406, "grad_norm": 0.710156261920929, "learning_rate": 1.6939503926477944e-05, "loss": 0.6178, "step": 16734 }, { "epoch": 0.5140847233741898, "grad_norm": 0.7671505808830261, "learning_rate": 1.6939155947666515e-05, "loss": 0.5463, "step": 16735 }, { "epoch": 0.514115442509139, "grad_norm": 0.34734830260276794, "learning_rate": 1.6938807952648198e-05, "loss": 0.5437, "step": 16736 }, { "epoch": 0.5141461616440881, "grad_norm": 0.34151190519332886, "learning_rate": 1.693845994142382e-05, "loss": 0.5139, "step": 16737 }, { "epoch": 0.5141768807790372, "grad_norm": 0.37832415103912354, "learning_rate": 1.6938111913994178e-05, "loss": 0.5613, "step": 16738 }, { "epoch": 0.5142075999139865, "grad_norm": 0.3420490622520447, "learning_rate": 1.6937763870360093e-05, "loss": 0.5601, "step": 16739 }, { "epoch": 0.5142383190489356, "grad_norm": 0.36457395553588867, "learning_rate": 1.6937415810522374e-05, "loss": 0.6239, "step": 16740 }, { "epoch": 0.5142690381838847, "grad_norm": 0.3755052983760834, "learning_rate": 1.693706773448184e-05, "loss": 0.5781, "step": 16741 }, { "epoch": 0.5142997573188339, "grad_norm": 0.37688466906547546, "learning_rate": 1.6936719642239295e-05, "loss": 0.5651, "step": 16742 }, { "epoch": 0.514330476453783, "grad_norm": 0.3332737684249878, "learning_rate": 1.6936371533795556e-05, "loss": 0.5583, "step": 16743 }, { "epoch": 0.5143611955887322, "grad_norm": 0.318699449300766, "learning_rate": 1.693602340915144e-05, "loss": 0.5523, "step": 16744 }, { "epoch": 0.5143919147236814, "grad_norm": 0.34614473581314087, "learning_rate": 1.693567526830776e-05, "loss": 0.5761, "step": 16745 }, { "epoch": 0.5144226338586305, "grad_norm": 0.3639524579048157, "learning_rate": 1.6935327111265324e-05, "loss": 0.6084, "step": 16746 }, { "epoch": 0.5144533529935797, "grad_norm": 0.3081194758415222, "learning_rate": 1.6934978938024943e-05, "loss": 0.5183, "step": 16747 }, { "epoch": 0.5144840721285289, "grad_norm": 0.32443010807037354, "learning_rate": 1.6934630748587438e-05, "loss": 0.5114, "step": 16748 }, { "epoch": 0.514514791263478, "grad_norm": 0.34718477725982666, "learning_rate": 1.6934282542953617e-05, "loss": 0.5563, "step": 16749 }, { "epoch": 0.5145455103984272, "grad_norm": 0.34371715784072876, "learning_rate": 1.6933934321124295e-05, "loss": 0.5972, "step": 16750 }, { "epoch": 0.5145762295333763, "grad_norm": 0.34512755274772644, "learning_rate": 1.6933586083100287e-05, "loss": 0.5174, "step": 16751 }, { "epoch": 0.5146069486683255, "grad_norm": 0.38403502106666565, "learning_rate": 1.6933237828882404e-05, "loss": 0.5674, "step": 16752 }, { "epoch": 0.5146376678032747, "grad_norm": 0.3877411186695099, "learning_rate": 1.693288955847146e-05, "loss": 0.5824, "step": 16753 }, { "epoch": 0.5146683869382238, "grad_norm": 0.3396456837654114, "learning_rate": 1.6932541271868265e-05, "loss": 0.5953, "step": 16754 }, { "epoch": 0.5146991060731729, "grad_norm": 0.33729681372642517, "learning_rate": 1.693219296907364e-05, "loss": 0.5362, "step": 16755 }, { "epoch": 0.5147298252081222, "grad_norm": 0.340252161026001, "learning_rate": 1.6931844650088388e-05, "loss": 0.5738, "step": 16756 }, { "epoch": 0.5147605443430713, "grad_norm": 0.360125869512558, "learning_rate": 1.6931496314913336e-05, "loss": 0.4947, "step": 16757 }, { "epoch": 0.5147912634780205, "grad_norm": 0.3776156008243561, "learning_rate": 1.6931147963549285e-05, "loss": 0.5643, "step": 16758 }, { "epoch": 0.5148219826129696, "grad_norm": 0.35319605469703674, "learning_rate": 1.6930799595997055e-05, "loss": 0.5578, "step": 16759 }, { "epoch": 0.5148527017479188, "grad_norm": 0.4239419996738434, "learning_rate": 1.693045121225746e-05, "loss": 0.5461, "step": 16760 }, { "epoch": 0.514883420882868, "grad_norm": 0.38139548897743225, "learning_rate": 1.6930102812331313e-05, "loss": 0.5857, "step": 16761 }, { "epoch": 0.5149141400178171, "grad_norm": 0.3269401788711548, "learning_rate": 1.6929754396219427e-05, "loss": 0.5766, "step": 16762 }, { "epoch": 0.5149448591527662, "grad_norm": 0.39696016907691956, "learning_rate": 1.692940596392261e-05, "loss": 0.5925, "step": 16763 }, { "epoch": 0.5149755782877155, "grad_norm": 0.364874929189682, "learning_rate": 1.692905751544169e-05, "loss": 0.5621, "step": 16764 }, { "epoch": 0.5150062974226646, "grad_norm": 0.33001261949539185, "learning_rate": 1.6928709050777464e-05, "loss": 0.5268, "step": 16765 }, { "epoch": 0.5150370165576137, "grad_norm": 0.372519850730896, "learning_rate": 1.692836056993076e-05, "loss": 0.5591, "step": 16766 }, { "epoch": 0.5150677356925629, "grad_norm": 0.34122616052627563, "learning_rate": 1.6928012072902385e-05, "loss": 0.5842, "step": 16767 }, { "epoch": 0.515098454827512, "grad_norm": 0.3584970235824585, "learning_rate": 1.6927663559693154e-05, "loss": 0.5364, "step": 16768 }, { "epoch": 0.5151291739624612, "grad_norm": 0.3666256368160248, "learning_rate": 1.692731503030388e-05, "loss": 0.6244, "step": 16769 }, { "epoch": 0.5151598930974104, "grad_norm": 0.3299579918384552, "learning_rate": 1.6926966484735376e-05, "loss": 0.5669, "step": 16770 }, { "epoch": 0.5151906122323595, "grad_norm": 0.34387531876564026, "learning_rate": 1.692661792298846e-05, "loss": 0.5563, "step": 16771 }, { "epoch": 0.5152213313673087, "grad_norm": 0.3681296706199646, "learning_rate": 1.6926269345063943e-05, "loss": 0.5912, "step": 16772 }, { "epoch": 0.5152520505022579, "grad_norm": 0.34074661135673523, "learning_rate": 1.692592075096264e-05, "loss": 0.6132, "step": 16773 }, { "epoch": 0.515282769637207, "grad_norm": 0.35199445486068726, "learning_rate": 1.6925572140685365e-05, "loss": 0.5703, "step": 16774 }, { "epoch": 0.5153134887721562, "grad_norm": 0.3495255410671234, "learning_rate": 1.6925223514232935e-05, "loss": 0.584, "step": 16775 }, { "epoch": 0.5153442079071053, "grad_norm": 0.334765762090683, "learning_rate": 1.6924874871606163e-05, "loss": 0.5612, "step": 16776 }, { "epoch": 0.5153749270420545, "grad_norm": 0.31781908869743347, "learning_rate": 1.6924526212805862e-05, "loss": 0.5565, "step": 16777 }, { "epoch": 0.5154056461770037, "grad_norm": 0.5656083822250366, "learning_rate": 1.6924177537832843e-05, "loss": 0.5516, "step": 16778 }, { "epoch": 0.5154363653119528, "grad_norm": 0.37646326422691345, "learning_rate": 1.6923828846687926e-05, "loss": 0.6374, "step": 16779 }, { "epoch": 0.5154670844469019, "grad_norm": 0.34485021233558655, "learning_rate": 1.692348013937192e-05, "loss": 0.5945, "step": 16780 }, { "epoch": 0.5154978035818512, "grad_norm": 0.3592008352279663, "learning_rate": 1.6923131415885644e-05, "loss": 0.6411, "step": 16781 }, { "epoch": 0.5155285227168003, "grad_norm": 0.3680160343647003, "learning_rate": 1.6922782676229913e-05, "loss": 0.663, "step": 16782 }, { "epoch": 0.5155592418517495, "grad_norm": 0.40018248558044434, "learning_rate": 1.6922433920405543e-05, "loss": 0.574, "step": 16783 }, { "epoch": 0.5155899609866986, "grad_norm": 0.3929399847984314, "learning_rate": 1.692208514841334e-05, "loss": 0.6505, "step": 16784 }, { "epoch": 0.5156206801216477, "grad_norm": 0.34230560064315796, "learning_rate": 1.6921736360254123e-05, "loss": 0.5674, "step": 16785 }, { "epoch": 0.515651399256597, "grad_norm": 0.36263343691825867, "learning_rate": 1.692138755592871e-05, "loss": 0.5167, "step": 16786 }, { "epoch": 0.5156821183915461, "grad_norm": 0.3682510256767273, "learning_rate": 1.692103873543791e-05, "loss": 0.6476, "step": 16787 }, { "epoch": 0.5157128375264952, "grad_norm": 0.45946425199508667, "learning_rate": 1.6920689898782542e-05, "loss": 0.5792, "step": 16788 }, { "epoch": 0.5157435566614444, "grad_norm": 0.38014769554138184, "learning_rate": 1.692034104596342e-05, "loss": 0.5888, "step": 16789 }, { "epoch": 0.5157742757963936, "grad_norm": 0.37714138627052307, "learning_rate": 1.691999217698136e-05, "loss": 0.5969, "step": 16790 }, { "epoch": 0.5158049949313427, "grad_norm": 0.36176612973213196, "learning_rate": 1.691964329183717e-05, "loss": 0.4887, "step": 16791 }, { "epoch": 0.5158357140662919, "grad_norm": 0.35487279295921326, "learning_rate": 1.6919294390531672e-05, "loss": 0.5391, "step": 16792 }, { "epoch": 0.515866433201241, "grad_norm": 0.3985297679901123, "learning_rate": 1.6918945473065683e-05, "loss": 0.5748, "step": 16793 }, { "epoch": 0.5158971523361903, "grad_norm": 0.3560030460357666, "learning_rate": 1.691859653944001e-05, "loss": 0.5643, "step": 16794 }, { "epoch": 0.5159278714711394, "grad_norm": 0.3487459421157837, "learning_rate": 1.691824758965547e-05, "loss": 0.5281, "step": 16795 }, { "epoch": 0.5159585906060885, "grad_norm": 0.3482080399990082, "learning_rate": 1.6917898623712884e-05, "loss": 0.5298, "step": 16796 }, { "epoch": 0.5159893097410377, "grad_norm": 0.45495715737342834, "learning_rate": 1.6917549641613057e-05, "loss": 0.4896, "step": 16797 }, { "epoch": 0.5160200288759869, "grad_norm": 0.4351052939891815, "learning_rate": 1.6917200643356815e-05, "loss": 0.5967, "step": 16798 }, { "epoch": 0.516050748010936, "grad_norm": 0.3340330719947815, "learning_rate": 1.6916851628944965e-05, "loss": 0.6251, "step": 16799 }, { "epoch": 0.5160814671458852, "grad_norm": 0.44640395045280457, "learning_rate": 1.6916502598378323e-05, "loss": 0.588, "step": 16800 }, { "epoch": 0.5161121862808343, "grad_norm": 0.35083943605422974, "learning_rate": 1.6916153551657712e-05, "loss": 0.5447, "step": 16801 }, { "epoch": 0.5161429054157834, "grad_norm": 0.37524840235710144, "learning_rate": 1.691580448878394e-05, "loss": 0.5903, "step": 16802 }, { "epoch": 0.5161736245507327, "grad_norm": 0.34428420662879944, "learning_rate": 1.691545540975782e-05, "loss": 0.5634, "step": 16803 }, { "epoch": 0.5162043436856818, "grad_norm": 0.40962865948677063, "learning_rate": 1.6915106314580173e-05, "loss": 0.6335, "step": 16804 }, { "epoch": 0.5162350628206309, "grad_norm": 0.3637882173061371, "learning_rate": 1.6914757203251812e-05, "loss": 0.6031, "step": 16805 }, { "epoch": 0.5162657819555801, "grad_norm": 0.3537442088127136, "learning_rate": 1.691440807577355e-05, "loss": 0.5618, "step": 16806 }, { "epoch": 0.5162965010905293, "grad_norm": 0.34722432494163513, "learning_rate": 1.6914058932146206e-05, "loss": 0.5969, "step": 16807 }, { "epoch": 0.5163272202254785, "grad_norm": 0.5048168897628784, "learning_rate": 1.69137097723706e-05, "loss": 0.5201, "step": 16808 }, { "epoch": 0.5163579393604276, "grad_norm": 0.3416930139064789, "learning_rate": 1.6913360596447534e-05, "loss": 0.543, "step": 16809 }, { "epoch": 0.5163886584953767, "grad_norm": 0.34718453884124756, "learning_rate": 1.6913011404377838e-05, "loss": 0.6756, "step": 16810 }, { "epoch": 0.516419377630326, "grad_norm": 0.5435742735862732, "learning_rate": 1.6912662196162318e-05, "loss": 0.6037, "step": 16811 }, { "epoch": 0.5164500967652751, "grad_norm": 0.33784911036491394, "learning_rate": 1.691231297180179e-05, "loss": 0.6271, "step": 16812 }, { "epoch": 0.5164808159002242, "grad_norm": 0.3416939079761505, "learning_rate": 1.6911963731297078e-05, "loss": 0.5019, "step": 16813 }, { "epoch": 0.5165115350351734, "grad_norm": 0.3516436517238617, "learning_rate": 1.691161447464899e-05, "loss": 0.5759, "step": 16814 }, { "epoch": 0.5165422541701226, "grad_norm": 0.35791081190109253, "learning_rate": 1.6911265201858342e-05, "loss": 0.5808, "step": 16815 }, { "epoch": 0.5165729733050717, "grad_norm": 0.36817410588264465, "learning_rate": 1.6910915912925953e-05, "loss": 0.5406, "step": 16816 }, { "epoch": 0.5166036924400209, "grad_norm": 0.3548276424407959, "learning_rate": 1.6910566607852636e-05, "loss": 0.6484, "step": 16817 }, { "epoch": 0.51663441157497, "grad_norm": 0.3789390027523041, "learning_rate": 1.691021728663921e-05, "loss": 0.5795, "step": 16818 }, { "epoch": 0.5166651307099193, "grad_norm": 0.3552594780921936, "learning_rate": 1.690986794928649e-05, "loss": 0.6073, "step": 16819 }, { "epoch": 0.5166958498448684, "grad_norm": 0.3495624363422394, "learning_rate": 1.6909518595795288e-05, "loss": 0.5491, "step": 16820 }, { "epoch": 0.5167265689798175, "grad_norm": 0.3555338382720947, "learning_rate": 1.6909169226166422e-05, "loss": 0.5251, "step": 16821 }, { "epoch": 0.5167572881147667, "grad_norm": 0.2991284132003784, "learning_rate": 1.6908819840400708e-05, "loss": 0.5276, "step": 16822 }, { "epoch": 0.5167880072497159, "grad_norm": 0.34769490361213684, "learning_rate": 1.6908470438498966e-05, "loss": 0.494, "step": 16823 }, { "epoch": 0.516818726384665, "grad_norm": 0.35056766867637634, "learning_rate": 1.6908121020462008e-05, "loss": 0.5988, "step": 16824 }, { "epoch": 0.5168494455196142, "grad_norm": 0.44386231899261475, "learning_rate": 1.690777158629065e-05, "loss": 0.6203, "step": 16825 }, { "epoch": 0.5168801646545633, "grad_norm": 0.38168448209762573, "learning_rate": 1.6907422135985712e-05, "loss": 0.5179, "step": 16826 }, { "epoch": 0.5169108837895124, "grad_norm": 0.5198469758033752, "learning_rate": 1.6907072669548004e-05, "loss": 0.5536, "step": 16827 }, { "epoch": 0.5169416029244617, "grad_norm": 0.32893773913383484, "learning_rate": 1.6906723186978347e-05, "loss": 0.5267, "step": 16828 }, { "epoch": 0.5169723220594108, "grad_norm": 0.36757341027259827, "learning_rate": 1.6906373688277554e-05, "loss": 0.5095, "step": 16829 }, { "epoch": 0.5170030411943599, "grad_norm": 0.34942182898521423, "learning_rate": 1.690602417344644e-05, "loss": 0.6221, "step": 16830 }, { "epoch": 0.5170337603293091, "grad_norm": 0.36688265204429626, "learning_rate": 1.690567464248583e-05, "loss": 0.5881, "step": 16831 }, { "epoch": 0.5170644794642583, "grad_norm": 0.3469028174877167, "learning_rate": 1.6905325095396534e-05, "loss": 0.5976, "step": 16832 }, { "epoch": 0.5170951985992075, "grad_norm": 0.4199966788291931, "learning_rate": 1.6904975532179366e-05, "loss": 0.6247, "step": 16833 }, { "epoch": 0.5171259177341566, "grad_norm": 0.39662599563598633, "learning_rate": 1.6904625952835147e-05, "loss": 0.56, "step": 16834 }, { "epoch": 0.5171566368691057, "grad_norm": 0.3766920864582062, "learning_rate": 1.6904276357364692e-05, "loss": 0.5618, "step": 16835 }, { "epoch": 0.517187356004055, "grad_norm": 0.3832724690437317, "learning_rate": 1.690392674576882e-05, "loss": 0.5537, "step": 16836 }, { "epoch": 0.5172180751390041, "grad_norm": 0.3481455445289612, "learning_rate": 1.690357711804834e-05, "loss": 0.5973, "step": 16837 }, { "epoch": 0.5172487942739532, "grad_norm": 0.34409916400909424, "learning_rate": 1.6903227474204075e-05, "loss": 0.5437, "step": 16838 }, { "epoch": 0.5172795134089024, "grad_norm": 0.32271841168403625, "learning_rate": 1.6902877814236842e-05, "loss": 0.5246, "step": 16839 }, { "epoch": 0.5173102325438516, "grad_norm": 0.44427311420440674, "learning_rate": 1.6902528138147455e-05, "loss": 0.6639, "step": 16840 }, { "epoch": 0.5173409516788007, "grad_norm": 0.37140724062919617, "learning_rate": 1.690217844593673e-05, "loss": 0.5616, "step": 16841 }, { "epoch": 0.5173716708137499, "grad_norm": 0.3378331661224365, "learning_rate": 1.6901828737605487e-05, "loss": 0.6175, "step": 16842 }, { "epoch": 0.517402389948699, "grad_norm": 0.3738175928592682, "learning_rate": 1.690147901315454e-05, "loss": 0.5603, "step": 16843 }, { "epoch": 0.5174331090836483, "grad_norm": 0.3359449505805969, "learning_rate": 1.6901129272584705e-05, "loss": 0.5313, "step": 16844 }, { "epoch": 0.5174638282185974, "grad_norm": 0.34995123744010925, "learning_rate": 1.6900779515896806e-05, "loss": 0.5721, "step": 16845 }, { "epoch": 0.5174945473535465, "grad_norm": 0.34810128808021545, "learning_rate": 1.6900429743091648e-05, "loss": 0.5176, "step": 16846 }, { "epoch": 0.5175252664884957, "grad_norm": 0.32599228620529175, "learning_rate": 1.690007995417006e-05, "loss": 0.6169, "step": 16847 }, { "epoch": 0.5175559856234448, "grad_norm": 0.3794202506542206, "learning_rate": 1.6899730149132848e-05, "loss": 0.5977, "step": 16848 }, { "epoch": 0.517586704758394, "grad_norm": 0.3580975830554962, "learning_rate": 1.689938032798084e-05, "loss": 0.6034, "step": 16849 }, { "epoch": 0.5176174238933432, "grad_norm": 0.3803461492061615, "learning_rate": 1.6899030490714844e-05, "loss": 0.5433, "step": 16850 }, { "epoch": 0.5176481430282923, "grad_norm": 0.44161686301231384, "learning_rate": 1.689868063733568e-05, "loss": 0.5659, "step": 16851 }, { "epoch": 0.5176788621632414, "grad_norm": 0.34753090143203735, "learning_rate": 1.6898330767844168e-05, "loss": 0.5813, "step": 16852 }, { "epoch": 0.5177095812981907, "grad_norm": 0.33147913217544556, "learning_rate": 1.689798088224112e-05, "loss": 0.5502, "step": 16853 }, { "epoch": 0.5177403004331398, "grad_norm": 0.32355964183807373, "learning_rate": 1.6897630980527357e-05, "loss": 0.4747, "step": 16854 }, { "epoch": 0.5177710195680889, "grad_norm": 0.36041638255119324, "learning_rate": 1.6897281062703697e-05, "loss": 0.5974, "step": 16855 }, { "epoch": 0.5178017387030381, "grad_norm": 0.3622611165046692, "learning_rate": 1.6896931128770955e-05, "loss": 0.523, "step": 16856 }, { "epoch": 0.5178324578379873, "grad_norm": 0.5533674359321594, "learning_rate": 1.6896581178729947e-05, "loss": 0.5166, "step": 16857 }, { "epoch": 0.5178631769729365, "grad_norm": 0.3452061414718628, "learning_rate": 1.689623121258149e-05, "loss": 0.5574, "step": 16858 }, { "epoch": 0.5178938961078856, "grad_norm": 0.3490562438964844, "learning_rate": 1.689588123032641e-05, "loss": 0.5631, "step": 16859 }, { "epoch": 0.5179246152428347, "grad_norm": 0.36015602946281433, "learning_rate": 1.6895531231965508e-05, "loss": 0.6026, "step": 16860 }, { "epoch": 0.517955334377784, "grad_norm": 0.35246896743774414, "learning_rate": 1.6895181217499618e-05, "loss": 0.5834, "step": 16861 }, { "epoch": 0.5179860535127331, "grad_norm": 0.4592823088169098, "learning_rate": 1.689483118692955e-05, "loss": 0.548, "step": 16862 }, { "epoch": 0.5180167726476822, "grad_norm": 0.3734976649284363, "learning_rate": 1.6894481140256124e-05, "loss": 0.5941, "step": 16863 }, { "epoch": 0.5180474917826314, "grad_norm": 0.3542463481426239, "learning_rate": 1.6894131077480153e-05, "loss": 0.5732, "step": 16864 }, { "epoch": 0.5180782109175806, "grad_norm": 0.4083467423915863, "learning_rate": 1.6893780998602455e-05, "loss": 0.5305, "step": 16865 }, { "epoch": 0.5181089300525297, "grad_norm": 0.37347739934921265, "learning_rate": 1.6893430903623854e-05, "loss": 0.5556, "step": 16866 }, { "epoch": 0.5181396491874789, "grad_norm": 0.3479328155517578, "learning_rate": 1.6893080792545162e-05, "loss": 0.5089, "step": 16867 }, { "epoch": 0.518170368322428, "grad_norm": 0.3486272394657135, "learning_rate": 1.68927306653672e-05, "loss": 0.5815, "step": 16868 }, { "epoch": 0.5182010874573773, "grad_norm": 0.3589964807033539, "learning_rate": 1.6892380522090783e-05, "loss": 0.5839, "step": 16869 }, { "epoch": 0.5182318065923264, "grad_norm": 0.34772592782974243, "learning_rate": 1.689203036271673e-05, "loss": 0.5376, "step": 16870 }, { "epoch": 0.5182625257272755, "grad_norm": 0.39980581402778625, "learning_rate": 1.689168018724586e-05, "loss": 0.6183, "step": 16871 }, { "epoch": 0.5182932448622247, "grad_norm": 0.3472137749195099, "learning_rate": 1.6891329995678988e-05, "loss": 0.5594, "step": 16872 }, { "epoch": 0.5183239639971738, "grad_norm": 0.3448159992694855, "learning_rate": 1.6890979788016934e-05, "loss": 0.5356, "step": 16873 }, { "epoch": 0.518354683132123, "grad_norm": 0.34003347158432007, "learning_rate": 1.689062956426052e-05, "loss": 0.5005, "step": 16874 }, { "epoch": 0.5183854022670722, "grad_norm": 0.3658064305782318, "learning_rate": 1.689027932441055e-05, "loss": 0.4495, "step": 16875 }, { "epoch": 0.5184161214020213, "grad_norm": 0.35784146189689636, "learning_rate": 1.6889929068467857e-05, "loss": 0.5258, "step": 16876 }, { "epoch": 0.5184468405369704, "grad_norm": 0.38624462485313416, "learning_rate": 1.6889578796433254e-05, "loss": 0.5654, "step": 16877 }, { "epoch": 0.5184775596719197, "grad_norm": 0.4351441562175751, "learning_rate": 1.6889228508307556e-05, "loss": 0.5507, "step": 16878 }, { "epoch": 0.5185082788068688, "grad_norm": 0.36362960934638977, "learning_rate": 1.6888878204091582e-05, "loss": 0.6008, "step": 16879 }, { "epoch": 0.5185389979418179, "grad_norm": 0.32950130105018616, "learning_rate": 1.6888527883786157e-05, "loss": 0.5382, "step": 16880 }, { "epoch": 0.5185697170767671, "grad_norm": 0.333372563123703, "learning_rate": 1.6888177547392094e-05, "loss": 0.533, "step": 16881 }, { "epoch": 0.5186004362117163, "grad_norm": 0.40802568197250366, "learning_rate": 1.6887827194910208e-05, "loss": 0.5434, "step": 16882 }, { "epoch": 0.5186311553466655, "grad_norm": 0.3635513484477997, "learning_rate": 1.688747682634132e-05, "loss": 0.5895, "step": 16883 }, { "epoch": 0.5186618744816146, "grad_norm": 0.3572397530078888, "learning_rate": 1.688712644168625e-05, "loss": 0.5948, "step": 16884 }, { "epoch": 0.5186925936165637, "grad_norm": 0.32358619570732117, "learning_rate": 1.6886776040945817e-05, "loss": 0.6186, "step": 16885 }, { "epoch": 0.518723312751513, "grad_norm": 0.3245325982570648, "learning_rate": 1.6886425624120838e-05, "loss": 0.5615, "step": 16886 }, { "epoch": 0.5187540318864621, "grad_norm": 0.38820531964302063, "learning_rate": 1.6886075191212127e-05, "loss": 0.5553, "step": 16887 }, { "epoch": 0.5187847510214112, "grad_norm": 0.3819673955440521, "learning_rate": 1.688572474222051e-05, "loss": 0.5545, "step": 16888 }, { "epoch": 0.5188154701563604, "grad_norm": 0.3826281726360321, "learning_rate": 1.68853742771468e-05, "loss": 0.5542, "step": 16889 }, { "epoch": 0.5188461892913095, "grad_norm": 0.3335113525390625, "learning_rate": 1.688502379599182e-05, "loss": 0.5543, "step": 16890 }, { "epoch": 0.5188769084262587, "grad_norm": 0.34980344772338867, "learning_rate": 1.6884673298756385e-05, "loss": 0.5348, "step": 16891 }, { "epoch": 0.5189076275612079, "grad_norm": 0.4170517921447754, "learning_rate": 1.6884322785441313e-05, "loss": 0.532, "step": 16892 }, { "epoch": 0.518938346696157, "grad_norm": 0.33596232533454895, "learning_rate": 1.6883972256047424e-05, "loss": 0.5464, "step": 16893 }, { "epoch": 0.5189690658311062, "grad_norm": 0.3239094018936157, "learning_rate": 1.688362171057554e-05, "loss": 0.5291, "step": 16894 }, { "epoch": 0.5189997849660554, "grad_norm": 0.332608699798584, "learning_rate": 1.6883271149026477e-05, "loss": 0.6059, "step": 16895 }, { "epoch": 0.5190305041010045, "grad_norm": 0.38629743456840515, "learning_rate": 1.688292057140105e-05, "loss": 0.6373, "step": 16896 }, { "epoch": 0.5190612232359537, "grad_norm": 0.3575364053249359, "learning_rate": 1.6882569977700084e-05, "loss": 0.5755, "step": 16897 }, { "epoch": 0.5190919423709028, "grad_norm": 0.366165429353714, "learning_rate": 1.6882219367924395e-05, "loss": 0.5719, "step": 16898 }, { "epoch": 0.519122661505852, "grad_norm": 0.372437983751297, "learning_rate": 1.68818687420748e-05, "loss": 0.5658, "step": 16899 }, { "epoch": 0.5191533806408012, "grad_norm": 0.338128000497818, "learning_rate": 1.688151810015212e-05, "loss": 0.5593, "step": 16900 }, { "epoch": 0.5191840997757503, "grad_norm": 0.34777066111564636, "learning_rate": 1.6881167442157175e-05, "loss": 0.5448, "step": 16901 }, { "epoch": 0.5192148189106994, "grad_norm": 0.3538271486759186, "learning_rate": 1.688081676809078e-05, "loss": 0.5532, "step": 16902 }, { "epoch": 0.5192455380456487, "grad_norm": 0.3656678795814514, "learning_rate": 1.6880466077953763e-05, "loss": 0.5721, "step": 16903 }, { "epoch": 0.5192762571805978, "grad_norm": 0.34716346859931946, "learning_rate": 1.6880115371746933e-05, "loss": 0.6076, "step": 16904 }, { "epoch": 0.519306976315547, "grad_norm": 0.3391806185245514, "learning_rate": 1.6879764649471117e-05, "loss": 0.4824, "step": 16905 }, { "epoch": 0.5193376954504961, "grad_norm": 0.34734049439430237, "learning_rate": 1.6879413911127126e-05, "loss": 0.5049, "step": 16906 }, { "epoch": 0.5193684145854452, "grad_norm": 0.36875206232070923, "learning_rate": 1.6879063156715784e-05, "loss": 0.5401, "step": 16907 }, { "epoch": 0.5193991337203945, "grad_norm": 0.34010687470436096, "learning_rate": 1.6878712386237908e-05, "loss": 0.545, "step": 16908 }, { "epoch": 0.5194298528553436, "grad_norm": 0.33894413709640503, "learning_rate": 1.687836159969432e-05, "loss": 0.485, "step": 16909 }, { "epoch": 0.5194605719902927, "grad_norm": 0.4353002905845642, "learning_rate": 1.687801079708584e-05, "loss": 0.5587, "step": 16910 }, { "epoch": 0.519491291125242, "grad_norm": 0.3429238796234131, "learning_rate": 1.6877659978413284e-05, "loss": 0.5109, "step": 16911 }, { "epoch": 0.5195220102601911, "grad_norm": 0.714683473110199, "learning_rate": 1.6877309143677472e-05, "loss": 0.5588, "step": 16912 }, { "epoch": 0.5195527293951402, "grad_norm": 0.37196892499923706, "learning_rate": 1.6876958292879226e-05, "loss": 0.6097, "step": 16913 }, { "epoch": 0.5195834485300894, "grad_norm": 0.3704656958580017, "learning_rate": 1.6876607426019364e-05, "loss": 0.537, "step": 16914 }, { "epoch": 0.5196141676650385, "grad_norm": 0.36958956718444824, "learning_rate": 1.68762565430987e-05, "loss": 0.5922, "step": 16915 }, { "epoch": 0.5196448867999877, "grad_norm": 0.3997191786766052, "learning_rate": 1.6875905644118066e-05, "loss": 0.5931, "step": 16916 }, { "epoch": 0.5196756059349369, "grad_norm": 0.3780432343482971, "learning_rate": 1.687555472907827e-05, "loss": 0.5446, "step": 16917 }, { "epoch": 0.519706325069886, "grad_norm": 0.4208686053752899, "learning_rate": 1.6875203797980136e-05, "loss": 0.5318, "step": 16918 }, { "epoch": 0.5197370442048352, "grad_norm": 0.3829532563686371, "learning_rate": 1.6874852850824482e-05, "loss": 0.5402, "step": 16919 }, { "epoch": 0.5197677633397844, "grad_norm": 0.3220531642436981, "learning_rate": 1.6874501887612133e-05, "loss": 0.513, "step": 16920 }, { "epoch": 0.5197984824747335, "grad_norm": 0.4278983473777771, "learning_rate": 1.68741509083439e-05, "loss": 0.6001, "step": 16921 }, { "epoch": 0.5198292016096827, "grad_norm": 0.4044554829597473, "learning_rate": 1.6873799913020614e-05, "loss": 0.6131, "step": 16922 }, { "epoch": 0.5198599207446318, "grad_norm": 0.36180683970451355, "learning_rate": 1.687344890164308e-05, "loss": 0.6583, "step": 16923 }, { "epoch": 0.519890639879581, "grad_norm": 0.3557491600513458, "learning_rate": 1.6873097874212132e-05, "loss": 0.5606, "step": 16924 }, { "epoch": 0.5199213590145302, "grad_norm": 0.38648468255996704, "learning_rate": 1.6872746830728584e-05, "loss": 0.5832, "step": 16925 }, { "epoch": 0.5199520781494793, "grad_norm": 0.3683575689792633, "learning_rate": 1.6872395771193254e-05, "loss": 0.6611, "step": 16926 }, { "epoch": 0.5199827972844284, "grad_norm": 0.3857043981552124, "learning_rate": 1.687204469560696e-05, "loss": 0.6165, "step": 16927 }, { "epoch": 0.5200135164193777, "grad_norm": 0.32639026641845703, "learning_rate": 1.687169360397053e-05, "loss": 0.6023, "step": 16928 }, { "epoch": 0.5200442355543268, "grad_norm": 0.3478180766105652, "learning_rate": 1.6871342496284783e-05, "loss": 0.5539, "step": 16929 }, { "epoch": 0.520074954689276, "grad_norm": 0.36171942949295044, "learning_rate": 1.6870991372550532e-05, "loss": 0.5591, "step": 16930 }, { "epoch": 0.5201056738242251, "grad_norm": 0.35941675305366516, "learning_rate": 1.68706402327686e-05, "loss": 0.5325, "step": 16931 }, { "epoch": 0.5201363929591742, "grad_norm": 0.3493078649044037, "learning_rate": 1.6870289076939807e-05, "loss": 0.5939, "step": 16932 }, { "epoch": 0.5201671120941235, "grad_norm": 0.347795695066452, "learning_rate": 1.6869937905064977e-05, "loss": 0.5756, "step": 16933 }, { "epoch": 0.5201978312290726, "grad_norm": 0.34277352690696716, "learning_rate": 1.6869586717144924e-05, "loss": 0.5312, "step": 16934 }, { "epoch": 0.5202285503640217, "grad_norm": 0.4316260814666748, "learning_rate": 1.6869235513180475e-05, "loss": 0.5882, "step": 16935 }, { "epoch": 0.5202592694989709, "grad_norm": 0.3243025541305542, "learning_rate": 1.686888429317244e-05, "loss": 0.558, "step": 16936 }, { "epoch": 0.5202899886339201, "grad_norm": 0.3828491270542145, "learning_rate": 1.6868533057121652e-05, "loss": 0.6157, "step": 16937 }, { "epoch": 0.5203207077688692, "grad_norm": 0.3846838176250458, "learning_rate": 1.6868181805028926e-05, "loss": 0.5072, "step": 16938 }, { "epoch": 0.5203514269038184, "grad_norm": 0.34277665615081787, "learning_rate": 1.686783053689508e-05, "loss": 0.6285, "step": 16939 }, { "epoch": 0.5203821460387675, "grad_norm": 0.36519262194633484, "learning_rate": 1.6867479252720936e-05, "loss": 0.5326, "step": 16940 }, { "epoch": 0.5204128651737167, "grad_norm": 0.3713577389717102, "learning_rate": 1.6867127952507316e-05, "loss": 0.5544, "step": 16941 }, { "epoch": 0.5204435843086659, "grad_norm": 0.33492225408554077, "learning_rate": 1.686677663625504e-05, "loss": 0.5611, "step": 16942 }, { "epoch": 0.520474303443615, "grad_norm": 0.3499731123447418, "learning_rate": 1.6866425303964922e-05, "loss": 0.5617, "step": 16943 }, { "epoch": 0.5205050225785642, "grad_norm": 0.3846389949321747, "learning_rate": 1.686607395563779e-05, "loss": 0.5702, "step": 16944 }, { "epoch": 0.5205357417135134, "grad_norm": 0.3085474371910095, "learning_rate": 1.6865722591274466e-05, "loss": 0.4783, "step": 16945 }, { "epoch": 0.5205664608484625, "grad_norm": 0.3684687614440918, "learning_rate": 1.6865371210875764e-05, "loss": 0.5818, "step": 16946 }, { "epoch": 0.5205971799834117, "grad_norm": 0.3433871865272522, "learning_rate": 1.686501981444251e-05, "loss": 0.5765, "step": 16947 }, { "epoch": 0.5206278991183608, "grad_norm": 0.40262746810913086, "learning_rate": 1.6864668401975525e-05, "loss": 0.5775, "step": 16948 }, { "epoch": 0.52065861825331, "grad_norm": 0.3261570334434509, "learning_rate": 1.6864316973475624e-05, "loss": 0.551, "step": 16949 }, { "epoch": 0.5206893373882592, "grad_norm": 0.35952991247177124, "learning_rate": 1.6863965528943633e-05, "loss": 0.5682, "step": 16950 }, { "epoch": 0.5207200565232083, "grad_norm": 0.36623886227607727, "learning_rate": 1.686361406838037e-05, "loss": 0.6039, "step": 16951 }, { "epoch": 0.5207507756581574, "grad_norm": 0.42833971977233887, "learning_rate": 1.686326259178666e-05, "loss": 0.4913, "step": 16952 }, { "epoch": 0.5207814947931066, "grad_norm": 0.3281848132610321, "learning_rate": 1.6862911099163317e-05, "loss": 0.5293, "step": 16953 }, { "epoch": 0.5208122139280558, "grad_norm": 0.3349824845790863, "learning_rate": 1.686255959051117e-05, "loss": 0.5572, "step": 16954 }, { "epoch": 0.520842933063005, "grad_norm": 0.35325729846954346, "learning_rate": 1.686220806583103e-05, "loss": 0.5125, "step": 16955 }, { "epoch": 0.5208736521979541, "grad_norm": 0.33830541372299194, "learning_rate": 1.6861856525123725e-05, "loss": 0.5141, "step": 16956 }, { "epoch": 0.5209043713329032, "grad_norm": 0.42788803577423096, "learning_rate": 1.686150496839008e-05, "loss": 0.6117, "step": 16957 }, { "epoch": 0.5209350904678525, "grad_norm": 0.31900009512901306, "learning_rate": 1.6861153395630907e-05, "loss": 0.5688, "step": 16958 }, { "epoch": 0.5209658096028016, "grad_norm": 0.38910242915153503, "learning_rate": 1.686080180684703e-05, "loss": 0.5301, "step": 16959 }, { "epoch": 0.5209965287377507, "grad_norm": 0.3732011914253235, "learning_rate": 1.6860450202039275e-05, "loss": 0.5644, "step": 16960 }, { "epoch": 0.5210272478726999, "grad_norm": 0.36476150155067444, "learning_rate": 1.6860098581208454e-05, "loss": 0.5467, "step": 16961 }, { "epoch": 0.5210579670076491, "grad_norm": 0.40151071548461914, "learning_rate": 1.68597469443554e-05, "loss": 0.6213, "step": 16962 }, { "epoch": 0.5210886861425982, "grad_norm": 0.35775765776634216, "learning_rate": 1.6859395291480923e-05, "loss": 0.5589, "step": 16963 }, { "epoch": 0.5211194052775474, "grad_norm": 0.3478066623210907, "learning_rate": 1.6859043622585852e-05, "loss": 0.5746, "step": 16964 }, { "epoch": 0.5211501244124965, "grad_norm": 0.3766174018383026, "learning_rate": 1.6858691937671006e-05, "loss": 0.4787, "step": 16965 }, { "epoch": 0.5211808435474456, "grad_norm": 0.3188716173171997, "learning_rate": 1.68583402367372e-05, "loss": 0.5876, "step": 16966 }, { "epoch": 0.5212115626823949, "grad_norm": 0.37398219108581543, "learning_rate": 1.6857988519785272e-05, "loss": 0.6133, "step": 16967 }, { "epoch": 0.521242281817344, "grad_norm": 0.3600437641143799, "learning_rate": 1.6857636786816028e-05, "loss": 0.5337, "step": 16968 }, { "epoch": 0.5212730009522932, "grad_norm": 0.36625558137893677, "learning_rate": 1.685728503783029e-05, "loss": 0.6704, "step": 16969 }, { "epoch": 0.5213037200872424, "grad_norm": 0.3559701144695282, "learning_rate": 1.6856933272828887e-05, "loss": 0.5653, "step": 16970 }, { "epoch": 0.5213344392221915, "grad_norm": 0.3403119444847107, "learning_rate": 1.685658149181264e-05, "loss": 0.5825, "step": 16971 }, { "epoch": 0.5213651583571407, "grad_norm": 0.3746093809604645, "learning_rate": 1.6856229694782365e-05, "loss": 0.6002, "step": 16972 }, { "epoch": 0.5213958774920898, "grad_norm": 0.3462832570075989, "learning_rate": 1.6855877881738885e-05, "loss": 0.6065, "step": 16973 }, { "epoch": 0.5214265966270389, "grad_norm": 0.3605060577392578, "learning_rate": 1.6855526052683026e-05, "loss": 0.5834, "step": 16974 }, { "epoch": 0.5214573157619882, "grad_norm": 0.3825274109840393, "learning_rate": 1.685517420761561e-05, "loss": 0.5964, "step": 16975 }, { "epoch": 0.5214880348969373, "grad_norm": 0.3755330741405487, "learning_rate": 1.6854822346537452e-05, "loss": 0.6465, "step": 16976 }, { "epoch": 0.5215187540318864, "grad_norm": 0.34237492084503174, "learning_rate": 1.6854470469449377e-05, "loss": 0.6334, "step": 16977 }, { "epoch": 0.5215494731668356, "grad_norm": 0.35686227679252625, "learning_rate": 1.685411857635221e-05, "loss": 0.6071, "step": 16978 }, { "epoch": 0.5215801923017848, "grad_norm": 0.3531419038772583, "learning_rate": 1.685376666724677e-05, "loss": 0.5165, "step": 16979 }, { "epoch": 0.521610911436734, "grad_norm": 0.3582116961479187, "learning_rate": 1.685341474213388e-05, "loss": 0.5823, "step": 16980 }, { "epoch": 0.5216416305716831, "grad_norm": 0.34741565585136414, "learning_rate": 1.6853062801014358e-05, "loss": 0.499, "step": 16981 }, { "epoch": 0.5216723497066322, "grad_norm": 0.42128199338912964, "learning_rate": 1.685271084388903e-05, "loss": 0.6327, "step": 16982 }, { "epoch": 0.5217030688415815, "grad_norm": 0.44916656613349915, "learning_rate": 1.685235887075872e-05, "loss": 0.5346, "step": 16983 }, { "epoch": 0.5217337879765306, "grad_norm": 0.3657461106777191, "learning_rate": 1.6852006881624245e-05, "loss": 0.5707, "step": 16984 }, { "epoch": 0.5217645071114797, "grad_norm": 0.38636189699172974, "learning_rate": 1.685165487648643e-05, "loss": 0.5808, "step": 16985 }, { "epoch": 0.5217952262464289, "grad_norm": 0.3958852291107178, "learning_rate": 1.68513028553461e-05, "loss": 0.6171, "step": 16986 }, { "epoch": 0.521825945381378, "grad_norm": 0.3313107490539551, "learning_rate": 1.6850950818204067e-05, "loss": 0.5803, "step": 16987 }, { "epoch": 0.5218566645163272, "grad_norm": 0.42999449372291565, "learning_rate": 1.6850598765061167e-05, "loss": 0.5738, "step": 16988 }, { "epoch": 0.5218873836512764, "grad_norm": 0.3561694920063019, "learning_rate": 1.685024669591821e-05, "loss": 0.5463, "step": 16989 }, { "epoch": 0.5219181027862255, "grad_norm": 0.3587033450603485, "learning_rate": 1.6849894610776024e-05, "loss": 0.599, "step": 16990 }, { "epoch": 0.5219488219211748, "grad_norm": 0.37614309787750244, "learning_rate": 1.684954250963543e-05, "loss": 0.6714, "step": 16991 }, { "epoch": 0.5219795410561239, "grad_norm": 0.34565868973731995, "learning_rate": 1.6849190392497254e-05, "loss": 0.5676, "step": 16992 }, { "epoch": 0.522010260191073, "grad_norm": 0.35557326674461365, "learning_rate": 1.6848838259362313e-05, "loss": 0.537, "step": 16993 }, { "epoch": 0.5220409793260222, "grad_norm": 0.40558934211730957, "learning_rate": 1.6848486110231436e-05, "loss": 0.5685, "step": 16994 }, { "epoch": 0.5220716984609713, "grad_norm": 0.6206150650978088, "learning_rate": 1.6848133945105436e-05, "loss": 0.5792, "step": 16995 }, { "epoch": 0.5221024175959205, "grad_norm": 0.316119909286499, "learning_rate": 1.6847781763985144e-05, "loss": 0.545, "step": 16996 }, { "epoch": 0.5221331367308697, "grad_norm": 0.39058175683021545, "learning_rate": 1.6847429566871376e-05, "loss": 0.6173, "step": 16997 }, { "epoch": 0.5221638558658188, "grad_norm": 0.3297182321548462, "learning_rate": 1.684707735376496e-05, "loss": 0.5269, "step": 16998 }, { "epoch": 0.5221945750007679, "grad_norm": 0.3731611669063568, "learning_rate": 1.684672512466672e-05, "loss": 0.6077, "step": 16999 }, { "epoch": 0.5222252941357172, "grad_norm": 0.34547197818756104, "learning_rate": 1.6846372879577473e-05, "loss": 0.5396, "step": 17000 }, { "epoch": 0.5222560132706663, "grad_norm": 0.34885069727897644, "learning_rate": 1.6846020618498046e-05, "loss": 0.6104, "step": 17001 }, { "epoch": 0.5222867324056154, "grad_norm": 0.3566843271255493, "learning_rate": 1.6845668341429257e-05, "loss": 0.5738, "step": 17002 }, { "epoch": 0.5223174515405646, "grad_norm": 0.36021339893341064, "learning_rate": 1.684531604837193e-05, "loss": 0.5716, "step": 17003 }, { "epoch": 0.5223481706755138, "grad_norm": 0.3277444839477539, "learning_rate": 1.684496373932689e-05, "loss": 0.5328, "step": 17004 }, { "epoch": 0.522378889810463, "grad_norm": 0.3278258442878723, "learning_rate": 1.6844611414294963e-05, "loss": 0.5896, "step": 17005 }, { "epoch": 0.5224096089454121, "grad_norm": 0.331222265958786, "learning_rate": 1.6844259073276964e-05, "loss": 0.4809, "step": 17006 }, { "epoch": 0.5224403280803612, "grad_norm": 0.3197587728500366, "learning_rate": 1.6843906716273723e-05, "loss": 0.5716, "step": 17007 }, { "epoch": 0.5224710472153105, "grad_norm": 0.3345125615596771, "learning_rate": 1.684355434328606e-05, "loss": 0.5017, "step": 17008 }, { "epoch": 0.5225017663502596, "grad_norm": 0.43626952171325684, "learning_rate": 1.6843201954314794e-05, "loss": 0.5622, "step": 17009 }, { "epoch": 0.5225324854852087, "grad_norm": 0.34691140055656433, "learning_rate": 1.6842849549360753e-05, "loss": 0.5633, "step": 17010 }, { "epoch": 0.5225632046201579, "grad_norm": 0.3570665717124939, "learning_rate": 1.6842497128424762e-05, "loss": 0.5644, "step": 17011 }, { "epoch": 0.522593923755107, "grad_norm": 0.33646267652511597, "learning_rate": 1.6842144691507635e-05, "loss": 0.5615, "step": 17012 }, { "epoch": 0.5226246428900562, "grad_norm": 0.424877792596817, "learning_rate": 1.6841792238610208e-05, "loss": 0.5486, "step": 17013 }, { "epoch": 0.5226553620250054, "grad_norm": 0.33947062492370605, "learning_rate": 1.6841439769733294e-05, "loss": 0.4839, "step": 17014 }, { "epoch": 0.5226860811599545, "grad_norm": 0.3338967561721802, "learning_rate": 1.684108728487772e-05, "loss": 0.5302, "step": 17015 }, { "epoch": 0.5227168002949037, "grad_norm": 0.3788968026638031, "learning_rate": 1.684073478404431e-05, "loss": 0.4468, "step": 17016 }, { "epoch": 0.5227475194298529, "grad_norm": 0.35224372148513794, "learning_rate": 1.6840382267233887e-05, "loss": 0.5635, "step": 17017 }, { "epoch": 0.522778238564802, "grad_norm": 0.32737284898757935, "learning_rate": 1.6840029734447273e-05, "loss": 0.5019, "step": 17018 }, { "epoch": 0.5228089576997512, "grad_norm": 0.3843417465686798, "learning_rate": 1.6839677185685287e-05, "loss": 0.5262, "step": 17019 }, { "epoch": 0.5228396768347003, "grad_norm": 0.4036490321159363, "learning_rate": 1.6839324620948763e-05, "loss": 0.6196, "step": 17020 }, { "epoch": 0.5228703959696495, "grad_norm": 0.3500337600708008, "learning_rate": 1.6838972040238516e-05, "loss": 0.4893, "step": 17021 }, { "epoch": 0.5229011151045987, "grad_norm": 0.33354461193084717, "learning_rate": 1.6838619443555373e-05, "loss": 0.507, "step": 17022 }, { "epoch": 0.5229318342395478, "grad_norm": 0.3402877151966095, "learning_rate": 1.6838266830900156e-05, "loss": 0.5305, "step": 17023 }, { "epoch": 0.5229625533744969, "grad_norm": 0.3978286385536194, "learning_rate": 1.6837914202273692e-05, "loss": 0.5574, "step": 17024 }, { "epoch": 0.5229932725094462, "grad_norm": 0.37840959429740906, "learning_rate": 1.68375615576768e-05, "loss": 0.5919, "step": 17025 }, { "epoch": 0.5230239916443953, "grad_norm": 0.33036983013153076, "learning_rate": 1.6837208897110305e-05, "loss": 0.563, "step": 17026 }, { "epoch": 0.5230547107793444, "grad_norm": 0.4242558181285858, "learning_rate": 1.683685622057503e-05, "loss": 0.5913, "step": 17027 }, { "epoch": 0.5230854299142936, "grad_norm": 0.34995847940444946, "learning_rate": 1.68365035280718e-05, "loss": 0.5389, "step": 17028 }, { "epoch": 0.5231161490492428, "grad_norm": 0.36726057529449463, "learning_rate": 1.683615081960144e-05, "loss": 0.68, "step": 17029 }, { "epoch": 0.523146868184192, "grad_norm": 0.3720272481441498, "learning_rate": 1.6835798095164772e-05, "loss": 0.6107, "step": 17030 }, { "epoch": 0.5231775873191411, "grad_norm": 0.3911034166812897, "learning_rate": 1.683544535476262e-05, "loss": 0.6119, "step": 17031 }, { "epoch": 0.5232083064540902, "grad_norm": 0.34101951122283936, "learning_rate": 1.6835092598395806e-05, "loss": 0.5264, "step": 17032 }, { "epoch": 0.5232390255890395, "grad_norm": 0.3836565315723419, "learning_rate": 1.683473982606516e-05, "loss": 0.6001, "step": 17033 }, { "epoch": 0.5232697447239886, "grad_norm": 0.33784154057502747, "learning_rate": 1.6834387037771497e-05, "loss": 0.5281, "step": 17034 }, { "epoch": 0.5233004638589377, "grad_norm": 0.4979386627674103, "learning_rate": 1.6834034233515652e-05, "loss": 0.5818, "step": 17035 }, { "epoch": 0.5233311829938869, "grad_norm": 0.3309781849384308, "learning_rate": 1.6833681413298436e-05, "loss": 0.5717, "step": 17036 }, { "epoch": 0.523361902128836, "grad_norm": 0.4058310091495514, "learning_rate": 1.6833328577120684e-05, "loss": 0.5795, "step": 17037 }, { "epoch": 0.5233926212637852, "grad_norm": 0.36721697449684143, "learning_rate": 1.6832975724983216e-05, "loss": 0.6181, "step": 17038 }, { "epoch": 0.5234233403987344, "grad_norm": 0.3339841365814209, "learning_rate": 1.6832622856886857e-05, "loss": 0.4881, "step": 17039 }, { "epoch": 0.5234540595336835, "grad_norm": 0.3896107077598572, "learning_rate": 1.6832269972832427e-05, "loss": 0.6401, "step": 17040 }, { "epoch": 0.5234847786686327, "grad_norm": 0.381781667470932, "learning_rate": 1.6831917072820754e-05, "loss": 0.5913, "step": 17041 }, { "epoch": 0.5235154978035819, "grad_norm": 0.3288511037826538, "learning_rate": 1.683156415685266e-05, "loss": 0.5902, "step": 17042 }, { "epoch": 0.523546216938531, "grad_norm": 0.3789781630039215, "learning_rate": 1.6831211224928975e-05, "loss": 0.5117, "step": 17043 }, { "epoch": 0.5235769360734802, "grad_norm": 0.3657667338848114, "learning_rate": 1.6830858277050515e-05, "loss": 0.542, "step": 17044 }, { "epoch": 0.5236076552084293, "grad_norm": 0.34633079171180725, "learning_rate": 1.683050531321811e-05, "loss": 0.5978, "step": 17045 }, { "epoch": 0.5236383743433785, "grad_norm": 0.3415851294994354, "learning_rate": 1.6830152333432584e-05, "loss": 0.6379, "step": 17046 }, { "epoch": 0.5236690934783277, "grad_norm": 0.3510061204433441, "learning_rate": 1.682979933769476e-05, "loss": 0.5593, "step": 17047 }, { "epoch": 0.5236998126132768, "grad_norm": 0.3762488067150116, "learning_rate": 1.6829446326005462e-05, "loss": 0.5288, "step": 17048 }, { "epoch": 0.5237305317482259, "grad_norm": 0.35678631067276, "learning_rate": 1.6829093298365515e-05, "loss": 0.6431, "step": 17049 }, { "epoch": 0.5237612508831752, "grad_norm": 0.34033167362213135, "learning_rate": 1.6828740254775743e-05, "loss": 0.5396, "step": 17050 }, { "epoch": 0.5237919700181243, "grad_norm": 0.35504046082496643, "learning_rate": 1.6828387195236972e-05, "loss": 0.5073, "step": 17051 }, { "epoch": 0.5238226891530734, "grad_norm": 0.40559282898902893, "learning_rate": 1.6828034119750027e-05, "loss": 0.5516, "step": 17052 }, { "epoch": 0.5238534082880226, "grad_norm": 0.33925294876098633, "learning_rate": 1.6827681028315728e-05, "loss": 0.5396, "step": 17053 }, { "epoch": 0.5238841274229717, "grad_norm": 0.39518648386001587, "learning_rate": 1.6827327920934906e-05, "loss": 0.5252, "step": 17054 }, { "epoch": 0.523914846557921, "grad_norm": 0.8231386542320251, "learning_rate": 1.682697479760838e-05, "loss": 0.4944, "step": 17055 }, { "epoch": 0.5239455656928701, "grad_norm": 0.3694082498550415, "learning_rate": 1.682662165833698e-05, "loss": 0.64, "step": 17056 }, { "epoch": 0.5239762848278192, "grad_norm": 0.33403003215789795, "learning_rate": 1.682626850312153e-05, "loss": 0.5866, "step": 17057 }, { "epoch": 0.5240070039627684, "grad_norm": 0.34937113523483276, "learning_rate": 1.682591533196285e-05, "loss": 0.5931, "step": 17058 }, { "epoch": 0.5240377230977176, "grad_norm": 0.34221670031547546, "learning_rate": 1.682556214486177e-05, "loss": 0.5528, "step": 17059 }, { "epoch": 0.5240684422326667, "grad_norm": 0.41811633110046387, "learning_rate": 1.682520894181911e-05, "loss": 0.6055, "step": 17060 }, { "epoch": 0.5240991613676159, "grad_norm": 0.3672974109649658, "learning_rate": 1.6824855722835704e-05, "loss": 0.5434, "step": 17061 }, { "epoch": 0.524129880502565, "grad_norm": 0.3748646676540375, "learning_rate": 1.6824502487912364e-05, "loss": 0.5889, "step": 17062 }, { "epoch": 0.5241605996375142, "grad_norm": 0.3802797794342041, "learning_rate": 1.6824149237049927e-05, "loss": 0.5637, "step": 17063 }, { "epoch": 0.5241913187724634, "grad_norm": 0.3865649998188019, "learning_rate": 1.6823795970249207e-05, "loss": 0.6136, "step": 17064 }, { "epoch": 0.5242220379074125, "grad_norm": 0.35641229152679443, "learning_rate": 1.682344268751104e-05, "loss": 0.5515, "step": 17065 }, { "epoch": 0.5242527570423617, "grad_norm": 0.3443400263786316, "learning_rate": 1.6823089388836246e-05, "loss": 0.549, "step": 17066 }, { "epoch": 0.5242834761773109, "grad_norm": 0.35518044233322144, "learning_rate": 1.682273607422565e-05, "loss": 0.5435, "step": 17067 }, { "epoch": 0.52431419531226, "grad_norm": 0.38310033082962036, "learning_rate": 1.6822382743680075e-05, "loss": 0.6578, "step": 17068 }, { "epoch": 0.5243449144472092, "grad_norm": 0.3200690746307373, "learning_rate": 1.6822029397200348e-05, "loss": 0.5587, "step": 17069 }, { "epoch": 0.5243756335821583, "grad_norm": 0.36321598291397095, "learning_rate": 1.6821676034787294e-05, "loss": 0.487, "step": 17070 }, { "epoch": 0.5244063527171074, "grad_norm": 0.3308059871196747, "learning_rate": 1.6821322656441747e-05, "loss": 0.5262, "step": 17071 }, { "epoch": 0.5244370718520567, "grad_norm": 0.6699589490890503, "learning_rate": 1.6820969262164516e-05, "loss": 0.5159, "step": 17072 }, { "epoch": 0.5244677909870058, "grad_norm": 0.32767120003700256, "learning_rate": 1.6820615851956442e-05, "loss": 0.4905, "step": 17073 }, { "epoch": 0.5244985101219549, "grad_norm": 0.3290756046772003, "learning_rate": 1.6820262425818338e-05, "loss": 0.4843, "step": 17074 }, { "epoch": 0.5245292292569042, "grad_norm": 0.3248766362667084, "learning_rate": 1.6819908983751038e-05, "loss": 0.5175, "step": 17075 }, { "epoch": 0.5245599483918533, "grad_norm": 0.39445191621780396, "learning_rate": 1.681955552575536e-05, "loss": 0.7507, "step": 17076 }, { "epoch": 0.5245906675268024, "grad_norm": 0.3601611256599426, "learning_rate": 1.6819202051832137e-05, "loss": 0.5639, "step": 17077 }, { "epoch": 0.5246213866617516, "grad_norm": 0.35583359003067017, "learning_rate": 1.6818848561982193e-05, "loss": 0.6172, "step": 17078 }, { "epoch": 0.5246521057967007, "grad_norm": 0.33599722385406494, "learning_rate": 1.681849505620635e-05, "loss": 0.4503, "step": 17079 }, { "epoch": 0.52468282493165, "grad_norm": 0.42901018261909485, "learning_rate": 1.6818141534505433e-05, "loss": 0.6479, "step": 17080 }, { "epoch": 0.5247135440665991, "grad_norm": 0.3691801130771637, "learning_rate": 1.6817787996880273e-05, "loss": 0.5486, "step": 17081 }, { "epoch": 0.5247442632015482, "grad_norm": 0.35644280910491943, "learning_rate": 1.6817434443331694e-05, "loss": 0.5424, "step": 17082 }, { "epoch": 0.5247749823364974, "grad_norm": 0.3439689874649048, "learning_rate": 1.6817080873860518e-05, "loss": 0.5422, "step": 17083 }, { "epoch": 0.5248057014714466, "grad_norm": 0.35254383087158203, "learning_rate": 1.6816727288467575e-05, "loss": 0.5866, "step": 17084 }, { "epoch": 0.5248364206063957, "grad_norm": 0.329396516084671, "learning_rate": 1.681637368715369e-05, "loss": 0.5482, "step": 17085 }, { "epoch": 0.5248671397413449, "grad_norm": 0.37809279561042786, "learning_rate": 1.6816020069919687e-05, "loss": 0.5528, "step": 17086 }, { "epoch": 0.524897858876294, "grad_norm": 0.33065804839134216, "learning_rate": 1.6815666436766392e-05, "loss": 0.5902, "step": 17087 }, { "epoch": 0.5249285780112432, "grad_norm": 0.3368861973285675, "learning_rate": 1.6815312787694634e-05, "loss": 0.4963, "step": 17088 }, { "epoch": 0.5249592971461924, "grad_norm": 0.3674563467502594, "learning_rate": 1.6814959122705237e-05, "loss": 0.6464, "step": 17089 }, { "epoch": 0.5249900162811415, "grad_norm": 0.7560603022575378, "learning_rate": 1.681460544179903e-05, "loss": 0.592, "step": 17090 }, { "epoch": 0.5250207354160907, "grad_norm": 0.3467411398887634, "learning_rate": 1.681425174497683e-05, "loss": 0.572, "step": 17091 }, { "epoch": 0.5250514545510399, "grad_norm": 0.3432554006576538, "learning_rate": 1.6813898032239473e-05, "loss": 0.5942, "step": 17092 }, { "epoch": 0.525082173685989, "grad_norm": 0.41013067960739136, "learning_rate": 1.681354430358778e-05, "loss": 0.5462, "step": 17093 }, { "epoch": 0.5251128928209382, "grad_norm": 0.3512915372848511, "learning_rate": 1.6813190559022578e-05, "loss": 0.5992, "step": 17094 }, { "epoch": 0.5251436119558873, "grad_norm": 0.37173476815223694, "learning_rate": 1.6812836798544693e-05, "loss": 0.4904, "step": 17095 }, { "epoch": 0.5251743310908364, "grad_norm": 0.3629993796348572, "learning_rate": 1.6812483022154955e-05, "loss": 0.5263, "step": 17096 }, { "epoch": 0.5252050502257857, "grad_norm": 0.35085248947143555, "learning_rate": 1.6812129229854185e-05, "loss": 0.5631, "step": 17097 }, { "epoch": 0.5252357693607348, "grad_norm": 0.3560388684272766, "learning_rate": 1.681177542164321e-05, "loss": 0.5807, "step": 17098 }, { "epoch": 0.5252664884956839, "grad_norm": 0.36230018734931946, "learning_rate": 1.6811421597522862e-05, "loss": 0.5876, "step": 17099 }, { "epoch": 0.5252972076306331, "grad_norm": 0.3489912450313568, "learning_rate": 1.6811067757493957e-05, "loss": 0.5989, "step": 17100 }, { "epoch": 0.5253279267655823, "grad_norm": 0.3433593511581421, "learning_rate": 1.6810713901557333e-05, "loss": 0.5358, "step": 17101 }, { "epoch": 0.5253586459005315, "grad_norm": 0.383026659488678, "learning_rate": 1.681036002971381e-05, "loss": 0.5591, "step": 17102 }, { "epoch": 0.5253893650354806, "grad_norm": 0.31588584184646606, "learning_rate": 1.6810006141964212e-05, "loss": 0.564, "step": 17103 }, { "epoch": 0.5254200841704297, "grad_norm": 0.3804260194301605, "learning_rate": 1.680965223830937e-05, "loss": 0.6288, "step": 17104 }, { "epoch": 0.525450803305379, "grad_norm": 0.3919837772846222, "learning_rate": 1.6809298318750112e-05, "loss": 0.6319, "step": 17105 }, { "epoch": 0.5254815224403281, "grad_norm": 0.3651268482208252, "learning_rate": 1.6808944383287262e-05, "loss": 0.5533, "step": 17106 }, { "epoch": 0.5255122415752772, "grad_norm": 0.32521557807922363, "learning_rate": 1.680859043192165e-05, "loss": 0.653, "step": 17107 }, { "epoch": 0.5255429607102264, "grad_norm": 0.4215928912162781, "learning_rate": 1.6808236464654094e-05, "loss": 0.5441, "step": 17108 }, { "epoch": 0.5255736798451756, "grad_norm": 0.35680341720581055, "learning_rate": 1.680788248148543e-05, "loss": 0.5528, "step": 17109 }, { "epoch": 0.5256043989801247, "grad_norm": 0.3291541337966919, "learning_rate": 1.6807528482416476e-05, "loss": 0.5102, "step": 17110 }, { "epoch": 0.5256351181150739, "grad_norm": 0.3145633935928345, "learning_rate": 1.6807174467448067e-05, "loss": 0.5461, "step": 17111 }, { "epoch": 0.525665837250023, "grad_norm": 0.34818947315216064, "learning_rate": 1.680682043658103e-05, "loss": 0.5259, "step": 17112 }, { "epoch": 0.5256965563849721, "grad_norm": 0.38091984391212463, "learning_rate": 1.6806466389816183e-05, "loss": 0.5797, "step": 17113 }, { "epoch": 0.5257272755199214, "grad_norm": 0.3784688115119934, "learning_rate": 1.680611232715436e-05, "loss": 0.532, "step": 17114 }, { "epoch": 0.5257579946548705, "grad_norm": 0.33728086948394775, "learning_rate": 1.6805758248596387e-05, "loss": 0.6208, "step": 17115 }, { "epoch": 0.5257887137898197, "grad_norm": 0.36286550760269165, "learning_rate": 1.680540415414309e-05, "loss": 0.4759, "step": 17116 }, { "epoch": 0.5258194329247688, "grad_norm": 0.36745908856391907, "learning_rate": 1.6805050043795296e-05, "loss": 0.4678, "step": 17117 }, { "epoch": 0.525850152059718, "grad_norm": 0.4414336681365967, "learning_rate": 1.6804695917553836e-05, "loss": 0.492, "step": 17118 }, { "epoch": 0.5258808711946672, "grad_norm": 0.40542227029800415, "learning_rate": 1.6804341775419527e-05, "loss": 0.5805, "step": 17119 }, { "epoch": 0.5259115903296163, "grad_norm": 0.36446017026901245, "learning_rate": 1.6803987617393206e-05, "loss": 0.5484, "step": 17120 }, { "epoch": 0.5259423094645654, "grad_norm": 0.3984276056289673, "learning_rate": 1.6803633443475697e-05, "loss": 0.529, "step": 17121 }, { "epoch": 0.5259730285995147, "grad_norm": 0.3370259702205658, "learning_rate": 1.6803279253667826e-05, "loss": 0.5649, "step": 17122 }, { "epoch": 0.5260037477344638, "grad_norm": 0.39394018054008484, "learning_rate": 1.6802925047970423e-05, "loss": 0.6288, "step": 17123 }, { "epoch": 0.5260344668694129, "grad_norm": 0.41116461157798767, "learning_rate": 1.680257082638431e-05, "loss": 0.6123, "step": 17124 }, { "epoch": 0.5260651860043621, "grad_norm": 0.3367462158203125, "learning_rate": 1.680221658891032e-05, "loss": 0.6059, "step": 17125 }, { "epoch": 0.5260959051393113, "grad_norm": 0.31517907977104187, "learning_rate": 1.6801862335549278e-05, "loss": 0.5631, "step": 17126 }, { "epoch": 0.5261266242742605, "grad_norm": 0.33315861225128174, "learning_rate": 1.6801508066302012e-05, "loss": 0.5814, "step": 17127 }, { "epoch": 0.5261573434092096, "grad_norm": 0.363689124584198, "learning_rate": 1.680115378116935e-05, "loss": 0.6403, "step": 17128 }, { "epoch": 0.5261880625441587, "grad_norm": 0.35932451486587524, "learning_rate": 1.6800799480152116e-05, "loss": 0.5567, "step": 17129 }, { "epoch": 0.526218781679108, "grad_norm": 0.3761639893054962, "learning_rate": 1.680044516325114e-05, "loss": 0.5427, "step": 17130 }, { "epoch": 0.5262495008140571, "grad_norm": 0.3722704350948334, "learning_rate": 1.680009083046725e-05, "loss": 0.5947, "step": 17131 }, { "epoch": 0.5262802199490062, "grad_norm": 0.3661996126174927, "learning_rate": 1.6799736481801274e-05, "loss": 0.5687, "step": 17132 }, { "epoch": 0.5263109390839554, "grad_norm": 0.36639007925987244, "learning_rate": 1.6799382117254034e-05, "loss": 0.5593, "step": 17133 }, { "epoch": 0.5263416582189046, "grad_norm": 0.3621130883693695, "learning_rate": 1.6799027736826368e-05, "loss": 0.6137, "step": 17134 }, { "epoch": 0.5263723773538537, "grad_norm": 0.3885907828807831, "learning_rate": 1.6798673340519095e-05, "loss": 0.568, "step": 17135 }, { "epoch": 0.5264030964888029, "grad_norm": 0.372795432806015, "learning_rate": 1.6798318928333047e-05, "loss": 0.6053, "step": 17136 }, { "epoch": 0.526433815623752, "grad_norm": 0.36231786012649536, "learning_rate": 1.679796450026905e-05, "loss": 0.5695, "step": 17137 }, { "epoch": 0.5264645347587011, "grad_norm": 0.414518803358078, "learning_rate": 1.6797610056327932e-05, "loss": 0.5727, "step": 17138 }, { "epoch": 0.5264952538936504, "grad_norm": 0.3515462577342987, "learning_rate": 1.6797255596510518e-05, "loss": 0.6362, "step": 17139 }, { "epoch": 0.5265259730285995, "grad_norm": 0.4642738997936249, "learning_rate": 1.679690112081764e-05, "loss": 0.6037, "step": 17140 }, { "epoch": 0.5265566921635487, "grad_norm": 0.5408658981323242, "learning_rate": 1.6796546629250128e-05, "loss": 0.4395, "step": 17141 }, { "epoch": 0.5265874112984978, "grad_norm": 0.36373570561408997, "learning_rate": 1.6796192121808805e-05, "loss": 0.563, "step": 17142 }, { "epoch": 0.526618130433447, "grad_norm": 0.37874433398246765, "learning_rate": 1.67958375984945e-05, "loss": 0.5595, "step": 17143 }, { "epoch": 0.5266488495683962, "grad_norm": 0.3226058781147003, "learning_rate": 1.6795483059308043e-05, "loss": 0.5904, "step": 17144 }, { "epoch": 0.5266795687033453, "grad_norm": 0.3525058329105377, "learning_rate": 1.679512850425026e-05, "loss": 0.5854, "step": 17145 }, { "epoch": 0.5267102878382944, "grad_norm": 0.5663449764251709, "learning_rate": 1.679477393332198e-05, "loss": 0.5683, "step": 17146 }, { "epoch": 0.5267410069732437, "grad_norm": 0.35064855217933655, "learning_rate": 1.679441934652403e-05, "loss": 0.5706, "step": 17147 }, { "epoch": 0.5267717261081928, "grad_norm": 0.3384571671485901, "learning_rate": 1.679406474385724e-05, "loss": 0.5562, "step": 17148 }, { "epoch": 0.5268024452431419, "grad_norm": 0.3863227367401123, "learning_rate": 1.6793710125322437e-05, "loss": 0.506, "step": 17149 }, { "epoch": 0.5268331643780911, "grad_norm": 0.3727254569530487, "learning_rate": 1.679335549092045e-05, "loss": 0.605, "step": 17150 }, { "epoch": 0.5268638835130403, "grad_norm": 0.350010484457016, "learning_rate": 1.6793000840652106e-05, "loss": 0.5494, "step": 17151 }, { "epoch": 0.5268946026479895, "grad_norm": 0.35676905512809753, "learning_rate": 1.6792646174518235e-05, "loss": 0.569, "step": 17152 }, { "epoch": 0.5269253217829386, "grad_norm": 0.36581313610076904, "learning_rate": 1.6792291492519664e-05, "loss": 0.5224, "step": 17153 }, { "epoch": 0.5269560409178877, "grad_norm": 0.3375585675239563, "learning_rate": 1.6791936794657223e-05, "loss": 0.6125, "step": 17154 }, { "epoch": 0.526986760052837, "grad_norm": 0.354994535446167, "learning_rate": 1.6791582080931736e-05, "loss": 0.547, "step": 17155 }, { "epoch": 0.5270174791877861, "grad_norm": 0.33246690034866333, "learning_rate": 1.679122735134404e-05, "loss": 0.578, "step": 17156 }, { "epoch": 0.5270481983227352, "grad_norm": 0.3281761109828949, "learning_rate": 1.679087260589495e-05, "loss": 0.4781, "step": 17157 }, { "epoch": 0.5270789174576844, "grad_norm": 0.41937902569770813, "learning_rate": 1.679051784458531e-05, "loss": 0.5262, "step": 17158 }, { "epoch": 0.5271096365926335, "grad_norm": 0.3445090651512146, "learning_rate": 1.6790163067415938e-05, "loss": 0.585, "step": 17159 }, { "epoch": 0.5271403557275827, "grad_norm": 0.5087087750434875, "learning_rate": 1.6789808274387668e-05, "loss": 0.5709, "step": 17160 }, { "epoch": 0.5271710748625319, "grad_norm": 0.36174920201301575, "learning_rate": 1.6789453465501326e-05, "loss": 0.4982, "step": 17161 }, { "epoch": 0.527201793997481, "grad_norm": 0.3528594970703125, "learning_rate": 1.678909864075774e-05, "loss": 0.609, "step": 17162 }, { "epoch": 0.5272325131324301, "grad_norm": 0.3272199332714081, "learning_rate": 1.678874380015774e-05, "loss": 0.474, "step": 17163 }, { "epoch": 0.5272632322673794, "grad_norm": 0.3676362931728363, "learning_rate": 1.6788388943702155e-05, "loss": 0.5694, "step": 17164 }, { "epoch": 0.5272939514023285, "grad_norm": 0.3519970178604126, "learning_rate": 1.6788034071391815e-05, "loss": 0.5714, "step": 17165 }, { "epoch": 0.5273246705372777, "grad_norm": 0.39924296736717224, "learning_rate": 1.678767918322755e-05, "loss": 0.65, "step": 17166 }, { "epoch": 0.5273553896722268, "grad_norm": 0.35354843735694885, "learning_rate": 1.678732427921018e-05, "loss": 0.5457, "step": 17167 }, { "epoch": 0.527386108807176, "grad_norm": 0.32218921184539795, "learning_rate": 1.6786969359340544e-05, "loss": 0.5536, "step": 17168 }, { "epoch": 0.5274168279421252, "grad_norm": 0.3908005356788635, "learning_rate": 1.6786614423619464e-05, "loss": 0.503, "step": 17169 }, { "epoch": 0.5274475470770743, "grad_norm": 0.373963326215744, "learning_rate": 1.6786259472047775e-05, "loss": 0.7026, "step": 17170 }, { "epoch": 0.5274782662120234, "grad_norm": 0.3467302620410919, "learning_rate": 1.67859045046263e-05, "loss": 0.5639, "step": 17171 }, { "epoch": 0.5275089853469727, "grad_norm": 0.44806304574012756, "learning_rate": 1.6785549521355875e-05, "loss": 0.5422, "step": 17172 }, { "epoch": 0.5275397044819218, "grad_norm": 0.41792675852775574, "learning_rate": 1.6785194522237323e-05, "loss": 0.5978, "step": 17173 }, { "epoch": 0.5275704236168709, "grad_norm": 0.32205939292907715, "learning_rate": 1.6784839507271477e-05, "loss": 0.4508, "step": 17174 }, { "epoch": 0.5276011427518201, "grad_norm": 0.4237225651741028, "learning_rate": 1.678448447645916e-05, "loss": 0.6218, "step": 17175 }, { "epoch": 0.5276318618867692, "grad_norm": 0.3637480139732361, "learning_rate": 1.678412942980121e-05, "loss": 0.5659, "step": 17176 }, { "epoch": 0.5276625810217185, "grad_norm": 0.33216169476509094, "learning_rate": 1.6783774367298452e-05, "loss": 0.4662, "step": 17177 }, { "epoch": 0.5276933001566676, "grad_norm": 0.35738667845726013, "learning_rate": 1.678341928895171e-05, "loss": 0.5922, "step": 17178 }, { "epoch": 0.5277240192916167, "grad_norm": 0.38096049427986145, "learning_rate": 1.6783064194761826e-05, "loss": 0.546, "step": 17179 }, { "epoch": 0.527754738426566, "grad_norm": 0.3508017957210541, "learning_rate": 1.678270908472962e-05, "loss": 0.5715, "step": 17180 }, { "epoch": 0.5277854575615151, "grad_norm": 0.370608389377594, "learning_rate": 1.6782353958855918e-05, "loss": 0.5696, "step": 17181 }, { "epoch": 0.5278161766964642, "grad_norm": 0.3483356833457947, "learning_rate": 1.6781998817141562e-05, "loss": 0.5114, "step": 17182 }, { "epoch": 0.5278468958314134, "grad_norm": 0.3458460867404938, "learning_rate": 1.678164365958737e-05, "loss": 0.5616, "step": 17183 }, { "epoch": 0.5278776149663625, "grad_norm": 0.35562390089035034, "learning_rate": 1.6781288486194173e-05, "loss": 0.5978, "step": 17184 }, { "epoch": 0.5279083341013117, "grad_norm": 0.346993088722229, "learning_rate": 1.6780933296962806e-05, "loss": 0.6147, "step": 17185 }, { "epoch": 0.5279390532362609, "grad_norm": 0.33619773387908936, "learning_rate": 1.6780578091894096e-05, "loss": 0.6066, "step": 17186 }, { "epoch": 0.52796977237121, "grad_norm": 0.33275073766708374, "learning_rate": 1.6780222870988875e-05, "loss": 0.5516, "step": 17187 }, { "epoch": 0.5280004915061592, "grad_norm": 0.383171409368515, "learning_rate": 1.6779867634247966e-05, "loss": 0.544, "step": 17188 }, { "epoch": 0.5280312106411084, "grad_norm": 0.36273008584976196, "learning_rate": 1.6779512381672206e-05, "loss": 0.6582, "step": 17189 }, { "epoch": 0.5280619297760575, "grad_norm": 0.33993959426879883, "learning_rate": 1.6779157113262416e-05, "loss": 0.6521, "step": 17190 }, { "epoch": 0.5280926489110067, "grad_norm": 0.3776061534881592, "learning_rate": 1.6778801829019435e-05, "loss": 0.5655, "step": 17191 }, { "epoch": 0.5281233680459558, "grad_norm": 0.3637588918209076, "learning_rate": 1.6778446528944087e-05, "loss": 0.5753, "step": 17192 }, { "epoch": 0.528154087180905, "grad_norm": 0.3536960184574127, "learning_rate": 1.6778091213037206e-05, "loss": 0.6567, "step": 17193 }, { "epoch": 0.5281848063158542, "grad_norm": 0.38684070110321045, "learning_rate": 1.6777735881299617e-05, "loss": 0.6059, "step": 17194 }, { "epoch": 0.5282155254508033, "grad_norm": 0.37449198961257935, "learning_rate": 1.6777380533732155e-05, "loss": 0.6539, "step": 17195 }, { "epoch": 0.5282462445857524, "grad_norm": 0.30988872051239014, "learning_rate": 1.6777025170335646e-05, "loss": 0.528, "step": 17196 }, { "epoch": 0.5282769637207017, "grad_norm": 0.38072454929351807, "learning_rate": 1.677666979111092e-05, "loss": 0.613, "step": 17197 }, { "epoch": 0.5283076828556508, "grad_norm": 0.3558870255947113, "learning_rate": 1.6776314396058812e-05, "loss": 0.6197, "step": 17198 }, { "epoch": 0.5283384019905999, "grad_norm": 0.34503185749053955, "learning_rate": 1.6775958985180147e-05, "loss": 0.5401, "step": 17199 }, { "epoch": 0.5283691211255491, "grad_norm": 0.3353354036808014, "learning_rate": 1.6775603558475755e-05, "loss": 0.55, "step": 17200 }, { "epoch": 0.5283998402604982, "grad_norm": 0.47175419330596924, "learning_rate": 1.6775248115946467e-05, "loss": 0.5725, "step": 17201 }, { "epoch": 0.5284305593954475, "grad_norm": 0.35500669479370117, "learning_rate": 1.6774892657593116e-05, "loss": 0.6688, "step": 17202 }, { "epoch": 0.5284612785303966, "grad_norm": 0.3812509775161743, "learning_rate": 1.677453718341653e-05, "loss": 0.664, "step": 17203 }, { "epoch": 0.5284919976653457, "grad_norm": 0.3242207467556, "learning_rate": 1.677418169341754e-05, "loss": 0.4736, "step": 17204 }, { "epoch": 0.528522716800295, "grad_norm": 0.35710349678993225, "learning_rate": 1.6773826187596974e-05, "loss": 0.4571, "step": 17205 }, { "epoch": 0.5285534359352441, "grad_norm": 0.31059250235557556, "learning_rate": 1.6773470665955666e-05, "loss": 0.5243, "step": 17206 }, { "epoch": 0.5285841550701932, "grad_norm": 0.33832621574401855, "learning_rate": 1.6773115128494442e-05, "loss": 0.5305, "step": 17207 }, { "epoch": 0.5286148742051424, "grad_norm": 0.3478144109249115, "learning_rate": 1.6772759575214136e-05, "loss": 0.6311, "step": 17208 }, { "epoch": 0.5286455933400915, "grad_norm": 0.3306460976600647, "learning_rate": 1.6772404006115578e-05, "loss": 0.5135, "step": 17209 }, { "epoch": 0.5286763124750407, "grad_norm": 0.368017315864563, "learning_rate": 1.6772048421199594e-05, "loss": 0.5473, "step": 17210 }, { "epoch": 0.5287070316099899, "grad_norm": 0.343172550201416, "learning_rate": 1.6771692820467018e-05, "loss": 0.5949, "step": 17211 }, { "epoch": 0.528737750744939, "grad_norm": 0.36456000804901123, "learning_rate": 1.677133720391868e-05, "loss": 0.5375, "step": 17212 }, { "epoch": 0.5287684698798882, "grad_norm": 0.32627618312835693, "learning_rate": 1.6770981571555417e-05, "loss": 0.607, "step": 17213 }, { "epoch": 0.5287991890148374, "grad_norm": 0.39072856307029724, "learning_rate": 1.6770625923378046e-05, "loss": 0.6264, "step": 17214 }, { "epoch": 0.5288299081497865, "grad_norm": 0.35281720757484436, "learning_rate": 1.677027025938741e-05, "loss": 0.6063, "step": 17215 }, { "epoch": 0.5288606272847357, "grad_norm": 0.3935064673423767, "learning_rate": 1.6769914579584336e-05, "loss": 0.598, "step": 17216 }, { "epoch": 0.5288913464196848, "grad_norm": 0.4175238609313965, "learning_rate": 1.676955888396965e-05, "loss": 0.5549, "step": 17217 }, { "epoch": 0.528922065554634, "grad_norm": 0.321268767118454, "learning_rate": 1.676920317254419e-05, "loss": 0.5368, "step": 17218 }, { "epoch": 0.5289527846895832, "grad_norm": 0.36270490288734436, "learning_rate": 1.676884744530878e-05, "loss": 0.5783, "step": 17219 }, { "epoch": 0.5289835038245323, "grad_norm": 0.3529336750507355, "learning_rate": 1.676849170226426e-05, "loss": 0.6008, "step": 17220 }, { "epoch": 0.5290142229594814, "grad_norm": 0.36654698848724365, "learning_rate": 1.6768135943411445e-05, "loss": 0.5495, "step": 17221 }, { "epoch": 0.5290449420944306, "grad_norm": 0.35879987478256226, "learning_rate": 1.6767780168751183e-05, "loss": 0.5969, "step": 17222 }, { "epoch": 0.5290756612293798, "grad_norm": 0.43403923511505127, "learning_rate": 1.6767424378284295e-05, "loss": 0.4011, "step": 17223 }, { "epoch": 0.5291063803643289, "grad_norm": 0.3810604512691498, "learning_rate": 1.676706857201162e-05, "loss": 0.5711, "step": 17224 }, { "epoch": 0.5291370994992781, "grad_norm": 0.3474650979042053, "learning_rate": 1.676671274993398e-05, "loss": 0.5608, "step": 17225 }, { "epoch": 0.5291678186342272, "grad_norm": 0.3809007406234741, "learning_rate": 1.6766356912052208e-05, "loss": 0.5728, "step": 17226 }, { "epoch": 0.5291985377691765, "grad_norm": 0.3907526433467865, "learning_rate": 1.6766001058367137e-05, "loss": 0.5858, "step": 17227 }, { "epoch": 0.5292292569041256, "grad_norm": 0.3540938198566437, "learning_rate": 1.6765645188879596e-05, "loss": 0.5301, "step": 17228 }, { "epoch": 0.5292599760390747, "grad_norm": 0.37664374709129333, "learning_rate": 1.6765289303590425e-05, "loss": 0.5033, "step": 17229 }, { "epoch": 0.5292906951740239, "grad_norm": 0.3645837604999542, "learning_rate": 1.6764933402500444e-05, "loss": 0.5716, "step": 17230 }, { "epoch": 0.5293214143089731, "grad_norm": 0.3708764910697937, "learning_rate": 1.676457748561049e-05, "loss": 0.5062, "step": 17231 }, { "epoch": 0.5293521334439222, "grad_norm": 0.33386680483818054, "learning_rate": 1.6764221552921392e-05, "loss": 0.521, "step": 17232 }, { "epoch": 0.5293828525788714, "grad_norm": 0.38108786940574646, "learning_rate": 1.676386560443398e-05, "loss": 0.5899, "step": 17233 }, { "epoch": 0.5294135717138205, "grad_norm": 0.33968815207481384, "learning_rate": 1.676350964014909e-05, "loss": 0.5367, "step": 17234 }, { "epoch": 0.5294442908487696, "grad_norm": 0.3630366027355194, "learning_rate": 1.676315366006755e-05, "loss": 0.5171, "step": 17235 }, { "epoch": 0.5294750099837189, "grad_norm": 0.3717157244682312, "learning_rate": 1.6762797664190196e-05, "loss": 0.6048, "step": 17236 }, { "epoch": 0.529505729118668, "grad_norm": 0.4420836865901947, "learning_rate": 1.676244165251785e-05, "loss": 0.6137, "step": 17237 }, { "epoch": 0.5295364482536172, "grad_norm": 0.36189961433410645, "learning_rate": 1.6762085625051354e-05, "loss": 0.6351, "step": 17238 }, { "epoch": 0.5295671673885664, "grad_norm": 0.34608232975006104, "learning_rate": 1.6761729581791534e-05, "loss": 0.6374, "step": 17239 }, { "epoch": 0.5295978865235155, "grad_norm": 0.3361964821815491, "learning_rate": 1.676137352273922e-05, "loss": 0.5305, "step": 17240 }, { "epoch": 0.5296286056584647, "grad_norm": 0.4377133250236511, "learning_rate": 1.6761017447895248e-05, "loss": 0.6076, "step": 17241 }, { "epoch": 0.5296593247934138, "grad_norm": 0.3038196563720703, "learning_rate": 1.6760661357260442e-05, "loss": 0.5404, "step": 17242 }, { "epoch": 0.5296900439283629, "grad_norm": 0.3467174172401428, "learning_rate": 1.6760305250835648e-05, "loss": 0.5812, "step": 17243 }, { "epoch": 0.5297207630633122, "grad_norm": 0.33301252126693726, "learning_rate": 1.6759949128621685e-05, "loss": 0.5702, "step": 17244 }, { "epoch": 0.5297514821982613, "grad_norm": 0.6793193221092224, "learning_rate": 1.6759592990619387e-05, "loss": 0.502, "step": 17245 }, { "epoch": 0.5297822013332104, "grad_norm": 0.43540146946907043, "learning_rate": 1.6759236836829587e-05, "loss": 0.5768, "step": 17246 }, { "epoch": 0.5298129204681596, "grad_norm": 0.34679651260375977, "learning_rate": 1.675888066725312e-05, "loss": 0.5685, "step": 17247 }, { "epoch": 0.5298436396031088, "grad_norm": 0.34440675377845764, "learning_rate": 1.6758524481890814e-05, "loss": 0.5676, "step": 17248 }, { "epoch": 0.5298743587380579, "grad_norm": 0.35088053345680237, "learning_rate": 1.6758168280743504e-05, "loss": 0.5987, "step": 17249 }, { "epoch": 0.5299050778730071, "grad_norm": 0.34457749128341675, "learning_rate": 1.6757812063812017e-05, "loss": 0.4646, "step": 17250 }, { "epoch": 0.5299357970079562, "grad_norm": 0.3695155382156372, "learning_rate": 1.6757455831097188e-05, "loss": 0.6013, "step": 17251 }, { "epoch": 0.5299665161429055, "grad_norm": 0.34640997648239136, "learning_rate": 1.6757099582599852e-05, "loss": 0.6374, "step": 17252 }, { "epoch": 0.5299972352778546, "grad_norm": 0.3437354266643524, "learning_rate": 1.6756743318320836e-05, "loss": 0.5743, "step": 17253 }, { "epoch": 0.5300279544128037, "grad_norm": 0.4554313123226166, "learning_rate": 1.6756387038260975e-05, "loss": 0.5717, "step": 17254 }, { "epoch": 0.5300586735477529, "grad_norm": 0.3293493986129761, "learning_rate": 1.67560307424211e-05, "loss": 0.4945, "step": 17255 }, { "epoch": 0.530089392682702, "grad_norm": 0.3792603313922882, "learning_rate": 1.6755674430802043e-05, "loss": 0.6984, "step": 17256 }, { "epoch": 0.5301201118176512, "grad_norm": 0.36618632078170776, "learning_rate": 1.6755318103404635e-05, "loss": 0.5724, "step": 17257 }, { "epoch": 0.5301508309526004, "grad_norm": 0.32332542538642883, "learning_rate": 1.6754961760229714e-05, "loss": 0.561, "step": 17258 }, { "epoch": 0.5301815500875495, "grad_norm": 0.3638613522052765, "learning_rate": 1.6754605401278106e-05, "loss": 0.5153, "step": 17259 }, { "epoch": 0.5302122692224986, "grad_norm": 0.3504585027694702, "learning_rate": 1.6754249026550644e-05, "loss": 0.5625, "step": 17260 }, { "epoch": 0.5302429883574479, "grad_norm": 0.3430297076702118, "learning_rate": 1.6753892636048164e-05, "loss": 0.5458, "step": 17261 }, { "epoch": 0.530273707492397, "grad_norm": 0.33634695410728455, "learning_rate": 1.6753536229771493e-05, "loss": 0.5092, "step": 17262 }, { "epoch": 0.5303044266273462, "grad_norm": 0.3930884003639221, "learning_rate": 1.675317980772147e-05, "loss": 0.6019, "step": 17263 }, { "epoch": 0.5303351457622953, "grad_norm": 0.4015962481498718, "learning_rate": 1.675282336989892e-05, "loss": 0.5244, "step": 17264 }, { "epoch": 0.5303658648972445, "grad_norm": 0.4321385622024536, "learning_rate": 1.675246691630468e-05, "loss": 0.5352, "step": 17265 }, { "epoch": 0.5303965840321937, "grad_norm": 0.3691467344760895, "learning_rate": 1.6752110446939583e-05, "loss": 0.5187, "step": 17266 }, { "epoch": 0.5304273031671428, "grad_norm": 0.34569254517555237, "learning_rate": 1.675175396180446e-05, "loss": 0.5548, "step": 17267 }, { "epoch": 0.5304580223020919, "grad_norm": 0.3645572066307068, "learning_rate": 1.6751397460900146e-05, "loss": 0.5538, "step": 17268 }, { "epoch": 0.5304887414370412, "grad_norm": 0.33556920289993286, "learning_rate": 1.6751040944227473e-05, "loss": 0.5351, "step": 17269 }, { "epoch": 0.5305194605719903, "grad_norm": 0.3323725461959839, "learning_rate": 1.6750684411787268e-05, "loss": 0.543, "step": 17270 }, { "epoch": 0.5305501797069394, "grad_norm": 0.36526861786842346, "learning_rate": 1.6750327863580368e-05, "loss": 0.6461, "step": 17271 }, { "epoch": 0.5305808988418886, "grad_norm": 0.37965187430381775, "learning_rate": 1.674997129960761e-05, "loss": 0.6308, "step": 17272 }, { "epoch": 0.5306116179768378, "grad_norm": 0.3410053849220276, "learning_rate": 1.6749614719869823e-05, "loss": 0.5559, "step": 17273 }, { "epoch": 0.5306423371117869, "grad_norm": 0.4290660321712494, "learning_rate": 1.6749258124367833e-05, "loss": 0.5977, "step": 17274 }, { "epoch": 0.5306730562467361, "grad_norm": 0.32674309611320496, "learning_rate": 1.6748901513102484e-05, "loss": 0.5163, "step": 17275 }, { "epoch": 0.5307037753816852, "grad_norm": 0.3867798447608948, "learning_rate": 1.6748544886074604e-05, "loss": 0.5755, "step": 17276 }, { "epoch": 0.5307344945166345, "grad_norm": 0.33389073610305786, "learning_rate": 1.6748188243285027e-05, "loss": 0.5582, "step": 17277 }, { "epoch": 0.5307652136515836, "grad_norm": 0.3941335082054138, "learning_rate": 1.6747831584734583e-05, "loss": 0.6333, "step": 17278 }, { "epoch": 0.5307959327865327, "grad_norm": 0.36234936118125916, "learning_rate": 1.6747474910424108e-05, "loss": 0.5888, "step": 17279 }, { "epoch": 0.5308266519214819, "grad_norm": 0.354898065328598, "learning_rate": 1.6747118220354432e-05, "loss": 0.5881, "step": 17280 }, { "epoch": 0.530857371056431, "grad_norm": 0.4172767400741577, "learning_rate": 1.6746761514526393e-05, "loss": 0.4912, "step": 17281 }, { "epoch": 0.5308880901913802, "grad_norm": 0.374140202999115, "learning_rate": 1.674640479294082e-05, "loss": 0.5177, "step": 17282 }, { "epoch": 0.5309188093263294, "grad_norm": 0.30899038910865784, "learning_rate": 1.6746048055598548e-05, "loss": 0.4708, "step": 17283 }, { "epoch": 0.5309495284612785, "grad_norm": 0.3286052644252777, "learning_rate": 1.674569130250041e-05, "loss": 0.5595, "step": 17284 }, { "epoch": 0.5309802475962276, "grad_norm": 0.3817119300365448, "learning_rate": 1.6745334533647237e-05, "loss": 0.6325, "step": 17285 }, { "epoch": 0.5310109667311769, "grad_norm": 0.43117228150367737, "learning_rate": 1.6744977749039865e-05, "loss": 0.6356, "step": 17286 }, { "epoch": 0.531041685866126, "grad_norm": 0.3622561991214752, "learning_rate": 1.6744620948679127e-05, "loss": 0.5582, "step": 17287 }, { "epoch": 0.5310724050010752, "grad_norm": 0.43353569507598877, "learning_rate": 1.6744264132565853e-05, "loss": 0.5387, "step": 17288 }, { "epoch": 0.5311031241360243, "grad_norm": 0.4620560109615326, "learning_rate": 1.6743907300700884e-05, "loss": 0.5068, "step": 17289 }, { "epoch": 0.5311338432709735, "grad_norm": 0.35169586539268494, "learning_rate": 1.6743550453085045e-05, "loss": 0.5344, "step": 17290 }, { "epoch": 0.5311645624059227, "grad_norm": 0.37219780683517456, "learning_rate": 1.6743193589719175e-05, "loss": 0.5771, "step": 17291 }, { "epoch": 0.5311952815408718, "grad_norm": 0.39778387546539307, "learning_rate": 1.6742836710604105e-05, "loss": 0.5542, "step": 17292 }, { "epoch": 0.5312260006758209, "grad_norm": 0.37781190872192383, "learning_rate": 1.6742479815740667e-05, "loss": 0.5783, "step": 17293 }, { "epoch": 0.5312567198107702, "grad_norm": 0.35347846150398254, "learning_rate": 1.67421229051297e-05, "loss": 0.6053, "step": 17294 }, { "epoch": 0.5312874389457193, "grad_norm": 0.37462326884269714, "learning_rate": 1.674176597877203e-05, "loss": 0.5432, "step": 17295 }, { "epoch": 0.5313181580806684, "grad_norm": 0.3665309250354767, "learning_rate": 1.67414090366685e-05, "loss": 0.6412, "step": 17296 }, { "epoch": 0.5313488772156176, "grad_norm": 0.34918949007987976, "learning_rate": 1.6741052078819936e-05, "loss": 0.5878, "step": 17297 }, { "epoch": 0.5313795963505668, "grad_norm": 0.3538622260093689, "learning_rate": 1.674069510522717e-05, "loss": 0.5097, "step": 17298 }, { "epoch": 0.531410315485516, "grad_norm": 0.3396088182926178, "learning_rate": 1.6740338115891045e-05, "loss": 0.5415, "step": 17299 }, { "epoch": 0.5314410346204651, "grad_norm": 0.3678450882434845, "learning_rate": 1.6739981110812388e-05, "loss": 0.6431, "step": 17300 }, { "epoch": 0.5314717537554142, "grad_norm": 0.3718957006931305, "learning_rate": 1.673962408999203e-05, "loss": 0.559, "step": 17301 }, { "epoch": 0.5315024728903635, "grad_norm": 0.35013312101364136, "learning_rate": 1.6739267053430815e-05, "loss": 0.5796, "step": 17302 }, { "epoch": 0.5315331920253126, "grad_norm": 0.3530600070953369, "learning_rate": 1.6738910001129567e-05, "loss": 0.5251, "step": 17303 }, { "epoch": 0.5315639111602617, "grad_norm": 0.3594443202018738, "learning_rate": 1.6738552933089128e-05, "loss": 0.6741, "step": 17304 }, { "epoch": 0.5315946302952109, "grad_norm": 0.3446805775165558, "learning_rate": 1.6738195849310326e-05, "loss": 0.6204, "step": 17305 }, { "epoch": 0.53162534943016, "grad_norm": 0.35248494148254395, "learning_rate": 1.6737838749793997e-05, "loss": 0.6445, "step": 17306 }, { "epoch": 0.5316560685651092, "grad_norm": 0.34697791934013367, "learning_rate": 1.6737481634540978e-05, "loss": 0.6351, "step": 17307 }, { "epoch": 0.5316867877000584, "grad_norm": 0.36272725462913513, "learning_rate": 1.6737124503552097e-05, "loss": 0.577, "step": 17308 }, { "epoch": 0.5317175068350075, "grad_norm": 0.3346388041973114, "learning_rate": 1.6736767356828193e-05, "loss": 0.5919, "step": 17309 }, { "epoch": 0.5317482259699566, "grad_norm": 0.3537120223045349, "learning_rate": 1.6736410194370096e-05, "loss": 0.5379, "step": 17310 }, { "epoch": 0.5317789451049059, "grad_norm": 0.3835258185863495, "learning_rate": 1.6736053016178643e-05, "loss": 0.6287, "step": 17311 }, { "epoch": 0.531809664239855, "grad_norm": 0.36293575167655945, "learning_rate": 1.6735695822254668e-05, "loss": 0.6345, "step": 17312 }, { "epoch": 0.5318403833748042, "grad_norm": 0.4310011863708496, "learning_rate": 1.6735338612599004e-05, "loss": 0.6199, "step": 17313 }, { "epoch": 0.5318711025097533, "grad_norm": 0.32151293754577637, "learning_rate": 1.673498138721249e-05, "loss": 0.4659, "step": 17314 }, { "epoch": 0.5319018216447025, "grad_norm": 0.3638944923877716, "learning_rate": 1.6734624146095954e-05, "loss": 0.6306, "step": 17315 }, { "epoch": 0.5319325407796517, "grad_norm": 0.36964356899261475, "learning_rate": 1.6734266889250232e-05, "loss": 0.5329, "step": 17316 }, { "epoch": 0.5319632599146008, "grad_norm": 0.34486687183380127, "learning_rate": 1.673390961667616e-05, "loss": 0.5728, "step": 17317 }, { "epoch": 0.5319939790495499, "grad_norm": 0.5779333710670471, "learning_rate": 1.6733552328374572e-05, "loss": 0.5817, "step": 17318 }, { "epoch": 0.5320246981844992, "grad_norm": 0.33269450068473816, "learning_rate": 1.6733195024346303e-05, "loss": 0.5292, "step": 17319 }, { "epoch": 0.5320554173194483, "grad_norm": 0.48717200756073, "learning_rate": 1.6732837704592185e-05, "loss": 0.635, "step": 17320 }, { "epoch": 0.5320861364543974, "grad_norm": 0.39030301570892334, "learning_rate": 1.6732480369113057e-05, "loss": 0.6454, "step": 17321 }, { "epoch": 0.5321168555893466, "grad_norm": 0.42183101177215576, "learning_rate": 1.673212301790975e-05, "loss": 0.7039, "step": 17322 }, { "epoch": 0.5321475747242957, "grad_norm": 0.3666004240512848, "learning_rate": 1.6731765650983098e-05, "loss": 0.6086, "step": 17323 }, { "epoch": 0.532178293859245, "grad_norm": 0.3422590494155884, "learning_rate": 1.6731408268333938e-05, "loss": 0.523, "step": 17324 }, { "epoch": 0.5322090129941941, "grad_norm": 1.6792553663253784, "learning_rate": 1.6731050869963105e-05, "loss": 0.5859, "step": 17325 }, { "epoch": 0.5322397321291432, "grad_norm": 0.3990066349506378, "learning_rate": 1.673069345587143e-05, "loss": 0.6208, "step": 17326 }, { "epoch": 0.5322704512640924, "grad_norm": 0.34617915749549866, "learning_rate": 1.6730336026059755e-05, "loss": 0.6322, "step": 17327 }, { "epoch": 0.5323011703990416, "grad_norm": 0.32512417435646057, "learning_rate": 1.6729978580528908e-05, "loss": 0.5075, "step": 17328 }, { "epoch": 0.5323318895339907, "grad_norm": 0.3258800208568573, "learning_rate": 1.6729621119279726e-05, "loss": 0.5371, "step": 17329 }, { "epoch": 0.5323626086689399, "grad_norm": 0.3864729404449463, "learning_rate": 1.6729263642313044e-05, "loss": 0.5984, "step": 17330 }, { "epoch": 0.532393327803889, "grad_norm": 0.3683992922306061, "learning_rate": 1.6728906149629693e-05, "loss": 0.5555, "step": 17331 }, { "epoch": 0.5324240469388382, "grad_norm": 0.34229734539985657, "learning_rate": 1.6728548641230516e-05, "loss": 0.5962, "step": 17332 }, { "epoch": 0.5324547660737874, "grad_norm": 0.3372699022293091, "learning_rate": 1.6728191117116344e-05, "loss": 0.5534, "step": 17333 }, { "epoch": 0.5324854852087365, "grad_norm": 0.3725792169570923, "learning_rate": 1.672783357728801e-05, "loss": 0.629, "step": 17334 }, { "epoch": 0.5325162043436856, "grad_norm": 0.36258333921432495, "learning_rate": 1.672747602174635e-05, "loss": 0.4956, "step": 17335 }, { "epoch": 0.5325469234786349, "grad_norm": 0.34343135356903076, "learning_rate": 1.67271184504922e-05, "loss": 0.5175, "step": 17336 }, { "epoch": 0.532577642613584, "grad_norm": 0.4418194591999054, "learning_rate": 1.6726760863526398e-05, "loss": 0.5264, "step": 17337 }, { "epoch": 0.5326083617485332, "grad_norm": 0.34815558791160583, "learning_rate": 1.6726403260849774e-05, "loss": 0.4914, "step": 17338 }, { "epoch": 0.5326390808834823, "grad_norm": 0.37168148159980774, "learning_rate": 1.6726045642463162e-05, "loss": 0.5499, "step": 17339 }, { "epoch": 0.5326698000184314, "grad_norm": 0.4095971882343292, "learning_rate": 1.6725688008367407e-05, "loss": 0.5947, "step": 17340 }, { "epoch": 0.5327005191533807, "grad_norm": 0.4383649230003357, "learning_rate": 1.6725330358563334e-05, "loss": 0.5271, "step": 17341 }, { "epoch": 0.5327312382883298, "grad_norm": 0.40236011147499084, "learning_rate": 1.6724972693051783e-05, "loss": 0.5704, "step": 17342 }, { "epoch": 0.5327619574232789, "grad_norm": 0.35946041345596313, "learning_rate": 1.6724615011833593e-05, "loss": 0.5714, "step": 17343 }, { "epoch": 0.5327926765582282, "grad_norm": 0.38009390234947205, "learning_rate": 1.6724257314909588e-05, "loss": 0.6199, "step": 17344 }, { "epoch": 0.5328233956931773, "grad_norm": 0.34332039952278137, "learning_rate": 1.6723899602280617e-05, "loss": 0.5795, "step": 17345 }, { "epoch": 0.5328541148281264, "grad_norm": 0.402311235666275, "learning_rate": 1.6723541873947503e-05, "loss": 0.5834, "step": 17346 }, { "epoch": 0.5328848339630756, "grad_norm": 0.4167317748069763, "learning_rate": 1.672318412991109e-05, "loss": 0.6181, "step": 17347 }, { "epoch": 0.5329155530980247, "grad_norm": 0.40620142221450806, "learning_rate": 1.672282637017221e-05, "loss": 0.6049, "step": 17348 }, { "epoch": 0.532946272232974, "grad_norm": 0.38681524991989136, "learning_rate": 1.67224685947317e-05, "loss": 0.4987, "step": 17349 }, { "epoch": 0.5329769913679231, "grad_norm": 0.35705694556236267, "learning_rate": 1.6722110803590397e-05, "loss": 0.5056, "step": 17350 }, { "epoch": 0.5330077105028722, "grad_norm": 0.31814438104629517, "learning_rate": 1.6721752996749133e-05, "loss": 0.596, "step": 17351 }, { "epoch": 0.5330384296378214, "grad_norm": 0.4494929909706116, "learning_rate": 1.6721395174208743e-05, "loss": 0.7133, "step": 17352 }, { "epoch": 0.5330691487727706, "grad_norm": 0.38571465015411377, "learning_rate": 1.6721037335970068e-05, "loss": 0.5558, "step": 17353 }, { "epoch": 0.5330998679077197, "grad_norm": 0.35353201627731323, "learning_rate": 1.6720679482033945e-05, "loss": 0.5312, "step": 17354 }, { "epoch": 0.5331305870426689, "grad_norm": 0.361712783575058, "learning_rate": 1.67203216124012e-05, "loss": 0.586, "step": 17355 }, { "epoch": 0.533161306177618, "grad_norm": 0.3359939157962799, "learning_rate": 1.6719963727072673e-05, "loss": 0.5412, "step": 17356 }, { "epoch": 0.5331920253125672, "grad_norm": 0.40301960706710815, "learning_rate": 1.6719605826049202e-05, "loss": 0.5697, "step": 17357 }, { "epoch": 0.5332227444475164, "grad_norm": 0.36025378108024597, "learning_rate": 1.6719247909331627e-05, "loss": 0.5077, "step": 17358 }, { "epoch": 0.5332534635824655, "grad_norm": 0.39578837156295776, "learning_rate": 1.6718889976920774e-05, "loss": 0.6615, "step": 17359 }, { "epoch": 0.5332841827174146, "grad_norm": 0.33421778678894043, "learning_rate": 1.6718532028817487e-05, "loss": 0.5702, "step": 17360 }, { "epoch": 0.5333149018523639, "grad_norm": 0.38005849719047546, "learning_rate": 1.6718174065022604e-05, "loss": 0.6207, "step": 17361 }, { "epoch": 0.533345620987313, "grad_norm": 0.3307769000530243, "learning_rate": 1.671781608553695e-05, "loss": 0.4898, "step": 17362 }, { "epoch": 0.5333763401222622, "grad_norm": 0.34856104850769043, "learning_rate": 1.671745809036137e-05, "loss": 0.4946, "step": 17363 }, { "epoch": 0.5334070592572113, "grad_norm": 0.33593735098838806, "learning_rate": 1.6717100079496695e-05, "loss": 0.528, "step": 17364 }, { "epoch": 0.5334377783921604, "grad_norm": 0.3310040235519409, "learning_rate": 1.6716742052943768e-05, "loss": 0.4961, "step": 17365 }, { "epoch": 0.5334684975271097, "grad_norm": 0.35519418120384216, "learning_rate": 1.6716384010703418e-05, "loss": 0.4964, "step": 17366 }, { "epoch": 0.5334992166620588, "grad_norm": 0.3398650288581848, "learning_rate": 1.6716025952776482e-05, "loss": 0.5031, "step": 17367 }, { "epoch": 0.5335299357970079, "grad_norm": 0.32115644216537476, "learning_rate": 1.67156678791638e-05, "loss": 0.4853, "step": 17368 }, { "epoch": 0.5335606549319571, "grad_norm": 0.3403930366039276, "learning_rate": 1.6715309789866206e-05, "loss": 0.5816, "step": 17369 }, { "epoch": 0.5335913740669063, "grad_norm": 0.37326541543006897, "learning_rate": 1.6714951684884537e-05, "loss": 0.5489, "step": 17370 }, { "epoch": 0.5336220932018554, "grad_norm": 0.3665692210197449, "learning_rate": 1.6714593564219632e-05, "loss": 0.6713, "step": 17371 }, { "epoch": 0.5336528123368046, "grad_norm": 0.3653124272823334, "learning_rate": 1.6714235427872323e-05, "loss": 0.5717, "step": 17372 }, { "epoch": 0.5336835314717537, "grad_norm": 0.4197884798049927, "learning_rate": 1.671387727584345e-05, "loss": 0.5873, "step": 17373 }, { "epoch": 0.533714250606703, "grad_norm": 0.3606233596801758, "learning_rate": 1.6713519108133846e-05, "loss": 0.5823, "step": 17374 }, { "epoch": 0.5337449697416521, "grad_norm": 0.361645370721817, "learning_rate": 1.6713160924744346e-05, "loss": 0.5362, "step": 17375 }, { "epoch": 0.5337756888766012, "grad_norm": 0.37010568380355835, "learning_rate": 1.6712802725675795e-05, "loss": 0.6036, "step": 17376 }, { "epoch": 0.5338064080115504, "grad_norm": 0.3705853819847107, "learning_rate": 1.671244451092902e-05, "loss": 0.5532, "step": 17377 }, { "epoch": 0.5338371271464996, "grad_norm": 0.36408179998397827, "learning_rate": 1.6712086280504865e-05, "loss": 0.5617, "step": 17378 }, { "epoch": 0.5338678462814487, "grad_norm": 0.35713061690330505, "learning_rate": 1.6711728034404165e-05, "loss": 0.6148, "step": 17379 }, { "epoch": 0.5338985654163979, "grad_norm": 0.3740946650505066, "learning_rate": 1.6711369772627756e-05, "loss": 0.5139, "step": 17380 }, { "epoch": 0.533929284551347, "grad_norm": 0.38650232553482056, "learning_rate": 1.671101149517647e-05, "loss": 0.497, "step": 17381 }, { "epoch": 0.5339600036862961, "grad_norm": 0.3612997531890869, "learning_rate": 1.6710653202051152e-05, "loss": 0.6006, "step": 17382 }, { "epoch": 0.5339907228212454, "grad_norm": 0.6732011437416077, "learning_rate": 1.671029489325263e-05, "loss": 0.548, "step": 17383 }, { "epoch": 0.5340214419561945, "grad_norm": 0.314237117767334, "learning_rate": 1.670993656878175e-05, "loss": 0.4935, "step": 17384 }, { "epoch": 0.5340521610911436, "grad_norm": 0.34440383315086365, "learning_rate": 1.670957822863934e-05, "loss": 0.5296, "step": 17385 }, { "epoch": 0.5340828802260928, "grad_norm": 0.33470332622528076, "learning_rate": 1.6709219872826247e-05, "loss": 0.5841, "step": 17386 }, { "epoch": 0.534113599361042, "grad_norm": 0.4175652861595154, "learning_rate": 1.67088615013433e-05, "loss": 0.5627, "step": 17387 }, { "epoch": 0.5341443184959912, "grad_norm": 0.36672502756118774, "learning_rate": 1.6708503114191337e-05, "loss": 0.583, "step": 17388 }, { "epoch": 0.5341750376309403, "grad_norm": 0.35693374276161194, "learning_rate": 1.6708144711371196e-05, "loss": 0.5674, "step": 17389 }, { "epoch": 0.5342057567658894, "grad_norm": 0.40570878982543945, "learning_rate": 1.670778629288372e-05, "loss": 0.5438, "step": 17390 }, { "epoch": 0.5342364759008387, "grad_norm": 0.37775933742523193, "learning_rate": 1.6707427858729733e-05, "loss": 0.5909, "step": 17391 }, { "epoch": 0.5342671950357878, "grad_norm": 0.4309457242488861, "learning_rate": 1.6707069408910085e-05, "loss": 0.5804, "step": 17392 }, { "epoch": 0.5342979141707369, "grad_norm": 0.3182259500026703, "learning_rate": 1.6706710943425606e-05, "loss": 0.5845, "step": 17393 }, { "epoch": 0.5343286333056861, "grad_norm": 0.4706096351146698, "learning_rate": 1.6706352462277137e-05, "loss": 0.6041, "step": 17394 }, { "epoch": 0.5343593524406353, "grad_norm": 0.3578329384326935, "learning_rate": 1.670599396546551e-05, "loss": 0.5897, "step": 17395 }, { "epoch": 0.5343900715755844, "grad_norm": 0.31993716955184937, "learning_rate": 1.6705635452991567e-05, "loss": 0.5523, "step": 17396 }, { "epoch": 0.5344207907105336, "grad_norm": 0.3375912308692932, "learning_rate": 1.6705276924856148e-05, "loss": 0.5023, "step": 17397 }, { "epoch": 0.5344515098454827, "grad_norm": 0.3382474482059479, "learning_rate": 1.670491838106008e-05, "loss": 0.5565, "step": 17398 }, { "epoch": 0.534482228980432, "grad_norm": 0.3502212166786194, "learning_rate": 1.6704559821604212e-05, "loss": 0.6227, "step": 17399 }, { "epoch": 0.5345129481153811, "grad_norm": 0.3692356050014496, "learning_rate": 1.6704201246489374e-05, "loss": 0.5695, "step": 17400 }, { "epoch": 0.5345436672503302, "grad_norm": 0.3879462778568268, "learning_rate": 1.6703842655716406e-05, "loss": 0.6345, "step": 17401 }, { "epoch": 0.5345743863852794, "grad_norm": 0.3533160090446472, "learning_rate": 1.670348404928615e-05, "loss": 0.5336, "step": 17402 }, { "epoch": 0.5346051055202286, "grad_norm": 0.32270899415016174, "learning_rate": 1.6703125427199435e-05, "loss": 0.5899, "step": 17403 }, { "epoch": 0.5346358246551777, "grad_norm": 0.3640224039554596, "learning_rate": 1.6702766789457098e-05, "loss": 0.5888, "step": 17404 }, { "epoch": 0.5346665437901269, "grad_norm": 0.34903961420059204, "learning_rate": 1.6702408136059986e-05, "loss": 0.5984, "step": 17405 }, { "epoch": 0.534697262925076, "grad_norm": 0.3514820337295532, "learning_rate": 1.670204946700893e-05, "loss": 0.5821, "step": 17406 }, { "epoch": 0.5347279820600251, "grad_norm": 0.3930802643299103, "learning_rate": 1.6701690782304772e-05, "loss": 0.542, "step": 17407 }, { "epoch": 0.5347587011949744, "grad_norm": 0.32450130581855774, "learning_rate": 1.6701332081948343e-05, "loss": 0.5566, "step": 17408 }, { "epoch": 0.5347894203299235, "grad_norm": 0.4071666896343231, "learning_rate": 1.670097336594049e-05, "loss": 0.5304, "step": 17409 }, { "epoch": 0.5348201394648727, "grad_norm": 0.33368319272994995, "learning_rate": 1.670061463428204e-05, "loss": 0.5804, "step": 17410 }, { "epoch": 0.5348508585998218, "grad_norm": 0.40081658959388733, "learning_rate": 1.670025588697384e-05, "loss": 0.6111, "step": 17411 }, { "epoch": 0.534881577734771, "grad_norm": 0.31729602813720703, "learning_rate": 1.6699897124016724e-05, "loss": 0.5012, "step": 17412 }, { "epoch": 0.5349122968697202, "grad_norm": 0.3350316882133484, "learning_rate": 1.669953834541153e-05, "loss": 0.5719, "step": 17413 }, { "epoch": 0.5349430160046693, "grad_norm": 0.32096046209335327, "learning_rate": 1.6699179551159098e-05, "loss": 0.4632, "step": 17414 }, { "epoch": 0.5349737351396184, "grad_norm": 0.3821983337402344, "learning_rate": 1.6698820741260262e-05, "loss": 0.523, "step": 17415 }, { "epoch": 0.5350044542745677, "grad_norm": 0.3619576096534729, "learning_rate": 1.6698461915715863e-05, "loss": 0.6406, "step": 17416 }, { "epoch": 0.5350351734095168, "grad_norm": 0.30501607060432434, "learning_rate": 1.669810307452674e-05, "loss": 0.515, "step": 17417 }, { "epoch": 0.5350658925444659, "grad_norm": 0.362920343875885, "learning_rate": 1.6697744217693727e-05, "loss": 0.5741, "step": 17418 }, { "epoch": 0.5350966116794151, "grad_norm": 0.36480507254600525, "learning_rate": 1.6697385345217666e-05, "loss": 0.561, "step": 17419 }, { "epoch": 0.5351273308143643, "grad_norm": 0.34038621187210083, "learning_rate": 1.6697026457099396e-05, "loss": 0.4985, "step": 17420 }, { "epoch": 0.5351580499493134, "grad_norm": 0.34532275795936584, "learning_rate": 1.669666755333975e-05, "loss": 0.5668, "step": 17421 }, { "epoch": 0.5351887690842626, "grad_norm": 0.3461824059486389, "learning_rate": 1.6696308633939566e-05, "loss": 0.5896, "step": 17422 }, { "epoch": 0.5352194882192117, "grad_norm": 0.31141337752342224, "learning_rate": 1.669594969889969e-05, "loss": 0.5509, "step": 17423 }, { "epoch": 0.535250207354161, "grad_norm": 0.3643421232700348, "learning_rate": 1.6695590748220953e-05, "loss": 0.5943, "step": 17424 }, { "epoch": 0.5352809264891101, "grad_norm": 0.35991278290748596, "learning_rate": 1.66952317819042e-05, "loss": 0.6218, "step": 17425 }, { "epoch": 0.5353116456240592, "grad_norm": 0.3475857973098755, "learning_rate": 1.6694872799950267e-05, "loss": 0.5424, "step": 17426 }, { "epoch": 0.5353423647590084, "grad_norm": 0.34299635887145996, "learning_rate": 1.6694513802359985e-05, "loss": 0.5528, "step": 17427 }, { "epoch": 0.5353730838939575, "grad_norm": 0.3556605577468872, "learning_rate": 1.66941547891342e-05, "loss": 0.5862, "step": 17428 }, { "epoch": 0.5354038030289067, "grad_norm": 0.35763034224510193, "learning_rate": 1.6693795760273753e-05, "loss": 0.504, "step": 17429 }, { "epoch": 0.5354345221638559, "grad_norm": 0.3220345377922058, "learning_rate": 1.6693436715779473e-05, "loss": 0.5525, "step": 17430 }, { "epoch": 0.535465241298805, "grad_norm": 0.36392125487327576, "learning_rate": 1.6693077655652207e-05, "loss": 0.6029, "step": 17431 }, { "epoch": 0.5354959604337541, "grad_norm": 0.36986324191093445, "learning_rate": 1.669271857989279e-05, "loss": 0.6288, "step": 17432 }, { "epoch": 0.5355266795687034, "grad_norm": 0.34235048294067383, "learning_rate": 1.6692359488502062e-05, "loss": 0.4214, "step": 17433 }, { "epoch": 0.5355573987036525, "grad_norm": 0.32794204354286194, "learning_rate": 1.6692000381480863e-05, "loss": 0.5649, "step": 17434 }, { "epoch": 0.5355881178386017, "grad_norm": 0.3520791530609131, "learning_rate": 1.6691641258830026e-05, "loss": 0.5238, "step": 17435 }, { "epoch": 0.5356188369735508, "grad_norm": 0.31993839144706726, "learning_rate": 1.6691282120550395e-05, "loss": 0.4927, "step": 17436 }, { "epoch": 0.5356495561085, "grad_norm": 0.3604786992073059, "learning_rate": 1.669092296664281e-05, "loss": 0.5346, "step": 17437 }, { "epoch": 0.5356802752434492, "grad_norm": 0.42127901315689087, "learning_rate": 1.6690563797108105e-05, "loss": 0.5127, "step": 17438 }, { "epoch": 0.5357109943783983, "grad_norm": 0.3421512246131897, "learning_rate": 1.6690204611947117e-05, "loss": 0.6, "step": 17439 }, { "epoch": 0.5357417135133474, "grad_norm": 0.32104647159576416, "learning_rate": 1.6689845411160696e-05, "loss": 0.5328, "step": 17440 }, { "epoch": 0.5357724326482967, "grad_norm": 0.3563134968280792, "learning_rate": 1.6689486194749668e-05, "loss": 0.6145, "step": 17441 }, { "epoch": 0.5358031517832458, "grad_norm": 0.3526926338672638, "learning_rate": 1.668912696271488e-05, "loss": 0.6114, "step": 17442 }, { "epoch": 0.5358338709181949, "grad_norm": 0.33706140518188477, "learning_rate": 1.668876771505717e-05, "loss": 0.6081, "step": 17443 }, { "epoch": 0.5358645900531441, "grad_norm": 0.4655609726905823, "learning_rate": 1.6688408451777375e-05, "loss": 0.5956, "step": 17444 }, { "epoch": 0.5358953091880932, "grad_norm": 0.3340607285499573, "learning_rate": 1.6688049172876332e-05, "loss": 0.5971, "step": 17445 }, { "epoch": 0.5359260283230424, "grad_norm": 0.34313178062438965, "learning_rate": 1.6687689878354887e-05, "loss": 0.5317, "step": 17446 }, { "epoch": 0.5359567474579916, "grad_norm": 0.3619050681591034, "learning_rate": 1.6687330568213876e-05, "loss": 0.6113, "step": 17447 }, { "epoch": 0.5359874665929407, "grad_norm": 0.38130950927734375, "learning_rate": 1.6686971242454133e-05, "loss": 0.6176, "step": 17448 }, { "epoch": 0.53601818572789, "grad_norm": 0.34545356035232544, "learning_rate": 1.6686611901076503e-05, "loss": 0.5687, "step": 17449 }, { "epoch": 0.5360489048628391, "grad_norm": 0.4210030436515808, "learning_rate": 1.6686252544081827e-05, "loss": 0.5981, "step": 17450 }, { "epoch": 0.5360796239977882, "grad_norm": 0.3470647633075714, "learning_rate": 1.668589317147094e-05, "loss": 0.5464, "step": 17451 }, { "epoch": 0.5361103431327374, "grad_norm": 0.37976348400115967, "learning_rate": 1.668553378324468e-05, "loss": 0.5376, "step": 17452 }, { "epoch": 0.5361410622676865, "grad_norm": 0.32590770721435547, "learning_rate": 1.668517437940389e-05, "loss": 0.5874, "step": 17453 }, { "epoch": 0.5361717814026357, "grad_norm": 0.3775072991847992, "learning_rate": 1.668481495994941e-05, "loss": 0.5365, "step": 17454 }, { "epoch": 0.5362025005375849, "grad_norm": 0.39779552817344666, "learning_rate": 1.6684455524882078e-05, "loss": 0.6436, "step": 17455 }, { "epoch": 0.536233219672534, "grad_norm": 0.3469092845916748, "learning_rate": 1.668409607420273e-05, "loss": 0.5604, "step": 17456 }, { "epoch": 0.5362639388074831, "grad_norm": 0.3661600649356842, "learning_rate": 1.668373660791221e-05, "loss": 0.5934, "step": 17457 }, { "epoch": 0.5362946579424324, "grad_norm": 0.34565863013267517, "learning_rate": 1.668337712601136e-05, "loss": 0.5968, "step": 17458 }, { "epoch": 0.5363253770773815, "grad_norm": 0.33999308943748474, "learning_rate": 1.6683017628501014e-05, "loss": 0.5569, "step": 17459 }, { "epoch": 0.5363560962123307, "grad_norm": 0.34559324383735657, "learning_rate": 1.6682658115382012e-05, "loss": 0.5448, "step": 17460 }, { "epoch": 0.5363868153472798, "grad_norm": 0.3643296957015991, "learning_rate": 1.6682298586655198e-05, "loss": 0.631, "step": 17461 }, { "epoch": 0.536417534482229, "grad_norm": 0.33204761147499084, "learning_rate": 1.6681939042321405e-05, "loss": 0.5426, "step": 17462 }, { "epoch": 0.5364482536171782, "grad_norm": 0.3624633550643921, "learning_rate": 1.668157948238148e-05, "loss": 0.5699, "step": 17463 }, { "epoch": 0.5364789727521273, "grad_norm": 0.3800508379936218, "learning_rate": 1.6681219906836256e-05, "loss": 0.5481, "step": 17464 }, { "epoch": 0.5365096918870764, "grad_norm": 0.3647020161151886, "learning_rate": 1.668086031568658e-05, "loss": 0.5046, "step": 17465 }, { "epoch": 0.5365404110220257, "grad_norm": 0.37407469749450684, "learning_rate": 1.6680500708933286e-05, "loss": 0.5804, "step": 17466 }, { "epoch": 0.5365711301569748, "grad_norm": 0.3517841398715973, "learning_rate": 1.6680141086577215e-05, "loss": 0.501, "step": 17467 }, { "epoch": 0.5366018492919239, "grad_norm": 0.35199084877967834, "learning_rate": 1.667978144861921e-05, "loss": 0.5736, "step": 17468 }, { "epoch": 0.5366325684268731, "grad_norm": 0.37125641107559204, "learning_rate": 1.6679421795060113e-05, "loss": 0.4839, "step": 17469 }, { "epoch": 0.5366632875618222, "grad_norm": 0.32552993297576904, "learning_rate": 1.6679062125900755e-05, "loss": 0.5487, "step": 17470 }, { "epoch": 0.5366940066967714, "grad_norm": 0.34504759311676025, "learning_rate": 1.667870244114198e-05, "loss": 0.5211, "step": 17471 }, { "epoch": 0.5367247258317206, "grad_norm": 0.3499925136566162, "learning_rate": 1.6678342740784628e-05, "loss": 0.5728, "step": 17472 }, { "epoch": 0.5367554449666697, "grad_norm": 0.34153813123703003, "learning_rate": 1.6677983024829543e-05, "loss": 0.5588, "step": 17473 }, { "epoch": 0.536786164101619, "grad_norm": 0.34554675221443176, "learning_rate": 1.6677623293277562e-05, "loss": 0.6038, "step": 17474 }, { "epoch": 0.5368168832365681, "grad_norm": 0.4533258378505707, "learning_rate": 1.6677263546129527e-05, "loss": 0.5079, "step": 17475 }, { "epoch": 0.5368476023715172, "grad_norm": 0.7510029673576355, "learning_rate": 1.6676903783386276e-05, "loss": 0.5578, "step": 17476 }, { "epoch": 0.5368783215064664, "grad_norm": 0.34514036774635315, "learning_rate": 1.6676544005048647e-05, "loss": 0.5506, "step": 17477 }, { "epoch": 0.5369090406414155, "grad_norm": 0.3369775712490082, "learning_rate": 1.6676184211117484e-05, "loss": 0.602, "step": 17478 }, { "epoch": 0.5369397597763647, "grad_norm": 0.4107666611671448, "learning_rate": 1.667582440159363e-05, "loss": 0.5674, "step": 17479 }, { "epoch": 0.5369704789113139, "grad_norm": 0.3495771884918213, "learning_rate": 1.667546457647792e-05, "loss": 0.5497, "step": 17480 }, { "epoch": 0.537001198046263, "grad_norm": 0.42126885056495667, "learning_rate": 1.6675104735771195e-05, "loss": 0.626, "step": 17481 }, { "epoch": 0.5370319171812121, "grad_norm": 0.33243638277053833, "learning_rate": 1.6674744879474297e-05, "loss": 0.5235, "step": 17482 }, { "epoch": 0.5370626363161614, "grad_norm": 0.3541898727416992, "learning_rate": 1.6674385007588066e-05, "loss": 0.553, "step": 17483 }, { "epoch": 0.5370933554511105, "grad_norm": 0.5091285109519958, "learning_rate": 1.6674025120113343e-05, "loss": 0.647, "step": 17484 }, { "epoch": 0.5371240745860597, "grad_norm": 0.35725879669189453, "learning_rate": 1.667366521705097e-05, "loss": 0.4944, "step": 17485 }, { "epoch": 0.5371547937210088, "grad_norm": 0.3168172538280487, "learning_rate": 1.667330529840178e-05, "loss": 0.5639, "step": 17486 }, { "epoch": 0.537185512855958, "grad_norm": 0.4096253216266632, "learning_rate": 1.6672945364166627e-05, "loss": 0.4721, "step": 17487 }, { "epoch": 0.5372162319909072, "grad_norm": 0.34546902775764465, "learning_rate": 1.667258541434634e-05, "loss": 0.5996, "step": 17488 }, { "epoch": 0.5372469511258563, "grad_norm": 0.35846972465515137, "learning_rate": 1.6672225448941764e-05, "loss": 0.6337, "step": 17489 }, { "epoch": 0.5372776702608054, "grad_norm": 0.4331517517566681, "learning_rate": 1.667186546795374e-05, "loss": 0.5017, "step": 17490 }, { "epoch": 0.5373083893957546, "grad_norm": 0.33076971769332886, "learning_rate": 1.667150547138311e-05, "loss": 0.6222, "step": 17491 }, { "epoch": 0.5373391085307038, "grad_norm": 0.3383631110191345, "learning_rate": 1.667114545923071e-05, "loss": 0.614, "step": 17492 }, { "epoch": 0.5373698276656529, "grad_norm": 0.32763010263442993, "learning_rate": 1.6670785431497386e-05, "loss": 0.5599, "step": 17493 }, { "epoch": 0.5374005468006021, "grad_norm": 0.7026676535606384, "learning_rate": 1.6670425388183975e-05, "loss": 0.6055, "step": 17494 }, { "epoch": 0.5374312659355512, "grad_norm": 0.346204549074173, "learning_rate": 1.667006532929132e-05, "loss": 0.4517, "step": 17495 }, { "epoch": 0.5374619850705005, "grad_norm": 0.3904145658016205, "learning_rate": 1.6669705254820265e-05, "loss": 0.5446, "step": 17496 }, { "epoch": 0.5374927042054496, "grad_norm": 0.3680364787578583, "learning_rate": 1.6669345164771645e-05, "loss": 0.5923, "step": 17497 }, { "epoch": 0.5375234233403987, "grad_norm": 0.3575509190559387, "learning_rate": 1.66689850591463e-05, "loss": 0.5699, "step": 17498 }, { "epoch": 0.5375541424753479, "grad_norm": 0.3986164331436157, "learning_rate": 1.6668624937945082e-05, "loss": 0.5695, "step": 17499 }, { "epoch": 0.5375848616102971, "grad_norm": 0.43428489565849304, "learning_rate": 1.666826480116882e-05, "loss": 0.5459, "step": 17500 }, { "epoch": 0.5376155807452462, "grad_norm": 0.48966655135154724, "learning_rate": 1.6667904648818362e-05, "loss": 0.554, "step": 17501 }, { "epoch": 0.5376462998801954, "grad_norm": 0.4934186339378357, "learning_rate": 1.6667544480894543e-05, "loss": 0.4671, "step": 17502 }, { "epoch": 0.5376770190151445, "grad_norm": 0.33992844820022583, "learning_rate": 1.6667184297398213e-05, "loss": 0.5838, "step": 17503 }, { "epoch": 0.5377077381500937, "grad_norm": 0.3237183392047882, "learning_rate": 1.66668240983302e-05, "loss": 0.6286, "step": 17504 }, { "epoch": 0.5377384572850429, "grad_norm": 0.3196732699871063, "learning_rate": 1.666646388369136e-05, "loss": 0.4761, "step": 17505 }, { "epoch": 0.537769176419992, "grad_norm": 0.3409876823425293, "learning_rate": 1.6666103653482532e-05, "loss": 0.5837, "step": 17506 }, { "epoch": 0.5377998955549411, "grad_norm": 0.36396101117134094, "learning_rate": 1.666574340770455e-05, "loss": 0.4895, "step": 17507 }, { "epoch": 0.5378306146898904, "grad_norm": 0.34790855646133423, "learning_rate": 1.6665383146358255e-05, "loss": 0.5399, "step": 17508 }, { "epoch": 0.5378613338248395, "grad_norm": 0.3589848577976227, "learning_rate": 1.6665022869444494e-05, "loss": 0.5682, "step": 17509 }, { "epoch": 0.5378920529597887, "grad_norm": 0.3531658947467804, "learning_rate": 1.666466257696411e-05, "loss": 0.5612, "step": 17510 }, { "epoch": 0.5379227720947378, "grad_norm": 0.33020463585853577, "learning_rate": 1.666430226891794e-05, "loss": 0.4518, "step": 17511 }, { "epoch": 0.5379534912296869, "grad_norm": 0.3554821014404297, "learning_rate": 1.6663941945306825e-05, "loss": 0.5995, "step": 17512 }, { "epoch": 0.5379842103646362, "grad_norm": 0.3487411439418793, "learning_rate": 1.666358160613161e-05, "loss": 0.6267, "step": 17513 }, { "epoch": 0.5380149294995853, "grad_norm": 0.524196445941925, "learning_rate": 1.6663221251393135e-05, "loss": 0.6397, "step": 17514 }, { "epoch": 0.5380456486345344, "grad_norm": 0.3321971893310547, "learning_rate": 1.6662860881092237e-05, "loss": 0.5723, "step": 17515 }, { "epoch": 0.5380763677694836, "grad_norm": 0.3662353754043579, "learning_rate": 1.6662500495229764e-05, "loss": 0.5746, "step": 17516 }, { "epoch": 0.5381070869044328, "grad_norm": 0.39048105478286743, "learning_rate": 1.6662140093806558e-05, "loss": 0.5846, "step": 17517 }, { "epoch": 0.5381378060393819, "grad_norm": 0.3405389189720154, "learning_rate": 1.6661779676823458e-05, "loss": 0.5822, "step": 17518 }, { "epoch": 0.5381685251743311, "grad_norm": 0.36540213227272034, "learning_rate": 1.6661419244281305e-05, "loss": 0.5826, "step": 17519 }, { "epoch": 0.5381992443092802, "grad_norm": 0.4035775363445282, "learning_rate": 1.6661058796180945e-05, "loss": 0.5623, "step": 17520 }, { "epoch": 0.5382299634442295, "grad_norm": 0.3582201600074768, "learning_rate": 1.6660698332523213e-05, "loss": 0.4953, "step": 17521 }, { "epoch": 0.5382606825791786, "grad_norm": 0.35023367404937744, "learning_rate": 1.6660337853308958e-05, "loss": 0.5562, "step": 17522 }, { "epoch": 0.5382914017141277, "grad_norm": 0.33650290966033936, "learning_rate": 1.6659977358539014e-05, "loss": 0.5495, "step": 17523 }, { "epoch": 0.5383221208490769, "grad_norm": 0.39224186539649963, "learning_rate": 1.6659616848214232e-05, "loss": 0.5281, "step": 17524 }, { "epoch": 0.538352839984026, "grad_norm": 0.35957908630371094, "learning_rate": 1.665925632233545e-05, "loss": 0.5671, "step": 17525 }, { "epoch": 0.5383835591189752, "grad_norm": 0.4133070707321167, "learning_rate": 1.6658895780903507e-05, "loss": 0.4645, "step": 17526 }, { "epoch": 0.5384142782539244, "grad_norm": 0.4309268891811371, "learning_rate": 1.665853522391925e-05, "loss": 0.6573, "step": 17527 }, { "epoch": 0.5384449973888735, "grad_norm": 0.3621964454650879, "learning_rate": 1.665817465138352e-05, "loss": 0.5604, "step": 17528 }, { "epoch": 0.5384757165238226, "grad_norm": 0.4107750952243805, "learning_rate": 1.6657814063297156e-05, "loss": 0.5961, "step": 17529 }, { "epoch": 0.5385064356587719, "grad_norm": 0.3455018699169159, "learning_rate": 1.6657453459661e-05, "loss": 0.5542, "step": 17530 }, { "epoch": 0.538537154793721, "grad_norm": 0.4200392961502075, "learning_rate": 1.66570928404759e-05, "loss": 0.5926, "step": 17531 }, { "epoch": 0.5385678739286701, "grad_norm": 0.35290437936782837, "learning_rate": 1.6656732205742697e-05, "loss": 0.5816, "step": 17532 }, { "epoch": 0.5385985930636193, "grad_norm": 0.34654656052589417, "learning_rate": 1.6656371555462227e-05, "loss": 0.5928, "step": 17533 }, { "epoch": 0.5386293121985685, "grad_norm": 2.0377073287963867, "learning_rate": 1.6656010889635335e-05, "loss": 0.5839, "step": 17534 }, { "epoch": 0.5386600313335177, "grad_norm": 0.37239354848861694, "learning_rate": 1.665565020826287e-05, "loss": 0.4912, "step": 17535 }, { "epoch": 0.5386907504684668, "grad_norm": 0.3409408926963806, "learning_rate": 1.6655289511345665e-05, "loss": 0.471, "step": 17536 }, { "epoch": 0.5387214696034159, "grad_norm": 0.3470422327518463, "learning_rate": 1.6654928798884568e-05, "loss": 0.5386, "step": 17537 }, { "epoch": 0.5387521887383652, "grad_norm": 0.4229872226715088, "learning_rate": 1.6654568070880422e-05, "loss": 0.5376, "step": 17538 }, { "epoch": 0.5387829078733143, "grad_norm": 0.4485396444797516, "learning_rate": 1.6654207327334063e-05, "loss": 0.5555, "step": 17539 }, { "epoch": 0.5388136270082634, "grad_norm": 0.351500004529953, "learning_rate": 1.665384656824634e-05, "loss": 0.5435, "step": 17540 }, { "epoch": 0.5388443461432126, "grad_norm": 0.38810205459594727, "learning_rate": 1.6653485793618096e-05, "loss": 0.4844, "step": 17541 }, { "epoch": 0.5388750652781618, "grad_norm": 0.36079293489456177, "learning_rate": 1.665312500345017e-05, "loss": 0.5946, "step": 17542 }, { "epoch": 0.5389057844131109, "grad_norm": 0.34850555658340454, "learning_rate": 1.66527641977434e-05, "loss": 0.5332, "step": 17543 }, { "epoch": 0.5389365035480601, "grad_norm": 0.40027981996536255, "learning_rate": 1.6652403376498642e-05, "loss": 0.5993, "step": 17544 }, { "epoch": 0.5389672226830092, "grad_norm": 0.40111109614372253, "learning_rate": 1.665204253971673e-05, "loss": 0.6055, "step": 17545 }, { "epoch": 0.5389979418179585, "grad_norm": 0.3748756945133209, "learning_rate": 1.665168168739851e-05, "loss": 0.5875, "step": 17546 }, { "epoch": 0.5390286609529076, "grad_norm": 0.3782269358634949, "learning_rate": 1.665132081954482e-05, "loss": 0.5531, "step": 17547 }, { "epoch": 0.5390593800878567, "grad_norm": 0.32752782106399536, "learning_rate": 1.6650959936156502e-05, "loss": 0.5649, "step": 17548 }, { "epoch": 0.5390900992228059, "grad_norm": 0.32788509130477905, "learning_rate": 1.665059903723441e-05, "loss": 0.5965, "step": 17549 }, { "epoch": 0.539120818357755, "grad_norm": 0.37002843618392944, "learning_rate": 1.6650238122779373e-05, "loss": 0.6153, "step": 17550 }, { "epoch": 0.5391515374927042, "grad_norm": 0.3457208275794983, "learning_rate": 1.6649877192792243e-05, "loss": 0.5623, "step": 17551 }, { "epoch": 0.5391822566276534, "grad_norm": 0.35760167241096497, "learning_rate": 1.664951624727386e-05, "loss": 0.545, "step": 17552 }, { "epoch": 0.5392129757626025, "grad_norm": 0.37083151936531067, "learning_rate": 1.664915528622507e-05, "loss": 0.5938, "step": 17553 }, { "epoch": 0.5392436948975516, "grad_norm": 0.3456827998161316, "learning_rate": 1.6648794309646713e-05, "loss": 0.5543, "step": 17554 }, { "epoch": 0.5392744140325009, "grad_norm": 0.34073343873023987, "learning_rate": 1.664843331753963e-05, "loss": 0.5595, "step": 17555 }, { "epoch": 0.53930513316745, "grad_norm": 0.35285404324531555, "learning_rate": 1.6648072309904667e-05, "loss": 0.5677, "step": 17556 }, { "epoch": 0.5393358523023991, "grad_norm": 0.366821825504303, "learning_rate": 1.664771128674267e-05, "loss": 0.6053, "step": 17557 }, { "epoch": 0.5393665714373483, "grad_norm": 0.41547510027885437, "learning_rate": 1.6647350248054475e-05, "loss": 0.664, "step": 17558 }, { "epoch": 0.5393972905722975, "grad_norm": 0.38729190826416016, "learning_rate": 1.664698919384093e-05, "loss": 0.569, "step": 17559 }, { "epoch": 0.5394280097072467, "grad_norm": 0.34708932042121887, "learning_rate": 1.664662812410288e-05, "loss": 0.567, "step": 17560 }, { "epoch": 0.5394587288421958, "grad_norm": 0.3449181020259857, "learning_rate": 1.6646267038841163e-05, "loss": 0.5653, "step": 17561 }, { "epoch": 0.5394894479771449, "grad_norm": 0.35874566435813904, "learning_rate": 1.6645905938056625e-05, "loss": 0.5953, "step": 17562 }, { "epoch": 0.5395201671120942, "grad_norm": 0.44349902868270874, "learning_rate": 1.664554482175011e-05, "loss": 0.5746, "step": 17563 }, { "epoch": 0.5395508862470433, "grad_norm": 0.3381945788860321, "learning_rate": 1.6645183689922465e-05, "loss": 0.6157, "step": 17564 }, { "epoch": 0.5395816053819924, "grad_norm": 0.3923344910144806, "learning_rate": 1.6644822542574527e-05, "loss": 0.5819, "step": 17565 }, { "epoch": 0.5396123245169416, "grad_norm": 0.3672839105129242, "learning_rate": 1.664446137970714e-05, "loss": 0.4886, "step": 17566 }, { "epoch": 0.5396430436518908, "grad_norm": 0.4246094524860382, "learning_rate": 1.664410020132115e-05, "loss": 0.5741, "step": 17567 }, { "epoch": 0.5396737627868399, "grad_norm": 0.35157305002212524, "learning_rate": 1.6643739007417398e-05, "loss": 0.53, "step": 17568 }, { "epoch": 0.5397044819217891, "grad_norm": 0.34374549984931946, "learning_rate": 1.6643377797996737e-05, "loss": 0.5002, "step": 17569 }, { "epoch": 0.5397352010567382, "grad_norm": 0.3689754605293274, "learning_rate": 1.6643016573059995e-05, "loss": 0.4104, "step": 17570 }, { "epoch": 0.5397659201916875, "grad_norm": 0.3624507784843445, "learning_rate": 1.6642655332608025e-05, "loss": 0.6544, "step": 17571 }, { "epoch": 0.5397966393266366, "grad_norm": 0.4240308701992035, "learning_rate": 1.6642294076641674e-05, "loss": 0.5501, "step": 17572 }, { "epoch": 0.5398273584615857, "grad_norm": 0.32863783836364746, "learning_rate": 1.664193280516178e-05, "loss": 0.5269, "step": 17573 }, { "epoch": 0.5398580775965349, "grad_norm": 0.34537166357040405, "learning_rate": 1.6641571518169187e-05, "loss": 0.4673, "step": 17574 }, { "epoch": 0.539888796731484, "grad_norm": 0.36700010299682617, "learning_rate": 1.664121021566474e-05, "loss": 0.6223, "step": 17575 }, { "epoch": 0.5399195158664332, "grad_norm": 0.402723491191864, "learning_rate": 1.664084889764928e-05, "loss": 0.5851, "step": 17576 }, { "epoch": 0.5399502350013824, "grad_norm": 0.3402990996837616, "learning_rate": 1.6640487564123654e-05, "loss": 0.6047, "step": 17577 }, { "epoch": 0.5399809541363315, "grad_norm": 0.37228086590766907, "learning_rate": 1.664012621508871e-05, "loss": 0.5643, "step": 17578 }, { "epoch": 0.5400116732712806, "grad_norm": 0.3415141999721527, "learning_rate": 1.6639764850545286e-05, "loss": 0.549, "step": 17579 }, { "epoch": 0.5400423924062299, "grad_norm": 0.34738689661026, "learning_rate": 1.6639403470494225e-05, "loss": 0.5962, "step": 17580 }, { "epoch": 0.540073111541179, "grad_norm": 0.42042967677116394, "learning_rate": 1.6639042074936377e-05, "loss": 0.6317, "step": 17581 }, { "epoch": 0.5401038306761281, "grad_norm": 0.37628498673439026, "learning_rate": 1.6638680663872578e-05, "loss": 0.6049, "step": 17582 }, { "epoch": 0.5401345498110773, "grad_norm": 0.33361613750457764, "learning_rate": 1.6638319237303685e-05, "loss": 0.5497, "step": 17583 }, { "epoch": 0.5401652689460265, "grad_norm": 0.37521812319755554, "learning_rate": 1.6637957795230524e-05, "loss": 0.6509, "step": 17584 }, { "epoch": 0.5401959880809757, "grad_norm": 0.34697389602661133, "learning_rate": 1.6637596337653957e-05, "loss": 0.5503, "step": 17585 }, { "epoch": 0.5402267072159248, "grad_norm": 0.38327091932296753, "learning_rate": 1.6637234864574813e-05, "loss": 0.5747, "step": 17586 }, { "epoch": 0.5402574263508739, "grad_norm": 0.35492706298828125, "learning_rate": 1.6636873375993947e-05, "loss": 0.5662, "step": 17587 }, { "epoch": 0.5402881454858232, "grad_norm": 0.3484227657318115, "learning_rate": 1.66365118719122e-05, "loss": 0.4732, "step": 17588 }, { "epoch": 0.5403188646207723, "grad_norm": 0.36518797278404236, "learning_rate": 1.6636150352330417e-05, "loss": 0.6116, "step": 17589 }, { "epoch": 0.5403495837557214, "grad_norm": 0.38551533222198486, "learning_rate": 1.6635788817249437e-05, "loss": 0.663, "step": 17590 }, { "epoch": 0.5403803028906706, "grad_norm": 0.351563036441803, "learning_rate": 1.6635427266670113e-05, "loss": 0.5146, "step": 17591 }, { "epoch": 0.5404110220256197, "grad_norm": 0.3550795018672943, "learning_rate": 1.6635065700593287e-05, "loss": 0.5542, "step": 17592 }, { "epoch": 0.5404417411605689, "grad_norm": 0.3744320273399353, "learning_rate": 1.6634704119019794e-05, "loss": 0.6057, "step": 17593 }, { "epoch": 0.5404724602955181, "grad_norm": 0.36780059337615967, "learning_rate": 1.663434252195049e-05, "loss": 0.64, "step": 17594 }, { "epoch": 0.5405031794304672, "grad_norm": 0.40136444568634033, "learning_rate": 1.663398090938622e-05, "loss": 0.5192, "step": 17595 }, { "epoch": 0.5405338985654164, "grad_norm": 0.3708445131778717, "learning_rate": 1.663361928132782e-05, "loss": 0.5596, "step": 17596 }, { "epoch": 0.5405646177003656, "grad_norm": 0.325363427400589, "learning_rate": 1.663325763777614e-05, "loss": 0.5462, "step": 17597 }, { "epoch": 0.5405953368353147, "grad_norm": 0.3905683755874634, "learning_rate": 1.6632895978732023e-05, "loss": 0.6222, "step": 17598 }, { "epoch": 0.5406260559702639, "grad_norm": 0.36278441548347473, "learning_rate": 1.663253430419631e-05, "loss": 0.5249, "step": 17599 }, { "epoch": 0.540656775105213, "grad_norm": 0.514239490032196, "learning_rate": 1.6632172614169858e-05, "loss": 0.6539, "step": 17600 }, { "epoch": 0.5406874942401622, "grad_norm": 0.366150826215744, "learning_rate": 1.66318109086535e-05, "loss": 0.5673, "step": 17601 }, { "epoch": 0.5407182133751114, "grad_norm": 0.34579068422317505, "learning_rate": 1.6631449187648086e-05, "loss": 0.5865, "step": 17602 }, { "epoch": 0.5407489325100605, "grad_norm": 0.38088536262512207, "learning_rate": 1.6631087451154456e-05, "loss": 0.567, "step": 17603 }, { "epoch": 0.5407796516450096, "grad_norm": 0.4064445495605469, "learning_rate": 1.663072569917346e-05, "loss": 0.5694, "step": 17604 }, { "epoch": 0.5408103707799589, "grad_norm": 0.34901660680770874, "learning_rate": 1.6630363931705943e-05, "loss": 0.5515, "step": 17605 }, { "epoch": 0.540841089914908, "grad_norm": 0.38060522079467773, "learning_rate": 1.6630002148752745e-05, "loss": 0.4988, "step": 17606 }, { "epoch": 0.5408718090498572, "grad_norm": 0.3706704080104828, "learning_rate": 1.6629640350314714e-05, "loss": 0.5031, "step": 17607 }, { "epoch": 0.5409025281848063, "grad_norm": 0.3617754578590393, "learning_rate": 1.6629278536392694e-05, "loss": 0.5439, "step": 17608 }, { "epoch": 0.5409332473197555, "grad_norm": 0.4844091534614563, "learning_rate": 1.662891670698754e-05, "loss": 0.6969, "step": 17609 }, { "epoch": 0.5409639664547047, "grad_norm": 0.3364875018596649, "learning_rate": 1.6628554862100074e-05, "loss": 0.5, "step": 17610 }, { "epoch": 0.5409946855896538, "grad_norm": 0.34483766555786133, "learning_rate": 1.6628193001731165e-05, "loss": 0.57, "step": 17611 }, { "epoch": 0.5410254047246029, "grad_norm": 0.39760875701904297, "learning_rate": 1.6627831125881644e-05, "loss": 0.6504, "step": 17612 }, { "epoch": 0.5410561238595522, "grad_norm": 0.33580029010772705, "learning_rate": 1.6627469234552363e-05, "loss": 0.5689, "step": 17613 }, { "epoch": 0.5410868429945013, "grad_norm": 0.34415000677108765, "learning_rate": 1.662710732774416e-05, "loss": 0.5784, "step": 17614 }, { "epoch": 0.5411175621294504, "grad_norm": 0.35400399565696716, "learning_rate": 1.662674540545789e-05, "loss": 0.6165, "step": 17615 }, { "epoch": 0.5411482812643996, "grad_norm": 0.3292425274848938, "learning_rate": 1.6626383467694393e-05, "loss": 0.5407, "step": 17616 }, { "epoch": 0.5411790003993487, "grad_norm": 0.3426227867603302, "learning_rate": 1.6626021514454514e-05, "loss": 0.4984, "step": 17617 }, { "epoch": 0.5412097195342979, "grad_norm": 0.35087811946868896, "learning_rate": 1.6625659545739096e-05, "loss": 0.4484, "step": 17618 }, { "epoch": 0.5412404386692471, "grad_norm": 0.34681200981140137, "learning_rate": 1.6625297561548993e-05, "loss": 0.5283, "step": 17619 }, { "epoch": 0.5412711578041962, "grad_norm": 0.3534158170223236, "learning_rate": 1.662493556188504e-05, "loss": 0.5916, "step": 17620 }, { "epoch": 0.5413018769391454, "grad_norm": 0.3580128848552704, "learning_rate": 1.662457354674809e-05, "loss": 0.5068, "step": 17621 }, { "epoch": 0.5413325960740946, "grad_norm": 0.37682992219924927, "learning_rate": 1.6624211516138987e-05, "loss": 0.5656, "step": 17622 }, { "epoch": 0.5413633152090437, "grad_norm": 0.3845697343349457, "learning_rate": 1.662384947005857e-05, "loss": 0.5761, "step": 17623 }, { "epoch": 0.5413940343439929, "grad_norm": 0.397361695766449, "learning_rate": 1.6623487408507696e-05, "loss": 0.5401, "step": 17624 }, { "epoch": 0.541424753478942, "grad_norm": 0.39270031452178955, "learning_rate": 1.66231253314872e-05, "loss": 0.4891, "step": 17625 }, { "epoch": 0.5414554726138912, "grad_norm": 0.37781867384910583, "learning_rate": 1.6622763238997934e-05, "loss": 0.5915, "step": 17626 }, { "epoch": 0.5414861917488404, "grad_norm": 0.3806750774383545, "learning_rate": 1.6622401131040745e-05, "loss": 0.4639, "step": 17627 }, { "epoch": 0.5415169108837895, "grad_norm": 0.3516782224178314, "learning_rate": 1.662203900761647e-05, "loss": 0.6189, "step": 17628 }, { "epoch": 0.5415476300187386, "grad_norm": 0.36958298087120056, "learning_rate": 1.6621676868725964e-05, "loss": 0.6009, "step": 17629 }, { "epoch": 0.5415783491536879, "grad_norm": 0.3412788510322571, "learning_rate": 1.6621314714370066e-05, "loss": 0.5161, "step": 17630 }, { "epoch": 0.541609068288637, "grad_norm": 0.3584352135658264, "learning_rate": 1.662095254454963e-05, "loss": 0.6555, "step": 17631 }, { "epoch": 0.5416397874235862, "grad_norm": 0.3615007698535919, "learning_rate": 1.662059035926549e-05, "loss": 0.5179, "step": 17632 }, { "epoch": 0.5416705065585353, "grad_norm": 0.3270239531993866, "learning_rate": 1.6620228158518507e-05, "loss": 0.5346, "step": 17633 }, { "epoch": 0.5417012256934844, "grad_norm": 0.3632640838623047, "learning_rate": 1.6619865942309515e-05, "loss": 0.5556, "step": 17634 }, { "epoch": 0.5417319448284337, "grad_norm": 0.36114057898521423, "learning_rate": 1.6619503710639362e-05, "loss": 0.5544, "step": 17635 }, { "epoch": 0.5417626639633828, "grad_norm": 0.3875245749950409, "learning_rate": 1.6619141463508897e-05, "loss": 0.6187, "step": 17636 }, { "epoch": 0.5417933830983319, "grad_norm": 0.38266149163246155, "learning_rate": 1.6618779200918966e-05, "loss": 0.5354, "step": 17637 }, { "epoch": 0.5418241022332811, "grad_norm": 0.34363865852355957, "learning_rate": 1.6618416922870416e-05, "loss": 0.5564, "step": 17638 }, { "epoch": 0.5418548213682303, "grad_norm": 0.3867649734020233, "learning_rate": 1.6618054629364088e-05, "loss": 0.616, "step": 17639 }, { "epoch": 0.5418855405031794, "grad_norm": 0.34328681230545044, "learning_rate": 1.6617692320400827e-05, "loss": 0.5566, "step": 17640 }, { "epoch": 0.5419162596381286, "grad_norm": 0.3303239643573761, "learning_rate": 1.661732999598149e-05, "loss": 0.5923, "step": 17641 }, { "epoch": 0.5419469787730777, "grad_norm": 0.31894564628601074, "learning_rate": 1.6616967656106913e-05, "loss": 0.4898, "step": 17642 }, { "epoch": 0.5419776979080269, "grad_norm": 0.3708004951477051, "learning_rate": 1.661660530077795e-05, "loss": 0.5373, "step": 17643 }, { "epoch": 0.5420084170429761, "grad_norm": 0.365610808134079, "learning_rate": 1.661624292999544e-05, "loss": 0.5819, "step": 17644 }, { "epoch": 0.5420391361779252, "grad_norm": 0.40150147676467896, "learning_rate": 1.6615880543760234e-05, "loss": 0.6135, "step": 17645 }, { "epoch": 0.5420698553128744, "grad_norm": 0.3388419449329376, "learning_rate": 1.6615518142073178e-05, "loss": 0.5355, "step": 17646 }, { "epoch": 0.5421005744478236, "grad_norm": 0.34792622923851013, "learning_rate": 1.6615155724935116e-05, "loss": 0.531, "step": 17647 }, { "epoch": 0.5421312935827727, "grad_norm": 0.3535044491291046, "learning_rate": 1.6614793292346893e-05, "loss": 0.547, "step": 17648 }, { "epoch": 0.5421620127177219, "grad_norm": 0.3867039978504181, "learning_rate": 1.661443084430936e-05, "loss": 0.5928, "step": 17649 }, { "epoch": 0.542192731852671, "grad_norm": 0.7360907196998596, "learning_rate": 1.661406838082336e-05, "loss": 0.4711, "step": 17650 }, { "epoch": 0.5422234509876201, "grad_norm": 0.3535214066505432, "learning_rate": 1.6613705901889745e-05, "loss": 0.573, "step": 17651 }, { "epoch": 0.5422541701225694, "grad_norm": 0.33429551124572754, "learning_rate": 1.6613343407509357e-05, "loss": 0.598, "step": 17652 }, { "epoch": 0.5422848892575185, "grad_norm": 0.39646944403648376, "learning_rate": 1.6612980897683044e-05, "loss": 0.5936, "step": 17653 }, { "epoch": 0.5423156083924676, "grad_norm": 0.36002078652381897, "learning_rate": 1.6612618372411652e-05, "loss": 0.5592, "step": 17654 }, { "epoch": 0.5423463275274168, "grad_norm": 0.3753989338874817, "learning_rate": 1.661225583169603e-05, "loss": 0.5446, "step": 17655 }, { "epoch": 0.542377046662366, "grad_norm": 0.33321306109428406, "learning_rate": 1.661189327553702e-05, "loss": 0.4801, "step": 17656 }, { "epoch": 0.5424077657973152, "grad_norm": 0.36795276403427124, "learning_rate": 1.661153070393547e-05, "loss": 0.5984, "step": 17657 }, { "epoch": 0.5424384849322643, "grad_norm": 0.35949185490608215, "learning_rate": 1.661116811689223e-05, "loss": 0.5911, "step": 17658 }, { "epoch": 0.5424692040672134, "grad_norm": 0.3341417908668518, "learning_rate": 1.6610805514408144e-05, "loss": 0.5, "step": 17659 }, { "epoch": 0.5424999232021627, "grad_norm": 0.3760957717895508, "learning_rate": 1.6610442896484063e-05, "loss": 0.4989, "step": 17660 }, { "epoch": 0.5425306423371118, "grad_norm": 0.3503108024597168, "learning_rate": 1.6610080263120828e-05, "loss": 0.5492, "step": 17661 }, { "epoch": 0.5425613614720609, "grad_norm": 0.3976658582687378, "learning_rate": 1.6609717614319293e-05, "loss": 0.6234, "step": 17662 }, { "epoch": 0.5425920806070101, "grad_norm": 0.3299378752708435, "learning_rate": 1.6609354950080297e-05, "loss": 0.5452, "step": 17663 }, { "epoch": 0.5426227997419593, "grad_norm": 0.35478919744491577, "learning_rate": 1.6608992270404692e-05, "loss": 0.5582, "step": 17664 }, { "epoch": 0.5426535188769084, "grad_norm": 0.420798122882843, "learning_rate": 1.6608629575293324e-05, "loss": 0.5389, "step": 17665 }, { "epoch": 0.5426842380118576, "grad_norm": 0.35608747601509094, "learning_rate": 1.660826686474704e-05, "loss": 0.4795, "step": 17666 }, { "epoch": 0.5427149571468067, "grad_norm": 0.43351176381111145, "learning_rate": 1.6607904138766685e-05, "loss": 0.6064, "step": 17667 }, { "epoch": 0.5427456762817559, "grad_norm": 0.3550818860530853, "learning_rate": 1.6607541397353112e-05, "loss": 0.5738, "step": 17668 }, { "epoch": 0.5427763954167051, "grad_norm": 0.39188385009765625, "learning_rate": 1.6607178640507164e-05, "loss": 0.5003, "step": 17669 }, { "epoch": 0.5428071145516542, "grad_norm": 0.5626742243766785, "learning_rate": 1.660681586822969e-05, "loss": 0.5521, "step": 17670 }, { "epoch": 0.5428378336866034, "grad_norm": 0.374032586812973, "learning_rate": 1.6606453080521533e-05, "loss": 0.5349, "step": 17671 }, { "epoch": 0.5428685528215526, "grad_norm": 0.34837019443511963, "learning_rate": 1.6606090277383544e-05, "loss": 0.5547, "step": 17672 }, { "epoch": 0.5428992719565017, "grad_norm": 0.32095804810523987, "learning_rate": 1.660572745881657e-05, "loss": 0.5579, "step": 17673 }, { "epoch": 0.5429299910914509, "grad_norm": 0.3471071124076843, "learning_rate": 1.660536462482146e-05, "loss": 0.5704, "step": 17674 }, { "epoch": 0.5429607102264, "grad_norm": 0.3762083947658539, "learning_rate": 1.660500177539906e-05, "loss": 0.4882, "step": 17675 }, { "epoch": 0.5429914293613491, "grad_norm": 0.37276262044906616, "learning_rate": 1.6604638910550214e-05, "loss": 0.4726, "step": 17676 }, { "epoch": 0.5430221484962984, "grad_norm": 0.3400457799434662, "learning_rate": 1.6604276030275774e-05, "loss": 0.5341, "step": 17677 }, { "epoch": 0.5430528676312475, "grad_norm": 0.3349197208881378, "learning_rate": 1.6603913134576582e-05, "loss": 0.5823, "step": 17678 }, { "epoch": 0.5430835867661966, "grad_norm": 0.38008198142051697, "learning_rate": 1.6603550223453496e-05, "loss": 0.4911, "step": 17679 }, { "epoch": 0.5431143059011458, "grad_norm": 0.36963891983032227, "learning_rate": 1.6603187296907357e-05, "loss": 0.6017, "step": 17680 }, { "epoch": 0.543145025036095, "grad_norm": 0.327671080827713, "learning_rate": 1.660282435493901e-05, "loss": 0.5574, "step": 17681 }, { "epoch": 0.5431757441710442, "grad_norm": 0.4918424189090729, "learning_rate": 1.660246139754931e-05, "loss": 0.5626, "step": 17682 }, { "epoch": 0.5432064633059933, "grad_norm": 0.3699384331703186, "learning_rate": 1.6602098424739094e-05, "loss": 0.5825, "step": 17683 }, { "epoch": 0.5432371824409424, "grad_norm": 0.3416949510574341, "learning_rate": 1.6601735436509217e-05, "loss": 0.556, "step": 17684 }, { "epoch": 0.5432679015758917, "grad_norm": 0.3889096677303314, "learning_rate": 1.6601372432860528e-05, "loss": 0.598, "step": 17685 }, { "epoch": 0.5432986207108408, "grad_norm": 0.3543693721294403, "learning_rate": 1.660100941379387e-05, "loss": 0.5521, "step": 17686 }, { "epoch": 0.5433293398457899, "grad_norm": 0.3668806552886963, "learning_rate": 1.6600646379310096e-05, "loss": 0.5614, "step": 17687 }, { "epoch": 0.5433600589807391, "grad_norm": 0.3333626985549927, "learning_rate": 1.660028332941005e-05, "loss": 0.5118, "step": 17688 }, { "epoch": 0.5433907781156883, "grad_norm": 0.35499387979507446, "learning_rate": 1.6599920264094588e-05, "loss": 0.5679, "step": 17689 }, { "epoch": 0.5434214972506374, "grad_norm": 0.35176828503608704, "learning_rate": 1.6599557183364542e-05, "loss": 0.5689, "step": 17690 }, { "epoch": 0.5434522163855866, "grad_norm": 0.3457180857658386, "learning_rate": 1.6599194087220774e-05, "loss": 0.5644, "step": 17691 }, { "epoch": 0.5434829355205357, "grad_norm": 0.3658696711063385, "learning_rate": 1.6598830975664126e-05, "loss": 0.574, "step": 17692 }, { "epoch": 0.543513654655485, "grad_norm": 0.34223872423171997, "learning_rate": 1.6598467848695446e-05, "loss": 0.5918, "step": 17693 }, { "epoch": 0.5435443737904341, "grad_norm": 0.3816506266593933, "learning_rate": 1.6598104706315588e-05, "loss": 0.5527, "step": 17694 }, { "epoch": 0.5435750929253832, "grad_norm": 0.3838176131248474, "learning_rate": 1.659774154852539e-05, "loss": 0.6843, "step": 17695 }, { "epoch": 0.5436058120603324, "grad_norm": 0.3773840069770813, "learning_rate": 1.659737837532571e-05, "loss": 0.5488, "step": 17696 }, { "epoch": 0.5436365311952815, "grad_norm": 0.3666813373565674, "learning_rate": 1.659701518671739e-05, "loss": 0.5196, "step": 17697 }, { "epoch": 0.5436672503302307, "grad_norm": 0.3734922409057617, "learning_rate": 1.6596651982701285e-05, "loss": 0.6018, "step": 17698 }, { "epoch": 0.5436979694651799, "grad_norm": 0.3823114037513733, "learning_rate": 1.6596288763278234e-05, "loss": 0.5604, "step": 17699 }, { "epoch": 0.543728688600129, "grad_norm": 0.32001426815986633, "learning_rate": 1.659592552844909e-05, "loss": 0.5254, "step": 17700 }, { "epoch": 0.5437594077350781, "grad_norm": 0.3313305675983429, "learning_rate": 1.65955622782147e-05, "loss": 0.5026, "step": 17701 }, { "epoch": 0.5437901268700274, "grad_norm": 0.39203909039497375, "learning_rate": 1.659519901257592e-05, "loss": 0.5642, "step": 17702 }, { "epoch": 0.5438208460049765, "grad_norm": 0.34227433800697327, "learning_rate": 1.6594835731533586e-05, "loss": 0.5424, "step": 17703 }, { "epoch": 0.5438515651399256, "grad_norm": 0.35318708419799805, "learning_rate": 1.659447243508855e-05, "loss": 0.5879, "step": 17704 }, { "epoch": 0.5438822842748748, "grad_norm": 0.38673678040504456, "learning_rate": 1.659410912324167e-05, "loss": 0.538, "step": 17705 }, { "epoch": 0.543913003409824, "grad_norm": 0.35573431849479675, "learning_rate": 1.6593745795993784e-05, "loss": 0.5107, "step": 17706 }, { "epoch": 0.5439437225447732, "grad_norm": 0.35469236969947815, "learning_rate": 1.6593382453345745e-05, "loss": 0.6649, "step": 17707 }, { "epoch": 0.5439744416797223, "grad_norm": 0.5138862133026123, "learning_rate": 1.65930190952984e-05, "loss": 0.5278, "step": 17708 }, { "epoch": 0.5440051608146714, "grad_norm": 0.3462706506252289, "learning_rate": 1.6592655721852602e-05, "loss": 0.6077, "step": 17709 }, { "epoch": 0.5440358799496207, "grad_norm": 0.3589460551738739, "learning_rate": 1.6592292333009192e-05, "loss": 0.5494, "step": 17710 }, { "epoch": 0.5440665990845698, "grad_norm": 0.33556926250457764, "learning_rate": 1.6591928928769024e-05, "loss": 0.6503, "step": 17711 }, { "epoch": 0.5440973182195189, "grad_norm": 0.39089855551719666, "learning_rate": 1.6591565509132944e-05, "loss": 0.5511, "step": 17712 }, { "epoch": 0.5441280373544681, "grad_norm": 0.4295934736728668, "learning_rate": 1.659120207410181e-05, "loss": 0.5983, "step": 17713 }, { "epoch": 0.5441587564894173, "grad_norm": 0.37466973066329956, "learning_rate": 1.6590838623676455e-05, "loss": 0.6408, "step": 17714 }, { "epoch": 0.5441894756243664, "grad_norm": 0.33733561635017395, "learning_rate": 1.659047515785774e-05, "loss": 0.5591, "step": 17715 }, { "epoch": 0.5442201947593156, "grad_norm": 0.387408047914505, "learning_rate": 1.659011167664651e-05, "loss": 0.6131, "step": 17716 }, { "epoch": 0.5442509138942647, "grad_norm": 0.37652677297592163, "learning_rate": 1.658974818004361e-05, "loss": 0.4631, "step": 17717 }, { "epoch": 0.544281633029214, "grad_norm": 0.33413195610046387, "learning_rate": 1.6589384668049895e-05, "loss": 0.5817, "step": 17718 }, { "epoch": 0.5443123521641631, "grad_norm": 0.35954052209854126, "learning_rate": 1.6589021140666213e-05, "loss": 0.5504, "step": 17719 }, { "epoch": 0.5443430712991122, "grad_norm": 0.33819445967674255, "learning_rate": 1.658865759789341e-05, "loss": 0.5699, "step": 17720 }, { "epoch": 0.5443737904340614, "grad_norm": 0.3741539716720581, "learning_rate": 1.658829403973234e-05, "loss": 0.5774, "step": 17721 }, { "epoch": 0.5444045095690105, "grad_norm": 0.3450431823730469, "learning_rate": 1.6587930466183847e-05, "loss": 0.4765, "step": 17722 }, { "epoch": 0.5444352287039597, "grad_norm": 0.39633411169052124, "learning_rate": 1.6587566877248785e-05, "loss": 0.6436, "step": 17723 }, { "epoch": 0.5444659478389089, "grad_norm": 0.35573041439056396, "learning_rate": 1.6587203272927996e-05, "loss": 0.6095, "step": 17724 }, { "epoch": 0.544496666973858, "grad_norm": 0.35813626646995544, "learning_rate": 1.6586839653222338e-05, "loss": 0.5942, "step": 17725 }, { "epoch": 0.5445273861088071, "grad_norm": 0.33719462156295776, "learning_rate": 1.6586476018132653e-05, "loss": 0.5664, "step": 17726 }, { "epoch": 0.5445581052437564, "grad_norm": 0.3577592968940735, "learning_rate": 1.6586112367659794e-05, "loss": 0.5293, "step": 17727 }, { "epoch": 0.5445888243787055, "grad_norm": 0.32969745993614197, "learning_rate": 1.658574870180461e-05, "loss": 0.5069, "step": 17728 }, { "epoch": 0.5446195435136546, "grad_norm": 0.34583529829978943, "learning_rate": 1.658538502056795e-05, "loss": 0.5372, "step": 17729 }, { "epoch": 0.5446502626486038, "grad_norm": 0.8097785711288452, "learning_rate": 1.6585021323950663e-05, "loss": 0.6521, "step": 17730 }, { "epoch": 0.544680981783553, "grad_norm": 0.312434583902359, "learning_rate": 1.65846576119536e-05, "loss": 0.5352, "step": 17731 }, { "epoch": 0.5447117009185022, "grad_norm": 0.34326574206352234, "learning_rate": 1.6584293884577605e-05, "loss": 0.5044, "step": 17732 }, { "epoch": 0.5447424200534513, "grad_norm": 0.3642909824848175, "learning_rate": 1.658393014182354e-05, "loss": 0.5921, "step": 17733 }, { "epoch": 0.5447731391884004, "grad_norm": 0.40556713938713074, "learning_rate": 1.658356638369224e-05, "loss": 0.5428, "step": 17734 }, { "epoch": 0.5448038583233497, "grad_norm": 0.4519590735435486, "learning_rate": 1.658320261018456e-05, "loss": 0.6814, "step": 17735 }, { "epoch": 0.5448345774582988, "grad_norm": 0.34088268876075745, "learning_rate": 1.6582838821301354e-05, "loss": 0.538, "step": 17736 }, { "epoch": 0.5448652965932479, "grad_norm": 0.3594852685928345, "learning_rate": 1.658247501704347e-05, "loss": 0.6328, "step": 17737 }, { "epoch": 0.5448960157281971, "grad_norm": 0.35510939359664917, "learning_rate": 1.658211119741175e-05, "loss": 0.553, "step": 17738 }, { "epoch": 0.5449267348631462, "grad_norm": 0.40275833010673523, "learning_rate": 1.6581747362407054e-05, "loss": 0.5699, "step": 17739 }, { "epoch": 0.5449574539980954, "grad_norm": 0.38424280285835266, "learning_rate": 1.6581383512030224e-05, "loss": 0.633, "step": 17740 }, { "epoch": 0.5449881731330446, "grad_norm": 0.324897438287735, "learning_rate": 1.6581019646282115e-05, "loss": 0.5069, "step": 17741 }, { "epoch": 0.5450188922679937, "grad_norm": 0.3672373294830322, "learning_rate": 1.6580655765163576e-05, "loss": 0.5625, "step": 17742 }, { "epoch": 0.545049611402943, "grad_norm": 0.3235056400299072, "learning_rate": 1.6580291868675455e-05, "loss": 0.5871, "step": 17743 }, { "epoch": 0.5450803305378921, "grad_norm": 0.3985832929611206, "learning_rate": 1.6579927956818603e-05, "loss": 0.5648, "step": 17744 }, { "epoch": 0.5451110496728412, "grad_norm": 0.41424521803855896, "learning_rate": 1.657956402959387e-05, "loss": 0.6031, "step": 17745 }, { "epoch": 0.5451417688077904, "grad_norm": 0.4279395639896393, "learning_rate": 1.6579200087002104e-05, "loss": 0.564, "step": 17746 }, { "epoch": 0.5451724879427395, "grad_norm": 0.38417840003967285, "learning_rate": 1.6578836129044158e-05, "loss": 0.574, "step": 17747 }, { "epoch": 0.5452032070776887, "grad_norm": 0.3529137074947357, "learning_rate": 1.6578472155720882e-05, "loss": 0.6298, "step": 17748 }, { "epoch": 0.5452339262126379, "grad_norm": 0.34810489416122437, "learning_rate": 1.6578108167033123e-05, "loss": 0.5248, "step": 17749 }, { "epoch": 0.545264645347587, "grad_norm": 0.36843615770339966, "learning_rate": 1.6577744162981732e-05, "loss": 0.5091, "step": 17750 }, { "epoch": 0.5452953644825361, "grad_norm": 0.4092557430267334, "learning_rate": 1.657738014356756e-05, "loss": 0.5608, "step": 17751 }, { "epoch": 0.5453260836174854, "grad_norm": 0.366543173789978, "learning_rate": 1.6577016108791463e-05, "loss": 0.5577, "step": 17752 }, { "epoch": 0.5453568027524345, "grad_norm": 0.36111441254615784, "learning_rate": 1.6576652058654283e-05, "loss": 0.5915, "step": 17753 }, { "epoch": 0.5453875218873836, "grad_norm": 0.4382053315639496, "learning_rate": 1.657628799315687e-05, "loss": 0.553, "step": 17754 }, { "epoch": 0.5454182410223328, "grad_norm": 0.438309907913208, "learning_rate": 1.657592391230008e-05, "loss": 0.5703, "step": 17755 }, { "epoch": 0.545448960157282, "grad_norm": 0.3420596718788147, "learning_rate": 1.6575559816084758e-05, "loss": 0.536, "step": 17756 }, { "epoch": 0.5454796792922312, "grad_norm": 0.38722237944602966, "learning_rate": 1.6575195704511756e-05, "loss": 0.607, "step": 17757 }, { "epoch": 0.5455103984271803, "grad_norm": 0.4006425440311432, "learning_rate": 1.6574831577581925e-05, "loss": 0.5089, "step": 17758 }, { "epoch": 0.5455411175621294, "grad_norm": 0.34805455803871155, "learning_rate": 1.657446743529612e-05, "loss": 0.5862, "step": 17759 }, { "epoch": 0.5455718366970786, "grad_norm": 0.39740148186683655, "learning_rate": 1.6574103277655184e-05, "loss": 0.6019, "step": 17760 }, { "epoch": 0.5456025558320278, "grad_norm": 0.35048773884773254, "learning_rate": 1.657373910465997e-05, "loss": 0.5948, "step": 17761 }, { "epoch": 0.5456332749669769, "grad_norm": 0.3629768192768097, "learning_rate": 1.657337491631133e-05, "loss": 0.5447, "step": 17762 }, { "epoch": 0.5456639941019261, "grad_norm": 0.3450331389904022, "learning_rate": 1.6573010712610116e-05, "loss": 0.6083, "step": 17763 }, { "epoch": 0.5456947132368752, "grad_norm": 0.33820632100105286, "learning_rate": 1.6572646493557174e-05, "loss": 0.6043, "step": 17764 }, { "epoch": 0.5457254323718244, "grad_norm": 0.35275179147720337, "learning_rate": 1.6572282259153357e-05, "loss": 0.4757, "step": 17765 }, { "epoch": 0.5457561515067736, "grad_norm": 0.37016719579696655, "learning_rate": 1.657191800939952e-05, "loss": 0.5427, "step": 17766 }, { "epoch": 0.5457868706417227, "grad_norm": 0.35285046696662903, "learning_rate": 1.6571553744296505e-05, "loss": 0.5212, "step": 17767 }, { "epoch": 0.5458175897766719, "grad_norm": 0.34526240825653076, "learning_rate": 1.6571189463845167e-05, "loss": 0.5141, "step": 17768 }, { "epoch": 0.5458483089116211, "grad_norm": 0.3409665822982788, "learning_rate": 1.6570825168046362e-05, "loss": 0.5838, "step": 17769 }, { "epoch": 0.5458790280465702, "grad_norm": 0.3328840732574463, "learning_rate": 1.6570460856900932e-05, "loss": 0.5362, "step": 17770 }, { "epoch": 0.5459097471815194, "grad_norm": 0.36654531955718994, "learning_rate": 1.657009653040973e-05, "loss": 0.6168, "step": 17771 }, { "epoch": 0.5459404663164685, "grad_norm": 0.3876456022262573, "learning_rate": 1.656973218857361e-05, "loss": 0.6167, "step": 17772 }, { "epoch": 0.5459711854514177, "grad_norm": 0.5172316431999207, "learning_rate": 1.6569367831393423e-05, "loss": 0.5795, "step": 17773 }, { "epoch": 0.5460019045863669, "grad_norm": 0.3343391418457031, "learning_rate": 1.6569003458870018e-05, "loss": 0.6207, "step": 17774 }, { "epoch": 0.546032623721316, "grad_norm": 0.31789860129356384, "learning_rate": 1.6568639071004247e-05, "loss": 0.5345, "step": 17775 }, { "epoch": 0.5460633428562651, "grad_norm": 0.34435585141181946, "learning_rate": 1.656827466779696e-05, "loss": 0.5905, "step": 17776 }, { "epoch": 0.5460940619912144, "grad_norm": 0.5723513960838318, "learning_rate": 1.656791024924901e-05, "loss": 0.5979, "step": 17777 }, { "epoch": 0.5461247811261635, "grad_norm": 0.4134824872016907, "learning_rate": 1.6567545815361247e-05, "loss": 0.5565, "step": 17778 }, { "epoch": 0.5461555002611126, "grad_norm": 0.5177611112594604, "learning_rate": 1.656718136613452e-05, "loss": 0.5211, "step": 17779 }, { "epoch": 0.5461862193960618, "grad_norm": 0.4397880733013153, "learning_rate": 1.6566816901569684e-05, "loss": 0.6033, "step": 17780 }, { "epoch": 0.5462169385310109, "grad_norm": 0.33246564865112305, "learning_rate": 1.656645242166759e-05, "loss": 0.5332, "step": 17781 }, { "epoch": 0.5462476576659602, "grad_norm": 0.4321861267089844, "learning_rate": 1.6566087926429084e-05, "loss": 0.6416, "step": 17782 }, { "epoch": 0.5462783768009093, "grad_norm": 0.33753177523612976, "learning_rate": 1.6565723415855023e-05, "loss": 0.5415, "step": 17783 }, { "epoch": 0.5463090959358584, "grad_norm": 0.3512771725654602, "learning_rate": 1.656535888994626e-05, "loss": 0.58, "step": 17784 }, { "epoch": 0.5463398150708076, "grad_norm": 0.36695167422294617, "learning_rate": 1.6564994348703635e-05, "loss": 0.5052, "step": 17785 }, { "epoch": 0.5463705342057568, "grad_norm": 0.34553104639053345, "learning_rate": 1.6564629792128014e-05, "loss": 0.5513, "step": 17786 }, { "epoch": 0.5464012533407059, "grad_norm": 0.381276935338974, "learning_rate": 1.6564265220220236e-05, "loss": 0.5865, "step": 17787 }, { "epoch": 0.5464319724756551, "grad_norm": 0.4641372263431549, "learning_rate": 1.656390063298116e-05, "loss": 0.55, "step": 17788 }, { "epoch": 0.5464626916106042, "grad_norm": 0.4088456928730011, "learning_rate": 1.656353603041164e-05, "loss": 0.5478, "step": 17789 }, { "epoch": 0.5464934107455534, "grad_norm": 0.3531017601490021, "learning_rate": 1.656317141251252e-05, "loss": 0.5759, "step": 17790 }, { "epoch": 0.5465241298805026, "grad_norm": 0.343704491853714, "learning_rate": 1.6562806779284657e-05, "loss": 0.6104, "step": 17791 }, { "epoch": 0.5465548490154517, "grad_norm": 0.42058125138282776, "learning_rate": 1.6562442130728897e-05, "loss": 0.533, "step": 17792 }, { "epoch": 0.5465855681504009, "grad_norm": 0.3446334898471832, "learning_rate": 1.6562077466846097e-05, "loss": 0.5998, "step": 17793 }, { "epoch": 0.54661628728535, "grad_norm": 0.33991238474845886, "learning_rate": 1.6561712787637107e-05, "loss": 0.537, "step": 17794 }, { "epoch": 0.5466470064202992, "grad_norm": 0.4118923246860504, "learning_rate": 1.6561348093102773e-05, "loss": 0.5436, "step": 17795 }, { "epoch": 0.5466777255552484, "grad_norm": 0.8692225813865662, "learning_rate": 1.656098338324396e-05, "loss": 0.5485, "step": 17796 }, { "epoch": 0.5467084446901975, "grad_norm": 0.32533422112464905, "learning_rate": 1.6560618658061507e-05, "loss": 0.5098, "step": 17797 }, { "epoch": 0.5467391638251466, "grad_norm": 0.34816521406173706, "learning_rate": 1.6560253917556273e-05, "loss": 0.5524, "step": 17798 }, { "epoch": 0.5467698829600959, "grad_norm": 0.32880067825317383, "learning_rate": 1.6559889161729107e-05, "loss": 0.5804, "step": 17799 }, { "epoch": 0.546800602095045, "grad_norm": 0.47061675786972046, "learning_rate": 1.6559524390580867e-05, "loss": 0.5791, "step": 17800 }, { "epoch": 0.5468313212299941, "grad_norm": 0.33728039264678955, "learning_rate": 1.6559159604112392e-05, "loss": 0.5861, "step": 17801 }, { "epoch": 0.5468620403649433, "grad_norm": 0.33636409044265747, "learning_rate": 1.6558794802324544e-05, "loss": 0.5244, "step": 17802 }, { "epoch": 0.5468927594998925, "grad_norm": 0.38391658663749695, "learning_rate": 1.6558429985218177e-05, "loss": 0.5896, "step": 17803 }, { "epoch": 0.5469234786348417, "grad_norm": 0.37696588039398193, "learning_rate": 1.6558065152794133e-05, "loss": 0.6145, "step": 17804 }, { "epoch": 0.5469541977697908, "grad_norm": 0.4099494218826294, "learning_rate": 1.6557700305053274e-05, "loss": 0.6259, "step": 17805 }, { "epoch": 0.5469849169047399, "grad_norm": 0.3631151616573334, "learning_rate": 1.6557335441996446e-05, "loss": 0.5859, "step": 17806 }, { "epoch": 0.5470156360396892, "grad_norm": 0.34597212076187134, "learning_rate": 1.6556970563624504e-05, "loss": 0.598, "step": 17807 }, { "epoch": 0.5470463551746383, "grad_norm": 0.3382669687271118, "learning_rate": 1.6556605669938298e-05, "loss": 0.5866, "step": 17808 }, { "epoch": 0.5470770743095874, "grad_norm": 0.31504184007644653, "learning_rate": 1.655624076093868e-05, "loss": 0.5533, "step": 17809 }, { "epoch": 0.5471077934445366, "grad_norm": 1.3177824020385742, "learning_rate": 1.6555875836626507e-05, "loss": 0.5463, "step": 17810 }, { "epoch": 0.5471385125794858, "grad_norm": 0.32278066873550415, "learning_rate": 1.6555510897002628e-05, "loss": 0.5443, "step": 17811 }, { "epoch": 0.5471692317144349, "grad_norm": 0.3601195216178894, "learning_rate": 1.6555145942067892e-05, "loss": 0.5563, "step": 17812 }, { "epoch": 0.5471999508493841, "grad_norm": 0.35071420669555664, "learning_rate": 1.6554780971823155e-05, "loss": 0.5379, "step": 17813 }, { "epoch": 0.5472306699843332, "grad_norm": 0.38742586970329285, "learning_rate": 1.6554415986269273e-05, "loss": 0.5732, "step": 17814 }, { "epoch": 0.5472613891192823, "grad_norm": 0.3576367199420929, "learning_rate": 1.6554050985407097e-05, "loss": 0.5502, "step": 17815 }, { "epoch": 0.5472921082542316, "grad_norm": 0.3470524251461029, "learning_rate": 1.6553685969237473e-05, "loss": 0.5694, "step": 17816 }, { "epoch": 0.5473228273891807, "grad_norm": 0.32977530360221863, "learning_rate": 1.6553320937761256e-05, "loss": 0.5671, "step": 17817 }, { "epoch": 0.5473535465241299, "grad_norm": 0.34287333488464355, "learning_rate": 1.6552955890979303e-05, "loss": 0.5198, "step": 17818 }, { "epoch": 0.547384265659079, "grad_norm": 0.3206973075866699, "learning_rate": 1.6552590828892463e-05, "loss": 0.5377, "step": 17819 }, { "epoch": 0.5474149847940282, "grad_norm": 0.31568801403045654, "learning_rate": 1.655222575150159e-05, "loss": 0.5543, "step": 17820 }, { "epoch": 0.5474457039289774, "grad_norm": 0.3582233190536499, "learning_rate": 1.6551860658807536e-05, "loss": 0.5912, "step": 17821 }, { "epoch": 0.5474764230639265, "grad_norm": 0.4754795432090759, "learning_rate": 1.6551495550811155e-05, "loss": 0.5989, "step": 17822 }, { "epoch": 0.5475071421988756, "grad_norm": 0.3693186938762665, "learning_rate": 1.6551130427513297e-05, "loss": 0.6958, "step": 17823 }, { "epoch": 0.5475378613338249, "grad_norm": 0.3545594811439514, "learning_rate": 1.6550765288914815e-05, "loss": 0.6579, "step": 17824 }, { "epoch": 0.547568580468774, "grad_norm": 0.34710490703582764, "learning_rate": 1.6550400135016567e-05, "loss": 0.5311, "step": 17825 }, { "epoch": 0.5475992996037231, "grad_norm": 0.40857669711112976, "learning_rate": 1.65500349658194e-05, "loss": 0.5931, "step": 17826 }, { "epoch": 0.5476300187386723, "grad_norm": 0.3268378674983978, "learning_rate": 1.654966978132417e-05, "loss": 0.5878, "step": 17827 }, { "epoch": 0.5476607378736215, "grad_norm": 0.3292548656463623, "learning_rate": 1.6549304581531725e-05, "loss": 0.4826, "step": 17828 }, { "epoch": 0.5476914570085707, "grad_norm": 0.34110596776008606, "learning_rate": 1.6548939366442926e-05, "loss": 0.5498, "step": 17829 }, { "epoch": 0.5477221761435198, "grad_norm": 0.3000625967979431, "learning_rate": 1.654857413605862e-05, "loss": 0.5418, "step": 17830 }, { "epoch": 0.5477528952784689, "grad_norm": 0.3351806402206421, "learning_rate": 1.654820889037966e-05, "loss": 0.632, "step": 17831 }, { "epoch": 0.5477836144134182, "grad_norm": 0.33575963973999023, "learning_rate": 1.6547843629406902e-05, "loss": 0.4862, "step": 17832 }, { "epoch": 0.5478143335483673, "grad_norm": 0.3593653738498688, "learning_rate": 1.65474783531412e-05, "loss": 0.5274, "step": 17833 }, { "epoch": 0.5478450526833164, "grad_norm": 0.339646577835083, "learning_rate": 1.6547113061583403e-05, "loss": 0.5285, "step": 17834 }, { "epoch": 0.5478757718182656, "grad_norm": 0.36409562826156616, "learning_rate": 1.6546747754734366e-05, "loss": 0.5754, "step": 17835 }, { "epoch": 0.5479064909532148, "grad_norm": 0.39288946986198425, "learning_rate": 1.6546382432594944e-05, "loss": 0.5877, "step": 17836 }, { "epoch": 0.5479372100881639, "grad_norm": 0.3896755576133728, "learning_rate": 1.654601709516599e-05, "loss": 0.6159, "step": 17837 }, { "epoch": 0.5479679292231131, "grad_norm": 0.37969663739204407, "learning_rate": 1.6545651742448355e-05, "loss": 0.6028, "step": 17838 }, { "epoch": 0.5479986483580622, "grad_norm": 0.3571425974369049, "learning_rate": 1.6545286374442892e-05, "loss": 0.5943, "step": 17839 }, { "epoch": 0.5480293674930113, "grad_norm": 0.3501509726047516, "learning_rate": 1.6544920991150456e-05, "loss": 0.5218, "step": 17840 }, { "epoch": 0.5480600866279606, "grad_norm": 0.40619730949401855, "learning_rate": 1.6544555592571894e-05, "loss": 0.5512, "step": 17841 }, { "epoch": 0.5480908057629097, "grad_norm": 0.3890797197818756, "learning_rate": 1.6544190178708074e-05, "loss": 0.6059, "step": 17842 }, { "epoch": 0.5481215248978589, "grad_norm": 0.3417879641056061, "learning_rate": 1.6543824749559836e-05, "loss": 0.584, "step": 17843 }, { "epoch": 0.548152244032808, "grad_norm": 0.45473459362983704, "learning_rate": 1.654345930512804e-05, "loss": 0.5691, "step": 17844 }, { "epoch": 0.5481829631677572, "grad_norm": 0.3158077597618103, "learning_rate": 1.654309384541354e-05, "loss": 0.532, "step": 17845 }, { "epoch": 0.5482136823027064, "grad_norm": 0.3588064908981323, "learning_rate": 1.6542728370417184e-05, "loss": 0.574, "step": 17846 }, { "epoch": 0.5482444014376555, "grad_norm": 0.3702642619609833, "learning_rate": 1.6542362880139832e-05, "loss": 0.5914, "step": 17847 }, { "epoch": 0.5482751205726046, "grad_norm": 0.3724825084209442, "learning_rate": 1.654199737458233e-05, "loss": 0.5673, "step": 17848 }, { "epoch": 0.5483058397075539, "grad_norm": 0.37141385674476624, "learning_rate": 1.654163185374554e-05, "loss": 0.5671, "step": 17849 }, { "epoch": 0.548336558842503, "grad_norm": 0.3366224765777588, "learning_rate": 1.654126631763031e-05, "loss": 0.6299, "step": 17850 }, { "epoch": 0.5483672779774521, "grad_norm": 0.3561117649078369, "learning_rate": 1.6540900766237496e-05, "loss": 0.6305, "step": 17851 }, { "epoch": 0.5483979971124013, "grad_norm": 0.31687578558921814, "learning_rate": 1.6540535199567953e-05, "loss": 0.6041, "step": 17852 }, { "epoch": 0.5484287162473505, "grad_norm": 0.3243749439716339, "learning_rate": 1.6540169617622533e-05, "loss": 0.6098, "step": 17853 }, { "epoch": 0.5484594353822997, "grad_norm": 0.34132450819015503, "learning_rate": 1.6539804020402088e-05, "loss": 0.5112, "step": 17854 }, { "epoch": 0.5484901545172488, "grad_norm": 0.3644148111343384, "learning_rate": 1.6539438407907476e-05, "loss": 0.5671, "step": 17855 }, { "epoch": 0.5485208736521979, "grad_norm": 0.33963823318481445, "learning_rate": 1.653907278013955e-05, "loss": 0.6374, "step": 17856 }, { "epoch": 0.5485515927871472, "grad_norm": 0.33696845173835754, "learning_rate": 1.6538707137099158e-05, "loss": 0.4949, "step": 17857 }, { "epoch": 0.5485823119220963, "grad_norm": 0.6348381638526917, "learning_rate": 1.6538341478787162e-05, "loss": 0.5855, "step": 17858 }, { "epoch": 0.5486130310570454, "grad_norm": 0.3773930072784424, "learning_rate": 1.6537975805204412e-05, "loss": 0.5527, "step": 17859 }, { "epoch": 0.5486437501919946, "grad_norm": 0.4112347662448883, "learning_rate": 1.6537610116351762e-05, "loss": 0.5406, "step": 17860 }, { "epoch": 0.5486744693269437, "grad_norm": 0.33259260654449463, "learning_rate": 1.653724441223007e-05, "loss": 0.4508, "step": 17861 }, { "epoch": 0.5487051884618929, "grad_norm": 0.3755553960800171, "learning_rate": 1.6536878692840188e-05, "loss": 0.518, "step": 17862 }, { "epoch": 0.5487359075968421, "grad_norm": 0.3710494637489319, "learning_rate": 1.6536512958182965e-05, "loss": 0.6035, "step": 17863 }, { "epoch": 0.5487666267317912, "grad_norm": 0.45756807923316956, "learning_rate": 1.6536147208259263e-05, "loss": 0.6148, "step": 17864 }, { "epoch": 0.5487973458667403, "grad_norm": 0.3563537001609802, "learning_rate": 1.653578144306993e-05, "loss": 0.6049, "step": 17865 }, { "epoch": 0.5488280650016896, "grad_norm": 0.3613763153553009, "learning_rate": 1.6535415662615825e-05, "loss": 0.5418, "step": 17866 }, { "epoch": 0.5488587841366387, "grad_norm": 0.3676871359348297, "learning_rate": 1.6535049866897802e-05, "loss": 0.5338, "step": 17867 }, { "epoch": 0.5488895032715879, "grad_norm": 0.3553885817527771, "learning_rate": 1.6534684055916706e-05, "loss": 0.5386, "step": 17868 }, { "epoch": 0.548920222406537, "grad_norm": 0.42028558254241943, "learning_rate": 1.6534318229673405e-05, "loss": 0.5, "step": 17869 }, { "epoch": 0.5489509415414862, "grad_norm": 0.3791741132736206, "learning_rate": 1.6533952388168745e-05, "loss": 0.6775, "step": 17870 }, { "epoch": 0.5489816606764354, "grad_norm": 0.36319777369499207, "learning_rate": 1.6533586531403587e-05, "loss": 0.623, "step": 17871 }, { "epoch": 0.5490123798113845, "grad_norm": 0.3763711154460907, "learning_rate": 1.6533220659378775e-05, "loss": 0.565, "step": 17872 }, { "epoch": 0.5490430989463336, "grad_norm": 0.3383457362651825, "learning_rate": 1.6532854772095176e-05, "loss": 0.5733, "step": 17873 }, { "epoch": 0.5490738180812829, "grad_norm": 0.3896760642528534, "learning_rate": 1.6532488869553634e-05, "loss": 0.5032, "step": 17874 }, { "epoch": 0.549104537216232, "grad_norm": 0.34761855006217957, "learning_rate": 1.6532122951755013e-05, "loss": 0.6048, "step": 17875 }, { "epoch": 0.5491352563511811, "grad_norm": 0.3611413836479187, "learning_rate": 1.6531757018700162e-05, "loss": 0.5993, "step": 17876 }, { "epoch": 0.5491659754861303, "grad_norm": 0.36839333176612854, "learning_rate": 1.6531391070389933e-05, "loss": 0.5411, "step": 17877 }, { "epoch": 0.5491966946210795, "grad_norm": 0.3564799726009369, "learning_rate": 1.6531025106825183e-05, "loss": 0.5473, "step": 17878 }, { "epoch": 0.5492274137560287, "grad_norm": 0.3468804359436035, "learning_rate": 1.653065912800677e-05, "loss": 0.5977, "step": 17879 }, { "epoch": 0.5492581328909778, "grad_norm": 0.3907046914100647, "learning_rate": 1.6530293133935547e-05, "loss": 0.6376, "step": 17880 }, { "epoch": 0.5492888520259269, "grad_norm": 0.34149959683418274, "learning_rate": 1.652992712461237e-05, "loss": 0.5975, "step": 17881 }, { "epoch": 0.5493195711608762, "grad_norm": 0.33303728699684143, "learning_rate": 1.6529561100038086e-05, "loss": 0.545, "step": 17882 }, { "epoch": 0.5493502902958253, "grad_norm": 0.3381478786468506, "learning_rate": 1.652919506021356e-05, "loss": 0.5697, "step": 17883 }, { "epoch": 0.5493810094307744, "grad_norm": 0.3225345015525818, "learning_rate": 1.6528829005139643e-05, "loss": 0.5712, "step": 17884 }, { "epoch": 0.5494117285657236, "grad_norm": 0.3625480532646179, "learning_rate": 1.652846293481719e-05, "loss": 0.5357, "step": 17885 }, { "epoch": 0.5494424477006727, "grad_norm": 0.33722615242004395, "learning_rate": 1.6528096849247055e-05, "loss": 0.5659, "step": 17886 }, { "epoch": 0.5494731668356219, "grad_norm": 0.3693940341472626, "learning_rate": 1.6527730748430096e-05, "loss": 0.5532, "step": 17887 }, { "epoch": 0.5495038859705711, "grad_norm": 0.3462417423725128, "learning_rate": 1.6527364632367163e-05, "loss": 0.6064, "step": 17888 }, { "epoch": 0.5495346051055202, "grad_norm": 0.3416949212551117, "learning_rate": 1.6526998501059115e-05, "loss": 0.599, "step": 17889 }, { "epoch": 0.5495653242404694, "grad_norm": 0.4237491488456726, "learning_rate": 1.6526632354506805e-05, "loss": 0.5892, "step": 17890 }, { "epoch": 0.5495960433754186, "grad_norm": 0.35294491052627563, "learning_rate": 1.6526266192711093e-05, "loss": 0.5713, "step": 17891 }, { "epoch": 0.5496267625103677, "grad_norm": 0.8422199487686157, "learning_rate": 1.6525900015672827e-05, "loss": 0.6202, "step": 17892 }, { "epoch": 0.5496574816453169, "grad_norm": 0.3258680999279022, "learning_rate": 1.6525533823392867e-05, "loss": 0.4557, "step": 17893 }, { "epoch": 0.549688200780266, "grad_norm": 0.3699376583099365, "learning_rate": 1.6525167615872068e-05, "loss": 0.6167, "step": 17894 }, { "epoch": 0.5497189199152152, "grad_norm": 0.36217713356018066, "learning_rate": 1.652480139311128e-05, "loss": 0.5665, "step": 17895 }, { "epoch": 0.5497496390501644, "grad_norm": 0.3964667022228241, "learning_rate": 1.652443515511137e-05, "loss": 0.5195, "step": 17896 }, { "epoch": 0.5497803581851135, "grad_norm": 0.4052218794822693, "learning_rate": 1.6524068901873183e-05, "loss": 0.571, "step": 17897 }, { "epoch": 0.5498110773200626, "grad_norm": 0.5183062553405762, "learning_rate": 1.6523702633397575e-05, "loss": 0.5704, "step": 17898 }, { "epoch": 0.5498417964550119, "grad_norm": 0.38971054553985596, "learning_rate": 1.6523336349685406e-05, "loss": 0.5719, "step": 17899 }, { "epoch": 0.549872515589961, "grad_norm": 0.36778315901756287, "learning_rate": 1.652297005073753e-05, "loss": 0.536, "step": 17900 }, { "epoch": 0.5499032347249101, "grad_norm": 0.36975592374801636, "learning_rate": 1.65226037365548e-05, "loss": 0.5444, "step": 17901 }, { "epoch": 0.5499339538598593, "grad_norm": 0.4615325927734375, "learning_rate": 1.6522237407138076e-05, "loss": 0.612, "step": 17902 }, { "epoch": 0.5499646729948084, "grad_norm": 0.38011297583580017, "learning_rate": 1.6521871062488205e-05, "loss": 0.6006, "step": 17903 }, { "epoch": 0.5499953921297577, "grad_norm": 0.4380514621734619, "learning_rate": 1.6521504702606053e-05, "loss": 0.4949, "step": 17904 }, { "epoch": 0.5500261112647068, "grad_norm": 0.37127354741096497, "learning_rate": 1.6521138327492472e-05, "loss": 0.5691, "step": 17905 }, { "epoch": 0.5500568303996559, "grad_norm": 0.36025527119636536, "learning_rate": 1.6520771937148315e-05, "loss": 0.5497, "step": 17906 }, { "epoch": 0.5500875495346051, "grad_norm": 0.3407958745956421, "learning_rate": 1.652040553157444e-05, "loss": 0.5305, "step": 17907 }, { "epoch": 0.5501182686695543, "grad_norm": 0.3159032166004181, "learning_rate": 1.6520039110771704e-05, "loss": 0.5124, "step": 17908 }, { "epoch": 0.5501489878045034, "grad_norm": 0.3294544219970703, "learning_rate": 1.6519672674740964e-05, "loss": 0.4605, "step": 17909 }, { "epoch": 0.5501797069394526, "grad_norm": 0.36503270268440247, "learning_rate": 1.651930622348307e-05, "loss": 0.5466, "step": 17910 }, { "epoch": 0.5502104260744017, "grad_norm": 0.36371394991874695, "learning_rate": 1.651893975699888e-05, "loss": 0.5324, "step": 17911 }, { "epoch": 0.5502411452093509, "grad_norm": 0.3435885012149811, "learning_rate": 1.6518573275289253e-05, "loss": 0.5789, "step": 17912 }, { "epoch": 0.5502718643443001, "grad_norm": 0.4175232946872711, "learning_rate": 1.651820677835504e-05, "loss": 0.4827, "step": 17913 }, { "epoch": 0.5503025834792492, "grad_norm": 0.7273060083389282, "learning_rate": 1.65178402661971e-05, "loss": 0.5125, "step": 17914 }, { "epoch": 0.5503333026141984, "grad_norm": 0.45545849204063416, "learning_rate": 1.651747373881629e-05, "loss": 0.482, "step": 17915 }, { "epoch": 0.5503640217491476, "grad_norm": 0.4573293626308441, "learning_rate": 1.6517107196213465e-05, "loss": 0.5604, "step": 17916 }, { "epoch": 0.5503947408840967, "grad_norm": 0.3910965919494629, "learning_rate": 1.6516740638389484e-05, "loss": 0.4967, "step": 17917 }, { "epoch": 0.5504254600190459, "grad_norm": 0.337507426738739, "learning_rate": 1.6516374065345196e-05, "loss": 0.5635, "step": 17918 }, { "epoch": 0.550456179153995, "grad_norm": 0.3612883985042572, "learning_rate": 1.6516007477081463e-05, "loss": 0.5303, "step": 17919 }, { "epoch": 0.5504868982889441, "grad_norm": 0.3509572446346283, "learning_rate": 1.651564087359914e-05, "loss": 0.4575, "step": 17920 }, { "epoch": 0.5505176174238934, "grad_norm": 0.3373236358165741, "learning_rate": 1.651527425489908e-05, "loss": 0.5164, "step": 17921 }, { "epoch": 0.5505483365588425, "grad_norm": 0.41804465651512146, "learning_rate": 1.6514907620982143e-05, "loss": 0.5658, "step": 17922 }, { "epoch": 0.5505790556937916, "grad_norm": 0.3752017617225647, "learning_rate": 1.6514540971849187e-05, "loss": 0.5438, "step": 17923 }, { "epoch": 0.5506097748287408, "grad_norm": 0.37419381737709045, "learning_rate": 1.6514174307501062e-05, "loss": 0.5966, "step": 17924 }, { "epoch": 0.55064049396369, "grad_norm": 0.3528817892074585, "learning_rate": 1.651380762793863e-05, "loss": 0.5934, "step": 17925 }, { "epoch": 0.5506712130986391, "grad_norm": 0.3814813494682312, "learning_rate": 1.6513440933162743e-05, "loss": 0.6219, "step": 17926 }, { "epoch": 0.5507019322335883, "grad_norm": 0.3692583441734314, "learning_rate": 1.6513074223174264e-05, "loss": 0.5543, "step": 17927 }, { "epoch": 0.5507326513685374, "grad_norm": 0.3530328869819641, "learning_rate": 1.6512707497974042e-05, "loss": 0.599, "step": 17928 }, { "epoch": 0.5507633705034867, "grad_norm": 0.3384045958518982, "learning_rate": 1.6512340757562938e-05, "loss": 0.5718, "step": 17929 }, { "epoch": 0.5507940896384358, "grad_norm": 0.3112022578716278, "learning_rate": 1.651197400194181e-05, "loss": 0.5521, "step": 17930 }, { "epoch": 0.5508248087733849, "grad_norm": 0.3793368637561798, "learning_rate": 1.6511607231111505e-05, "loss": 0.5327, "step": 17931 }, { "epoch": 0.5508555279083341, "grad_norm": 0.3697839677333832, "learning_rate": 1.651124044507289e-05, "loss": 0.5127, "step": 17932 }, { "epoch": 0.5508862470432833, "grad_norm": 0.3692989945411682, "learning_rate": 1.651087364382682e-05, "loss": 0.5774, "step": 17933 }, { "epoch": 0.5509169661782324, "grad_norm": 0.45082300901412964, "learning_rate": 1.651050682737415e-05, "loss": 0.5764, "step": 17934 }, { "epoch": 0.5509476853131816, "grad_norm": 0.36269861459732056, "learning_rate": 1.6510139995715734e-05, "loss": 0.5354, "step": 17935 }, { "epoch": 0.5509784044481307, "grad_norm": 0.36267802119255066, "learning_rate": 1.6509773148852436e-05, "loss": 0.5406, "step": 17936 }, { "epoch": 0.5510091235830799, "grad_norm": 0.35712772607803345, "learning_rate": 1.65094062867851e-05, "loss": 0.6389, "step": 17937 }, { "epoch": 0.5510398427180291, "grad_norm": 0.32744428515434265, "learning_rate": 1.65090394095146e-05, "loss": 0.5717, "step": 17938 }, { "epoch": 0.5510705618529782, "grad_norm": 0.35781407356262207, "learning_rate": 1.650867251704178e-05, "loss": 0.6213, "step": 17939 }, { "epoch": 0.5511012809879274, "grad_norm": 0.34238412976264954, "learning_rate": 1.65083056093675e-05, "loss": 0.594, "step": 17940 }, { "epoch": 0.5511320001228766, "grad_norm": 0.5031593441963196, "learning_rate": 1.6507938686492616e-05, "loss": 0.5486, "step": 17941 }, { "epoch": 0.5511627192578257, "grad_norm": 0.3509288728237152, "learning_rate": 1.650757174841799e-05, "loss": 0.5623, "step": 17942 }, { "epoch": 0.5511934383927749, "grad_norm": 0.3697925806045532, "learning_rate": 1.6507204795144472e-05, "loss": 0.5478, "step": 17943 }, { "epoch": 0.551224157527724, "grad_norm": 0.47042933106422424, "learning_rate": 1.650683782667293e-05, "loss": 0.5423, "step": 17944 }, { "epoch": 0.5512548766626731, "grad_norm": 0.3620114326477051, "learning_rate": 1.6506470843004208e-05, "loss": 0.5438, "step": 17945 }, { "epoch": 0.5512855957976224, "grad_norm": 0.3470032513141632, "learning_rate": 1.6506103844139167e-05, "loss": 0.6023, "step": 17946 }, { "epoch": 0.5513163149325715, "grad_norm": 0.3540189862251282, "learning_rate": 1.650573683007867e-05, "loss": 0.5408, "step": 17947 }, { "epoch": 0.5513470340675206, "grad_norm": 0.3788972496986389, "learning_rate": 1.650536980082357e-05, "loss": 0.5886, "step": 17948 }, { "epoch": 0.5513777532024698, "grad_norm": 0.3602831959724426, "learning_rate": 1.6505002756374724e-05, "loss": 0.5746, "step": 17949 }, { "epoch": 0.551408472337419, "grad_norm": 0.502042829990387, "learning_rate": 1.6504635696732992e-05, "loss": 0.5573, "step": 17950 }, { "epoch": 0.5514391914723681, "grad_norm": 0.35111406445503235, "learning_rate": 1.6504268621899226e-05, "loss": 0.5843, "step": 17951 }, { "epoch": 0.5514699106073173, "grad_norm": 0.35634422302246094, "learning_rate": 1.6503901531874287e-05, "loss": 0.6438, "step": 17952 }, { "epoch": 0.5515006297422664, "grad_norm": 0.3504487872123718, "learning_rate": 1.6503534426659032e-05, "loss": 0.5555, "step": 17953 }, { "epoch": 0.5515313488772157, "grad_norm": 0.35313940048217773, "learning_rate": 1.6503167306254318e-05, "loss": 0.554, "step": 17954 }, { "epoch": 0.5515620680121648, "grad_norm": 0.3352429270744324, "learning_rate": 1.6502800170661002e-05, "loss": 0.5558, "step": 17955 }, { "epoch": 0.5515927871471139, "grad_norm": 0.35531893372535706, "learning_rate": 1.6502433019879945e-05, "loss": 0.5585, "step": 17956 }, { "epoch": 0.5516235062820631, "grad_norm": 0.33617955446243286, "learning_rate": 1.6502065853911997e-05, "loss": 0.5634, "step": 17957 }, { "epoch": 0.5516542254170123, "grad_norm": 0.32530298829078674, "learning_rate": 1.6501698672758022e-05, "loss": 0.5145, "step": 17958 }, { "epoch": 0.5516849445519614, "grad_norm": 0.37184685468673706, "learning_rate": 1.6501331476418877e-05, "loss": 0.6321, "step": 17959 }, { "epoch": 0.5517156636869106, "grad_norm": 0.3637303411960602, "learning_rate": 1.6500964264895417e-05, "loss": 0.6084, "step": 17960 }, { "epoch": 0.5517463828218597, "grad_norm": 0.3975439965724945, "learning_rate": 1.6500597038188502e-05, "loss": 0.6418, "step": 17961 }, { "epoch": 0.5517771019568088, "grad_norm": 0.3827633261680603, "learning_rate": 1.6500229796298985e-05, "loss": 0.5514, "step": 17962 }, { "epoch": 0.5518078210917581, "grad_norm": 0.385158509016037, "learning_rate": 1.649986253922773e-05, "loss": 0.5725, "step": 17963 }, { "epoch": 0.5518385402267072, "grad_norm": 0.37808895111083984, "learning_rate": 1.6499495266975593e-05, "loss": 0.5948, "step": 17964 }, { "epoch": 0.5518692593616564, "grad_norm": 0.4545128047466278, "learning_rate": 1.649912797954343e-05, "loss": 0.6164, "step": 17965 }, { "epoch": 0.5518999784966055, "grad_norm": 0.3740076720714569, "learning_rate": 1.6498760676932102e-05, "loss": 0.6176, "step": 17966 }, { "epoch": 0.5519306976315547, "grad_norm": 0.38371187448501587, "learning_rate": 1.649839335914246e-05, "loss": 0.5716, "step": 17967 }, { "epoch": 0.5519614167665039, "grad_norm": 0.34170669317245483, "learning_rate": 1.6498026026175373e-05, "loss": 0.5933, "step": 17968 }, { "epoch": 0.551992135901453, "grad_norm": 0.3537026345729828, "learning_rate": 1.6497658678031685e-05, "loss": 0.5636, "step": 17969 }, { "epoch": 0.5520228550364021, "grad_norm": 0.43965739011764526, "learning_rate": 1.6497291314712267e-05, "loss": 0.6337, "step": 17970 }, { "epoch": 0.5520535741713514, "grad_norm": 0.3640703856945038, "learning_rate": 1.649692393621797e-05, "loss": 0.5745, "step": 17971 }, { "epoch": 0.5520842933063005, "grad_norm": 0.4630148708820343, "learning_rate": 1.649655654254965e-05, "loss": 0.6414, "step": 17972 }, { "epoch": 0.5521150124412496, "grad_norm": 0.36114972829818726, "learning_rate": 1.6496189133708172e-05, "loss": 0.6063, "step": 17973 }, { "epoch": 0.5521457315761988, "grad_norm": 0.36431121826171875, "learning_rate": 1.6495821709694388e-05, "loss": 0.5808, "step": 17974 }, { "epoch": 0.552176450711148, "grad_norm": 0.338574081659317, "learning_rate": 1.6495454270509163e-05, "loss": 0.5853, "step": 17975 }, { "epoch": 0.5522071698460971, "grad_norm": 0.42442944645881653, "learning_rate": 1.6495086816153344e-05, "loss": 0.5447, "step": 17976 }, { "epoch": 0.5522378889810463, "grad_norm": 0.3278902769088745, "learning_rate": 1.64947193466278e-05, "loss": 0.5532, "step": 17977 }, { "epoch": 0.5522686081159954, "grad_norm": 0.37699246406555176, "learning_rate": 1.6494351861933387e-05, "loss": 0.5902, "step": 17978 }, { "epoch": 0.5522993272509447, "grad_norm": 0.36531922221183777, "learning_rate": 1.649398436207096e-05, "loss": 0.5684, "step": 17979 }, { "epoch": 0.5523300463858938, "grad_norm": 0.3895452320575714, "learning_rate": 1.649361684704138e-05, "loss": 0.5522, "step": 17980 }, { "epoch": 0.5523607655208429, "grad_norm": 0.37074753642082214, "learning_rate": 1.6493249316845502e-05, "loss": 0.599, "step": 17981 }, { "epoch": 0.5523914846557921, "grad_norm": 0.353380411863327, "learning_rate": 1.6492881771484192e-05, "loss": 0.542, "step": 17982 }, { "epoch": 0.5524222037907413, "grad_norm": 0.34879082441329956, "learning_rate": 1.64925142109583e-05, "loss": 0.5332, "step": 17983 }, { "epoch": 0.5524529229256904, "grad_norm": 0.3733726739883423, "learning_rate": 1.6492146635268683e-05, "loss": 0.5556, "step": 17984 }, { "epoch": 0.5524836420606396, "grad_norm": 0.4090191721916199, "learning_rate": 1.649177904441621e-05, "loss": 0.6297, "step": 17985 }, { "epoch": 0.5525143611955887, "grad_norm": 0.3826545774936676, "learning_rate": 1.6491411438401735e-05, "loss": 0.671, "step": 17986 }, { "epoch": 0.5525450803305378, "grad_norm": 0.4285891056060791, "learning_rate": 1.6491043817226108e-05, "loss": 0.5623, "step": 17987 }, { "epoch": 0.5525757994654871, "grad_norm": 0.3469213545322418, "learning_rate": 1.64906761808902e-05, "loss": 0.5899, "step": 17988 }, { "epoch": 0.5526065186004362, "grad_norm": 0.3562980890274048, "learning_rate": 1.6490308529394866e-05, "loss": 0.549, "step": 17989 }, { "epoch": 0.5526372377353854, "grad_norm": 0.34952667355537415, "learning_rate": 1.6489940862740962e-05, "loss": 0.6062, "step": 17990 }, { "epoch": 0.5526679568703345, "grad_norm": 0.370668888092041, "learning_rate": 1.6489573180929346e-05, "loss": 0.5621, "step": 17991 }, { "epoch": 0.5526986760052837, "grad_norm": 0.36219051480293274, "learning_rate": 1.648920548396088e-05, "loss": 0.5397, "step": 17992 }, { "epoch": 0.5527293951402329, "grad_norm": 0.3857770264148712, "learning_rate": 1.6488837771836417e-05, "loss": 0.5713, "step": 17993 }, { "epoch": 0.552760114275182, "grad_norm": 0.3422977328300476, "learning_rate": 1.6488470044556824e-05, "loss": 0.5745, "step": 17994 }, { "epoch": 0.5527908334101311, "grad_norm": 0.414061576128006, "learning_rate": 1.648810230212296e-05, "loss": 0.5861, "step": 17995 }, { "epoch": 0.5528215525450804, "grad_norm": 0.3965770900249481, "learning_rate": 1.6487734544535673e-05, "loss": 0.5479, "step": 17996 }, { "epoch": 0.5528522716800295, "grad_norm": 0.3686218857765198, "learning_rate": 1.6487366771795833e-05, "loss": 0.5543, "step": 17997 }, { "epoch": 0.5528829908149786, "grad_norm": 0.3542101979255676, "learning_rate": 1.648699898390429e-05, "loss": 0.5423, "step": 17998 }, { "epoch": 0.5529137099499278, "grad_norm": 0.34118229150772095, "learning_rate": 1.648663118086191e-05, "loss": 0.5695, "step": 17999 }, { "epoch": 0.552944429084877, "grad_norm": 0.370038777589798, "learning_rate": 1.6486263362669554e-05, "loss": 0.5708, "step": 18000 }, { "epoch": 0.5529751482198262, "grad_norm": 0.34807291626930237, "learning_rate": 1.6485895529328074e-05, "loss": 0.61, "step": 18001 }, { "epoch": 0.5530058673547753, "grad_norm": 0.33801597356796265, "learning_rate": 1.648552768083833e-05, "loss": 0.534, "step": 18002 }, { "epoch": 0.5530365864897244, "grad_norm": 0.31639260053634644, "learning_rate": 1.648515981720119e-05, "loss": 0.6056, "step": 18003 }, { "epoch": 0.5530673056246737, "grad_norm": 0.342925488948822, "learning_rate": 1.64847919384175e-05, "loss": 0.5402, "step": 18004 }, { "epoch": 0.5530980247596228, "grad_norm": 0.3372143805027008, "learning_rate": 1.6484424044488125e-05, "loss": 0.6199, "step": 18005 }, { "epoch": 0.5531287438945719, "grad_norm": 0.4069660007953644, "learning_rate": 1.6484056135413927e-05, "loss": 0.6368, "step": 18006 }, { "epoch": 0.5531594630295211, "grad_norm": 0.3762266933917999, "learning_rate": 1.6483688211195766e-05, "loss": 0.6625, "step": 18007 }, { "epoch": 0.5531901821644702, "grad_norm": 0.3324330449104309, "learning_rate": 1.6483320271834492e-05, "loss": 0.5715, "step": 18008 }, { "epoch": 0.5532209012994194, "grad_norm": 0.38232681155204773, "learning_rate": 1.6482952317330977e-05, "loss": 0.5526, "step": 18009 }, { "epoch": 0.5532516204343686, "grad_norm": 0.3661487102508545, "learning_rate": 1.648258434768607e-05, "loss": 0.6058, "step": 18010 }, { "epoch": 0.5532823395693177, "grad_norm": 0.4109456539154053, "learning_rate": 1.6482216362900638e-05, "loss": 0.5695, "step": 18011 }, { "epoch": 0.5533130587042668, "grad_norm": 0.33979228138923645, "learning_rate": 1.6481848362975532e-05, "loss": 0.6009, "step": 18012 }, { "epoch": 0.5533437778392161, "grad_norm": 0.49084925651550293, "learning_rate": 1.648148034791162e-05, "loss": 0.5544, "step": 18013 }, { "epoch": 0.5533744969741652, "grad_norm": 0.3432353436946869, "learning_rate": 1.6481112317709756e-05, "loss": 0.5838, "step": 18014 }, { "epoch": 0.5534052161091144, "grad_norm": 0.4049654006958008, "learning_rate": 1.6480744272370803e-05, "loss": 0.5214, "step": 18015 }, { "epoch": 0.5534359352440635, "grad_norm": 0.35836079716682434, "learning_rate": 1.6480376211895615e-05, "loss": 0.5907, "step": 18016 }, { "epoch": 0.5534666543790127, "grad_norm": 0.8291093111038208, "learning_rate": 1.648000813628506e-05, "loss": 0.6375, "step": 18017 }, { "epoch": 0.5534973735139619, "grad_norm": 0.3273581564426422, "learning_rate": 1.6479640045539995e-05, "loss": 0.5299, "step": 18018 }, { "epoch": 0.553528092648911, "grad_norm": 0.5420558452606201, "learning_rate": 1.6479271939661276e-05, "loss": 0.5353, "step": 18019 }, { "epoch": 0.5535588117838601, "grad_norm": 0.43158918619155884, "learning_rate": 1.6478903818649765e-05, "loss": 0.5551, "step": 18020 }, { "epoch": 0.5535895309188094, "grad_norm": 0.37662720680236816, "learning_rate": 1.6478535682506325e-05, "loss": 0.5393, "step": 18021 }, { "epoch": 0.5536202500537585, "grad_norm": 0.3826872706413269, "learning_rate": 1.6478167531231804e-05, "loss": 0.6277, "step": 18022 }, { "epoch": 0.5536509691887076, "grad_norm": 0.3610899746417999, "learning_rate": 1.647779936482708e-05, "loss": 0.5334, "step": 18023 }, { "epoch": 0.5536816883236568, "grad_norm": 0.3746196925640106, "learning_rate": 1.6477431183292997e-05, "loss": 0.5622, "step": 18024 }, { "epoch": 0.553712407458606, "grad_norm": 0.33936890959739685, "learning_rate": 1.6477062986630422e-05, "loss": 0.5927, "step": 18025 }, { "epoch": 0.5537431265935552, "grad_norm": 0.32986265420913696, "learning_rate": 1.6476694774840215e-05, "loss": 0.5847, "step": 18026 }, { "epoch": 0.5537738457285043, "grad_norm": 0.37801727652549744, "learning_rate": 1.6476326547923236e-05, "loss": 0.5217, "step": 18027 }, { "epoch": 0.5538045648634534, "grad_norm": 0.4452407956123352, "learning_rate": 1.6475958305880343e-05, "loss": 0.6024, "step": 18028 }, { "epoch": 0.5538352839984026, "grad_norm": 0.44289934635162354, "learning_rate": 1.64755900487124e-05, "loss": 0.6098, "step": 18029 }, { "epoch": 0.5538660031333518, "grad_norm": 0.382323294878006, "learning_rate": 1.6475221776420258e-05, "loss": 0.5926, "step": 18030 }, { "epoch": 0.5538967222683009, "grad_norm": 0.3203197419643402, "learning_rate": 1.6474853489004784e-05, "loss": 0.6111, "step": 18031 }, { "epoch": 0.5539274414032501, "grad_norm": 0.36737388372421265, "learning_rate": 1.647448518646684e-05, "loss": 0.6032, "step": 18032 }, { "epoch": 0.5539581605381992, "grad_norm": 0.34248343110084534, "learning_rate": 1.6474116868807284e-05, "loss": 0.5362, "step": 18033 }, { "epoch": 0.5539888796731484, "grad_norm": 0.36715924739837646, "learning_rate": 1.6473748536026978e-05, "loss": 0.537, "step": 18034 }, { "epoch": 0.5540195988080976, "grad_norm": 0.4749939739704132, "learning_rate": 1.6473380188126776e-05, "loss": 0.5891, "step": 18035 }, { "epoch": 0.5540503179430467, "grad_norm": 0.3676639199256897, "learning_rate": 1.6473011825107543e-05, "loss": 0.5485, "step": 18036 }, { "epoch": 0.5540810370779958, "grad_norm": 0.3352046608924866, "learning_rate": 1.647264344697014e-05, "loss": 0.557, "step": 18037 }, { "epoch": 0.5541117562129451, "grad_norm": 0.3171288073062897, "learning_rate": 1.6472275053715428e-05, "loss": 0.4696, "step": 18038 }, { "epoch": 0.5541424753478942, "grad_norm": 0.3696851134300232, "learning_rate": 1.6471906645344266e-05, "loss": 0.553, "step": 18039 }, { "epoch": 0.5541731944828434, "grad_norm": 0.32081520557403564, "learning_rate": 1.647153822185751e-05, "loss": 0.5258, "step": 18040 }, { "epoch": 0.5542039136177925, "grad_norm": 0.3922933340072632, "learning_rate": 1.6471169783256026e-05, "loss": 0.5341, "step": 18041 }, { "epoch": 0.5542346327527417, "grad_norm": 0.42852094769477844, "learning_rate": 1.6470801329540673e-05, "loss": 0.4933, "step": 18042 }, { "epoch": 0.5542653518876909, "grad_norm": 0.3755708932876587, "learning_rate": 1.647043286071231e-05, "loss": 0.56, "step": 18043 }, { "epoch": 0.55429607102264, "grad_norm": 0.34481075406074524, "learning_rate": 1.64700643767718e-05, "loss": 0.5747, "step": 18044 }, { "epoch": 0.5543267901575891, "grad_norm": 0.33457884192466736, "learning_rate": 1.6469695877720008e-05, "loss": 0.5925, "step": 18045 }, { "epoch": 0.5543575092925384, "grad_norm": 0.39829161763191223, "learning_rate": 1.646932736355778e-05, "loss": 0.5388, "step": 18046 }, { "epoch": 0.5543882284274875, "grad_norm": 0.38678058981895447, "learning_rate": 1.6468958834285995e-05, "loss": 0.6458, "step": 18047 }, { "epoch": 0.5544189475624366, "grad_norm": 0.3805958330631256, "learning_rate": 1.6468590289905503e-05, "loss": 0.6031, "step": 18048 }, { "epoch": 0.5544496666973858, "grad_norm": 0.3324504792690277, "learning_rate": 1.6468221730417165e-05, "loss": 0.6399, "step": 18049 }, { "epoch": 0.5544803858323349, "grad_norm": 0.31636732816696167, "learning_rate": 1.646785315582184e-05, "loss": 0.5028, "step": 18050 }, { "epoch": 0.5545111049672842, "grad_norm": 0.3241938054561615, "learning_rate": 1.6467484566120397e-05, "loss": 0.5594, "step": 18051 }, { "epoch": 0.5545418241022333, "grad_norm": 0.36225005984306335, "learning_rate": 1.6467115961313692e-05, "loss": 0.5298, "step": 18052 }, { "epoch": 0.5545725432371824, "grad_norm": 0.36613234877586365, "learning_rate": 1.6466747341402584e-05, "loss": 0.5231, "step": 18053 }, { "epoch": 0.5546032623721316, "grad_norm": 0.30728545784950256, "learning_rate": 1.6466378706387933e-05, "loss": 0.4806, "step": 18054 }, { "epoch": 0.5546339815070808, "grad_norm": 0.3686978816986084, "learning_rate": 1.646601005627061e-05, "loss": 0.5617, "step": 18055 }, { "epoch": 0.5546647006420299, "grad_norm": 0.3388842046260834, "learning_rate": 1.6465641391051462e-05, "loss": 0.5935, "step": 18056 }, { "epoch": 0.5546954197769791, "grad_norm": 0.39687085151672363, "learning_rate": 1.6465272710731363e-05, "loss": 0.6073, "step": 18057 }, { "epoch": 0.5547261389119282, "grad_norm": 0.3743288218975067, "learning_rate": 1.6464904015311164e-05, "loss": 0.5516, "step": 18058 }, { "epoch": 0.5547568580468774, "grad_norm": 0.3594745099544525, "learning_rate": 1.646453530479173e-05, "loss": 0.4765, "step": 18059 }, { "epoch": 0.5547875771818266, "grad_norm": 0.38174283504486084, "learning_rate": 1.6464166579173927e-05, "loss": 0.5998, "step": 18060 }, { "epoch": 0.5548182963167757, "grad_norm": 0.3640986979007721, "learning_rate": 1.6463797838458607e-05, "loss": 0.6256, "step": 18061 }, { "epoch": 0.5548490154517248, "grad_norm": 0.36679190397262573, "learning_rate": 1.646342908264664e-05, "loss": 0.6886, "step": 18062 }, { "epoch": 0.5548797345866741, "grad_norm": 0.37264320254325867, "learning_rate": 1.6463060311738878e-05, "loss": 0.5261, "step": 18063 }, { "epoch": 0.5549104537216232, "grad_norm": 0.4239799380302429, "learning_rate": 1.646269152573619e-05, "loss": 0.5879, "step": 18064 }, { "epoch": 0.5549411728565724, "grad_norm": 0.4692404568195343, "learning_rate": 1.6462322724639434e-05, "loss": 0.4979, "step": 18065 }, { "epoch": 0.5549718919915215, "grad_norm": 0.34827038645744324, "learning_rate": 1.6461953908449474e-05, "loss": 0.4961, "step": 18066 }, { "epoch": 0.5550026111264706, "grad_norm": 0.3667001724243164, "learning_rate": 1.6461585077167165e-05, "loss": 0.5956, "step": 18067 }, { "epoch": 0.5550333302614199, "grad_norm": 0.31638917326927185, "learning_rate": 1.6461216230793376e-05, "loss": 0.5447, "step": 18068 }, { "epoch": 0.555064049396369, "grad_norm": 0.3821481168270111, "learning_rate": 1.6460847369328962e-05, "loss": 0.6417, "step": 18069 }, { "epoch": 0.5550947685313181, "grad_norm": 0.3495928943157196, "learning_rate": 1.6460478492774793e-05, "loss": 0.5113, "step": 18070 }, { "epoch": 0.5551254876662673, "grad_norm": 0.35703957080841064, "learning_rate": 1.6460109601131724e-05, "loss": 0.5108, "step": 18071 }, { "epoch": 0.5551562068012165, "grad_norm": 0.361086905002594, "learning_rate": 1.6459740694400616e-05, "loss": 0.4977, "step": 18072 }, { "epoch": 0.5551869259361656, "grad_norm": 0.3744036555290222, "learning_rate": 1.645937177258233e-05, "loss": 0.5426, "step": 18073 }, { "epoch": 0.5552176450711148, "grad_norm": 0.36260131001472473, "learning_rate": 1.6459002835677734e-05, "loss": 0.4426, "step": 18074 }, { "epoch": 0.5552483642060639, "grad_norm": 0.3505665957927704, "learning_rate": 1.6458633883687684e-05, "loss": 0.5504, "step": 18075 }, { "epoch": 0.5552790833410132, "grad_norm": 0.3240140974521637, "learning_rate": 1.6458264916613044e-05, "loss": 0.6133, "step": 18076 }, { "epoch": 0.5553098024759623, "grad_norm": 0.3162482678890228, "learning_rate": 1.6457895934454676e-05, "loss": 0.5551, "step": 18077 }, { "epoch": 0.5553405216109114, "grad_norm": 0.3891441822052002, "learning_rate": 1.645752693721344e-05, "loss": 0.6449, "step": 18078 }, { "epoch": 0.5553712407458606, "grad_norm": 0.32309237122535706, "learning_rate": 1.6457157924890198e-05, "loss": 0.5754, "step": 18079 }, { "epoch": 0.5554019598808098, "grad_norm": 0.3514380156993866, "learning_rate": 1.6456788897485813e-05, "loss": 0.5328, "step": 18080 }, { "epoch": 0.5554326790157589, "grad_norm": 0.3570708632469177, "learning_rate": 1.6456419855001148e-05, "loss": 0.6132, "step": 18081 }, { "epoch": 0.5554633981507081, "grad_norm": 0.3780149221420288, "learning_rate": 1.6456050797437064e-05, "loss": 0.658, "step": 18082 }, { "epoch": 0.5554941172856572, "grad_norm": 0.32998964190483093, "learning_rate": 1.6455681724794425e-05, "loss": 0.4901, "step": 18083 }, { "epoch": 0.5555248364206063, "grad_norm": 0.3374941647052765, "learning_rate": 1.6455312637074087e-05, "loss": 0.519, "step": 18084 }, { "epoch": 0.5555555555555556, "grad_norm": 0.37378427386283875, "learning_rate": 1.6454943534276916e-05, "loss": 0.588, "step": 18085 }, { "epoch": 0.5555862746905047, "grad_norm": 0.3448309898376465, "learning_rate": 1.645457441640377e-05, "loss": 0.5139, "step": 18086 }, { "epoch": 0.5556169938254538, "grad_norm": 0.3666008412837982, "learning_rate": 1.6454205283455516e-05, "loss": 0.6277, "step": 18087 }, { "epoch": 0.555647712960403, "grad_norm": 0.37824520468711853, "learning_rate": 1.6453836135433016e-05, "loss": 0.5886, "step": 18088 }, { "epoch": 0.5556784320953522, "grad_norm": 0.3514859080314636, "learning_rate": 1.6453466972337135e-05, "loss": 0.4816, "step": 18089 }, { "epoch": 0.5557091512303014, "grad_norm": 0.414920449256897, "learning_rate": 1.6453097794168725e-05, "loss": 0.5651, "step": 18090 }, { "epoch": 0.5557398703652505, "grad_norm": 0.4304389953613281, "learning_rate": 1.645272860092866e-05, "loss": 0.5541, "step": 18091 }, { "epoch": 0.5557705895001996, "grad_norm": 0.3533284366130829, "learning_rate": 1.6452359392617792e-05, "loss": 0.4494, "step": 18092 }, { "epoch": 0.5558013086351489, "grad_norm": 0.4192480444908142, "learning_rate": 1.645199016923699e-05, "loss": 0.5975, "step": 18093 }, { "epoch": 0.555832027770098, "grad_norm": 0.4062107503414154, "learning_rate": 1.6451620930787116e-05, "loss": 0.5245, "step": 18094 }, { "epoch": 0.5558627469050471, "grad_norm": 0.3516167998313904, "learning_rate": 1.6451251677269027e-05, "loss": 0.5843, "step": 18095 }, { "epoch": 0.5558934660399963, "grad_norm": 0.3633265793323517, "learning_rate": 1.645088240868359e-05, "loss": 0.5449, "step": 18096 }, { "epoch": 0.5559241851749455, "grad_norm": 0.3878440260887146, "learning_rate": 1.645051312503167e-05, "loss": 0.5007, "step": 18097 }, { "epoch": 0.5559549043098946, "grad_norm": 0.33819565176963806, "learning_rate": 1.6450143826314123e-05, "loss": 0.6283, "step": 18098 }, { "epoch": 0.5559856234448438, "grad_norm": 0.4877866804599762, "learning_rate": 1.6449774512531815e-05, "loss": 0.6247, "step": 18099 }, { "epoch": 0.5560163425797929, "grad_norm": 0.37979239225387573, "learning_rate": 1.6449405183685606e-05, "loss": 0.613, "step": 18100 }, { "epoch": 0.5560470617147422, "grad_norm": 0.34859177470207214, "learning_rate": 1.644903583977637e-05, "loss": 0.5699, "step": 18101 }, { "epoch": 0.5560777808496913, "grad_norm": 0.3174418807029724, "learning_rate": 1.644866648080495e-05, "loss": 0.6464, "step": 18102 }, { "epoch": 0.5561084999846404, "grad_norm": 0.3643397390842438, "learning_rate": 1.6448297106772225e-05, "loss": 0.5244, "step": 18103 }, { "epoch": 0.5561392191195896, "grad_norm": 0.33690184354782104, "learning_rate": 1.644792771767905e-05, "loss": 0.6037, "step": 18104 }, { "epoch": 0.5561699382545388, "grad_norm": 0.45900076627731323, "learning_rate": 1.6447558313526288e-05, "loss": 0.5602, "step": 18105 }, { "epoch": 0.5562006573894879, "grad_norm": 0.33045345544815063, "learning_rate": 1.6447188894314802e-05, "loss": 0.5654, "step": 18106 }, { "epoch": 0.5562313765244371, "grad_norm": 0.37989968061447144, "learning_rate": 1.644681946004546e-05, "loss": 0.6272, "step": 18107 }, { "epoch": 0.5562620956593862, "grad_norm": 0.3749091327190399, "learning_rate": 1.6446450010719116e-05, "loss": 0.5666, "step": 18108 }, { "epoch": 0.5562928147943353, "grad_norm": 0.3636021316051483, "learning_rate": 1.644608054633664e-05, "loss": 0.5309, "step": 18109 }, { "epoch": 0.5563235339292846, "grad_norm": 0.34137076139450073, "learning_rate": 1.6445711066898897e-05, "loss": 0.4802, "step": 18110 }, { "epoch": 0.5563542530642337, "grad_norm": 0.3303718566894531, "learning_rate": 1.6445341572406745e-05, "loss": 0.5244, "step": 18111 }, { "epoch": 0.5563849721991829, "grad_norm": 0.3544222414493561, "learning_rate": 1.6444972062861042e-05, "loss": 0.5407, "step": 18112 }, { "epoch": 0.556415691334132, "grad_norm": 0.3696824908256531, "learning_rate": 1.644460253826266e-05, "loss": 0.6244, "step": 18113 }, { "epoch": 0.5564464104690812, "grad_norm": 0.3947255313396454, "learning_rate": 1.644423299861246e-05, "loss": 0.6131, "step": 18114 }, { "epoch": 0.5564771296040304, "grad_norm": 0.3669195771217346, "learning_rate": 1.64438634439113e-05, "loss": 0.5853, "step": 18115 }, { "epoch": 0.5565078487389795, "grad_norm": 0.36843934655189514, "learning_rate": 1.6443493874160052e-05, "loss": 0.5899, "step": 18116 }, { "epoch": 0.5565385678739286, "grad_norm": 0.3622927665710449, "learning_rate": 1.6443124289359568e-05, "loss": 0.5526, "step": 18117 }, { "epoch": 0.5565692870088779, "grad_norm": 0.36338430643081665, "learning_rate": 1.6442754689510723e-05, "loss": 0.5883, "step": 18118 }, { "epoch": 0.556600006143827, "grad_norm": 0.34312906861305237, "learning_rate": 1.644238507461437e-05, "loss": 0.6484, "step": 18119 }, { "epoch": 0.5566307252787761, "grad_norm": 0.37810587882995605, "learning_rate": 1.644201544467138e-05, "loss": 0.5762, "step": 18120 }, { "epoch": 0.5566614444137253, "grad_norm": 0.34684544801712036, "learning_rate": 1.644164579968261e-05, "loss": 0.5435, "step": 18121 }, { "epoch": 0.5566921635486745, "grad_norm": 0.36880791187286377, "learning_rate": 1.644127613964893e-05, "loss": 0.5331, "step": 18122 }, { "epoch": 0.5567228826836236, "grad_norm": 0.3571726083755493, "learning_rate": 1.6440906464571195e-05, "loss": 0.5974, "step": 18123 }, { "epoch": 0.5567536018185728, "grad_norm": 0.3532472848892212, "learning_rate": 1.644053677445028e-05, "loss": 0.5871, "step": 18124 }, { "epoch": 0.5567843209535219, "grad_norm": 0.3901354670524597, "learning_rate": 1.6440167069287033e-05, "loss": 0.563, "step": 18125 }, { "epoch": 0.5568150400884712, "grad_norm": 0.3383614718914032, "learning_rate": 1.6439797349082332e-05, "loss": 0.5223, "step": 18126 }, { "epoch": 0.5568457592234203, "grad_norm": 0.3414274752140045, "learning_rate": 1.6439427613837033e-05, "loss": 0.4746, "step": 18127 }, { "epoch": 0.5568764783583694, "grad_norm": 0.3449627757072449, "learning_rate": 1.6439057863552e-05, "loss": 0.6166, "step": 18128 }, { "epoch": 0.5569071974933186, "grad_norm": 0.36452627182006836, "learning_rate": 1.64386880982281e-05, "loss": 0.582, "step": 18129 }, { "epoch": 0.5569379166282677, "grad_norm": 0.34187355637550354, "learning_rate": 1.6438318317866194e-05, "loss": 0.5516, "step": 18130 }, { "epoch": 0.5569686357632169, "grad_norm": 0.34251707792282104, "learning_rate": 1.6437948522467146e-05, "loss": 0.517, "step": 18131 }, { "epoch": 0.5569993548981661, "grad_norm": 0.3362055718898773, "learning_rate": 1.6437578712031816e-05, "loss": 0.5141, "step": 18132 }, { "epoch": 0.5570300740331152, "grad_norm": 0.3587389886379242, "learning_rate": 1.6437208886561072e-05, "loss": 0.6346, "step": 18133 }, { "epoch": 0.5570607931680643, "grad_norm": 0.3970523178577423, "learning_rate": 1.6436839046055778e-05, "loss": 0.6119, "step": 18134 }, { "epoch": 0.5570915123030136, "grad_norm": 0.35949409008026123, "learning_rate": 1.64364691905168e-05, "loss": 0.5855, "step": 18135 }, { "epoch": 0.5571222314379627, "grad_norm": 0.36410772800445557, "learning_rate": 1.6436099319944996e-05, "loss": 0.6194, "step": 18136 }, { "epoch": 0.5571529505729119, "grad_norm": 0.32069629430770874, "learning_rate": 1.6435729434341235e-05, "loss": 0.5219, "step": 18137 }, { "epoch": 0.557183669707861, "grad_norm": 0.36733824014663696, "learning_rate": 1.6435359533706376e-05, "loss": 0.5605, "step": 18138 }, { "epoch": 0.5572143888428102, "grad_norm": 0.328702449798584, "learning_rate": 1.6434989618041287e-05, "loss": 0.5886, "step": 18139 }, { "epoch": 0.5572451079777594, "grad_norm": 0.3309843838214874, "learning_rate": 1.6434619687346825e-05, "loss": 0.57, "step": 18140 }, { "epoch": 0.5572758271127085, "grad_norm": 0.3251156210899353, "learning_rate": 1.6434249741623864e-05, "loss": 0.5651, "step": 18141 }, { "epoch": 0.5573065462476576, "grad_norm": 0.3991723656654358, "learning_rate": 1.6433879780873265e-05, "loss": 0.5802, "step": 18142 }, { "epoch": 0.5573372653826069, "grad_norm": 0.3539898693561554, "learning_rate": 1.6433509805095888e-05, "loss": 0.4768, "step": 18143 }, { "epoch": 0.557367984517556, "grad_norm": 0.36886066198349, "learning_rate": 1.6433139814292597e-05, "loss": 0.6009, "step": 18144 }, { "epoch": 0.5573987036525051, "grad_norm": 0.3185298442840576, "learning_rate": 1.6432769808464262e-05, "loss": 0.6039, "step": 18145 }, { "epoch": 0.5574294227874543, "grad_norm": 0.37571555376052856, "learning_rate": 1.6432399787611742e-05, "loss": 0.5296, "step": 18146 }, { "epoch": 0.5574601419224035, "grad_norm": 0.38762977719306946, "learning_rate": 1.6432029751735907e-05, "loss": 0.6063, "step": 18147 }, { "epoch": 0.5574908610573526, "grad_norm": 0.3807118833065033, "learning_rate": 1.6431659700837614e-05, "loss": 0.6131, "step": 18148 }, { "epoch": 0.5575215801923018, "grad_norm": 0.344454824924469, "learning_rate": 1.643128963491773e-05, "loss": 0.5795, "step": 18149 }, { "epoch": 0.5575522993272509, "grad_norm": 0.36965569853782654, "learning_rate": 1.6430919553977123e-05, "loss": 0.466, "step": 18150 }, { "epoch": 0.5575830184622002, "grad_norm": 0.3372235596179962, "learning_rate": 1.6430549458016653e-05, "loss": 0.5424, "step": 18151 }, { "epoch": 0.5576137375971493, "grad_norm": 0.34868597984313965, "learning_rate": 1.6430179347037184e-05, "loss": 0.5525, "step": 18152 }, { "epoch": 0.5576444567320984, "grad_norm": 0.3582879602909088, "learning_rate": 1.6429809221039585e-05, "loss": 0.5281, "step": 18153 }, { "epoch": 0.5576751758670476, "grad_norm": 0.4008615016937256, "learning_rate": 1.6429439080024714e-05, "loss": 0.5341, "step": 18154 }, { "epoch": 0.5577058950019967, "grad_norm": 0.35551801323890686, "learning_rate": 1.6429068923993443e-05, "loss": 0.5328, "step": 18155 }, { "epoch": 0.5577366141369459, "grad_norm": 0.42726704478263855, "learning_rate": 1.6428698752946627e-05, "loss": 0.5126, "step": 18156 }, { "epoch": 0.5577673332718951, "grad_norm": 0.4363044202327728, "learning_rate": 1.642832856688514e-05, "loss": 0.4937, "step": 18157 }, { "epoch": 0.5577980524068442, "grad_norm": 0.3768385350704193, "learning_rate": 1.642795836580984e-05, "loss": 0.5943, "step": 18158 }, { "epoch": 0.5578287715417933, "grad_norm": 0.3888286054134369, "learning_rate": 1.6427588149721597e-05, "loss": 0.6453, "step": 18159 }, { "epoch": 0.5578594906767426, "grad_norm": 0.3555689752101898, "learning_rate": 1.6427217918621276e-05, "loss": 0.4996, "step": 18160 }, { "epoch": 0.5578902098116917, "grad_norm": 0.4131220579147339, "learning_rate": 1.6426847672509732e-05, "loss": 0.5601, "step": 18161 }, { "epoch": 0.5579209289466409, "grad_norm": 0.3446046710014343, "learning_rate": 1.642647741138784e-05, "loss": 0.6065, "step": 18162 }, { "epoch": 0.55795164808159, "grad_norm": 0.38040891289711, "learning_rate": 1.642610713525646e-05, "loss": 0.6107, "step": 18163 }, { "epoch": 0.5579823672165392, "grad_norm": 0.39905956387519836, "learning_rate": 1.6425736844116456e-05, "loss": 0.4956, "step": 18164 }, { "epoch": 0.5580130863514884, "grad_norm": 0.3566291928291321, "learning_rate": 1.6425366537968698e-05, "loss": 0.5593, "step": 18165 }, { "epoch": 0.5580438054864375, "grad_norm": 0.41945379972457886, "learning_rate": 1.642499621681405e-05, "loss": 0.5395, "step": 18166 }, { "epoch": 0.5580745246213866, "grad_norm": 0.4910784661769867, "learning_rate": 1.6424625880653373e-05, "loss": 0.6251, "step": 18167 }, { "epoch": 0.5581052437563359, "grad_norm": 0.3586128354072571, "learning_rate": 1.642425552948753e-05, "loss": 0.5674, "step": 18168 }, { "epoch": 0.558135962891285, "grad_norm": 0.34144243597984314, "learning_rate": 1.642388516331739e-05, "loss": 0.5413, "step": 18169 }, { "epoch": 0.5581666820262341, "grad_norm": 0.33174246549606323, "learning_rate": 1.642351478214382e-05, "loss": 0.5836, "step": 18170 }, { "epoch": 0.5581974011611833, "grad_norm": 0.3666834235191345, "learning_rate": 1.642314438596768e-05, "loss": 0.631, "step": 18171 }, { "epoch": 0.5582281202961324, "grad_norm": 0.3504158556461334, "learning_rate": 1.6422773974789843e-05, "loss": 0.6092, "step": 18172 }, { "epoch": 0.5582588394310816, "grad_norm": 0.3692774176597595, "learning_rate": 1.6422403548611167e-05, "loss": 0.5086, "step": 18173 }, { "epoch": 0.5582895585660308, "grad_norm": 0.39314061403274536, "learning_rate": 1.6422033107432516e-05, "loss": 0.5678, "step": 18174 }, { "epoch": 0.5583202777009799, "grad_norm": 0.3757001757621765, "learning_rate": 1.6421662651254758e-05, "loss": 0.6384, "step": 18175 }, { "epoch": 0.5583509968359291, "grad_norm": 0.38963937759399414, "learning_rate": 1.642129218007876e-05, "loss": 0.6559, "step": 18176 }, { "epoch": 0.5583817159708783, "grad_norm": 0.35618260502815247, "learning_rate": 1.6420921693905387e-05, "loss": 0.5789, "step": 18177 }, { "epoch": 0.5584124351058274, "grad_norm": 0.3812629282474518, "learning_rate": 1.6420551192735502e-05, "loss": 0.6191, "step": 18178 }, { "epoch": 0.5584431542407766, "grad_norm": 0.3535442352294922, "learning_rate": 1.642018067656997e-05, "loss": 0.5636, "step": 18179 }, { "epoch": 0.5584738733757257, "grad_norm": 0.41114184260368347, "learning_rate": 1.6419810145409657e-05, "loss": 0.6276, "step": 18180 }, { "epoch": 0.5585045925106749, "grad_norm": 0.37413156032562256, "learning_rate": 1.641943959925543e-05, "loss": 0.5398, "step": 18181 }, { "epoch": 0.5585353116456241, "grad_norm": 0.3655870258808136, "learning_rate": 1.6419069038108157e-05, "loss": 0.5466, "step": 18182 }, { "epoch": 0.5585660307805732, "grad_norm": 0.3902820348739624, "learning_rate": 1.6418698461968697e-05, "loss": 0.5538, "step": 18183 }, { "epoch": 0.5585967499155223, "grad_norm": 0.4353218376636505, "learning_rate": 1.6418327870837916e-05, "loss": 0.6711, "step": 18184 }, { "epoch": 0.5586274690504716, "grad_norm": 0.36239686608314514, "learning_rate": 1.6417957264716685e-05, "loss": 0.5433, "step": 18185 }, { "epoch": 0.5586581881854207, "grad_norm": 0.3538099229335785, "learning_rate": 1.6417586643605868e-05, "loss": 0.5504, "step": 18186 }, { "epoch": 0.5586889073203699, "grad_norm": 0.323838472366333, "learning_rate": 1.6417216007506324e-05, "loss": 0.5601, "step": 18187 }, { "epoch": 0.558719626455319, "grad_norm": 0.36410993337631226, "learning_rate": 1.641684535641893e-05, "loss": 0.5533, "step": 18188 }, { "epoch": 0.5587503455902681, "grad_norm": 0.35782501101493835, "learning_rate": 1.6416474690344544e-05, "loss": 0.6808, "step": 18189 }, { "epoch": 0.5587810647252174, "grad_norm": 0.5148554444313049, "learning_rate": 1.641610400928403e-05, "loss": 0.5293, "step": 18190 }, { "epoch": 0.5588117838601665, "grad_norm": 0.3458652198314667, "learning_rate": 1.641573331323826e-05, "loss": 0.5483, "step": 18191 }, { "epoch": 0.5588425029951156, "grad_norm": 0.3390131890773773, "learning_rate": 1.6415362602208094e-05, "loss": 0.5424, "step": 18192 }, { "epoch": 0.5588732221300649, "grad_norm": 0.347152441740036, "learning_rate": 1.64149918761944e-05, "loss": 0.5581, "step": 18193 }, { "epoch": 0.558903941265014, "grad_norm": 0.39561134576797485, "learning_rate": 1.641462113519805e-05, "loss": 0.5492, "step": 18194 }, { "epoch": 0.5589346603999631, "grad_norm": 0.3736646771430969, "learning_rate": 1.64142503792199e-05, "loss": 0.599, "step": 18195 }, { "epoch": 0.5589653795349123, "grad_norm": 0.40045326948165894, "learning_rate": 1.6413879608260822e-05, "loss": 0.5662, "step": 18196 }, { "epoch": 0.5589960986698614, "grad_norm": 0.335266649723053, "learning_rate": 1.641350882232168e-05, "loss": 0.5537, "step": 18197 }, { "epoch": 0.5590268178048107, "grad_norm": 0.3653803765773773, "learning_rate": 1.6413138021403338e-05, "loss": 0.5705, "step": 18198 }, { "epoch": 0.5590575369397598, "grad_norm": 0.32527318596839905, "learning_rate": 1.641276720550667e-05, "loss": 0.55, "step": 18199 }, { "epoch": 0.5590882560747089, "grad_norm": 0.34172680974006653, "learning_rate": 1.641239637463253e-05, "loss": 0.5657, "step": 18200 }, { "epoch": 0.5591189752096581, "grad_norm": 0.4386487901210785, "learning_rate": 1.641202552878179e-05, "loss": 0.5677, "step": 18201 }, { "epoch": 0.5591496943446073, "grad_norm": 0.36914634704589844, "learning_rate": 1.641165466795532e-05, "loss": 0.6215, "step": 18202 }, { "epoch": 0.5591804134795564, "grad_norm": 0.3624131381511688, "learning_rate": 1.6411283792153984e-05, "loss": 0.5296, "step": 18203 }, { "epoch": 0.5592111326145056, "grad_norm": 0.3696311116218567, "learning_rate": 1.6410912901378644e-05, "loss": 0.6148, "step": 18204 }, { "epoch": 0.5592418517494547, "grad_norm": 0.37843847274780273, "learning_rate": 1.641054199563017e-05, "loss": 0.5699, "step": 18205 }, { "epoch": 0.5592725708844039, "grad_norm": 0.3409684896469116, "learning_rate": 1.641017107490943e-05, "loss": 0.4868, "step": 18206 }, { "epoch": 0.5593032900193531, "grad_norm": 0.3667132258415222, "learning_rate": 1.6409800139217285e-05, "loss": 0.5583, "step": 18207 }, { "epoch": 0.5593340091543022, "grad_norm": 0.3611859977245331, "learning_rate": 1.6409429188554605e-05, "loss": 0.5325, "step": 18208 }, { "epoch": 0.5593647282892513, "grad_norm": 0.36242538690567017, "learning_rate": 1.6409058222922253e-05, "loss": 0.5743, "step": 18209 }, { "epoch": 0.5593954474242006, "grad_norm": 0.3478143811225891, "learning_rate": 1.6408687242321102e-05, "loss": 0.5363, "step": 18210 }, { "epoch": 0.5594261665591497, "grad_norm": 0.3512100875377655, "learning_rate": 1.640831624675201e-05, "loss": 0.4839, "step": 18211 }, { "epoch": 0.5594568856940989, "grad_norm": 0.32411709427833557, "learning_rate": 1.640794523621585e-05, "loss": 0.4938, "step": 18212 }, { "epoch": 0.559487604829048, "grad_norm": 0.3498245179653168, "learning_rate": 1.6407574210713487e-05, "loss": 0.5151, "step": 18213 }, { "epoch": 0.5595183239639971, "grad_norm": 0.4278414845466614, "learning_rate": 1.6407203170245788e-05, "loss": 0.5924, "step": 18214 }, { "epoch": 0.5595490430989464, "grad_norm": 0.3493061065673828, "learning_rate": 1.6406832114813616e-05, "loss": 0.5136, "step": 18215 }, { "epoch": 0.5595797622338955, "grad_norm": 0.3669366240501404, "learning_rate": 1.640646104441784e-05, "loss": 0.6091, "step": 18216 }, { "epoch": 0.5596104813688446, "grad_norm": 0.3359493911266327, "learning_rate": 1.6406089959059328e-05, "loss": 0.5568, "step": 18217 }, { "epoch": 0.5596412005037938, "grad_norm": 0.46759530901908875, "learning_rate": 1.640571885873895e-05, "loss": 0.5256, "step": 18218 }, { "epoch": 0.559671919638743, "grad_norm": 0.5210440158843994, "learning_rate": 1.640534774345756e-05, "loss": 0.4767, "step": 18219 }, { "epoch": 0.5597026387736921, "grad_norm": 0.3605891466140747, "learning_rate": 1.640497661321604e-05, "loss": 0.5753, "step": 18220 }, { "epoch": 0.5597333579086413, "grad_norm": 0.33855172991752625, "learning_rate": 1.6404605468015244e-05, "loss": 0.5327, "step": 18221 }, { "epoch": 0.5597640770435904, "grad_norm": 0.32920607924461365, "learning_rate": 1.640423430785605e-05, "loss": 0.5929, "step": 18222 }, { "epoch": 0.5597947961785397, "grad_norm": 0.39223313331604004, "learning_rate": 1.6403863132739315e-05, "loss": 0.6098, "step": 18223 }, { "epoch": 0.5598255153134888, "grad_norm": 1.3575190305709839, "learning_rate": 1.6403491942665913e-05, "loss": 0.6937, "step": 18224 }, { "epoch": 0.5598562344484379, "grad_norm": 0.3290095925331116, "learning_rate": 1.6403120737636708e-05, "loss": 0.5255, "step": 18225 }, { "epoch": 0.5598869535833871, "grad_norm": 0.42775648832321167, "learning_rate": 1.6402749517652567e-05, "loss": 0.528, "step": 18226 }, { "epoch": 0.5599176727183363, "grad_norm": 0.3124074637889862, "learning_rate": 1.6402378282714357e-05, "loss": 0.5176, "step": 18227 }, { "epoch": 0.5599483918532854, "grad_norm": 0.37547728419303894, "learning_rate": 1.6402007032822946e-05, "loss": 0.5624, "step": 18228 }, { "epoch": 0.5599791109882346, "grad_norm": 0.4067326784133911, "learning_rate": 1.6401635767979198e-05, "loss": 0.5871, "step": 18229 }, { "epoch": 0.5600098301231837, "grad_norm": 0.32499584555625916, "learning_rate": 1.6401264488183986e-05, "loss": 0.4691, "step": 18230 }, { "epoch": 0.5600405492581328, "grad_norm": 0.33055633306503296, "learning_rate": 1.640089319343817e-05, "loss": 0.5161, "step": 18231 }, { "epoch": 0.5600712683930821, "grad_norm": 0.33913561701774597, "learning_rate": 1.6400521883742624e-05, "loss": 0.5544, "step": 18232 }, { "epoch": 0.5601019875280312, "grad_norm": 0.3364281952381134, "learning_rate": 1.6400150559098208e-05, "loss": 0.4905, "step": 18233 }, { "epoch": 0.5601327066629803, "grad_norm": 0.32342082262039185, "learning_rate": 1.63997792195058e-05, "loss": 0.5809, "step": 18234 }, { "epoch": 0.5601634257979295, "grad_norm": 0.34555554389953613, "learning_rate": 1.6399407864966254e-05, "loss": 0.5726, "step": 18235 }, { "epoch": 0.5601941449328787, "grad_norm": 0.39152103662490845, "learning_rate": 1.639903649548045e-05, "loss": 0.5785, "step": 18236 }, { "epoch": 0.5602248640678279, "grad_norm": 0.3774889409542084, "learning_rate": 1.639866511104925e-05, "loss": 0.4759, "step": 18237 }, { "epoch": 0.560255583202777, "grad_norm": 0.34826338291168213, "learning_rate": 1.6398293711673514e-05, "loss": 0.5455, "step": 18238 }, { "epoch": 0.5602863023377261, "grad_norm": 0.3539230227470398, "learning_rate": 1.639792229735412e-05, "loss": 0.6053, "step": 18239 }, { "epoch": 0.5603170214726754, "grad_norm": 0.3673689067363739, "learning_rate": 1.639755086809193e-05, "loss": 0.5566, "step": 18240 }, { "epoch": 0.5603477406076245, "grad_norm": 0.34748345613479614, "learning_rate": 1.639717942388782e-05, "loss": 0.5952, "step": 18241 }, { "epoch": 0.5603784597425736, "grad_norm": 0.323214590549469, "learning_rate": 1.6396807964742644e-05, "loss": 0.537, "step": 18242 }, { "epoch": 0.5604091788775228, "grad_norm": 0.3709036409854889, "learning_rate": 1.6396436490657275e-05, "loss": 0.611, "step": 18243 }, { "epoch": 0.560439898012472, "grad_norm": 0.6865405440330505, "learning_rate": 1.6396065001632583e-05, "loss": 0.5612, "step": 18244 }, { "epoch": 0.5604706171474211, "grad_norm": 0.3535831868648529, "learning_rate": 1.6395693497669438e-05, "loss": 0.6486, "step": 18245 }, { "epoch": 0.5605013362823703, "grad_norm": 0.36926111578941345, "learning_rate": 1.63953219787687e-05, "loss": 0.617, "step": 18246 }, { "epoch": 0.5605320554173194, "grad_norm": 0.3667214810848236, "learning_rate": 1.6394950444931243e-05, "loss": 0.6052, "step": 18247 }, { "epoch": 0.5605627745522687, "grad_norm": 0.3207518458366394, "learning_rate": 1.6394578896157935e-05, "loss": 0.6041, "step": 18248 }, { "epoch": 0.5605934936872178, "grad_norm": 0.39291703701019287, "learning_rate": 1.6394207332449636e-05, "loss": 0.5558, "step": 18249 }, { "epoch": 0.5606242128221669, "grad_norm": 0.38127240538597107, "learning_rate": 1.6393835753807225e-05, "loss": 0.4745, "step": 18250 }, { "epoch": 0.5606549319571161, "grad_norm": 0.3397308886051178, "learning_rate": 1.639346416023156e-05, "loss": 0.6014, "step": 18251 }, { "epoch": 0.5606856510920653, "grad_norm": 0.35461941361427307, "learning_rate": 1.639309255172351e-05, "loss": 0.5679, "step": 18252 }, { "epoch": 0.5607163702270144, "grad_norm": 0.37306392192840576, "learning_rate": 1.639272092828395e-05, "loss": 0.5295, "step": 18253 }, { "epoch": 0.5607470893619636, "grad_norm": 0.34354424476623535, "learning_rate": 1.6392349289913746e-05, "loss": 0.5114, "step": 18254 }, { "epoch": 0.5607778084969127, "grad_norm": 0.3535819351673126, "learning_rate": 1.6391977636613763e-05, "loss": 0.5737, "step": 18255 }, { "epoch": 0.5608085276318618, "grad_norm": 0.3559393584728241, "learning_rate": 1.639160596838487e-05, "loss": 0.6027, "step": 18256 }, { "epoch": 0.5608392467668111, "grad_norm": 0.39871057868003845, "learning_rate": 1.639123428522793e-05, "loss": 0.5607, "step": 18257 }, { "epoch": 0.5608699659017602, "grad_norm": 0.36241641640663147, "learning_rate": 1.639086258714382e-05, "loss": 0.4846, "step": 18258 }, { "epoch": 0.5609006850367093, "grad_norm": 0.35196876525878906, "learning_rate": 1.63904908741334e-05, "loss": 0.6541, "step": 18259 }, { "epoch": 0.5609314041716585, "grad_norm": 0.39547646045684814, "learning_rate": 1.6390119146197547e-05, "loss": 0.5826, "step": 18260 }, { "epoch": 0.5609621233066077, "grad_norm": 0.3569241166114807, "learning_rate": 1.6389747403337125e-05, "loss": 0.5899, "step": 18261 }, { "epoch": 0.5609928424415569, "grad_norm": 0.34032267332077026, "learning_rate": 1.6389375645553e-05, "loss": 0.5511, "step": 18262 }, { "epoch": 0.561023561576506, "grad_norm": 0.33130836486816406, "learning_rate": 1.6389003872846043e-05, "loss": 0.533, "step": 18263 }, { "epoch": 0.5610542807114551, "grad_norm": 0.3252285420894623, "learning_rate": 1.6388632085217117e-05, "loss": 0.5985, "step": 18264 }, { "epoch": 0.5610849998464044, "grad_norm": 0.49088340997695923, "learning_rate": 1.63882602826671e-05, "loss": 0.5152, "step": 18265 }, { "epoch": 0.5611157189813535, "grad_norm": 0.34332042932510376, "learning_rate": 1.638788846519685e-05, "loss": 0.5864, "step": 18266 }, { "epoch": 0.5611464381163026, "grad_norm": 0.35154619812965393, "learning_rate": 1.6387516632807245e-05, "loss": 0.6297, "step": 18267 }, { "epoch": 0.5611771572512518, "grad_norm": 0.32828062772750854, "learning_rate": 1.6387144785499146e-05, "loss": 0.5539, "step": 18268 }, { "epoch": 0.561207876386201, "grad_norm": 0.37525415420532227, "learning_rate": 1.6386772923273424e-05, "loss": 0.5017, "step": 18269 }, { "epoch": 0.5612385955211501, "grad_norm": 0.3517823815345764, "learning_rate": 1.6386401046130948e-05, "loss": 0.5508, "step": 18270 }, { "epoch": 0.5612693146560993, "grad_norm": 0.33707189559936523, "learning_rate": 1.6386029154072587e-05, "loss": 0.5631, "step": 18271 }, { "epoch": 0.5613000337910484, "grad_norm": 0.3309095501899719, "learning_rate": 1.638565724709921e-05, "loss": 0.4992, "step": 18272 }, { "epoch": 0.5613307529259977, "grad_norm": 0.32127001881599426, "learning_rate": 1.6385285325211683e-05, "loss": 0.4751, "step": 18273 }, { "epoch": 0.5613614720609468, "grad_norm": 0.5863350033760071, "learning_rate": 1.6384913388410873e-05, "loss": 0.6004, "step": 18274 }, { "epoch": 0.5613921911958959, "grad_norm": 0.41985172033309937, "learning_rate": 1.638454143669766e-05, "loss": 0.5394, "step": 18275 }, { "epoch": 0.5614229103308451, "grad_norm": 0.4216601550579071, "learning_rate": 1.63841694700729e-05, "loss": 0.5651, "step": 18276 }, { "epoch": 0.5614536294657942, "grad_norm": 0.5192256569862366, "learning_rate": 1.6383797488537466e-05, "loss": 0.5273, "step": 18277 }, { "epoch": 0.5614843486007434, "grad_norm": 0.3898274898529053, "learning_rate": 1.6383425492092225e-05, "loss": 0.5111, "step": 18278 }, { "epoch": 0.5615150677356926, "grad_norm": 0.3738938868045807, "learning_rate": 1.6383053480738053e-05, "loss": 0.5814, "step": 18279 }, { "epoch": 0.5615457868706417, "grad_norm": 0.40917128324508667, "learning_rate": 1.6382681454475812e-05, "loss": 0.574, "step": 18280 }, { "epoch": 0.5615765060055908, "grad_norm": 0.38891077041625977, "learning_rate": 1.638230941330637e-05, "loss": 0.5714, "step": 18281 }, { "epoch": 0.5616072251405401, "grad_norm": 0.3454788327217102, "learning_rate": 1.63819373572306e-05, "loss": 0.4987, "step": 18282 }, { "epoch": 0.5616379442754892, "grad_norm": 0.3409501910209656, "learning_rate": 1.6381565286249374e-05, "loss": 0.4977, "step": 18283 }, { "epoch": 0.5616686634104383, "grad_norm": 0.36741289496421814, "learning_rate": 1.638119320036355e-05, "loss": 0.5559, "step": 18284 }, { "epoch": 0.5616993825453875, "grad_norm": 0.4043771028518677, "learning_rate": 1.6380821099574003e-05, "loss": 0.6091, "step": 18285 }, { "epoch": 0.5617301016803367, "grad_norm": 0.3646406829357147, "learning_rate": 1.638044898388161e-05, "loss": 0.5962, "step": 18286 }, { "epoch": 0.5617608208152859, "grad_norm": 0.33014020323753357, "learning_rate": 1.638007685328723e-05, "loss": 0.5619, "step": 18287 }, { "epoch": 0.561791539950235, "grad_norm": 0.36028778553009033, "learning_rate": 1.637970470779173e-05, "loss": 0.6179, "step": 18288 }, { "epoch": 0.5618222590851841, "grad_norm": 0.3770008087158203, "learning_rate": 1.637933254739599e-05, "loss": 0.5381, "step": 18289 }, { "epoch": 0.5618529782201334, "grad_norm": 0.34158238768577576, "learning_rate": 1.6378960372100873e-05, "loss": 0.5514, "step": 18290 }, { "epoch": 0.5618836973550825, "grad_norm": 0.37825217843055725, "learning_rate": 1.6378588181907244e-05, "loss": 0.566, "step": 18291 }, { "epoch": 0.5619144164900316, "grad_norm": 0.3243420720100403, "learning_rate": 1.6378215976815982e-05, "loss": 0.5483, "step": 18292 }, { "epoch": 0.5619451356249808, "grad_norm": 0.3592986464500427, "learning_rate": 1.637784375682795e-05, "loss": 0.5846, "step": 18293 }, { "epoch": 0.56197585475993, "grad_norm": 0.6025839447975159, "learning_rate": 1.6377471521944014e-05, "loss": 0.4927, "step": 18294 }, { "epoch": 0.5620065738948791, "grad_norm": 0.4292632043361664, "learning_rate": 1.6377099272165052e-05, "loss": 0.5326, "step": 18295 }, { "epoch": 0.5620372930298283, "grad_norm": 0.804132342338562, "learning_rate": 1.6376727007491928e-05, "loss": 0.497, "step": 18296 }, { "epoch": 0.5620680121647774, "grad_norm": 0.38239943981170654, "learning_rate": 1.6376354727925514e-05, "loss": 0.5781, "step": 18297 }, { "epoch": 0.5620987312997267, "grad_norm": 0.36316099762916565, "learning_rate": 1.6375982433466676e-05, "loss": 0.6191, "step": 18298 }, { "epoch": 0.5621294504346758, "grad_norm": 0.3502183258533478, "learning_rate": 1.6375610124116287e-05, "loss": 0.4801, "step": 18299 }, { "epoch": 0.5621601695696249, "grad_norm": 0.35420987010002136, "learning_rate": 1.6375237799875218e-05, "loss": 0.5412, "step": 18300 }, { "epoch": 0.5621908887045741, "grad_norm": 0.3302285373210907, "learning_rate": 1.6374865460744333e-05, "loss": 0.5919, "step": 18301 }, { "epoch": 0.5622216078395232, "grad_norm": 0.4001822769641876, "learning_rate": 1.6374493106724503e-05, "loss": 0.5605, "step": 18302 }, { "epoch": 0.5622523269744724, "grad_norm": 0.3733326196670532, "learning_rate": 1.63741207378166e-05, "loss": 0.5395, "step": 18303 }, { "epoch": 0.5622830461094216, "grad_norm": 0.35609152913093567, "learning_rate": 1.6373748354021496e-05, "loss": 0.5867, "step": 18304 }, { "epoch": 0.5623137652443707, "grad_norm": 0.3737259805202484, "learning_rate": 1.6373375955340055e-05, "loss": 0.5433, "step": 18305 }, { "epoch": 0.5623444843793198, "grad_norm": 0.43974149227142334, "learning_rate": 1.6373003541773148e-05, "loss": 0.5299, "step": 18306 }, { "epoch": 0.5623752035142691, "grad_norm": 0.39898762106895447, "learning_rate": 1.637263111332165e-05, "loss": 0.5153, "step": 18307 }, { "epoch": 0.5624059226492182, "grad_norm": 0.37221604585647583, "learning_rate": 1.6372258669986423e-05, "loss": 0.5965, "step": 18308 }, { "epoch": 0.5624366417841674, "grad_norm": 0.36592745780944824, "learning_rate": 1.6371886211768344e-05, "loss": 0.4937, "step": 18309 }, { "epoch": 0.5624673609191165, "grad_norm": 0.38017022609710693, "learning_rate": 1.6371513738668278e-05, "loss": 0.5082, "step": 18310 }, { "epoch": 0.5624980800540657, "grad_norm": 0.3409700393676758, "learning_rate": 1.6371141250687097e-05, "loss": 0.5166, "step": 18311 }, { "epoch": 0.5625287991890149, "grad_norm": 0.32671815156936646, "learning_rate": 1.637076874782567e-05, "loss": 0.5226, "step": 18312 }, { "epoch": 0.562559518323964, "grad_norm": 0.3445340394973755, "learning_rate": 1.637039623008487e-05, "loss": 0.616, "step": 18313 }, { "epoch": 0.5625902374589131, "grad_norm": 0.4900410771369934, "learning_rate": 1.637002369746556e-05, "loss": 0.5577, "step": 18314 }, { "epoch": 0.5626209565938624, "grad_norm": 0.3900837302207947, "learning_rate": 1.636965114996862e-05, "loss": 0.5768, "step": 18315 }, { "epoch": 0.5626516757288115, "grad_norm": 0.36559098958969116, "learning_rate": 1.6369278587594913e-05, "loss": 0.4857, "step": 18316 }, { "epoch": 0.5626823948637606, "grad_norm": 0.33089950680732727, "learning_rate": 1.636890601034531e-05, "loss": 0.5643, "step": 18317 }, { "epoch": 0.5627131139987098, "grad_norm": 0.36032840609550476, "learning_rate": 1.6368533418220683e-05, "loss": 0.6311, "step": 18318 }, { "epoch": 0.5627438331336589, "grad_norm": 0.3527620732784271, "learning_rate": 1.63681608112219e-05, "loss": 0.5433, "step": 18319 }, { "epoch": 0.5627745522686081, "grad_norm": 0.35205933451652527, "learning_rate": 1.6367788189349834e-05, "loss": 0.5494, "step": 18320 }, { "epoch": 0.5628052714035573, "grad_norm": 0.4122970700263977, "learning_rate": 1.6367415552605358e-05, "loss": 0.5542, "step": 18321 }, { "epoch": 0.5628359905385064, "grad_norm": 0.3394835293292999, "learning_rate": 1.636704290098933e-05, "loss": 0.6103, "step": 18322 }, { "epoch": 0.5628667096734556, "grad_norm": 0.3474261164665222, "learning_rate": 1.6366670234502635e-05, "loss": 0.587, "step": 18323 }, { "epoch": 0.5628974288084048, "grad_norm": 0.35180899500846863, "learning_rate": 1.6366297553146133e-05, "loss": 0.5867, "step": 18324 }, { "epoch": 0.5629281479433539, "grad_norm": 0.34738799929618835, "learning_rate": 1.63659248569207e-05, "loss": 0.4909, "step": 18325 }, { "epoch": 0.5629588670783031, "grad_norm": 0.40920522809028625, "learning_rate": 1.6365552145827205e-05, "loss": 0.6172, "step": 18326 }, { "epoch": 0.5629895862132522, "grad_norm": 0.3677544891834259, "learning_rate": 1.636517941986652e-05, "loss": 0.5793, "step": 18327 }, { "epoch": 0.5630203053482014, "grad_norm": 0.36154717206954956, "learning_rate": 1.6364806679039513e-05, "loss": 0.5286, "step": 18328 }, { "epoch": 0.5630510244831506, "grad_norm": 0.339495986700058, "learning_rate": 1.6364433923347057e-05, "loss": 0.5833, "step": 18329 }, { "epoch": 0.5630817436180997, "grad_norm": 0.33638235926628113, "learning_rate": 1.6364061152790017e-05, "loss": 0.5753, "step": 18330 }, { "epoch": 0.5631124627530488, "grad_norm": 0.34079185128211975, "learning_rate": 1.636368836736927e-05, "loss": 0.6193, "step": 18331 }, { "epoch": 0.5631431818879981, "grad_norm": 0.3555673360824585, "learning_rate": 1.6363315567085686e-05, "loss": 0.5249, "step": 18332 }, { "epoch": 0.5631739010229472, "grad_norm": 0.3546343147754669, "learning_rate": 1.636294275194013e-05, "loss": 0.5579, "step": 18333 }, { "epoch": 0.5632046201578964, "grad_norm": 0.36554425954818726, "learning_rate": 1.636256992193348e-05, "loss": 0.5696, "step": 18334 }, { "epoch": 0.5632353392928455, "grad_norm": 0.37100115418434143, "learning_rate": 1.63621970770666e-05, "loss": 0.5673, "step": 18335 }, { "epoch": 0.5632660584277946, "grad_norm": 0.370244562625885, "learning_rate": 1.636182421734037e-05, "loss": 0.5673, "step": 18336 }, { "epoch": 0.5632967775627439, "grad_norm": 0.41678300499916077, "learning_rate": 1.6361451342755655e-05, "loss": 0.6675, "step": 18337 }, { "epoch": 0.563327496697693, "grad_norm": 0.43331196904182434, "learning_rate": 1.6361078453313322e-05, "loss": 0.4927, "step": 18338 }, { "epoch": 0.5633582158326421, "grad_norm": 0.3736661970615387, "learning_rate": 1.6360705549014245e-05, "loss": 0.5859, "step": 18339 }, { "epoch": 0.5633889349675913, "grad_norm": 0.3862769901752472, "learning_rate": 1.63603326298593e-05, "loss": 0.5801, "step": 18340 }, { "epoch": 0.5634196541025405, "grad_norm": 0.3941037952899933, "learning_rate": 1.6359959695849354e-05, "loss": 0.5996, "step": 18341 }, { "epoch": 0.5634503732374896, "grad_norm": 0.3958321809768677, "learning_rate": 1.6359586746985275e-05, "loss": 0.5008, "step": 18342 }, { "epoch": 0.5634810923724388, "grad_norm": 0.3775223195552826, "learning_rate": 1.6359213783267937e-05, "loss": 0.5812, "step": 18343 }, { "epoch": 0.5635118115073879, "grad_norm": 0.3158891499042511, "learning_rate": 1.6358840804698216e-05, "loss": 0.5439, "step": 18344 }, { "epoch": 0.5635425306423371, "grad_norm": 0.3401861786842346, "learning_rate": 1.6358467811276972e-05, "loss": 0.5752, "step": 18345 }, { "epoch": 0.5635732497772863, "grad_norm": 0.3778954744338989, "learning_rate": 1.6358094803005085e-05, "loss": 0.5906, "step": 18346 }, { "epoch": 0.5636039689122354, "grad_norm": 0.36840522289276123, "learning_rate": 1.6357721779883423e-05, "loss": 0.6347, "step": 18347 }, { "epoch": 0.5636346880471846, "grad_norm": 0.33897602558135986, "learning_rate": 1.635734874191286e-05, "loss": 0.5288, "step": 18348 }, { "epoch": 0.5636654071821338, "grad_norm": 0.35562437772750854, "learning_rate": 1.6356975689094263e-05, "loss": 0.6115, "step": 18349 }, { "epoch": 0.5636961263170829, "grad_norm": 0.35669034719467163, "learning_rate": 1.6356602621428508e-05, "loss": 0.5604, "step": 18350 }, { "epoch": 0.5637268454520321, "grad_norm": 0.3694695830345154, "learning_rate": 1.635622953891646e-05, "loss": 0.6527, "step": 18351 }, { "epoch": 0.5637575645869812, "grad_norm": 0.38557761907577515, "learning_rate": 1.6355856441558994e-05, "loss": 0.5612, "step": 18352 }, { "epoch": 0.5637882837219304, "grad_norm": 0.3418016731739044, "learning_rate": 1.6355483329356984e-05, "loss": 0.5952, "step": 18353 }, { "epoch": 0.5638190028568796, "grad_norm": 0.34491029381752014, "learning_rate": 1.6355110202311298e-05, "loss": 0.5773, "step": 18354 }, { "epoch": 0.5638497219918287, "grad_norm": 0.3440345227718353, "learning_rate": 1.635473706042281e-05, "loss": 0.5565, "step": 18355 }, { "epoch": 0.5638804411267778, "grad_norm": 0.32607266306877136, "learning_rate": 1.6354363903692386e-05, "loss": 0.538, "step": 18356 }, { "epoch": 0.563911160261727, "grad_norm": 0.35243359208106995, "learning_rate": 1.6353990732120902e-05, "loss": 0.4848, "step": 18357 }, { "epoch": 0.5639418793966762, "grad_norm": 0.35149046778678894, "learning_rate": 1.6353617545709232e-05, "loss": 0.6614, "step": 18358 }, { "epoch": 0.5639725985316254, "grad_norm": 0.3383082449436188, "learning_rate": 1.6353244344458242e-05, "loss": 0.5484, "step": 18359 }, { "epoch": 0.5640033176665745, "grad_norm": 0.3731682598590851, "learning_rate": 1.6352871128368803e-05, "loss": 0.6652, "step": 18360 }, { "epoch": 0.5640340368015236, "grad_norm": 0.3385595381259918, "learning_rate": 1.6352497897441794e-05, "loss": 0.5815, "step": 18361 }, { "epoch": 0.5640647559364729, "grad_norm": 0.36295071244239807, "learning_rate": 1.6352124651678084e-05, "loss": 0.6232, "step": 18362 }, { "epoch": 0.564095475071422, "grad_norm": 0.3658522963523865, "learning_rate": 1.635175139107854e-05, "loss": 0.5892, "step": 18363 }, { "epoch": 0.5641261942063711, "grad_norm": 0.3504900336265564, "learning_rate": 1.6351378115644037e-05, "loss": 0.5935, "step": 18364 }, { "epoch": 0.5641569133413203, "grad_norm": 0.33465462923049927, "learning_rate": 1.6351004825375447e-05, "loss": 0.5118, "step": 18365 }, { "epoch": 0.5641876324762695, "grad_norm": 0.4116058647632599, "learning_rate": 1.6350631520273644e-05, "loss": 0.6338, "step": 18366 }, { "epoch": 0.5642183516112186, "grad_norm": 0.36104387044906616, "learning_rate": 1.6350258200339494e-05, "loss": 0.5465, "step": 18367 }, { "epoch": 0.5642490707461678, "grad_norm": 0.33789175748825073, "learning_rate": 1.634988486557388e-05, "loss": 0.5409, "step": 18368 }, { "epoch": 0.5642797898811169, "grad_norm": 0.4201701283454895, "learning_rate": 1.6349511515977655e-05, "loss": 0.6504, "step": 18369 }, { "epoch": 0.564310509016066, "grad_norm": 0.3484180271625519, "learning_rate": 1.634913815155171e-05, "loss": 0.58, "step": 18370 }, { "epoch": 0.5643412281510153, "grad_norm": 0.34339022636413574, "learning_rate": 1.634876477229691e-05, "loss": 0.5763, "step": 18371 }, { "epoch": 0.5643719472859644, "grad_norm": 0.37331631779670715, "learning_rate": 1.6348391378214126e-05, "loss": 0.5555, "step": 18372 }, { "epoch": 0.5644026664209136, "grad_norm": 0.39279964566230774, "learning_rate": 1.6348017969304227e-05, "loss": 0.6137, "step": 18373 }, { "epoch": 0.5644333855558628, "grad_norm": 0.3233543932437897, "learning_rate": 1.634764454556809e-05, "loss": 0.5569, "step": 18374 }, { "epoch": 0.5644641046908119, "grad_norm": 0.328966349363327, "learning_rate": 1.634727110700659e-05, "loss": 0.5078, "step": 18375 }, { "epoch": 0.5644948238257611, "grad_norm": 0.3366200625896454, "learning_rate": 1.634689765362059e-05, "loss": 0.5665, "step": 18376 }, { "epoch": 0.5645255429607102, "grad_norm": 0.37178662419319153, "learning_rate": 1.6346524185410968e-05, "loss": 0.5209, "step": 18377 }, { "epoch": 0.5645562620956593, "grad_norm": 0.3387007713317871, "learning_rate": 1.6346150702378594e-05, "loss": 0.5593, "step": 18378 }, { "epoch": 0.5645869812306086, "grad_norm": 0.3427676558494568, "learning_rate": 1.6345777204524344e-05, "loss": 0.5426, "step": 18379 }, { "epoch": 0.5646177003655577, "grad_norm": 0.37182164192199707, "learning_rate": 1.6345403691849087e-05, "loss": 0.6068, "step": 18380 }, { "epoch": 0.5646484195005068, "grad_norm": 0.35725677013397217, "learning_rate": 1.63450301643537e-05, "loss": 0.5999, "step": 18381 }, { "epoch": 0.564679138635456, "grad_norm": 0.3324580788612366, "learning_rate": 1.634465662203905e-05, "loss": 0.5622, "step": 18382 }, { "epoch": 0.5647098577704052, "grad_norm": 0.40253207087516785, "learning_rate": 1.6344283064906013e-05, "loss": 0.6138, "step": 18383 }, { "epoch": 0.5647405769053544, "grad_norm": 0.3496708869934082, "learning_rate": 1.6343909492955458e-05, "loss": 0.5449, "step": 18384 }, { "epoch": 0.5647712960403035, "grad_norm": 0.36960190534591675, "learning_rate": 1.6343535906188257e-05, "loss": 0.5909, "step": 18385 }, { "epoch": 0.5648020151752526, "grad_norm": 0.3361191153526306, "learning_rate": 1.6343162304605288e-05, "loss": 0.5413, "step": 18386 }, { "epoch": 0.5648327343102019, "grad_norm": 0.3286413550376892, "learning_rate": 1.6342788688207418e-05, "loss": 0.5596, "step": 18387 }, { "epoch": 0.564863453445151, "grad_norm": 0.3907976746559143, "learning_rate": 1.6342415056995525e-05, "loss": 0.5269, "step": 18388 }, { "epoch": 0.5648941725801001, "grad_norm": 0.3687365651130676, "learning_rate": 1.634204141097048e-05, "loss": 0.5715, "step": 18389 }, { "epoch": 0.5649248917150493, "grad_norm": 0.34356755018234253, "learning_rate": 1.634166775013315e-05, "loss": 0.5431, "step": 18390 }, { "epoch": 0.5649556108499985, "grad_norm": 0.42226478457450867, "learning_rate": 1.6341294074484414e-05, "loss": 0.5838, "step": 18391 }, { "epoch": 0.5649863299849476, "grad_norm": 0.42827296257019043, "learning_rate": 1.634092038402514e-05, "loss": 0.5552, "step": 18392 }, { "epoch": 0.5650170491198968, "grad_norm": 0.3516594469547272, "learning_rate": 1.6340546678756205e-05, "loss": 0.5925, "step": 18393 }, { "epoch": 0.5650477682548459, "grad_norm": 0.39151760935783386, "learning_rate": 1.6340172958678483e-05, "loss": 0.6711, "step": 18394 }, { "epoch": 0.5650784873897952, "grad_norm": 0.36479905247688293, "learning_rate": 1.6339799223792843e-05, "loss": 0.5024, "step": 18395 }, { "epoch": 0.5651092065247443, "grad_norm": 0.3435215950012207, "learning_rate": 1.633942547410016e-05, "loss": 0.5305, "step": 18396 }, { "epoch": 0.5651399256596934, "grad_norm": 0.3453041911125183, "learning_rate": 1.6339051709601306e-05, "loss": 0.5228, "step": 18397 }, { "epoch": 0.5651706447946426, "grad_norm": 0.3844829499721527, "learning_rate": 1.633867793029715e-05, "loss": 0.5208, "step": 18398 }, { "epoch": 0.5652013639295917, "grad_norm": 0.3488744795322418, "learning_rate": 1.6338304136188573e-05, "loss": 0.5562, "step": 18399 }, { "epoch": 0.5652320830645409, "grad_norm": 0.34757906198501587, "learning_rate": 1.6337930327276445e-05, "loss": 0.586, "step": 18400 }, { "epoch": 0.5652628021994901, "grad_norm": 0.34940725564956665, "learning_rate": 1.6337556503561637e-05, "loss": 0.6193, "step": 18401 }, { "epoch": 0.5652935213344392, "grad_norm": 0.32208865880966187, "learning_rate": 1.633718266504502e-05, "loss": 0.5025, "step": 18402 }, { "epoch": 0.5653242404693883, "grad_norm": 0.3581107258796692, "learning_rate": 1.6336808811727473e-05, "loss": 0.5356, "step": 18403 }, { "epoch": 0.5653549596043376, "grad_norm": 0.3596915304660797, "learning_rate": 1.6336434943609866e-05, "loss": 0.5661, "step": 18404 }, { "epoch": 0.5653856787392867, "grad_norm": 0.426533579826355, "learning_rate": 1.6336061060693073e-05, "loss": 0.5823, "step": 18405 }, { "epoch": 0.5654163978742358, "grad_norm": 0.37538960576057434, "learning_rate": 1.6335687162977966e-05, "loss": 0.5956, "step": 18406 }, { "epoch": 0.565447117009185, "grad_norm": 0.41970929503440857, "learning_rate": 1.6335313250465418e-05, "loss": 0.6228, "step": 18407 }, { "epoch": 0.5654778361441342, "grad_norm": 0.3565661609172821, "learning_rate": 1.633493932315631e-05, "loss": 0.6332, "step": 18408 }, { "epoch": 0.5655085552790834, "grad_norm": 0.4081978499889374, "learning_rate": 1.6334565381051502e-05, "loss": 0.5699, "step": 18409 }, { "epoch": 0.5655392744140325, "grad_norm": 0.3515836000442505, "learning_rate": 1.6334191424151876e-05, "loss": 0.4904, "step": 18410 }, { "epoch": 0.5655699935489816, "grad_norm": 0.3288431763648987, "learning_rate": 1.6333817452458305e-05, "loss": 0.5829, "step": 18411 }, { "epoch": 0.5656007126839309, "grad_norm": 0.334730863571167, "learning_rate": 1.633344346597166e-05, "loss": 0.6002, "step": 18412 }, { "epoch": 0.56563143181888, "grad_norm": 0.4157460033893585, "learning_rate": 1.6333069464692815e-05, "loss": 0.561, "step": 18413 }, { "epoch": 0.5656621509538291, "grad_norm": 0.3340742290019989, "learning_rate": 1.6332695448622642e-05, "loss": 0.5803, "step": 18414 }, { "epoch": 0.5656928700887783, "grad_norm": 0.35722947120666504, "learning_rate": 1.6332321417762017e-05, "loss": 0.5602, "step": 18415 }, { "epoch": 0.5657235892237275, "grad_norm": 0.3258823752403259, "learning_rate": 1.6331947372111817e-05, "loss": 0.4593, "step": 18416 }, { "epoch": 0.5657543083586766, "grad_norm": 0.34310483932495117, "learning_rate": 1.633157331167291e-05, "loss": 0.6172, "step": 18417 }, { "epoch": 0.5657850274936258, "grad_norm": 0.33784595131874084, "learning_rate": 1.633119923644617e-05, "loss": 0.6088, "step": 18418 }, { "epoch": 0.5658157466285749, "grad_norm": 0.3596440553665161, "learning_rate": 1.6330825146432472e-05, "loss": 0.5769, "step": 18419 }, { "epoch": 0.5658464657635242, "grad_norm": 0.356621652841568, "learning_rate": 1.6330451041632687e-05, "loss": 0.5925, "step": 18420 }, { "epoch": 0.5658771848984733, "grad_norm": 0.3570275604724884, "learning_rate": 1.63300769220477e-05, "loss": 0.4696, "step": 18421 }, { "epoch": 0.5659079040334224, "grad_norm": 0.34841397404670715, "learning_rate": 1.632970278767837e-05, "loss": 0.4879, "step": 18422 }, { "epoch": 0.5659386231683716, "grad_norm": 0.37062734365463257, "learning_rate": 1.632932863852558e-05, "loss": 0.5602, "step": 18423 }, { "epoch": 0.5659693423033207, "grad_norm": 0.367774099111557, "learning_rate": 1.63289544745902e-05, "loss": 0.6062, "step": 18424 }, { "epoch": 0.5660000614382699, "grad_norm": 0.34777167439460754, "learning_rate": 1.63285802958731e-05, "loss": 0.5084, "step": 18425 }, { "epoch": 0.5660307805732191, "grad_norm": 0.3855191171169281, "learning_rate": 1.6328206102375164e-05, "loss": 0.5173, "step": 18426 }, { "epoch": 0.5660614997081682, "grad_norm": 0.32774490118026733, "learning_rate": 1.632783189409726e-05, "loss": 0.5427, "step": 18427 }, { "epoch": 0.5660922188431173, "grad_norm": 0.38868188858032227, "learning_rate": 1.632745767104026e-05, "loss": 0.5903, "step": 18428 }, { "epoch": 0.5661229379780666, "grad_norm": 0.33221951127052307, "learning_rate": 1.6327083433205045e-05, "loss": 0.5784, "step": 18429 }, { "epoch": 0.5661536571130157, "grad_norm": 0.37033548951148987, "learning_rate": 1.6326709180592485e-05, "loss": 0.5541, "step": 18430 }, { "epoch": 0.5661843762479648, "grad_norm": 0.36146485805511475, "learning_rate": 1.632633491320345e-05, "loss": 0.5426, "step": 18431 }, { "epoch": 0.566215095382914, "grad_norm": 0.3077166676521301, "learning_rate": 1.6325960631038817e-05, "loss": 0.4943, "step": 18432 }, { "epoch": 0.5662458145178632, "grad_norm": 0.531947910785675, "learning_rate": 1.6325586334099464e-05, "loss": 0.5367, "step": 18433 }, { "epoch": 0.5662765336528124, "grad_norm": 0.4820996820926666, "learning_rate": 1.6325212022386265e-05, "loss": 0.5558, "step": 18434 }, { "epoch": 0.5663072527877615, "grad_norm": 0.35819360613822937, "learning_rate": 1.6324837695900085e-05, "loss": 0.5809, "step": 18435 }, { "epoch": 0.5663379719227106, "grad_norm": 0.590408205986023, "learning_rate": 1.6324463354641807e-05, "loss": 0.5705, "step": 18436 }, { "epoch": 0.5663686910576599, "grad_norm": 0.4039843678474426, "learning_rate": 1.6324088998612308e-05, "loss": 0.5515, "step": 18437 }, { "epoch": 0.566399410192609, "grad_norm": 0.3265235722064972, "learning_rate": 1.6323714627812454e-05, "loss": 0.5663, "step": 18438 }, { "epoch": 0.5664301293275581, "grad_norm": 0.4028032422065735, "learning_rate": 1.632334024224312e-05, "loss": 0.562, "step": 18439 }, { "epoch": 0.5664608484625073, "grad_norm": 0.4061766564846039, "learning_rate": 1.632296584190519e-05, "loss": 0.5949, "step": 18440 }, { "epoch": 0.5664915675974564, "grad_norm": 0.3689662516117096, "learning_rate": 1.6322591426799527e-05, "loss": 0.5792, "step": 18441 }, { "epoch": 0.5665222867324056, "grad_norm": 0.3227826654911041, "learning_rate": 1.6322216996927013e-05, "loss": 0.4884, "step": 18442 }, { "epoch": 0.5665530058673548, "grad_norm": 0.3354160189628601, "learning_rate": 1.6321842552288516e-05, "loss": 0.5648, "step": 18443 }, { "epoch": 0.5665837250023039, "grad_norm": 0.3569202721118927, "learning_rate": 1.6321468092884917e-05, "loss": 0.5141, "step": 18444 }, { "epoch": 0.5666144441372531, "grad_norm": 0.38462719321250916, "learning_rate": 1.6321093618717086e-05, "loss": 0.6428, "step": 18445 }, { "epoch": 0.5666451632722023, "grad_norm": 0.37963882088661194, "learning_rate": 1.63207191297859e-05, "loss": 0.5792, "step": 18446 }, { "epoch": 0.5666758824071514, "grad_norm": 0.3348693251609802, "learning_rate": 1.632034462609223e-05, "loss": 0.5604, "step": 18447 }, { "epoch": 0.5667066015421006, "grad_norm": 0.3506665527820587, "learning_rate": 1.631997010763696e-05, "loss": 0.5687, "step": 18448 }, { "epoch": 0.5667373206770497, "grad_norm": 0.36115312576293945, "learning_rate": 1.6319595574420955e-05, "loss": 0.5692, "step": 18449 }, { "epoch": 0.5667680398119989, "grad_norm": 0.3541623651981354, "learning_rate": 1.6319221026445092e-05, "loss": 0.5391, "step": 18450 }, { "epoch": 0.5667987589469481, "grad_norm": 0.4037734866142273, "learning_rate": 1.6318846463710246e-05, "loss": 0.6475, "step": 18451 }, { "epoch": 0.5668294780818972, "grad_norm": 0.31899842619895935, "learning_rate": 1.6318471886217295e-05, "loss": 0.5237, "step": 18452 }, { "epoch": 0.5668601972168463, "grad_norm": 0.3606996238231659, "learning_rate": 1.631809729396711e-05, "loss": 0.593, "step": 18453 }, { "epoch": 0.5668909163517956, "grad_norm": 0.35009342432022095, "learning_rate": 1.6317722686960574e-05, "loss": 0.4769, "step": 18454 }, { "epoch": 0.5669216354867447, "grad_norm": 0.38225266337394714, "learning_rate": 1.6317348065198547e-05, "loss": 0.5687, "step": 18455 }, { "epoch": 0.5669523546216938, "grad_norm": 0.3832692503929138, "learning_rate": 1.6316973428681917e-05, "loss": 0.6208, "step": 18456 }, { "epoch": 0.566983073756643, "grad_norm": 0.3487228453159332, "learning_rate": 1.6316598777411555e-05, "loss": 0.5153, "step": 18457 }, { "epoch": 0.5670137928915921, "grad_norm": 0.3436364531517029, "learning_rate": 1.6316224111388336e-05, "loss": 0.5684, "step": 18458 }, { "epoch": 0.5670445120265414, "grad_norm": 0.3434727191925049, "learning_rate": 1.631584943061313e-05, "loss": 0.5181, "step": 18459 }, { "epoch": 0.5670752311614905, "grad_norm": 0.38151484727859497, "learning_rate": 1.6315474735086822e-05, "loss": 0.6317, "step": 18460 }, { "epoch": 0.5671059502964396, "grad_norm": 0.4732713997364044, "learning_rate": 1.6315100024810275e-05, "loss": 0.4998, "step": 18461 }, { "epoch": 0.5671366694313889, "grad_norm": 0.49807262420654297, "learning_rate": 1.6314725299784374e-05, "loss": 0.5766, "step": 18462 }, { "epoch": 0.567167388566338, "grad_norm": 0.34161123633384705, "learning_rate": 1.6314350560009993e-05, "loss": 0.5975, "step": 18463 }, { "epoch": 0.5671981077012871, "grad_norm": 0.350522518157959, "learning_rate": 1.6313975805488006e-05, "loss": 0.5525, "step": 18464 }, { "epoch": 0.5672288268362363, "grad_norm": 0.35436901450157166, "learning_rate": 1.6313601036219283e-05, "loss": 0.598, "step": 18465 }, { "epoch": 0.5672595459711854, "grad_norm": 0.3457684814929962, "learning_rate": 1.6313226252204707e-05, "loss": 0.5244, "step": 18466 }, { "epoch": 0.5672902651061346, "grad_norm": 0.3453342914581299, "learning_rate": 1.631285145344515e-05, "loss": 0.6043, "step": 18467 }, { "epoch": 0.5673209842410838, "grad_norm": 0.46679455041885376, "learning_rate": 1.6312476639941487e-05, "loss": 0.6115, "step": 18468 }, { "epoch": 0.5673517033760329, "grad_norm": 0.3173770010471344, "learning_rate": 1.6312101811694592e-05, "loss": 0.626, "step": 18469 }, { "epoch": 0.5673824225109821, "grad_norm": 0.43651318550109863, "learning_rate": 1.6311726968705343e-05, "loss": 0.5398, "step": 18470 }, { "epoch": 0.5674131416459313, "grad_norm": 0.34936270117759705, "learning_rate": 1.6311352110974618e-05, "loss": 0.6677, "step": 18471 }, { "epoch": 0.5674438607808804, "grad_norm": 0.3361052870750427, "learning_rate": 1.6310977238503286e-05, "loss": 0.5578, "step": 18472 }, { "epoch": 0.5674745799158296, "grad_norm": 0.366376668214798, "learning_rate": 1.6310602351292225e-05, "loss": 0.651, "step": 18473 }, { "epoch": 0.5675052990507787, "grad_norm": 0.35087132453918457, "learning_rate": 1.6310227449342314e-05, "loss": 0.5255, "step": 18474 }, { "epoch": 0.5675360181857279, "grad_norm": 0.3548397123813629, "learning_rate": 1.630985253265443e-05, "loss": 0.5424, "step": 18475 }, { "epoch": 0.5675667373206771, "grad_norm": 0.3995503783226013, "learning_rate": 1.6309477601229438e-05, "loss": 0.5747, "step": 18476 }, { "epoch": 0.5675974564556262, "grad_norm": 0.34618204832077026, "learning_rate": 1.6309102655068225e-05, "loss": 0.4252, "step": 18477 }, { "epoch": 0.5676281755905753, "grad_norm": 0.35286128520965576, "learning_rate": 1.630872769417166e-05, "loss": 0.4756, "step": 18478 }, { "epoch": 0.5676588947255246, "grad_norm": 0.34012794494628906, "learning_rate": 1.630835271854062e-05, "loss": 0.4895, "step": 18479 }, { "epoch": 0.5676896138604737, "grad_norm": 0.3305014967918396, "learning_rate": 1.6307977728175984e-05, "loss": 0.5569, "step": 18480 }, { "epoch": 0.5677203329954228, "grad_norm": 0.34344324469566345, "learning_rate": 1.6307602723078625e-05, "loss": 0.5125, "step": 18481 }, { "epoch": 0.567751052130372, "grad_norm": 0.3233770728111267, "learning_rate": 1.630722770324942e-05, "loss": 0.4381, "step": 18482 }, { "epoch": 0.5677817712653211, "grad_norm": 0.3676961064338684, "learning_rate": 1.6306852668689242e-05, "loss": 0.5046, "step": 18483 }, { "epoch": 0.5678124904002704, "grad_norm": 0.3465898931026459, "learning_rate": 1.630647761939897e-05, "loss": 0.5488, "step": 18484 }, { "epoch": 0.5678432095352195, "grad_norm": 0.35473549365997314, "learning_rate": 1.6306102555379478e-05, "loss": 0.5528, "step": 18485 }, { "epoch": 0.5678739286701686, "grad_norm": 0.3850688636302948, "learning_rate": 1.6305727476631642e-05, "loss": 0.556, "step": 18486 }, { "epoch": 0.5679046478051178, "grad_norm": 0.3968769609928131, "learning_rate": 1.630535238315634e-05, "loss": 0.595, "step": 18487 }, { "epoch": 0.567935366940067, "grad_norm": 0.33974820375442505, "learning_rate": 1.630497727495445e-05, "loss": 0.4989, "step": 18488 }, { "epoch": 0.5679660860750161, "grad_norm": 0.3499108850955963, "learning_rate": 1.6304602152026844e-05, "loss": 0.5624, "step": 18489 }, { "epoch": 0.5679968052099653, "grad_norm": 0.34994038939476013, "learning_rate": 1.63042270143744e-05, "loss": 0.5463, "step": 18490 }, { "epoch": 0.5680275243449144, "grad_norm": 0.3708800673484802, "learning_rate": 1.630385186199799e-05, "loss": 0.5748, "step": 18491 }, { "epoch": 0.5680582434798636, "grad_norm": 0.383782297372818, "learning_rate": 1.6303476694898496e-05, "loss": 0.5595, "step": 18492 }, { "epoch": 0.5680889626148128, "grad_norm": 0.3683859407901764, "learning_rate": 1.630310151307679e-05, "loss": 0.5968, "step": 18493 }, { "epoch": 0.5681196817497619, "grad_norm": 0.3788955807685852, "learning_rate": 1.6302726316533756e-05, "loss": 0.5349, "step": 18494 }, { "epoch": 0.5681504008847111, "grad_norm": 0.33946800231933594, "learning_rate": 1.6302351105270262e-05, "loss": 0.5312, "step": 18495 }, { "epoch": 0.5681811200196603, "grad_norm": 0.3686537742614746, "learning_rate": 1.6301975879287185e-05, "loss": 0.5923, "step": 18496 }, { "epoch": 0.5682118391546094, "grad_norm": 0.3537122309207916, "learning_rate": 1.6301600638585403e-05, "loss": 0.5909, "step": 18497 }, { "epoch": 0.5682425582895586, "grad_norm": 0.35387542843818665, "learning_rate": 1.6301225383165793e-05, "loss": 0.5887, "step": 18498 }, { "epoch": 0.5682732774245077, "grad_norm": 0.39186903834342957, "learning_rate": 1.6300850113029233e-05, "loss": 0.5979, "step": 18499 }, { "epoch": 0.5683039965594568, "grad_norm": 0.3483748137950897, "learning_rate": 1.6300474828176594e-05, "loss": 0.5533, "step": 18500 }, { "epoch": 0.5683347156944061, "grad_norm": 0.40103352069854736, "learning_rate": 1.630009952860876e-05, "loss": 0.5895, "step": 18501 }, { "epoch": 0.5683654348293552, "grad_norm": 0.36064159870147705, "learning_rate": 1.62997242143266e-05, "loss": 0.5883, "step": 18502 }, { "epoch": 0.5683961539643043, "grad_norm": 0.33095458149909973, "learning_rate": 1.6299348885330996e-05, "loss": 0.4987, "step": 18503 }, { "epoch": 0.5684268730992535, "grad_norm": 0.3890676498413086, "learning_rate": 1.629897354162282e-05, "loss": 0.5462, "step": 18504 }, { "epoch": 0.5684575922342027, "grad_norm": 0.37567824125289917, "learning_rate": 1.629859818320296e-05, "loss": 0.5356, "step": 18505 }, { "epoch": 0.5684883113691519, "grad_norm": 0.3888941705226898, "learning_rate": 1.6298222810072274e-05, "loss": 0.5359, "step": 18506 }, { "epoch": 0.568519030504101, "grad_norm": 0.367683470249176, "learning_rate": 1.629784742223165e-05, "loss": 0.572, "step": 18507 }, { "epoch": 0.5685497496390501, "grad_norm": 0.3650810420513153, "learning_rate": 1.6297472019681967e-05, "loss": 0.5629, "step": 18508 }, { "epoch": 0.5685804687739994, "grad_norm": 0.4467988610267639, "learning_rate": 1.62970966024241e-05, "loss": 0.6019, "step": 18509 }, { "epoch": 0.5686111879089485, "grad_norm": 0.35212597250938416, "learning_rate": 1.629672117045892e-05, "loss": 0.5495, "step": 18510 }, { "epoch": 0.5686419070438976, "grad_norm": 0.35216352343559265, "learning_rate": 1.629634572378731e-05, "loss": 0.4968, "step": 18511 }, { "epoch": 0.5686726261788468, "grad_norm": 0.38319772481918335, "learning_rate": 1.6295970262410142e-05, "loss": 0.5514, "step": 18512 }, { "epoch": 0.568703345313796, "grad_norm": 0.3395565450191498, "learning_rate": 1.62955947863283e-05, "loss": 0.6262, "step": 18513 }, { "epoch": 0.5687340644487451, "grad_norm": 0.5375403761863708, "learning_rate": 1.6295219295542658e-05, "loss": 0.6019, "step": 18514 }, { "epoch": 0.5687647835836943, "grad_norm": 0.36044496297836304, "learning_rate": 1.6294843790054087e-05, "loss": 0.5326, "step": 18515 }, { "epoch": 0.5687955027186434, "grad_norm": 0.3723022937774658, "learning_rate": 1.6294468269863468e-05, "loss": 0.5478, "step": 18516 }, { "epoch": 0.5688262218535926, "grad_norm": 0.33779534697532654, "learning_rate": 1.629409273497168e-05, "loss": 0.5759, "step": 18517 }, { "epoch": 0.5688569409885418, "grad_norm": 0.3429974615573883, "learning_rate": 1.6293717185379604e-05, "loss": 0.5911, "step": 18518 }, { "epoch": 0.5688876601234909, "grad_norm": 0.3394705653190613, "learning_rate": 1.6293341621088105e-05, "loss": 0.562, "step": 18519 }, { "epoch": 0.5689183792584401, "grad_norm": 0.3492768704891205, "learning_rate": 1.6292966042098068e-05, "loss": 0.6331, "step": 18520 }, { "epoch": 0.5689490983933893, "grad_norm": 0.3389914333820343, "learning_rate": 1.6292590448410375e-05, "loss": 0.5719, "step": 18521 }, { "epoch": 0.5689798175283384, "grad_norm": 0.3727606534957886, "learning_rate": 1.6292214840025892e-05, "loss": 0.6005, "step": 18522 }, { "epoch": 0.5690105366632876, "grad_norm": 0.38132616877555847, "learning_rate": 1.6291839216945503e-05, "loss": 0.5711, "step": 18523 }, { "epoch": 0.5690412557982367, "grad_norm": 0.36159706115722656, "learning_rate": 1.6291463579170088e-05, "loss": 0.5209, "step": 18524 }, { "epoch": 0.5690719749331858, "grad_norm": 0.32526007294654846, "learning_rate": 1.6291087926700514e-05, "loss": 0.5329, "step": 18525 }, { "epoch": 0.5691026940681351, "grad_norm": 0.3696666657924652, "learning_rate": 1.6290712259537667e-05, "loss": 0.5601, "step": 18526 }, { "epoch": 0.5691334132030842, "grad_norm": 0.3396947383880615, "learning_rate": 1.6290336577682424e-05, "loss": 0.5452, "step": 18527 }, { "epoch": 0.5691641323380333, "grad_norm": 0.33747339248657227, "learning_rate": 1.6289960881135658e-05, "loss": 0.5602, "step": 18528 }, { "epoch": 0.5691948514729825, "grad_norm": 0.3500919044017792, "learning_rate": 1.628958516989825e-05, "loss": 0.6491, "step": 18529 }, { "epoch": 0.5692255706079317, "grad_norm": 0.4276183843612671, "learning_rate": 1.628920944397108e-05, "loss": 0.5568, "step": 18530 }, { "epoch": 0.5692562897428809, "grad_norm": 0.3758138418197632, "learning_rate": 1.6288833703355017e-05, "loss": 0.5442, "step": 18531 }, { "epoch": 0.56928700887783, "grad_norm": 0.32477331161499023, "learning_rate": 1.6288457948050948e-05, "loss": 0.4658, "step": 18532 }, { "epoch": 0.5693177280127791, "grad_norm": 0.33165568113327026, "learning_rate": 1.6288082178059744e-05, "loss": 0.5865, "step": 18533 }, { "epoch": 0.5693484471477284, "grad_norm": 0.39695221185684204, "learning_rate": 1.6287706393382287e-05, "loss": 0.6534, "step": 18534 }, { "epoch": 0.5693791662826775, "grad_norm": 0.5125012397766113, "learning_rate": 1.6287330594019447e-05, "loss": 0.5076, "step": 18535 }, { "epoch": 0.5694098854176266, "grad_norm": 0.4320628046989441, "learning_rate": 1.628695477997211e-05, "loss": 0.5799, "step": 18536 }, { "epoch": 0.5694406045525758, "grad_norm": 0.3617112338542938, "learning_rate": 1.6286578951241155e-05, "loss": 0.6002, "step": 18537 }, { "epoch": 0.569471323687525, "grad_norm": 0.3900090754032135, "learning_rate": 1.628620310782745e-05, "loss": 0.6448, "step": 18538 }, { "epoch": 0.5695020428224741, "grad_norm": 0.3588487505912781, "learning_rate": 1.6285827249731886e-05, "loss": 0.6179, "step": 18539 }, { "epoch": 0.5695327619574233, "grad_norm": 0.31975531578063965, "learning_rate": 1.6285451376955328e-05, "loss": 0.5393, "step": 18540 }, { "epoch": 0.5695634810923724, "grad_norm": 0.3461657762527466, "learning_rate": 1.6285075489498662e-05, "loss": 0.4784, "step": 18541 }, { "epoch": 0.5695942002273215, "grad_norm": 0.3526533246040344, "learning_rate": 1.628469958736276e-05, "loss": 0.5421, "step": 18542 }, { "epoch": 0.5696249193622708, "grad_norm": 0.3546167314052582, "learning_rate": 1.6284323670548502e-05, "loss": 0.5595, "step": 18543 }, { "epoch": 0.5696556384972199, "grad_norm": 0.3444017469882965, "learning_rate": 1.6283947739056772e-05, "loss": 0.5238, "step": 18544 }, { "epoch": 0.5696863576321691, "grad_norm": 0.3595964014530182, "learning_rate": 1.6283571792888438e-05, "loss": 0.562, "step": 18545 }, { "epoch": 0.5697170767671182, "grad_norm": 0.35225722193717957, "learning_rate": 1.628319583204439e-05, "loss": 0.5159, "step": 18546 }, { "epoch": 0.5697477959020674, "grad_norm": 0.5331439971923828, "learning_rate": 1.6282819856525494e-05, "loss": 0.5318, "step": 18547 }, { "epoch": 0.5697785150370166, "grad_norm": 0.34449800848960876, "learning_rate": 1.6282443866332636e-05, "loss": 0.5612, "step": 18548 }, { "epoch": 0.5698092341719657, "grad_norm": 0.36048829555511475, "learning_rate": 1.628206786146669e-05, "loss": 0.4965, "step": 18549 }, { "epoch": 0.5698399533069148, "grad_norm": 0.3393063545227051, "learning_rate": 1.6281691841928536e-05, "loss": 0.5524, "step": 18550 }, { "epoch": 0.5698706724418641, "grad_norm": 0.5216580629348755, "learning_rate": 1.6281315807719055e-05, "loss": 0.5348, "step": 18551 }, { "epoch": 0.5699013915768132, "grad_norm": 0.3404894769191742, "learning_rate": 1.628093975883912e-05, "loss": 0.5204, "step": 18552 }, { "epoch": 0.5699321107117623, "grad_norm": 0.41619744896888733, "learning_rate": 1.6280563695289607e-05, "loss": 0.5614, "step": 18553 }, { "epoch": 0.5699628298467115, "grad_norm": 0.5130864381790161, "learning_rate": 1.6280187617071405e-05, "loss": 0.5956, "step": 18554 }, { "epoch": 0.5699935489816607, "grad_norm": 0.3500940203666687, "learning_rate": 1.6279811524185384e-05, "loss": 0.6278, "step": 18555 }, { "epoch": 0.5700242681166099, "grad_norm": 0.36983123421669006, "learning_rate": 1.6279435416632425e-05, "loss": 0.5444, "step": 18556 }, { "epoch": 0.570054987251559, "grad_norm": 0.32242050766944885, "learning_rate": 1.627905929441341e-05, "loss": 0.603, "step": 18557 }, { "epoch": 0.5700857063865081, "grad_norm": 0.3627101480960846, "learning_rate": 1.6278683157529206e-05, "loss": 0.5846, "step": 18558 }, { "epoch": 0.5701164255214574, "grad_norm": 0.32383015751838684, "learning_rate": 1.6278307005980702e-05, "loss": 0.5512, "step": 18559 }, { "epoch": 0.5701471446564065, "grad_norm": 0.31931072473526, "learning_rate": 1.6277930839768777e-05, "loss": 0.503, "step": 18560 }, { "epoch": 0.5701778637913556, "grad_norm": 0.3385072648525238, "learning_rate": 1.6277554658894298e-05, "loss": 0.5248, "step": 18561 }, { "epoch": 0.5702085829263048, "grad_norm": 0.362024188041687, "learning_rate": 1.627717846335816e-05, "loss": 0.5756, "step": 18562 }, { "epoch": 0.570239302061254, "grad_norm": 0.3571580648422241, "learning_rate": 1.627680225316123e-05, "loss": 0.5751, "step": 18563 }, { "epoch": 0.5702700211962031, "grad_norm": 0.4122377932071686, "learning_rate": 1.6276426028304392e-05, "loss": 0.5543, "step": 18564 }, { "epoch": 0.5703007403311523, "grad_norm": 0.37549063563346863, "learning_rate": 1.627604978878852e-05, "loss": 0.5346, "step": 18565 }, { "epoch": 0.5703314594661014, "grad_norm": 0.345047265291214, "learning_rate": 1.6275673534614494e-05, "loss": 0.5643, "step": 18566 }, { "epoch": 0.5703621786010505, "grad_norm": 0.34737274050712585, "learning_rate": 1.62752972657832e-05, "loss": 0.5286, "step": 18567 }, { "epoch": 0.5703928977359998, "grad_norm": 0.3173574209213257, "learning_rate": 1.6274920982295507e-05, "loss": 0.6034, "step": 18568 }, { "epoch": 0.5704236168709489, "grad_norm": 0.4336109459400177, "learning_rate": 1.6274544684152294e-05, "loss": 0.587, "step": 18569 }, { "epoch": 0.5704543360058981, "grad_norm": 0.36054527759552, "learning_rate": 1.627416837135445e-05, "loss": 0.526, "step": 18570 }, { "epoch": 0.5704850551408472, "grad_norm": 0.3328697979450226, "learning_rate": 1.6273792043902844e-05, "loss": 0.5755, "step": 18571 }, { "epoch": 0.5705157742757964, "grad_norm": 0.3387073874473572, "learning_rate": 1.627341570179836e-05, "loss": 0.5652, "step": 18572 }, { "epoch": 0.5705464934107456, "grad_norm": 0.32929855585098267, "learning_rate": 1.6273039345041875e-05, "loss": 0.5348, "step": 18573 }, { "epoch": 0.5705772125456947, "grad_norm": 0.4238332211971283, "learning_rate": 1.627266297363427e-05, "loss": 0.5455, "step": 18574 }, { "epoch": 0.5706079316806438, "grad_norm": 0.405169814825058, "learning_rate": 1.6272286587576423e-05, "loss": 0.6036, "step": 18575 }, { "epoch": 0.5706386508155931, "grad_norm": 0.3687654733657837, "learning_rate": 1.627191018686921e-05, "loss": 0.5145, "step": 18576 }, { "epoch": 0.5706693699505422, "grad_norm": 0.36385637521743774, "learning_rate": 1.6271533771513514e-05, "loss": 0.5674, "step": 18577 }, { "epoch": 0.5707000890854913, "grad_norm": 0.4080851674079895, "learning_rate": 1.6271157341510213e-05, "loss": 0.5795, "step": 18578 }, { "epoch": 0.5707308082204405, "grad_norm": 0.3734418451786041, "learning_rate": 1.6270780896860184e-05, "loss": 0.5936, "step": 18579 }, { "epoch": 0.5707615273553897, "grad_norm": 0.3693806827068329, "learning_rate": 1.627040443756431e-05, "loss": 0.5994, "step": 18580 }, { "epoch": 0.5707922464903389, "grad_norm": 0.3374244272708893, "learning_rate": 1.627002796362347e-05, "loss": 0.5909, "step": 18581 }, { "epoch": 0.570822965625288, "grad_norm": 0.33277493715286255, "learning_rate": 1.626965147503854e-05, "loss": 0.5, "step": 18582 }, { "epoch": 0.5708536847602371, "grad_norm": 0.3404451906681061, "learning_rate": 1.62692749718104e-05, "loss": 0.5915, "step": 18583 }, { "epoch": 0.5708844038951864, "grad_norm": 0.36378929018974304, "learning_rate": 1.6268898453939933e-05, "loss": 0.5899, "step": 18584 }, { "epoch": 0.5709151230301355, "grad_norm": 0.32406672835350037, "learning_rate": 1.6268521921428013e-05, "loss": 0.5075, "step": 18585 }, { "epoch": 0.5709458421650846, "grad_norm": 0.33781862258911133, "learning_rate": 1.6268145374275525e-05, "loss": 0.5817, "step": 18586 }, { "epoch": 0.5709765613000338, "grad_norm": 0.438236266374588, "learning_rate": 1.6267768812483344e-05, "loss": 0.5734, "step": 18587 }, { "epoch": 0.571007280434983, "grad_norm": 0.3334810435771942, "learning_rate": 1.6267392236052352e-05, "loss": 0.5705, "step": 18588 }, { "epoch": 0.5710379995699321, "grad_norm": 0.34775686264038086, "learning_rate": 1.626701564498343e-05, "loss": 0.548, "step": 18589 }, { "epoch": 0.5710687187048813, "grad_norm": 0.34610697627067566, "learning_rate": 1.626663903927745e-05, "loss": 0.5318, "step": 18590 }, { "epoch": 0.5710994378398304, "grad_norm": 0.3462832272052765, "learning_rate": 1.6266262418935303e-05, "loss": 0.5357, "step": 18591 }, { "epoch": 0.5711301569747795, "grad_norm": 0.34309685230255127, "learning_rate": 1.6265885783957858e-05, "loss": 0.5331, "step": 18592 }, { "epoch": 0.5711608761097288, "grad_norm": 0.5741856098175049, "learning_rate": 1.6265509134346002e-05, "loss": 0.5819, "step": 18593 }, { "epoch": 0.5711915952446779, "grad_norm": 0.3486904203891754, "learning_rate": 1.6265132470100608e-05, "loss": 0.5514, "step": 18594 }, { "epoch": 0.5712223143796271, "grad_norm": 0.37018436193466187, "learning_rate": 1.626475579122256e-05, "loss": 0.5362, "step": 18595 }, { "epoch": 0.5712530335145762, "grad_norm": 0.38518476486206055, "learning_rate": 1.626437909771274e-05, "loss": 0.6076, "step": 18596 }, { "epoch": 0.5712837526495254, "grad_norm": 0.3626132011413574, "learning_rate": 1.6264002389572026e-05, "loss": 0.551, "step": 18597 }, { "epoch": 0.5713144717844746, "grad_norm": 0.31683069467544556, "learning_rate": 1.6263625666801293e-05, "loss": 0.5582, "step": 18598 }, { "epoch": 0.5713451909194237, "grad_norm": 0.35297679901123047, "learning_rate": 1.6263248929401428e-05, "loss": 0.5661, "step": 18599 }, { "epoch": 0.5713759100543728, "grad_norm": 0.339578241109848, "learning_rate": 1.6262872177373307e-05, "loss": 0.5783, "step": 18600 }, { "epoch": 0.5714066291893221, "grad_norm": 0.34174975752830505, "learning_rate": 1.6262495410717808e-05, "loss": 0.5011, "step": 18601 }, { "epoch": 0.5714373483242712, "grad_norm": 0.35658085346221924, "learning_rate": 1.6262118629435818e-05, "loss": 0.5368, "step": 18602 }, { "epoch": 0.5714680674592203, "grad_norm": 0.34784653782844543, "learning_rate": 1.626174183352821e-05, "loss": 0.5443, "step": 18603 }, { "epoch": 0.5714987865941695, "grad_norm": 0.49554935097694397, "learning_rate": 1.6261365022995868e-05, "loss": 0.5505, "step": 18604 }, { "epoch": 0.5715295057291186, "grad_norm": 0.34560489654541016, "learning_rate": 1.626098819783967e-05, "loss": 0.5717, "step": 18605 }, { "epoch": 0.5715602248640679, "grad_norm": 0.37658578157424927, "learning_rate": 1.6260611358060496e-05, "loss": 0.5363, "step": 18606 }, { "epoch": 0.571590943999017, "grad_norm": 0.32990190386772156, "learning_rate": 1.6260234503659228e-05, "loss": 0.5183, "step": 18607 }, { "epoch": 0.5716216631339661, "grad_norm": 0.3565095067024231, "learning_rate": 1.6259857634636745e-05, "loss": 0.5549, "step": 18608 }, { "epoch": 0.5716523822689153, "grad_norm": 0.35965749621391296, "learning_rate": 1.6259480750993927e-05, "loss": 0.5902, "step": 18609 }, { "epoch": 0.5716831014038645, "grad_norm": 0.3262888491153717, "learning_rate": 1.6259103852731656e-05, "loss": 0.5062, "step": 18610 }, { "epoch": 0.5717138205388136, "grad_norm": 0.3550766706466675, "learning_rate": 1.625872693985081e-05, "loss": 0.5531, "step": 18611 }, { "epoch": 0.5717445396737628, "grad_norm": 0.5054039359092712, "learning_rate": 1.6258350012352268e-05, "loss": 0.6118, "step": 18612 }, { "epoch": 0.5717752588087119, "grad_norm": 0.33931323885917664, "learning_rate": 1.6257973070236916e-05, "loss": 0.5545, "step": 18613 }, { "epoch": 0.5718059779436611, "grad_norm": 0.3925960659980774, "learning_rate": 1.6257596113505628e-05, "loss": 0.5666, "step": 18614 }, { "epoch": 0.5718366970786103, "grad_norm": 0.34758448600769043, "learning_rate": 1.6257219142159287e-05, "loss": 0.6286, "step": 18615 }, { "epoch": 0.5718674162135594, "grad_norm": 0.40007108449935913, "learning_rate": 1.6256842156198772e-05, "loss": 0.5378, "step": 18616 }, { "epoch": 0.5718981353485086, "grad_norm": 0.32914209365844727, "learning_rate": 1.625646515562497e-05, "loss": 0.5811, "step": 18617 }, { "epoch": 0.5719288544834578, "grad_norm": 0.3489319086074829, "learning_rate": 1.6256088140438753e-05, "loss": 0.5315, "step": 18618 }, { "epoch": 0.5719595736184069, "grad_norm": 0.3583638370037079, "learning_rate": 1.6255711110641007e-05, "loss": 0.5183, "step": 18619 }, { "epoch": 0.5719902927533561, "grad_norm": 0.4039992690086365, "learning_rate": 1.6255334066232613e-05, "loss": 0.5255, "step": 18620 }, { "epoch": 0.5720210118883052, "grad_norm": 0.3218268156051636, "learning_rate": 1.6254957007214446e-05, "loss": 0.549, "step": 18621 }, { "epoch": 0.5720517310232544, "grad_norm": 0.3251972794532776, "learning_rate": 1.625457993358739e-05, "loss": 0.5423, "step": 18622 }, { "epoch": 0.5720824501582036, "grad_norm": 0.3921094834804535, "learning_rate": 1.625420284535233e-05, "loss": 0.5474, "step": 18623 }, { "epoch": 0.5721131692931527, "grad_norm": 0.39150720834732056, "learning_rate": 1.6253825742510137e-05, "loss": 0.6117, "step": 18624 }, { "epoch": 0.5721438884281018, "grad_norm": 0.3475719094276428, "learning_rate": 1.62534486250617e-05, "loss": 0.5811, "step": 18625 }, { "epoch": 0.572174607563051, "grad_norm": 0.39377453923225403, "learning_rate": 1.6253071493007896e-05, "loss": 0.5463, "step": 18626 }, { "epoch": 0.5722053266980002, "grad_norm": 0.3276136517524719, "learning_rate": 1.625269434634961e-05, "loss": 0.5135, "step": 18627 }, { "epoch": 0.5722360458329493, "grad_norm": 0.38513705134391785, "learning_rate": 1.6252317185087717e-05, "loss": 0.5334, "step": 18628 }, { "epoch": 0.5722667649678985, "grad_norm": 0.3966918885707855, "learning_rate": 1.62519400092231e-05, "loss": 0.6013, "step": 18629 }, { "epoch": 0.5722974841028476, "grad_norm": 0.3635323643684387, "learning_rate": 1.6251562818756638e-05, "loss": 0.5516, "step": 18630 }, { "epoch": 0.5723282032377969, "grad_norm": 0.35124123096466064, "learning_rate": 1.625118561368922e-05, "loss": 0.5962, "step": 18631 }, { "epoch": 0.572358922372746, "grad_norm": 0.34515300393104553, "learning_rate": 1.625080839402172e-05, "loss": 0.5786, "step": 18632 }, { "epoch": 0.5723896415076951, "grad_norm": 0.354108989238739, "learning_rate": 1.625043115975502e-05, "loss": 0.5957, "step": 18633 }, { "epoch": 0.5724203606426443, "grad_norm": 0.40924009680747986, "learning_rate": 1.625005391089e-05, "loss": 0.6134, "step": 18634 }, { "epoch": 0.5724510797775935, "grad_norm": 0.3657691478729248, "learning_rate": 1.6249676647427542e-05, "loss": 0.6134, "step": 18635 }, { "epoch": 0.5724817989125426, "grad_norm": 0.39996910095214844, "learning_rate": 1.6249299369368528e-05, "loss": 0.6298, "step": 18636 }, { "epoch": 0.5725125180474918, "grad_norm": 0.4261462390422821, "learning_rate": 1.6248922076713843e-05, "loss": 0.6365, "step": 18637 }, { "epoch": 0.5725432371824409, "grad_norm": 0.3339628577232361, "learning_rate": 1.624854476946436e-05, "loss": 0.576, "step": 18638 }, { "epoch": 0.57257395631739, "grad_norm": 0.4541527330875397, "learning_rate": 1.6248167447620966e-05, "loss": 0.5776, "step": 18639 }, { "epoch": 0.5726046754523393, "grad_norm": 0.3371785581111908, "learning_rate": 1.624779011118454e-05, "loss": 0.5545, "step": 18640 }, { "epoch": 0.5726353945872884, "grad_norm": 0.32801690697669983, "learning_rate": 1.6247412760155962e-05, "loss": 0.5219, "step": 18641 }, { "epoch": 0.5726661137222376, "grad_norm": 0.39489197731018066, "learning_rate": 1.624703539453612e-05, "loss": 0.6733, "step": 18642 }, { "epoch": 0.5726968328571868, "grad_norm": 0.3575400710105896, "learning_rate": 1.6246658014325885e-05, "loss": 0.5752, "step": 18643 }, { "epoch": 0.5727275519921359, "grad_norm": 0.33559542894363403, "learning_rate": 1.6246280619526148e-05, "loss": 0.6283, "step": 18644 }, { "epoch": 0.5727582711270851, "grad_norm": 0.3501774072647095, "learning_rate": 1.6245903210137783e-05, "loss": 0.5664, "step": 18645 }, { "epoch": 0.5727889902620342, "grad_norm": 0.37980398535728455, "learning_rate": 1.624552578616168e-05, "loss": 0.5386, "step": 18646 }, { "epoch": 0.5728197093969833, "grad_norm": 0.3772927224636078, "learning_rate": 1.6245148347598712e-05, "loss": 0.4677, "step": 18647 }, { "epoch": 0.5728504285319326, "grad_norm": 0.35028988122940063, "learning_rate": 1.6244770894449762e-05, "loss": 0.524, "step": 18648 }, { "epoch": 0.5728811476668817, "grad_norm": 0.3794916272163391, "learning_rate": 1.6244393426715714e-05, "loss": 0.543, "step": 18649 }, { "epoch": 0.5729118668018308, "grad_norm": 0.4925001263618469, "learning_rate": 1.6244015944397453e-05, "loss": 0.6312, "step": 18650 }, { "epoch": 0.57294258593678, "grad_norm": 0.38604047894477844, "learning_rate": 1.6243638447495853e-05, "loss": 0.6122, "step": 18651 }, { "epoch": 0.5729733050717292, "grad_norm": 0.34329164028167725, "learning_rate": 1.62432609360118e-05, "loss": 0.5679, "step": 18652 }, { "epoch": 0.5730040242066783, "grad_norm": 0.3516058921813965, "learning_rate": 1.6242883409946176e-05, "loss": 0.5437, "step": 18653 }, { "epoch": 0.5730347433416275, "grad_norm": 0.32062873244285583, "learning_rate": 1.624250586929986e-05, "loss": 0.4946, "step": 18654 }, { "epoch": 0.5730654624765766, "grad_norm": 0.40793290734291077, "learning_rate": 1.6242128314073737e-05, "loss": 0.5873, "step": 18655 }, { "epoch": 0.5730961816115259, "grad_norm": 0.3274938762187958, "learning_rate": 1.6241750744268688e-05, "loss": 0.6076, "step": 18656 }, { "epoch": 0.573126900746475, "grad_norm": 0.3930993974208832, "learning_rate": 1.6241373159885593e-05, "loss": 0.5592, "step": 18657 }, { "epoch": 0.5731576198814241, "grad_norm": 0.3768773674964905, "learning_rate": 1.6240995560925335e-05, "loss": 0.5478, "step": 18658 }, { "epoch": 0.5731883390163733, "grad_norm": 0.38371172547340393, "learning_rate": 1.62406179473888e-05, "loss": 0.5955, "step": 18659 }, { "epoch": 0.5732190581513225, "grad_norm": 0.3587949872016907, "learning_rate": 1.6240240319276862e-05, "loss": 0.553, "step": 18660 }, { "epoch": 0.5732497772862716, "grad_norm": 0.34745723009109497, "learning_rate": 1.623986267659041e-05, "loss": 0.5764, "step": 18661 }, { "epoch": 0.5732804964212208, "grad_norm": 0.390171080827713, "learning_rate": 1.6239485019330317e-05, "loss": 0.5419, "step": 18662 }, { "epoch": 0.5733112155561699, "grad_norm": 0.3745688199996948, "learning_rate": 1.6239107347497473e-05, "loss": 0.5788, "step": 18663 }, { "epoch": 0.573341934691119, "grad_norm": 0.35593512654304504, "learning_rate": 1.6238729661092763e-05, "loss": 0.5301, "step": 18664 }, { "epoch": 0.5733726538260683, "grad_norm": 0.8453759551048279, "learning_rate": 1.623835196011706e-05, "loss": 0.6181, "step": 18665 }, { "epoch": 0.5734033729610174, "grad_norm": 0.36497762799263, "learning_rate": 1.623797424457125e-05, "loss": 0.58, "step": 18666 }, { "epoch": 0.5734340920959666, "grad_norm": 0.34396815299987793, "learning_rate": 1.623759651445622e-05, "loss": 0.5972, "step": 18667 }, { "epoch": 0.5734648112309157, "grad_norm": 0.43795397877693176, "learning_rate": 1.623721876977284e-05, "loss": 0.5768, "step": 18668 }, { "epoch": 0.5734955303658649, "grad_norm": 0.35937249660491943, "learning_rate": 1.6236841010522005e-05, "loss": 0.576, "step": 18669 }, { "epoch": 0.5735262495008141, "grad_norm": 0.3851487338542938, "learning_rate": 1.6236463236704592e-05, "loss": 0.5846, "step": 18670 }, { "epoch": 0.5735569686357632, "grad_norm": 0.3805106282234192, "learning_rate": 1.6236085448321482e-05, "loss": 0.5846, "step": 18671 }, { "epoch": 0.5735876877707123, "grad_norm": 0.3520281910896301, "learning_rate": 1.623570764537356e-05, "loss": 0.6039, "step": 18672 }, { "epoch": 0.5736184069056616, "grad_norm": 0.453770250082016, "learning_rate": 1.6235329827861708e-05, "loss": 0.5083, "step": 18673 }, { "epoch": 0.5736491260406107, "grad_norm": 0.3853272795677185, "learning_rate": 1.6234951995786806e-05, "loss": 0.4535, "step": 18674 }, { "epoch": 0.5736798451755598, "grad_norm": 0.33838319778442383, "learning_rate": 1.6234574149149736e-05, "loss": 0.6002, "step": 18675 }, { "epoch": 0.573710564310509, "grad_norm": 0.32554852962493896, "learning_rate": 1.623419628795139e-05, "loss": 0.4688, "step": 18676 }, { "epoch": 0.5737412834454582, "grad_norm": 0.36140841245651245, "learning_rate": 1.6233818412192635e-05, "loss": 0.5517, "step": 18677 }, { "epoch": 0.5737720025804073, "grad_norm": 0.37955453991889954, "learning_rate": 1.6233440521874365e-05, "loss": 0.5802, "step": 18678 }, { "epoch": 0.5738027217153565, "grad_norm": 0.34169620275497437, "learning_rate": 1.6233062616997462e-05, "loss": 0.5281, "step": 18679 }, { "epoch": 0.5738334408503056, "grad_norm": 0.3528349995613098, "learning_rate": 1.6232684697562807e-05, "loss": 0.6387, "step": 18680 }, { "epoch": 0.5738641599852549, "grad_norm": 0.3304068446159363, "learning_rate": 1.6232306763571278e-05, "loss": 0.579, "step": 18681 }, { "epoch": 0.573894879120204, "grad_norm": 0.40643003582954407, "learning_rate": 1.623192881502376e-05, "loss": 0.5901, "step": 18682 }, { "epoch": 0.5739255982551531, "grad_norm": 0.5732483863830566, "learning_rate": 1.623155085192114e-05, "loss": 0.5694, "step": 18683 }, { "epoch": 0.5739563173901023, "grad_norm": 0.3250564634799957, "learning_rate": 1.6231172874264296e-05, "loss": 0.5273, "step": 18684 }, { "epoch": 0.5739870365250515, "grad_norm": 0.3352609872817993, "learning_rate": 1.623079488205411e-05, "loss": 0.5767, "step": 18685 }, { "epoch": 0.5740177556600006, "grad_norm": 0.3382619321346283, "learning_rate": 1.6230416875291474e-05, "loss": 0.5032, "step": 18686 }, { "epoch": 0.5740484747949498, "grad_norm": 0.39252081513404846, "learning_rate": 1.623003885397726e-05, "loss": 0.6051, "step": 18687 }, { "epoch": 0.5740791939298989, "grad_norm": 0.4138123691082001, "learning_rate": 1.6229660818112354e-05, "loss": 0.5577, "step": 18688 }, { "epoch": 0.574109913064848, "grad_norm": 0.33530667424201965, "learning_rate": 1.6229282767697644e-05, "loss": 0.5466, "step": 18689 }, { "epoch": 0.5741406321997973, "grad_norm": 0.3418010175228119, "learning_rate": 1.6228904702734007e-05, "loss": 0.5607, "step": 18690 }, { "epoch": 0.5741713513347464, "grad_norm": 0.36101147532463074, "learning_rate": 1.6228526623222326e-05, "loss": 0.5445, "step": 18691 }, { "epoch": 0.5742020704696956, "grad_norm": 0.3940165638923645, "learning_rate": 1.622814852916349e-05, "loss": 0.5857, "step": 18692 }, { "epoch": 0.5742327896046447, "grad_norm": 0.39226025342941284, "learning_rate": 1.622777042055838e-05, "loss": 0.6096, "step": 18693 }, { "epoch": 0.5742635087395939, "grad_norm": 0.3640338182449341, "learning_rate": 1.6227392297407872e-05, "loss": 0.5227, "step": 18694 }, { "epoch": 0.5742942278745431, "grad_norm": 0.3476230502128601, "learning_rate": 1.6227014159712854e-05, "loss": 0.5257, "step": 18695 }, { "epoch": 0.5743249470094922, "grad_norm": 0.34315627813339233, "learning_rate": 1.6226636007474212e-05, "loss": 0.6103, "step": 18696 }, { "epoch": 0.5743556661444413, "grad_norm": 0.33580124378204346, "learning_rate": 1.6226257840692828e-05, "loss": 0.6139, "step": 18697 }, { "epoch": 0.5743863852793906, "grad_norm": 0.3489450216293335, "learning_rate": 1.622587965936958e-05, "loss": 0.5593, "step": 18698 }, { "epoch": 0.5744171044143397, "grad_norm": 0.33579131960868835, "learning_rate": 1.622550146350536e-05, "loss": 0.5641, "step": 18699 }, { "epoch": 0.5744478235492888, "grad_norm": 0.3397985100746155, "learning_rate": 1.6225123253101042e-05, "loss": 0.5931, "step": 18700 }, { "epoch": 0.574478542684238, "grad_norm": 0.36184966564178467, "learning_rate": 1.6224745028157517e-05, "loss": 0.5246, "step": 18701 }, { "epoch": 0.5745092618191872, "grad_norm": 0.3642197549343109, "learning_rate": 1.6224366788675664e-05, "loss": 0.6029, "step": 18702 }, { "epoch": 0.5745399809541364, "grad_norm": 0.32161563634872437, "learning_rate": 1.6223988534656368e-05, "loss": 0.5118, "step": 18703 }, { "epoch": 0.5745707000890855, "grad_norm": 0.37086978554725647, "learning_rate": 1.6223610266100512e-05, "loss": 0.5694, "step": 18704 }, { "epoch": 0.5746014192240346, "grad_norm": 0.3472151458263397, "learning_rate": 1.6223231983008978e-05, "loss": 0.6241, "step": 18705 }, { "epoch": 0.5746321383589839, "grad_norm": 0.3481108546257019, "learning_rate": 1.622285368538265e-05, "loss": 0.4542, "step": 18706 }, { "epoch": 0.574662857493933, "grad_norm": 0.3721689283847809, "learning_rate": 1.6222475373222416e-05, "loss": 0.6105, "step": 18707 }, { "epoch": 0.5746935766288821, "grad_norm": 0.3569241762161255, "learning_rate": 1.6222097046529156e-05, "loss": 0.4812, "step": 18708 }, { "epoch": 0.5747242957638313, "grad_norm": 0.3740188479423523, "learning_rate": 1.6221718705303752e-05, "loss": 0.5658, "step": 18709 }, { "epoch": 0.5747550148987804, "grad_norm": 0.43307948112487793, "learning_rate": 1.622134034954709e-05, "loss": 0.6029, "step": 18710 }, { "epoch": 0.5747857340337296, "grad_norm": 0.36401525139808655, "learning_rate": 1.6220961979260055e-05, "loss": 0.5472, "step": 18711 }, { "epoch": 0.5748164531686788, "grad_norm": 0.5649749040603638, "learning_rate": 1.622058359444353e-05, "loss": 0.484, "step": 18712 }, { "epoch": 0.5748471723036279, "grad_norm": 0.36324408650398254, "learning_rate": 1.6220205195098394e-05, "loss": 0.5597, "step": 18713 }, { "epoch": 0.574877891438577, "grad_norm": 0.34888529777526855, "learning_rate": 1.6219826781225536e-05, "loss": 0.5431, "step": 18714 }, { "epoch": 0.5749086105735263, "grad_norm": 0.33821412920951843, "learning_rate": 1.621944835282584e-05, "loss": 0.5242, "step": 18715 }, { "epoch": 0.5749393297084754, "grad_norm": 0.7103124260902405, "learning_rate": 1.6219069909900184e-05, "loss": 0.5343, "step": 18716 }, { "epoch": 0.5749700488434246, "grad_norm": 0.36167284846305847, "learning_rate": 1.621869145244946e-05, "loss": 0.5251, "step": 18717 }, { "epoch": 0.5750007679783737, "grad_norm": 0.3212277591228485, "learning_rate": 1.621831298047454e-05, "loss": 0.5418, "step": 18718 }, { "epoch": 0.5750314871133229, "grad_norm": 0.3764103949069977, "learning_rate": 1.6217934493976326e-05, "loss": 0.6094, "step": 18719 }, { "epoch": 0.5750622062482721, "grad_norm": 0.35084661841392517, "learning_rate": 1.6217555992955686e-05, "loss": 0.5404, "step": 18720 }, { "epoch": 0.5750929253832212, "grad_norm": 0.37007930874824524, "learning_rate": 1.621717747741351e-05, "loss": 0.6568, "step": 18721 }, { "epoch": 0.5751236445181703, "grad_norm": 0.3603229522705078, "learning_rate": 1.6216798947350683e-05, "loss": 0.5516, "step": 18722 }, { "epoch": 0.5751543636531196, "grad_norm": 0.4276076555252075, "learning_rate": 1.6216420402768087e-05, "loss": 0.6244, "step": 18723 }, { "epoch": 0.5751850827880687, "grad_norm": 0.35440686345100403, "learning_rate": 1.6216041843666605e-05, "loss": 0.5863, "step": 18724 }, { "epoch": 0.5752158019230178, "grad_norm": 0.3826444745063782, "learning_rate": 1.6215663270047124e-05, "loss": 0.5444, "step": 18725 }, { "epoch": 0.575246521057967, "grad_norm": 0.31441643834114075, "learning_rate": 1.6215284681910532e-05, "loss": 0.5238, "step": 18726 }, { "epoch": 0.5752772401929162, "grad_norm": 0.45617833733558655, "learning_rate": 1.6214906079257706e-05, "loss": 0.5156, "step": 18727 }, { "epoch": 0.5753079593278654, "grad_norm": 0.3434596061706543, "learning_rate": 1.621452746208953e-05, "loss": 0.5248, "step": 18728 }, { "epoch": 0.5753386784628145, "grad_norm": 0.3699931204319, "learning_rate": 1.6214148830406892e-05, "loss": 0.5651, "step": 18729 }, { "epoch": 0.5753693975977636, "grad_norm": 0.3360949158668518, "learning_rate": 1.6213770184210677e-05, "loss": 0.5683, "step": 18730 }, { "epoch": 0.5754001167327129, "grad_norm": 0.37315335869789124, "learning_rate": 1.621339152350177e-05, "loss": 0.6176, "step": 18731 }, { "epoch": 0.575430835867662, "grad_norm": 0.3956177830696106, "learning_rate": 1.6213012848281052e-05, "loss": 0.564, "step": 18732 }, { "epoch": 0.5754615550026111, "grad_norm": 0.49973708391189575, "learning_rate": 1.6212634158549407e-05, "loss": 0.6615, "step": 18733 }, { "epoch": 0.5754922741375603, "grad_norm": 0.31259554624557495, "learning_rate": 1.6212255454307718e-05, "loss": 0.532, "step": 18734 }, { "epoch": 0.5755229932725094, "grad_norm": 0.3788837492465973, "learning_rate": 1.6211876735556876e-05, "loss": 0.5292, "step": 18735 }, { "epoch": 0.5755537124074586, "grad_norm": 0.3642123341560364, "learning_rate": 1.6211498002297763e-05, "loss": 0.5683, "step": 18736 }, { "epoch": 0.5755844315424078, "grad_norm": 0.39181193709373474, "learning_rate": 1.6211119254531263e-05, "loss": 0.5117, "step": 18737 }, { "epoch": 0.5756151506773569, "grad_norm": 0.3465396761894226, "learning_rate": 1.6210740492258256e-05, "loss": 0.552, "step": 18738 }, { "epoch": 0.575645869812306, "grad_norm": 0.39285534620285034, "learning_rate": 1.6210361715479633e-05, "loss": 0.5591, "step": 18739 }, { "epoch": 0.5756765889472553, "grad_norm": 0.34312763810157776, "learning_rate": 1.620998292419628e-05, "loss": 0.491, "step": 18740 }, { "epoch": 0.5757073080822044, "grad_norm": 0.37707674503326416, "learning_rate": 1.6209604118409073e-05, "loss": 0.5798, "step": 18741 }, { "epoch": 0.5757380272171536, "grad_norm": 0.38113152980804443, "learning_rate": 1.6209225298118906e-05, "loss": 0.5516, "step": 18742 }, { "epoch": 0.5757687463521027, "grad_norm": 0.35153815150260925, "learning_rate": 1.6208846463326655e-05, "loss": 0.5533, "step": 18743 }, { "epoch": 0.5757994654870519, "grad_norm": 0.35808083415031433, "learning_rate": 1.6208467614033213e-05, "loss": 0.6152, "step": 18744 }, { "epoch": 0.5758301846220011, "grad_norm": 0.36403751373291016, "learning_rate": 1.620808875023946e-05, "loss": 0.6289, "step": 18745 }, { "epoch": 0.5758609037569502, "grad_norm": 0.34637418389320374, "learning_rate": 1.6207709871946282e-05, "loss": 0.5449, "step": 18746 }, { "epoch": 0.5758916228918993, "grad_norm": 0.34893879294395447, "learning_rate": 1.6207330979154565e-05, "loss": 0.5594, "step": 18747 }, { "epoch": 0.5759223420268486, "grad_norm": 0.4037153720855713, "learning_rate": 1.620695207186519e-05, "loss": 0.5995, "step": 18748 }, { "epoch": 0.5759530611617977, "grad_norm": 0.39426353573799133, "learning_rate": 1.620657315007905e-05, "loss": 0.5881, "step": 18749 }, { "epoch": 0.5759837802967468, "grad_norm": 0.3705660104751587, "learning_rate": 1.6206194213797022e-05, "loss": 0.5981, "step": 18750 }, { "epoch": 0.576014499431696, "grad_norm": 0.37692415714263916, "learning_rate": 1.6205815263019994e-05, "loss": 0.5755, "step": 18751 }, { "epoch": 0.5760452185666451, "grad_norm": 0.34310707449913025, "learning_rate": 1.6205436297748853e-05, "loss": 0.5729, "step": 18752 }, { "epoch": 0.5760759377015944, "grad_norm": 0.36306455731391907, "learning_rate": 1.620505731798448e-05, "loss": 0.5911, "step": 18753 }, { "epoch": 0.5761066568365435, "grad_norm": 0.41170477867126465, "learning_rate": 1.6204678323727763e-05, "loss": 0.5847, "step": 18754 }, { "epoch": 0.5761373759714926, "grad_norm": 0.3478248715400696, "learning_rate": 1.6204299314979587e-05, "loss": 0.5105, "step": 18755 }, { "epoch": 0.5761680951064418, "grad_norm": 0.33027487993240356, "learning_rate": 1.6203920291740835e-05, "loss": 0.5792, "step": 18756 }, { "epoch": 0.576198814241391, "grad_norm": 0.3462214767932892, "learning_rate": 1.6203541254012395e-05, "loss": 0.5829, "step": 18757 }, { "epoch": 0.5762295333763401, "grad_norm": 0.3485485315322876, "learning_rate": 1.620316220179515e-05, "loss": 0.6103, "step": 18758 }, { "epoch": 0.5762602525112893, "grad_norm": 0.37667056918144226, "learning_rate": 1.620278313508999e-05, "loss": 0.4725, "step": 18759 }, { "epoch": 0.5762909716462384, "grad_norm": 0.3620540201663971, "learning_rate": 1.6202404053897794e-05, "loss": 0.5426, "step": 18760 }, { "epoch": 0.5763216907811876, "grad_norm": 0.3446927070617676, "learning_rate": 1.620202495821945e-05, "loss": 0.5768, "step": 18761 }, { "epoch": 0.5763524099161368, "grad_norm": 0.385452538728714, "learning_rate": 1.6201645848055845e-05, "loss": 0.5874, "step": 18762 }, { "epoch": 0.5763831290510859, "grad_norm": 0.35888656973838806, "learning_rate": 1.620126672340786e-05, "loss": 0.5078, "step": 18763 }, { "epoch": 0.576413848186035, "grad_norm": 0.37871697545051575, "learning_rate": 1.6200887584276385e-05, "loss": 0.5425, "step": 18764 }, { "epoch": 0.5764445673209843, "grad_norm": 0.32593321800231934, "learning_rate": 1.6200508430662305e-05, "loss": 0.6552, "step": 18765 }, { "epoch": 0.5764752864559334, "grad_norm": 0.696624755859375, "learning_rate": 1.6200129262566506e-05, "loss": 0.5027, "step": 18766 }, { "epoch": 0.5765060055908826, "grad_norm": 0.3338969051837921, "learning_rate": 1.6199750079989868e-05, "loss": 0.5333, "step": 18767 }, { "epoch": 0.5765367247258317, "grad_norm": 0.3524090349674225, "learning_rate": 1.6199370882933284e-05, "loss": 0.5563, "step": 18768 }, { "epoch": 0.5765674438607808, "grad_norm": 0.3561571538448334, "learning_rate": 1.6198991671397634e-05, "loss": 0.5719, "step": 18769 }, { "epoch": 0.5765981629957301, "grad_norm": 0.31751763820648193, "learning_rate": 1.619861244538381e-05, "loss": 0.5776, "step": 18770 }, { "epoch": 0.5766288821306792, "grad_norm": 0.3544142544269562, "learning_rate": 1.619823320489269e-05, "loss": 0.5745, "step": 18771 }, { "epoch": 0.5766596012656283, "grad_norm": 0.3811413645744324, "learning_rate": 1.6197853949925164e-05, "loss": 0.543, "step": 18772 }, { "epoch": 0.5766903204005775, "grad_norm": 0.32830533385276794, "learning_rate": 1.6197474680482117e-05, "loss": 0.4842, "step": 18773 }, { "epoch": 0.5767210395355267, "grad_norm": 0.41295990347862244, "learning_rate": 1.6197095396564435e-05, "loss": 0.5897, "step": 18774 }, { "epoch": 0.5767517586704758, "grad_norm": 0.3483271896839142, "learning_rate": 1.6196716098173007e-05, "loss": 0.6099, "step": 18775 }, { "epoch": 0.576782477805425, "grad_norm": 0.4491732120513916, "learning_rate": 1.619633678530871e-05, "loss": 0.5792, "step": 18776 }, { "epoch": 0.5768131969403741, "grad_norm": 0.379768431186676, "learning_rate": 1.6195957457972444e-05, "loss": 0.6456, "step": 18777 }, { "epoch": 0.5768439160753234, "grad_norm": 0.35413846373558044, "learning_rate": 1.6195578116165078e-05, "loss": 0.531, "step": 18778 }, { "epoch": 0.5768746352102725, "grad_norm": 0.3800845742225647, "learning_rate": 1.6195198759887512e-05, "loss": 0.5754, "step": 18779 }, { "epoch": 0.5769053543452216, "grad_norm": 0.3535778820514679, "learning_rate": 1.6194819389140622e-05, "loss": 0.5248, "step": 18780 }, { "epoch": 0.5769360734801708, "grad_norm": 0.3578692376613617, "learning_rate": 1.61944400039253e-05, "loss": 0.54, "step": 18781 }, { "epoch": 0.57696679261512, "grad_norm": 0.3912733793258667, "learning_rate": 1.6194060604242437e-05, "loss": 0.5377, "step": 18782 }, { "epoch": 0.5769975117500691, "grad_norm": 0.313044935464859, "learning_rate": 1.6193681190092908e-05, "loss": 0.5145, "step": 18783 }, { "epoch": 0.5770282308850183, "grad_norm": 0.3722119927406311, "learning_rate": 1.61933017614776e-05, "loss": 0.5599, "step": 18784 }, { "epoch": 0.5770589500199674, "grad_norm": 0.33461326360702515, "learning_rate": 1.619292231839741e-05, "loss": 0.5119, "step": 18785 }, { "epoch": 0.5770896691549166, "grad_norm": 0.3662077784538269, "learning_rate": 1.6192542860853214e-05, "loss": 0.6783, "step": 18786 }, { "epoch": 0.5771203882898658, "grad_norm": 0.38602524995803833, "learning_rate": 1.6192163388845907e-05, "loss": 0.5986, "step": 18787 }, { "epoch": 0.5771511074248149, "grad_norm": 0.34127989411354065, "learning_rate": 1.6191783902376362e-05, "loss": 0.5785, "step": 18788 }, { "epoch": 0.577181826559764, "grad_norm": 0.32867735624313354, "learning_rate": 1.6191404401445478e-05, "loss": 0.6077, "step": 18789 }, { "epoch": 0.5772125456947133, "grad_norm": 0.40325626730918884, "learning_rate": 1.6191024886054133e-05, "loss": 0.5655, "step": 18790 }, { "epoch": 0.5772432648296624, "grad_norm": 0.3586740493774414, "learning_rate": 1.619064535620322e-05, "loss": 0.5332, "step": 18791 }, { "epoch": 0.5772739839646116, "grad_norm": 0.37204572558403015, "learning_rate": 1.6190265811893624e-05, "loss": 0.5588, "step": 18792 }, { "epoch": 0.5773047030995607, "grad_norm": 0.3496866822242737, "learning_rate": 1.6189886253126225e-05, "loss": 0.651, "step": 18793 }, { "epoch": 0.5773354222345098, "grad_norm": 0.33767223358154297, "learning_rate": 1.618950667990192e-05, "loss": 0.5814, "step": 18794 }, { "epoch": 0.5773661413694591, "grad_norm": 0.3795228898525238, "learning_rate": 1.6189127092221587e-05, "loss": 0.6123, "step": 18795 }, { "epoch": 0.5773968605044082, "grad_norm": 0.47575458884239197, "learning_rate": 1.6188747490086113e-05, "loss": 0.6661, "step": 18796 }, { "epoch": 0.5774275796393573, "grad_norm": 0.33075186610221863, "learning_rate": 1.618836787349639e-05, "loss": 0.5891, "step": 18797 }, { "epoch": 0.5774582987743065, "grad_norm": 0.4080420732498169, "learning_rate": 1.61879882424533e-05, "loss": 0.5587, "step": 18798 }, { "epoch": 0.5774890179092557, "grad_norm": 0.36786890029907227, "learning_rate": 1.6187608596957734e-05, "loss": 0.5069, "step": 18799 }, { "epoch": 0.5775197370442048, "grad_norm": 0.3230457007884979, "learning_rate": 1.6187228937010574e-05, "loss": 0.5427, "step": 18800 }, { "epoch": 0.577550456179154, "grad_norm": 0.39028722047805786, "learning_rate": 1.618684926261271e-05, "loss": 0.5863, "step": 18801 }, { "epoch": 0.5775811753141031, "grad_norm": 0.36895135045051575, "learning_rate": 1.6186469573765024e-05, "loss": 0.5286, "step": 18802 }, { "epoch": 0.5776118944490524, "grad_norm": 0.32970213890075684, "learning_rate": 1.6186089870468408e-05, "loss": 0.5107, "step": 18803 }, { "epoch": 0.5776426135840015, "grad_norm": 0.3333958685398102, "learning_rate": 1.6185710152723748e-05, "loss": 0.5754, "step": 18804 }, { "epoch": 0.5776733327189506, "grad_norm": 0.44436973333358765, "learning_rate": 1.6185330420531934e-05, "loss": 0.5812, "step": 18805 }, { "epoch": 0.5777040518538998, "grad_norm": 0.3294956088066101, "learning_rate": 1.618495067389384e-05, "loss": 0.6528, "step": 18806 }, { "epoch": 0.577734770988849, "grad_norm": 0.3259119689464569, "learning_rate": 1.618457091281037e-05, "loss": 0.5942, "step": 18807 }, { "epoch": 0.5777654901237981, "grad_norm": 0.3551010489463806, "learning_rate": 1.6184191137282396e-05, "loss": 0.4964, "step": 18808 }, { "epoch": 0.5777962092587473, "grad_norm": 0.3533056378364563, "learning_rate": 1.6183811347310816e-05, "loss": 0.5695, "step": 18809 }, { "epoch": 0.5778269283936964, "grad_norm": 0.36802375316619873, "learning_rate": 1.6183431542896514e-05, "loss": 0.6048, "step": 18810 }, { "epoch": 0.5778576475286455, "grad_norm": 0.3264409303665161, "learning_rate": 1.6183051724040372e-05, "loss": 0.563, "step": 18811 }, { "epoch": 0.5778883666635948, "grad_norm": 0.34600958228111267, "learning_rate": 1.6182671890743284e-05, "loss": 0.6413, "step": 18812 }, { "epoch": 0.5779190857985439, "grad_norm": 0.3268650770187378, "learning_rate": 1.618229204300613e-05, "loss": 0.5851, "step": 18813 }, { "epoch": 0.5779498049334931, "grad_norm": 0.34858980774879456, "learning_rate": 1.6181912180829802e-05, "loss": 0.5669, "step": 18814 }, { "epoch": 0.5779805240684422, "grad_norm": 0.3676437437534332, "learning_rate": 1.6181532304215188e-05, "loss": 0.5313, "step": 18815 }, { "epoch": 0.5780112432033914, "grad_norm": 0.3700445592403412, "learning_rate": 1.6181152413163172e-05, "loss": 0.602, "step": 18816 }, { "epoch": 0.5780419623383406, "grad_norm": 0.3302961587905884, "learning_rate": 1.618077250767464e-05, "loss": 0.5279, "step": 18817 }, { "epoch": 0.5780726814732897, "grad_norm": 0.5040359497070312, "learning_rate": 1.618039258775049e-05, "loss": 0.6057, "step": 18818 }, { "epoch": 0.5781034006082388, "grad_norm": 0.33811497688293457, "learning_rate": 1.61800126533916e-05, "loss": 0.5176, "step": 18819 }, { "epoch": 0.5781341197431881, "grad_norm": 0.33863353729248047, "learning_rate": 1.6179632704598856e-05, "loss": 0.612, "step": 18820 }, { "epoch": 0.5781648388781372, "grad_norm": 0.35844627022743225, "learning_rate": 1.6179252741373147e-05, "loss": 0.5473, "step": 18821 }, { "epoch": 0.5781955580130863, "grad_norm": 0.3673538565635681, "learning_rate": 1.6178872763715367e-05, "loss": 0.5145, "step": 18822 }, { "epoch": 0.5782262771480355, "grad_norm": 0.34238138794898987, "learning_rate": 1.6178492771626394e-05, "loss": 0.5803, "step": 18823 }, { "epoch": 0.5782569962829847, "grad_norm": 0.356260746717453, "learning_rate": 1.617811276510712e-05, "loss": 0.5408, "step": 18824 }, { "epoch": 0.5782877154179338, "grad_norm": 0.3031003475189209, "learning_rate": 1.617773274415843e-05, "loss": 0.5657, "step": 18825 }, { "epoch": 0.578318434552883, "grad_norm": 0.35450607538223267, "learning_rate": 1.6177352708781216e-05, "loss": 0.6304, "step": 18826 }, { "epoch": 0.5783491536878321, "grad_norm": 0.3741878271102905, "learning_rate": 1.6176972658976364e-05, "loss": 0.6123, "step": 18827 }, { "epoch": 0.5783798728227814, "grad_norm": 0.3748418688774109, "learning_rate": 1.617659259474476e-05, "loss": 0.6367, "step": 18828 }, { "epoch": 0.5784105919577305, "grad_norm": 0.4654434621334076, "learning_rate": 1.6176212516087294e-05, "loss": 0.5922, "step": 18829 }, { "epoch": 0.5784413110926796, "grad_norm": 0.33575382828712463, "learning_rate": 1.617583242300485e-05, "loss": 0.5619, "step": 18830 }, { "epoch": 0.5784720302276288, "grad_norm": 0.3497731685638428, "learning_rate": 1.6175452315498323e-05, "loss": 0.5261, "step": 18831 }, { "epoch": 0.578502749362578, "grad_norm": 0.3579937219619751, "learning_rate": 1.617507219356859e-05, "loss": 0.5941, "step": 18832 }, { "epoch": 0.5785334684975271, "grad_norm": 0.5210243463516235, "learning_rate": 1.6174692057216548e-05, "loss": 0.6277, "step": 18833 }, { "epoch": 0.5785641876324763, "grad_norm": 0.3668278157711029, "learning_rate": 1.6174311906443078e-05, "loss": 0.6036, "step": 18834 }, { "epoch": 0.5785949067674254, "grad_norm": 0.3548945188522339, "learning_rate": 1.6173931741249075e-05, "loss": 0.5723, "step": 18835 }, { "epoch": 0.5786256259023745, "grad_norm": 0.3330089747905731, "learning_rate": 1.617355156163542e-05, "loss": 0.5329, "step": 18836 }, { "epoch": 0.5786563450373238, "grad_norm": 0.369961142539978, "learning_rate": 1.6173171367603006e-05, "loss": 0.4428, "step": 18837 }, { "epoch": 0.5786870641722729, "grad_norm": 0.36173194646835327, "learning_rate": 1.617279115915272e-05, "loss": 0.5695, "step": 18838 }, { "epoch": 0.5787177833072221, "grad_norm": 0.4311367869377136, "learning_rate": 1.617241093628545e-05, "loss": 0.5593, "step": 18839 }, { "epoch": 0.5787485024421712, "grad_norm": 0.3472992777824402, "learning_rate": 1.617203069900208e-05, "loss": 0.5065, "step": 18840 }, { "epoch": 0.5787792215771204, "grad_norm": 0.34410610795021057, "learning_rate": 1.6171650447303503e-05, "loss": 0.5415, "step": 18841 }, { "epoch": 0.5788099407120696, "grad_norm": 0.40099573135375977, "learning_rate": 1.6171270181190607e-05, "loss": 0.6, "step": 18842 }, { "epoch": 0.5788406598470187, "grad_norm": 0.3751898407936096, "learning_rate": 1.6170889900664276e-05, "loss": 0.5445, "step": 18843 }, { "epoch": 0.5788713789819678, "grad_norm": 0.38884446024894714, "learning_rate": 1.61705096057254e-05, "loss": 0.6046, "step": 18844 }, { "epoch": 0.5789020981169171, "grad_norm": 0.31922009587287903, "learning_rate": 1.617012929637487e-05, "loss": 0.4967, "step": 18845 }, { "epoch": 0.5789328172518662, "grad_norm": 0.32105231285095215, "learning_rate": 1.616974897261357e-05, "loss": 0.5358, "step": 18846 }, { "epoch": 0.5789635363868153, "grad_norm": 0.40902188420295715, "learning_rate": 1.6169368634442393e-05, "loss": 0.6126, "step": 18847 }, { "epoch": 0.5789942555217645, "grad_norm": 0.3469145596027374, "learning_rate": 1.6168988281862222e-05, "loss": 0.5473, "step": 18848 }, { "epoch": 0.5790249746567137, "grad_norm": 0.37456271052360535, "learning_rate": 1.616860791487395e-05, "loss": 0.5737, "step": 18849 }, { "epoch": 0.5790556937916628, "grad_norm": 0.39213326573371887, "learning_rate": 1.6168227533478463e-05, "loss": 0.5595, "step": 18850 }, { "epoch": 0.579086412926612, "grad_norm": 0.42411842942237854, "learning_rate": 1.616784713767665e-05, "loss": 0.5145, "step": 18851 }, { "epoch": 0.5791171320615611, "grad_norm": 0.3456067144870758, "learning_rate": 1.61674667274694e-05, "loss": 0.536, "step": 18852 }, { "epoch": 0.5791478511965104, "grad_norm": 0.3453506827354431, "learning_rate": 1.6167086302857597e-05, "loss": 0.5398, "step": 18853 }, { "epoch": 0.5791785703314595, "grad_norm": 0.3491151034832001, "learning_rate": 1.6166705863842136e-05, "loss": 0.5264, "step": 18854 }, { "epoch": 0.5792092894664086, "grad_norm": 0.35424548387527466, "learning_rate": 1.6166325410423902e-05, "loss": 0.5147, "step": 18855 }, { "epoch": 0.5792400086013578, "grad_norm": 0.40120360255241394, "learning_rate": 1.6165944942603787e-05, "loss": 0.6242, "step": 18856 }, { "epoch": 0.579270727736307, "grad_norm": 0.3103204667568207, "learning_rate": 1.6165564460382676e-05, "loss": 0.5351, "step": 18857 }, { "epoch": 0.5793014468712561, "grad_norm": 0.35477447509765625, "learning_rate": 1.6165183963761456e-05, "loss": 0.5373, "step": 18858 }, { "epoch": 0.5793321660062053, "grad_norm": 0.4424416422843933, "learning_rate": 1.616480345274102e-05, "loss": 0.5326, "step": 18859 }, { "epoch": 0.5793628851411544, "grad_norm": 0.3362339437007904, "learning_rate": 1.6164422927322256e-05, "loss": 0.5682, "step": 18860 }, { "epoch": 0.5793936042761035, "grad_norm": 0.3626258373260498, "learning_rate": 1.6164042387506048e-05, "loss": 0.5776, "step": 18861 }, { "epoch": 0.5794243234110528, "grad_norm": 0.33086422085762024, "learning_rate": 1.6163661833293293e-05, "loss": 0.5348, "step": 18862 }, { "epoch": 0.5794550425460019, "grad_norm": 0.35275375843048096, "learning_rate": 1.616328126468487e-05, "loss": 0.5971, "step": 18863 }, { "epoch": 0.5794857616809511, "grad_norm": 0.3760519027709961, "learning_rate": 1.6162900681681678e-05, "loss": 0.6057, "step": 18864 }, { "epoch": 0.5795164808159002, "grad_norm": 0.385963499546051, "learning_rate": 1.61625200842846e-05, "loss": 0.5098, "step": 18865 }, { "epoch": 0.5795471999508494, "grad_norm": 0.3653593063354492, "learning_rate": 1.6162139472494523e-05, "loss": 0.5234, "step": 18866 }, { "epoch": 0.5795779190857986, "grad_norm": 0.37149664759635925, "learning_rate": 1.6161758846312343e-05, "loss": 0.5467, "step": 18867 }, { "epoch": 0.5796086382207477, "grad_norm": 0.5922569036483765, "learning_rate": 1.6161378205738943e-05, "loss": 0.6017, "step": 18868 }, { "epoch": 0.5796393573556968, "grad_norm": 0.3427928686141968, "learning_rate": 1.6160997550775217e-05, "loss": 0.5897, "step": 18869 }, { "epoch": 0.5796700764906461, "grad_norm": 0.40089136362075806, "learning_rate": 1.6160616881422047e-05, "loss": 0.6159, "step": 18870 }, { "epoch": 0.5797007956255952, "grad_norm": 0.41690593957901, "learning_rate": 1.616023619768033e-05, "loss": 0.5747, "step": 18871 }, { "epoch": 0.5797315147605443, "grad_norm": 0.3503185212612152, "learning_rate": 1.6159855499550946e-05, "loss": 0.6595, "step": 18872 }, { "epoch": 0.5797622338954935, "grad_norm": 0.3392617106437683, "learning_rate": 1.615947478703479e-05, "loss": 0.5658, "step": 18873 }, { "epoch": 0.5797929530304426, "grad_norm": 0.3437831997871399, "learning_rate": 1.6159094060132756e-05, "loss": 0.5416, "step": 18874 }, { "epoch": 0.5798236721653918, "grad_norm": 0.3289218246936798, "learning_rate": 1.615871331884572e-05, "loss": 0.5669, "step": 18875 }, { "epoch": 0.579854391300341, "grad_norm": 1.2300713062286377, "learning_rate": 1.6158332563174583e-05, "loss": 0.6635, "step": 18876 }, { "epoch": 0.5798851104352901, "grad_norm": 0.5557264089584351, "learning_rate": 1.615795179312023e-05, "loss": 0.474, "step": 18877 }, { "epoch": 0.5799158295702393, "grad_norm": 0.34251248836517334, "learning_rate": 1.615757100868355e-05, "loss": 0.6341, "step": 18878 }, { "epoch": 0.5799465487051885, "grad_norm": 0.3457905352115631, "learning_rate": 1.615719020986543e-05, "loss": 0.5604, "step": 18879 }, { "epoch": 0.5799772678401376, "grad_norm": 0.38809001445770264, "learning_rate": 1.6156809396666768e-05, "loss": 0.5922, "step": 18880 }, { "epoch": 0.5800079869750868, "grad_norm": 0.4572273790836334, "learning_rate": 1.6156428569088447e-05, "loss": 0.5492, "step": 18881 }, { "epoch": 0.5800387061100359, "grad_norm": 0.3456781208515167, "learning_rate": 1.6156047727131355e-05, "loss": 0.5896, "step": 18882 }, { "epoch": 0.5800694252449851, "grad_norm": 0.36042070388793945, "learning_rate": 1.6155666870796384e-05, "loss": 0.5628, "step": 18883 }, { "epoch": 0.5801001443799343, "grad_norm": 0.3679330348968506, "learning_rate": 1.615528600008442e-05, "loss": 0.5854, "step": 18884 }, { "epoch": 0.5801308635148834, "grad_norm": 0.3331673741340637, "learning_rate": 1.615490511499636e-05, "loss": 0.5282, "step": 18885 }, { "epoch": 0.5801615826498325, "grad_norm": 0.3532065451145172, "learning_rate": 1.6154524215533088e-05, "loss": 0.5361, "step": 18886 }, { "epoch": 0.5801923017847818, "grad_norm": 0.37271955609321594, "learning_rate": 1.6154143301695497e-05, "loss": 0.5825, "step": 18887 }, { "epoch": 0.5802230209197309, "grad_norm": 0.34911587834358215, "learning_rate": 1.615376237348447e-05, "loss": 0.5225, "step": 18888 }, { "epoch": 0.5802537400546801, "grad_norm": 0.38360849022865295, "learning_rate": 1.6153381430900905e-05, "loss": 0.5249, "step": 18889 }, { "epoch": 0.5802844591896292, "grad_norm": 0.334821879863739, "learning_rate": 1.6153000473945687e-05, "loss": 0.5208, "step": 18890 }, { "epoch": 0.5803151783245784, "grad_norm": 0.3743862509727478, "learning_rate": 1.6152619502619705e-05, "loss": 0.5757, "step": 18891 }, { "epoch": 0.5803458974595276, "grad_norm": 0.3756842017173767, "learning_rate": 1.615223851692385e-05, "loss": 0.5225, "step": 18892 }, { "epoch": 0.5803766165944767, "grad_norm": 0.4077881872653961, "learning_rate": 1.6151857516859016e-05, "loss": 0.5227, "step": 18893 }, { "epoch": 0.5804073357294258, "grad_norm": 0.34889301657676697, "learning_rate": 1.6151476502426087e-05, "loss": 0.5081, "step": 18894 }, { "epoch": 0.580438054864375, "grad_norm": 0.3917873799800873, "learning_rate": 1.615109547362595e-05, "loss": 0.6005, "step": 18895 }, { "epoch": 0.5804687739993242, "grad_norm": 0.38822609186172485, "learning_rate": 1.6150714430459507e-05, "loss": 0.6095, "step": 18896 }, { "epoch": 0.5804994931342733, "grad_norm": 0.33713874220848083, "learning_rate": 1.615033337292764e-05, "loss": 0.5759, "step": 18897 }, { "epoch": 0.5805302122692225, "grad_norm": 0.3814067244529724, "learning_rate": 1.614995230103124e-05, "loss": 0.5229, "step": 18898 }, { "epoch": 0.5805609314041716, "grad_norm": 0.6469272375106812, "learning_rate": 1.6149571214771197e-05, "loss": 0.5529, "step": 18899 }, { "epoch": 0.5805916505391209, "grad_norm": 0.3742387592792511, "learning_rate": 1.61491901141484e-05, "loss": 0.6958, "step": 18900 }, { "epoch": 0.58062236967407, "grad_norm": 0.4063176214694977, "learning_rate": 1.614880899916374e-05, "loss": 0.6893, "step": 18901 }, { "epoch": 0.5806530888090191, "grad_norm": 0.3602615296840668, "learning_rate": 1.6148427869818103e-05, "loss": 0.5353, "step": 18902 }, { "epoch": 0.5806838079439683, "grad_norm": 0.36213287711143494, "learning_rate": 1.6148046726112388e-05, "loss": 0.5942, "step": 18903 }, { "epoch": 0.5807145270789175, "grad_norm": 0.3511231243610382, "learning_rate": 1.6147665568047483e-05, "loss": 0.5319, "step": 18904 }, { "epoch": 0.5807452462138666, "grad_norm": 0.3646296560764313, "learning_rate": 1.6147284395624268e-05, "loss": 0.6071, "step": 18905 }, { "epoch": 0.5807759653488158, "grad_norm": 0.3375126123428345, "learning_rate": 1.6146903208843645e-05, "loss": 0.5318, "step": 18906 }, { "epoch": 0.5808066844837649, "grad_norm": 0.4156622290611267, "learning_rate": 1.61465220077065e-05, "loss": 0.5428, "step": 18907 }, { "epoch": 0.580837403618714, "grad_norm": 0.38175061345100403, "learning_rate": 1.6146140792213724e-05, "loss": 0.6388, "step": 18908 }, { "epoch": 0.5808681227536633, "grad_norm": 0.3644830584526062, "learning_rate": 1.614575956236621e-05, "loss": 0.5643, "step": 18909 }, { "epoch": 0.5808988418886124, "grad_norm": 0.3547399938106537, "learning_rate": 1.6145378318164843e-05, "loss": 0.5593, "step": 18910 }, { "epoch": 0.5809295610235615, "grad_norm": 0.3944936990737915, "learning_rate": 1.6144997059610513e-05, "loss": 0.5601, "step": 18911 }, { "epoch": 0.5809602801585108, "grad_norm": 0.3314049541950226, "learning_rate": 1.6144615786704115e-05, "loss": 0.5055, "step": 18912 }, { "epoch": 0.5809909992934599, "grad_norm": 0.37169530987739563, "learning_rate": 1.6144234499446537e-05, "loss": 0.562, "step": 18913 }, { "epoch": 0.5810217184284091, "grad_norm": 0.3556937575340271, "learning_rate": 1.614385319783867e-05, "loss": 0.6693, "step": 18914 }, { "epoch": 0.5810524375633582, "grad_norm": 0.3831900358200073, "learning_rate": 1.6143471881881407e-05, "loss": 0.5688, "step": 18915 }, { "epoch": 0.5810831566983073, "grad_norm": 0.329112708568573, "learning_rate": 1.6143090551575635e-05, "loss": 0.5015, "step": 18916 }, { "epoch": 0.5811138758332566, "grad_norm": 0.3324778378009796, "learning_rate": 1.6142709206922246e-05, "loss": 0.5809, "step": 18917 }, { "epoch": 0.5811445949682057, "grad_norm": 0.43133509159088135, "learning_rate": 1.6142327847922132e-05, "loss": 0.5364, "step": 18918 }, { "epoch": 0.5811753141031548, "grad_norm": 0.39036184549331665, "learning_rate": 1.6141946474576183e-05, "loss": 0.5772, "step": 18919 }, { "epoch": 0.581206033238104, "grad_norm": 0.314384788274765, "learning_rate": 1.6141565086885288e-05, "loss": 0.5415, "step": 18920 }, { "epoch": 0.5812367523730532, "grad_norm": 0.3623393476009369, "learning_rate": 1.6141183684850337e-05, "loss": 0.5912, "step": 18921 }, { "epoch": 0.5812674715080023, "grad_norm": 0.3768017292022705, "learning_rate": 1.6140802268472224e-05, "loss": 0.5676, "step": 18922 }, { "epoch": 0.5812981906429515, "grad_norm": 0.3708040714263916, "learning_rate": 1.6140420837751838e-05, "loss": 0.5178, "step": 18923 }, { "epoch": 0.5813289097779006, "grad_norm": 0.45041000843048096, "learning_rate": 1.614003939269007e-05, "loss": 0.5158, "step": 18924 }, { "epoch": 0.5813596289128499, "grad_norm": 0.37570178508758545, "learning_rate": 1.613965793328781e-05, "loss": 0.6378, "step": 18925 }, { "epoch": 0.581390348047799, "grad_norm": 0.34976744651794434, "learning_rate": 1.6139276459545953e-05, "loss": 0.5611, "step": 18926 }, { "epoch": 0.5814210671827481, "grad_norm": 0.4059438407421112, "learning_rate": 1.6138894971465386e-05, "loss": 0.6385, "step": 18927 }, { "epoch": 0.5814517863176973, "grad_norm": 0.36650049686431885, "learning_rate": 1.6138513469047002e-05, "loss": 0.5607, "step": 18928 }, { "epoch": 0.5814825054526465, "grad_norm": 0.3687934875488281, "learning_rate": 1.613813195229169e-05, "loss": 0.6127, "step": 18929 }, { "epoch": 0.5815132245875956, "grad_norm": 0.3556056320667267, "learning_rate": 1.613775042120034e-05, "loss": 0.5258, "step": 18930 }, { "epoch": 0.5815439437225448, "grad_norm": 0.3405482769012451, "learning_rate": 1.6137368875773847e-05, "loss": 0.5908, "step": 18931 }, { "epoch": 0.5815746628574939, "grad_norm": 0.32699477672576904, "learning_rate": 1.61369873160131e-05, "loss": 0.4733, "step": 18932 }, { "epoch": 0.581605381992443, "grad_norm": 0.3423696458339691, "learning_rate": 1.6136605741918992e-05, "loss": 0.5243, "step": 18933 }, { "epoch": 0.5816361011273923, "grad_norm": 0.3569898307323456, "learning_rate": 1.613622415349241e-05, "loss": 0.6324, "step": 18934 }, { "epoch": 0.5816668202623414, "grad_norm": 0.32030242681503296, "learning_rate": 1.6135842550734253e-05, "loss": 0.5623, "step": 18935 }, { "epoch": 0.5816975393972905, "grad_norm": 0.3403686583042145, "learning_rate": 1.6135460933645406e-05, "loss": 0.5473, "step": 18936 }, { "epoch": 0.5817282585322398, "grad_norm": 0.3277445137500763, "learning_rate": 1.6135079302226756e-05, "loss": 0.5803, "step": 18937 }, { "epoch": 0.5817589776671889, "grad_norm": 0.36106860637664795, "learning_rate": 1.6134697656479206e-05, "loss": 0.5937, "step": 18938 }, { "epoch": 0.5817896968021381, "grad_norm": 0.4025826156139374, "learning_rate": 1.6134315996403636e-05, "loss": 0.6442, "step": 18939 }, { "epoch": 0.5818204159370872, "grad_norm": 0.3456144630908966, "learning_rate": 1.6133934322000948e-05, "loss": 0.5313, "step": 18940 }, { "epoch": 0.5818511350720363, "grad_norm": 0.3459492623806, "learning_rate": 1.6133552633272026e-05, "loss": 0.5328, "step": 18941 }, { "epoch": 0.5818818542069856, "grad_norm": 0.35957154631614685, "learning_rate": 1.613317093021776e-05, "loss": 0.5298, "step": 18942 }, { "epoch": 0.5819125733419347, "grad_norm": 0.33445683121681213, "learning_rate": 1.613278921283905e-05, "loss": 0.607, "step": 18943 }, { "epoch": 0.5819432924768838, "grad_norm": 0.34493276476860046, "learning_rate": 1.6132407481136778e-05, "loss": 0.5441, "step": 18944 }, { "epoch": 0.581974011611833, "grad_norm": 0.34767770767211914, "learning_rate": 1.6132025735111844e-05, "loss": 0.5969, "step": 18945 }, { "epoch": 0.5820047307467822, "grad_norm": 0.32150211930274963, "learning_rate": 1.6131643974765134e-05, "loss": 0.5107, "step": 18946 }, { "epoch": 0.5820354498817313, "grad_norm": 0.39940693974494934, "learning_rate": 1.613126220009754e-05, "loss": 0.504, "step": 18947 }, { "epoch": 0.5820661690166805, "grad_norm": 0.3278278708457947, "learning_rate": 1.6130880411109955e-05, "loss": 0.5658, "step": 18948 }, { "epoch": 0.5820968881516296, "grad_norm": 0.38490453362464905, "learning_rate": 1.6130498607803272e-05, "loss": 0.5223, "step": 18949 }, { "epoch": 0.5821276072865789, "grad_norm": 0.3711799085140228, "learning_rate": 1.6130116790178382e-05, "loss": 0.5988, "step": 18950 }, { "epoch": 0.582158326421528, "grad_norm": 0.32307231426239014, "learning_rate": 1.6129734958236175e-05, "loss": 0.5903, "step": 18951 }, { "epoch": 0.5821890455564771, "grad_norm": 0.35098716616630554, "learning_rate": 1.6129353111977542e-05, "loss": 0.5689, "step": 18952 }, { "epoch": 0.5822197646914263, "grad_norm": 0.3820827603340149, "learning_rate": 1.6128971251403382e-05, "loss": 0.5667, "step": 18953 }, { "epoch": 0.5822504838263755, "grad_norm": 0.3602457344532013, "learning_rate": 1.6128589376514575e-05, "loss": 0.551, "step": 18954 }, { "epoch": 0.5822812029613246, "grad_norm": 0.39277252554893494, "learning_rate": 1.6128207487312026e-05, "loss": 0.5138, "step": 18955 }, { "epoch": 0.5823119220962738, "grad_norm": 0.40706920623779297, "learning_rate": 1.6127825583796615e-05, "loss": 0.6099, "step": 18956 }, { "epoch": 0.5823426412312229, "grad_norm": 0.38212189078330994, "learning_rate": 1.612744366596924e-05, "loss": 0.5387, "step": 18957 }, { "epoch": 0.582373360366172, "grad_norm": 0.3487694561481476, "learning_rate": 1.6127061733830795e-05, "loss": 0.5331, "step": 18958 }, { "epoch": 0.5824040795011213, "grad_norm": 0.3821548521518707, "learning_rate": 1.6126679787382168e-05, "loss": 0.6034, "step": 18959 }, { "epoch": 0.5824347986360704, "grad_norm": 0.33604681491851807, "learning_rate": 1.6126297826624254e-05, "loss": 0.5656, "step": 18960 }, { "epoch": 0.5824655177710195, "grad_norm": 0.3678250014781952, "learning_rate": 1.612591585155794e-05, "loss": 0.5449, "step": 18961 }, { "epoch": 0.5824962369059687, "grad_norm": 0.3517409861087799, "learning_rate": 1.6125533862184127e-05, "loss": 0.5042, "step": 18962 }, { "epoch": 0.5825269560409179, "grad_norm": 0.4236067831516266, "learning_rate": 1.6125151858503696e-05, "loss": 0.5829, "step": 18963 }, { "epoch": 0.5825576751758671, "grad_norm": 0.32931479811668396, "learning_rate": 1.612476984051755e-05, "loss": 0.5642, "step": 18964 }, { "epoch": 0.5825883943108162, "grad_norm": 0.31634753942489624, "learning_rate": 1.6124387808226575e-05, "loss": 0.474, "step": 18965 }, { "epoch": 0.5826191134457653, "grad_norm": 0.35424908995628357, "learning_rate": 1.6124005761631664e-05, "loss": 0.5646, "step": 18966 }, { "epoch": 0.5826498325807146, "grad_norm": 0.3520033657550812, "learning_rate": 1.6123623700733707e-05, "loss": 0.5553, "step": 18967 }, { "epoch": 0.5826805517156637, "grad_norm": 0.3246608078479767, "learning_rate": 1.6123241625533602e-05, "loss": 0.5702, "step": 18968 }, { "epoch": 0.5827112708506128, "grad_norm": 0.35558274388313293, "learning_rate": 1.612285953603224e-05, "loss": 0.5601, "step": 18969 }, { "epoch": 0.582741989985562, "grad_norm": 0.3763192892074585, "learning_rate": 1.6122477432230512e-05, "loss": 0.5546, "step": 18970 }, { "epoch": 0.5827727091205112, "grad_norm": 0.32860663533210754, "learning_rate": 1.6122095314129306e-05, "loss": 0.5405, "step": 18971 }, { "epoch": 0.5828034282554603, "grad_norm": 0.3427245616912842, "learning_rate": 1.6121713181729524e-05, "loss": 0.5015, "step": 18972 }, { "epoch": 0.5828341473904095, "grad_norm": 0.3535906970500946, "learning_rate": 1.6121331035032052e-05, "loss": 0.5506, "step": 18973 }, { "epoch": 0.5828648665253586, "grad_norm": 0.31702300906181335, "learning_rate": 1.6120948874037782e-05, "loss": 0.452, "step": 18974 }, { "epoch": 0.5828955856603079, "grad_norm": 0.4061774015426636, "learning_rate": 1.612056669874761e-05, "loss": 0.4999, "step": 18975 }, { "epoch": 0.582926304795257, "grad_norm": 0.39382776618003845, "learning_rate": 1.6120184509162426e-05, "loss": 0.5279, "step": 18976 }, { "epoch": 0.5829570239302061, "grad_norm": 0.3564707636833191, "learning_rate": 1.6119802305283122e-05, "loss": 0.5413, "step": 18977 }, { "epoch": 0.5829877430651553, "grad_norm": 0.35049471259117126, "learning_rate": 1.6119420087110594e-05, "loss": 0.6168, "step": 18978 }, { "epoch": 0.5830184622001044, "grad_norm": 0.3694213330745697, "learning_rate": 1.6119037854645734e-05, "loss": 0.5237, "step": 18979 }, { "epoch": 0.5830491813350536, "grad_norm": 0.3432502746582031, "learning_rate": 1.6118655607889433e-05, "loss": 0.5853, "step": 18980 }, { "epoch": 0.5830799004700028, "grad_norm": 0.46064865589141846, "learning_rate": 1.6118273346842584e-05, "loss": 0.5245, "step": 18981 }, { "epoch": 0.5831106196049519, "grad_norm": 0.330211341381073, "learning_rate": 1.611789107150608e-05, "loss": 0.5057, "step": 18982 }, { "epoch": 0.583141338739901, "grad_norm": 0.3600941300392151, "learning_rate": 1.6117508781880814e-05, "loss": 0.5862, "step": 18983 }, { "epoch": 0.5831720578748503, "grad_norm": 0.35416343808174133, "learning_rate": 1.611712647796768e-05, "loss": 0.509, "step": 18984 }, { "epoch": 0.5832027770097994, "grad_norm": 0.3909826874732971, "learning_rate": 1.6116744159767573e-05, "loss": 0.5536, "step": 18985 }, { "epoch": 0.5832334961447485, "grad_norm": 0.42188331484794617, "learning_rate": 1.611636182728138e-05, "loss": 0.5876, "step": 18986 }, { "epoch": 0.5832642152796977, "grad_norm": 0.3779236674308777, "learning_rate": 1.6115979480509992e-05, "loss": 0.5519, "step": 18987 }, { "epoch": 0.5832949344146469, "grad_norm": 0.3578627407550812, "learning_rate": 1.6115597119454313e-05, "loss": 0.5584, "step": 18988 }, { "epoch": 0.5833256535495961, "grad_norm": 0.3765947222709656, "learning_rate": 1.6115214744115228e-05, "loss": 0.5615, "step": 18989 }, { "epoch": 0.5833563726845452, "grad_norm": 0.377760112285614, "learning_rate": 1.6114832354493634e-05, "loss": 0.5837, "step": 18990 }, { "epoch": 0.5833870918194943, "grad_norm": 0.4344131648540497, "learning_rate": 1.611444995059042e-05, "loss": 0.5486, "step": 18991 }, { "epoch": 0.5834178109544436, "grad_norm": 0.3417031764984131, "learning_rate": 1.6114067532406478e-05, "loss": 0.5871, "step": 18992 }, { "epoch": 0.5834485300893927, "grad_norm": 0.35007360577583313, "learning_rate": 1.6113685099942712e-05, "loss": 0.5642, "step": 18993 }, { "epoch": 0.5834792492243418, "grad_norm": 0.40038278698921204, "learning_rate": 1.61133026532e-05, "loss": 0.5236, "step": 18994 }, { "epoch": 0.583509968359291, "grad_norm": 0.34329017996788025, "learning_rate": 1.6112920192179244e-05, "loss": 0.5919, "step": 18995 }, { "epoch": 0.5835406874942402, "grad_norm": 0.35686659812927246, "learning_rate": 1.611253771688134e-05, "loss": 0.595, "step": 18996 }, { "epoch": 0.5835714066291893, "grad_norm": 0.35144317150115967, "learning_rate": 1.6112155227307175e-05, "loss": 0.5142, "step": 18997 }, { "epoch": 0.5836021257641385, "grad_norm": 0.35510706901550293, "learning_rate": 1.6111772723457644e-05, "loss": 0.5517, "step": 18998 }, { "epoch": 0.5836328448990876, "grad_norm": 0.3650219440460205, "learning_rate": 1.611139020533364e-05, "loss": 0.5531, "step": 18999 }, { "epoch": 0.5836635640340369, "grad_norm": 0.3402097225189209, "learning_rate": 1.6111007672936056e-05, "loss": 0.493, "step": 19000 }, { "epoch": 0.583694283168986, "grad_norm": 0.3984505832195282, "learning_rate": 1.611062512626579e-05, "loss": 0.5156, "step": 19001 }, { "epoch": 0.5837250023039351, "grad_norm": 0.34309014678001404, "learning_rate": 1.6110242565323733e-05, "loss": 0.6032, "step": 19002 }, { "epoch": 0.5837557214388843, "grad_norm": 1.4810490608215332, "learning_rate": 1.6109859990110775e-05, "loss": 0.5682, "step": 19003 }, { "epoch": 0.5837864405738334, "grad_norm": 0.46906769275665283, "learning_rate": 1.6109477400627816e-05, "loss": 0.5337, "step": 19004 }, { "epoch": 0.5838171597087826, "grad_norm": 0.33647677302360535, "learning_rate": 1.610909479687574e-05, "loss": 0.5536, "step": 19005 }, { "epoch": 0.5838478788437318, "grad_norm": 0.32075753808021545, "learning_rate": 1.6108712178855452e-05, "loss": 0.5307, "step": 19006 }, { "epoch": 0.5838785979786809, "grad_norm": 0.32426828145980835, "learning_rate": 1.610832954656784e-05, "loss": 0.4726, "step": 19007 }, { "epoch": 0.58390931711363, "grad_norm": 0.36429303884506226, "learning_rate": 1.6107946900013795e-05, "loss": 0.3638, "step": 19008 }, { "epoch": 0.5839400362485793, "grad_norm": 0.3632923662662506, "learning_rate": 1.6107564239194213e-05, "loss": 0.5926, "step": 19009 }, { "epoch": 0.5839707553835284, "grad_norm": 0.34860867261886597, "learning_rate": 1.610718156410999e-05, "loss": 0.5226, "step": 19010 }, { "epoch": 0.5840014745184776, "grad_norm": 0.3156141936779022, "learning_rate": 1.610679887476202e-05, "loss": 0.5794, "step": 19011 }, { "epoch": 0.5840321936534267, "grad_norm": 0.32957735657691956, "learning_rate": 1.610641617115119e-05, "loss": 0.5328, "step": 19012 }, { "epoch": 0.5840629127883759, "grad_norm": 0.6255026459693909, "learning_rate": 1.61060334532784e-05, "loss": 0.5764, "step": 19013 }, { "epoch": 0.5840936319233251, "grad_norm": 0.4045165777206421, "learning_rate": 1.6105650721144543e-05, "loss": 0.5837, "step": 19014 }, { "epoch": 0.5841243510582742, "grad_norm": 0.3273485600948334, "learning_rate": 1.6105267974750516e-05, "loss": 0.5284, "step": 19015 }, { "epoch": 0.5841550701932233, "grad_norm": 0.32644984126091003, "learning_rate": 1.6104885214097204e-05, "loss": 0.5259, "step": 19016 }, { "epoch": 0.5841857893281726, "grad_norm": 0.38079768419265747, "learning_rate": 1.6104502439185506e-05, "loss": 0.4965, "step": 19017 }, { "epoch": 0.5842165084631217, "grad_norm": 0.3429318964481354, "learning_rate": 1.6104119650016315e-05, "loss": 0.5425, "step": 19018 }, { "epoch": 0.5842472275980708, "grad_norm": 0.30511152744293213, "learning_rate": 1.6103736846590533e-05, "loss": 0.515, "step": 19019 }, { "epoch": 0.58427794673302, "grad_norm": 0.3672880530357361, "learning_rate": 1.6103354028909045e-05, "loss": 0.6116, "step": 19020 }, { "epoch": 0.5843086658679691, "grad_norm": 0.3771500885486603, "learning_rate": 1.6102971196972747e-05, "loss": 0.5102, "step": 19021 }, { "epoch": 0.5843393850029183, "grad_norm": 0.3312995135784149, "learning_rate": 1.610258835078253e-05, "loss": 0.5425, "step": 19022 }, { "epoch": 0.5843701041378675, "grad_norm": 0.36316317319869995, "learning_rate": 1.6102205490339296e-05, "loss": 0.566, "step": 19023 }, { "epoch": 0.5844008232728166, "grad_norm": 0.571011483669281, "learning_rate": 1.6101822615643935e-05, "loss": 0.686, "step": 19024 }, { "epoch": 0.5844315424077658, "grad_norm": 0.3829183876514435, "learning_rate": 1.6101439726697336e-05, "loss": 0.5374, "step": 19025 }, { "epoch": 0.584462261542715, "grad_norm": 0.34561651945114136, "learning_rate": 1.6101056823500404e-05, "loss": 0.5436, "step": 19026 }, { "epoch": 0.5844929806776641, "grad_norm": 0.36981499195098877, "learning_rate": 1.6100673906054023e-05, "loss": 0.5887, "step": 19027 }, { "epoch": 0.5845236998126133, "grad_norm": 0.3518851697444916, "learning_rate": 1.6100290974359097e-05, "loss": 0.5665, "step": 19028 }, { "epoch": 0.5845544189475624, "grad_norm": 0.3686752915382385, "learning_rate": 1.6099908028416512e-05, "loss": 0.5925, "step": 19029 }, { "epoch": 0.5845851380825116, "grad_norm": 0.4019148051738739, "learning_rate": 1.6099525068227168e-05, "loss": 0.5956, "step": 19030 }, { "epoch": 0.5846158572174608, "grad_norm": 0.38044822216033936, "learning_rate": 1.6099142093791956e-05, "loss": 0.5818, "step": 19031 }, { "epoch": 0.5846465763524099, "grad_norm": 0.43839797377586365, "learning_rate": 1.609875910511177e-05, "loss": 0.6613, "step": 19032 }, { "epoch": 0.584677295487359, "grad_norm": 0.36122357845306396, "learning_rate": 1.609837610218751e-05, "loss": 0.5762, "step": 19033 }, { "epoch": 0.5847080146223083, "grad_norm": 0.6398884057998657, "learning_rate": 1.6097993085020063e-05, "loss": 0.5458, "step": 19034 }, { "epoch": 0.5847387337572574, "grad_norm": 0.5717496871948242, "learning_rate": 1.609761005361033e-05, "loss": 0.6293, "step": 19035 }, { "epoch": 0.5847694528922066, "grad_norm": 0.35525357723236084, "learning_rate": 1.60972270079592e-05, "loss": 0.5586, "step": 19036 }, { "epoch": 0.5848001720271557, "grad_norm": 0.324018269777298, "learning_rate": 1.609684394806757e-05, "loss": 0.6402, "step": 19037 }, { "epoch": 0.5848308911621048, "grad_norm": 0.36993589997291565, "learning_rate": 1.6096460873936338e-05, "loss": 0.5766, "step": 19038 }, { "epoch": 0.5848616102970541, "grad_norm": 0.3744373619556427, "learning_rate": 1.6096077785566396e-05, "loss": 0.5769, "step": 19039 }, { "epoch": 0.5848923294320032, "grad_norm": 0.33012309670448303, "learning_rate": 1.6095694682958636e-05, "loss": 0.5565, "step": 19040 }, { "epoch": 0.5849230485669523, "grad_norm": 0.32912009954452515, "learning_rate": 1.6095311566113957e-05, "loss": 0.5447, "step": 19041 }, { "epoch": 0.5849537677019016, "grad_norm": 0.37907981872558594, "learning_rate": 1.6094928435033256e-05, "loss": 0.5152, "step": 19042 }, { "epoch": 0.5849844868368507, "grad_norm": 0.369662880897522, "learning_rate": 1.6094545289717418e-05, "loss": 0.564, "step": 19043 }, { "epoch": 0.5850152059717998, "grad_norm": 0.34219950437545776, "learning_rate": 1.6094162130167346e-05, "loss": 0.6216, "step": 19044 }, { "epoch": 0.585045925106749, "grad_norm": 0.3508072793483734, "learning_rate": 1.6093778956383933e-05, "loss": 0.541, "step": 19045 }, { "epoch": 0.5850766442416981, "grad_norm": 0.4366646409034729, "learning_rate": 1.6093395768368073e-05, "loss": 0.665, "step": 19046 }, { "epoch": 0.5851073633766473, "grad_norm": 0.33086130023002625, "learning_rate": 1.6093012566120663e-05, "loss": 0.5197, "step": 19047 }, { "epoch": 0.5851380825115965, "grad_norm": 0.31960979104042053, "learning_rate": 1.6092629349642595e-05, "loss": 0.5761, "step": 19048 }, { "epoch": 0.5851688016465456, "grad_norm": 0.33051595091819763, "learning_rate": 1.6092246118934767e-05, "loss": 0.4663, "step": 19049 }, { "epoch": 0.5851995207814948, "grad_norm": 0.36196425557136536, "learning_rate": 1.6091862873998072e-05, "loss": 0.5794, "step": 19050 }, { "epoch": 0.585230239916444, "grad_norm": 0.6424837112426758, "learning_rate": 1.6091479614833406e-05, "loss": 0.6294, "step": 19051 }, { "epoch": 0.5852609590513931, "grad_norm": 0.38559800386428833, "learning_rate": 1.6091096341441663e-05, "loss": 0.56, "step": 19052 }, { "epoch": 0.5852916781863423, "grad_norm": 0.32812055945396423, "learning_rate": 1.609071305382374e-05, "loss": 0.566, "step": 19053 }, { "epoch": 0.5853223973212914, "grad_norm": 0.3585057854652405, "learning_rate": 1.6090329751980533e-05, "loss": 0.5641, "step": 19054 }, { "epoch": 0.5853531164562406, "grad_norm": 0.33682316541671753, "learning_rate": 1.6089946435912932e-05, "loss": 0.5765, "step": 19055 }, { "epoch": 0.5853838355911898, "grad_norm": 0.4531397223472595, "learning_rate": 1.608956310562184e-05, "loss": 0.5441, "step": 19056 }, { "epoch": 0.5854145547261389, "grad_norm": 0.38367101550102234, "learning_rate": 1.6089179761108144e-05, "loss": 0.5858, "step": 19057 }, { "epoch": 0.585445273861088, "grad_norm": 0.3378032147884369, "learning_rate": 1.6088796402372742e-05, "loss": 0.4711, "step": 19058 }, { "epoch": 0.5854759929960373, "grad_norm": 0.34020471572875977, "learning_rate": 1.6088413029416535e-05, "loss": 0.4874, "step": 19059 }, { "epoch": 0.5855067121309864, "grad_norm": 0.34142088890075684, "learning_rate": 1.6088029642240413e-05, "loss": 0.5398, "step": 19060 }, { "epoch": 0.5855374312659356, "grad_norm": 0.3250628709793091, "learning_rate": 1.608764624084527e-05, "loss": 0.5025, "step": 19061 }, { "epoch": 0.5855681504008847, "grad_norm": 0.4266628623008728, "learning_rate": 1.6087262825232007e-05, "loss": 0.5492, "step": 19062 }, { "epoch": 0.5855988695358338, "grad_norm": 0.35039564967155457, "learning_rate": 1.6086879395401518e-05, "loss": 0.5554, "step": 19063 }, { "epoch": 0.5856295886707831, "grad_norm": 0.3411007225513458, "learning_rate": 1.6086495951354696e-05, "loss": 0.5645, "step": 19064 }, { "epoch": 0.5856603078057322, "grad_norm": 0.3412201404571533, "learning_rate": 1.6086112493092435e-05, "loss": 0.5654, "step": 19065 }, { "epoch": 0.5856910269406813, "grad_norm": 0.48150551319122314, "learning_rate": 1.6085729020615634e-05, "loss": 0.6063, "step": 19066 }, { "epoch": 0.5857217460756305, "grad_norm": 0.35922932624816895, "learning_rate": 1.6085345533925192e-05, "loss": 0.5384, "step": 19067 }, { "epoch": 0.5857524652105797, "grad_norm": 0.48187899589538574, "learning_rate": 1.6084962033021998e-05, "loss": 0.6292, "step": 19068 }, { "epoch": 0.5857831843455288, "grad_norm": 0.37217429280281067, "learning_rate": 1.6084578517906947e-05, "loss": 0.601, "step": 19069 }, { "epoch": 0.585813903480478, "grad_norm": 0.43105393648147583, "learning_rate": 1.6084194988580943e-05, "loss": 0.5614, "step": 19070 }, { "epoch": 0.5858446226154271, "grad_norm": 0.36954379081726074, "learning_rate": 1.6083811445044873e-05, "loss": 0.5761, "step": 19071 }, { "epoch": 0.5858753417503763, "grad_norm": 0.3685623109340668, "learning_rate": 1.6083427887299636e-05, "loss": 0.5503, "step": 19072 }, { "epoch": 0.5859060608853255, "grad_norm": 0.35402241349220276, "learning_rate": 1.608304431534613e-05, "loss": 0.5381, "step": 19073 }, { "epoch": 0.5859367800202746, "grad_norm": 0.5184417963027954, "learning_rate": 1.6082660729185248e-05, "loss": 0.5912, "step": 19074 }, { "epoch": 0.5859674991552238, "grad_norm": 0.37338802218437195, "learning_rate": 1.608227712881789e-05, "loss": 0.5821, "step": 19075 }, { "epoch": 0.585998218290173, "grad_norm": 0.39683887362480164, "learning_rate": 1.6081893514244948e-05, "loss": 0.5734, "step": 19076 }, { "epoch": 0.5860289374251221, "grad_norm": 0.3625904321670532, "learning_rate": 1.6081509885467318e-05, "loss": 0.5201, "step": 19077 }, { "epoch": 0.5860596565600713, "grad_norm": 0.37982505559921265, "learning_rate": 1.6081126242485897e-05, "loss": 0.5012, "step": 19078 }, { "epoch": 0.5860903756950204, "grad_norm": 0.4268046021461487, "learning_rate": 1.608074258530158e-05, "loss": 0.6026, "step": 19079 }, { "epoch": 0.5861210948299695, "grad_norm": 0.3464762270450592, "learning_rate": 1.6080358913915266e-05, "loss": 0.6172, "step": 19080 }, { "epoch": 0.5861518139649188, "grad_norm": 0.39469924569129944, "learning_rate": 1.6079975228327848e-05, "loss": 0.6251, "step": 19081 }, { "epoch": 0.5861825330998679, "grad_norm": 0.3567555546760559, "learning_rate": 1.607959152854022e-05, "loss": 0.5507, "step": 19082 }, { "epoch": 0.586213252234817, "grad_norm": 0.3669044077396393, "learning_rate": 1.607920781455329e-05, "loss": 0.5761, "step": 19083 }, { "epoch": 0.5862439713697662, "grad_norm": 0.4852367639541626, "learning_rate": 1.607882408636794e-05, "loss": 0.6, "step": 19084 }, { "epoch": 0.5862746905047154, "grad_norm": 0.4034809470176697, "learning_rate": 1.607844034398507e-05, "loss": 0.5873, "step": 19085 }, { "epoch": 0.5863054096396646, "grad_norm": 0.34965547919273376, "learning_rate": 1.6078056587405583e-05, "loss": 0.6418, "step": 19086 }, { "epoch": 0.5863361287746137, "grad_norm": 0.38951101899147034, "learning_rate": 1.6077672816630367e-05, "loss": 0.6104, "step": 19087 }, { "epoch": 0.5863668479095628, "grad_norm": 0.3380931317806244, "learning_rate": 1.6077289031660322e-05, "loss": 0.5491, "step": 19088 }, { "epoch": 0.5863975670445121, "grad_norm": 0.36132800579071045, "learning_rate": 1.6076905232496343e-05, "loss": 0.5849, "step": 19089 }, { "epoch": 0.5864282861794612, "grad_norm": 0.34591689705848694, "learning_rate": 1.6076521419139328e-05, "loss": 0.6311, "step": 19090 }, { "epoch": 0.5864590053144103, "grad_norm": 0.34233900904655457, "learning_rate": 1.6076137591590173e-05, "loss": 0.5039, "step": 19091 }, { "epoch": 0.5864897244493595, "grad_norm": 0.3701699376106262, "learning_rate": 1.6075753749849775e-05, "loss": 0.5426, "step": 19092 }, { "epoch": 0.5865204435843087, "grad_norm": 0.3397296369075775, "learning_rate": 1.6075369893919027e-05, "loss": 0.5301, "step": 19093 }, { "epoch": 0.5865511627192578, "grad_norm": 0.4406123757362366, "learning_rate": 1.6074986023798833e-05, "loss": 0.608, "step": 19094 }, { "epoch": 0.586581881854207, "grad_norm": 0.3586703836917877, "learning_rate": 1.6074602139490083e-05, "loss": 0.5941, "step": 19095 }, { "epoch": 0.5866126009891561, "grad_norm": 0.36415624618530273, "learning_rate": 1.6074218240993676e-05, "loss": 0.6751, "step": 19096 }, { "epoch": 0.5866433201241052, "grad_norm": 0.5277320742607117, "learning_rate": 1.6073834328310507e-05, "loss": 0.5993, "step": 19097 }, { "epoch": 0.5866740392590545, "grad_norm": 0.3294694125652313, "learning_rate": 1.607345040144147e-05, "loss": 0.6167, "step": 19098 }, { "epoch": 0.5867047583940036, "grad_norm": 0.365997850894928, "learning_rate": 1.6073066460387473e-05, "loss": 0.5665, "step": 19099 }, { "epoch": 0.5867354775289528, "grad_norm": 0.3466934263706207, "learning_rate": 1.6072682505149403e-05, "loss": 0.6779, "step": 19100 }, { "epoch": 0.586766196663902, "grad_norm": 0.3510375916957855, "learning_rate": 1.6072298535728156e-05, "loss": 0.5749, "step": 19101 }, { "epoch": 0.5867969157988511, "grad_norm": 0.3861817717552185, "learning_rate": 1.6071914552124633e-05, "loss": 0.6305, "step": 19102 }, { "epoch": 0.5868276349338003, "grad_norm": 0.9334580302238464, "learning_rate": 1.607153055433973e-05, "loss": 0.6308, "step": 19103 }, { "epoch": 0.5868583540687494, "grad_norm": 0.3455761969089508, "learning_rate": 1.6071146542374344e-05, "loss": 0.5362, "step": 19104 }, { "epoch": 0.5868890732036985, "grad_norm": 0.44324833154678345, "learning_rate": 1.607076251622937e-05, "loss": 0.4975, "step": 19105 }, { "epoch": 0.5869197923386478, "grad_norm": 0.3366246521472931, "learning_rate": 1.6070378475905706e-05, "loss": 0.4941, "step": 19106 }, { "epoch": 0.5869505114735969, "grad_norm": 0.34425127506256104, "learning_rate": 1.606999442140425e-05, "loss": 0.5938, "step": 19107 }, { "epoch": 0.586981230608546, "grad_norm": 0.34955912828445435, "learning_rate": 1.60696103527259e-05, "loss": 0.5533, "step": 19108 }, { "epoch": 0.5870119497434952, "grad_norm": 0.38867515325546265, "learning_rate": 1.6069226269871547e-05, "loss": 0.6486, "step": 19109 }, { "epoch": 0.5870426688784444, "grad_norm": 0.34522056579589844, "learning_rate": 1.6068842172842093e-05, "loss": 0.6087, "step": 19110 }, { "epoch": 0.5870733880133936, "grad_norm": 0.3990427553653717, "learning_rate": 1.6068458061638437e-05, "loss": 0.6691, "step": 19111 }, { "epoch": 0.5871041071483427, "grad_norm": 0.3730733096599579, "learning_rate": 1.606807393626147e-05, "loss": 0.6424, "step": 19112 }, { "epoch": 0.5871348262832918, "grad_norm": 0.444256454706192, "learning_rate": 1.6067689796712094e-05, "loss": 0.6047, "step": 19113 }, { "epoch": 0.5871655454182411, "grad_norm": 0.33453741669654846, "learning_rate": 1.6067305642991206e-05, "loss": 0.518, "step": 19114 }, { "epoch": 0.5871962645531902, "grad_norm": 0.34972748160362244, "learning_rate": 1.60669214750997e-05, "loss": 0.5132, "step": 19115 }, { "epoch": 0.5872269836881393, "grad_norm": 0.3265587091445923, "learning_rate": 1.6066537293038476e-05, "loss": 0.653, "step": 19116 }, { "epoch": 0.5872577028230885, "grad_norm": 0.347818523645401, "learning_rate": 1.606615309680843e-05, "loss": 0.5971, "step": 19117 }, { "epoch": 0.5872884219580377, "grad_norm": 0.36993634700775146, "learning_rate": 1.6065768886410462e-05, "loss": 0.6109, "step": 19118 }, { "epoch": 0.5873191410929868, "grad_norm": 0.31703031063079834, "learning_rate": 1.6065384661845463e-05, "loss": 0.4957, "step": 19119 }, { "epoch": 0.587349860227936, "grad_norm": 0.3522335886955261, "learning_rate": 1.606500042311434e-05, "loss": 0.5455, "step": 19120 }, { "epoch": 0.5873805793628851, "grad_norm": 0.34472930431365967, "learning_rate": 1.6064616170217982e-05, "loss": 0.5877, "step": 19121 }, { "epoch": 0.5874112984978344, "grad_norm": 0.4011450707912445, "learning_rate": 1.606423190315729e-05, "loss": 0.5875, "step": 19122 }, { "epoch": 0.5874420176327835, "grad_norm": 0.3291957378387451, "learning_rate": 1.606384762193316e-05, "loss": 0.5181, "step": 19123 }, { "epoch": 0.5874727367677326, "grad_norm": 0.43271785974502563, "learning_rate": 1.606346332654649e-05, "loss": 0.5577, "step": 19124 }, { "epoch": 0.5875034559026818, "grad_norm": 0.3600936532020569, "learning_rate": 1.6063079016998177e-05, "loss": 0.5442, "step": 19125 }, { "epoch": 0.587534175037631, "grad_norm": 0.34845757484436035, "learning_rate": 1.606269469328912e-05, "loss": 0.5265, "step": 19126 }, { "epoch": 0.5875648941725801, "grad_norm": 0.3550962507724762, "learning_rate": 1.6062310355420216e-05, "loss": 0.5707, "step": 19127 }, { "epoch": 0.5875956133075293, "grad_norm": 0.36195915937423706, "learning_rate": 1.6061926003392364e-05, "loss": 0.6134, "step": 19128 }, { "epoch": 0.5876263324424784, "grad_norm": 0.3260451853275299, "learning_rate": 1.606154163720646e-05, "loss": 0.5395, "step": 19129 }, { "epoch": 0.5876570515774275, "grad_norm": 0.3350355327129364, "learning_rate": 1.6061157256863406e-05, "loss": 0.5073, "step": 19130 }, { "epoch": 0.5876877707123768, "grad_norm": 0.3599681258201599, "learning_rate": 1.6060772862364088e-05, "loss": 0.5746, "step": 19131 }, { "epoch": 0.5877184898473259, "grad_norm": 0.4266717731952667, "learning_rate": 1.6060388453709417e-05, "loss": 0.6299, "step": 19132 }, { "epoch": 0.587749208982275, "grad_norm": 0.3448694944381714, "learning_rate": 1.6060004030900283e-05, "loss": 0.5343, "step": 19133 }, { "epoch": 0.5877799281172242, "grad_norm": 0.44789358973503113, "learning_rate": 1.605961959393759e-05, "loss": 0.5668, "step": 19134 }, { "epoch": 0.5878106472521734, "grad_norm": 0.3688737750053406, "learning_rate": 1.6059235142822233e-05, "loss": 0.5276, "step": 19135 }, { "epoch": 0.5878413663871226, "grad_norm": 0.40513965487480164, "learning_rate": 1.6058850677555105e-05, "loss": 0.6592, "step": 19136 }, { "epoch": 0.5878720855220717, "grad_norm": 0.4523657262325287, "learning_rate": 1.605846619813711e-05, "loss": 0.6034, "step": 19137 }, { "epoch": 0.5879028046570208, "grad_norm": 0.36143743991851807, "learning_rate": 1.6058081704569144e-05, "loss": 0.5846, "step": 19138 }, { "epoch": 0.5879335237919701, "grad_norm": 0.3621264398097992, "learning_rate": 1.6057697196852106e-05, "loss": 0.6215, "step": 19139 }, { "epoch": 0.5879642429269192, "grad_norm": 0.33505383133888245, "learning_rate": 1.605731267498689e-05, "loss": 0.5923, "step": 19140 }, { "epoch": 0.5879949620618683, "grad_norm": 0.5563961267471313, "learning_rate": 1.60569281389744e-05, "loss": 0.4953, "step": 19141 }, { "epoch": 0.5880256811968175, "grad_norm": 0.3295603394508362, "learning_rate": 1.6056543588815528e-05, "loss": 0.521, "step": 19142 }, { "epoch": 0.5880564003317666, "grad_norm": 0.33285078406333923, "learning_rate": 1.6056159024511177e-05, "loss": 0.5866, "step": 19143 }, { "epoch": 0.5880871194667158, "grad_norm": 0.36311811208724976, "learning_rate": 1.6055774446062246e-05, "loss": 0.5551, "step": 19144 }, { "epoch": 0.588117838601665, "grad_norm": 0.37847599387168884, "learning_rate": 1.605538985346963e-05, "loss": 0.558, "step": 19145 }, { "epoch": 0.5881485577366141, "grad_norm": 0.42490532994270325, "learning_rate": 1.6055005246734228e-05, "loss": 0.5767, "step": 19146 }, { "epoch": 0.5881792768715634, "grad_norm": 0.32640963792800903, "learning_rate": 1.6054620625856936e-05, "loss": 0.5352, "step": 19147 }, { "epoch": 0.5882099960065125, "grad_norm": 0.3491404056549072, "learning_rate": 1.605423599083866e-05, "loss": 0.6439, "step": 19148 }, { "epoch": 0.5882407151414616, "grad_norm": 0.4169277250766754, "learning_rate": 1.6053851341680285e-05, "loss": 0.5455, "step": 19149 }, { "epoch": 0.5882714342764108, "grad_norm": 0.344644159078598, "learning_rate": 1.6053466678382723e-05, "loss": 0.5895, "step": 19150 }, { "epoch": 0.5883021534113599, "grad_norm": 0.3487234115600586, "learning_rate": 1.6053082000946866e-05, "loss": 0.5616, "step": 19151 }, { "epoch": 0.5883328725463091, "grad_norm": 0.3601098656654358, "learning_rate": 1.6052697309373612e-05, "loss": 0.5963, "step": 19152 }, { "epoch": 0.5883635916812583, "grad_norm": 0.38001981377601624, "learning_rate": 1.6052312603663863e-05, "loss": 0.5324, "step": 19153 }, { "epoch": 0.5883943108162074, "grad_norm": 0.4760858416557312, "learning_rate": 1.6051927883818513e-05, "loss": 0.6038, "step": 19154 }, { "epoch": 0.5884250299511565, "grad_norm": 0.3445979058742523, "learning_rate": 1.6051543149838463e-05, "loss": 0.575, "step": 19155 }, { "epoch": 0.5884557490861058, "grad_norm": 4.230015754699707, "learning_rate": 1.605115840172461e-05, "loss": 0.5299, "step": 19156 }, { "epoch": 0.5884864682210549, "grad_norm": 0.38857021927833557, "learning_rate": 1.605077363947786e-05, "loss": 0.6449, "step": 19157 }, { "epoch": 0.588517187356004, "grad_norm": 0.4077652096748352, "learning_rate": 1.60503888630991e-05, "loss": 0.6077, "step": 19158 }, { "epoch": 0.5885479064909532, "grad_norm": 0.3129008710384369, "learning_rate": 1.6050004072589234e-05, "loss": 0.6503, "step": 19159 }, { "epoch": 0.5885786256259024, "grad_norm": 0.3571508228778839, "learning_rate": 1.6049619267949164e-05, "loss": 0.5803, "step": 19160 }, { "epoch": 0.5886093447608516, "grad_norm": 0.395611435174942, "learning_rate": 1.6049234449179785e-05, "loss": 0.5839, "step": 19161 }, { "epoch": 0.5886400638958007, "grad_norm": 0.34334996342658997, "learning_rate": 1.6048849616281998e-05, "loss": 0.6517, "step": 19162 }, { "epoch": 0.5886707830307498, "grad_norm": 0.3403235673904419, "learning_rate": 1.6048464769256697e-05, "loss": 0.5242, "step": 19163 }, { "epoch": 0.588701502165699, "grad_norm": 0.30748116970062256, "learning_rate": 1.6048079908104784e-05, "loss": 0.4414, "step": 19164 }, { "epoch": 0.5887322213006482, "grad_norm": 0.42470091581344604, "learning_rate": 1.6047695032827157e-05, "loss": 0.5092, "step": 19165 }, { "epoch": 0.5887629404355973, "grad_norm": 0.4006432592868805, "learning_rate": 1.604731014342472e-05, "loss": 0.5312, "step": 19166 }, { "epoch": 0.5887936595705465, "grad_norm": 0.38505518436431885, "learning_rate": 1.6046925239898364e-05, "loss": 0.6367, "step": 19167 }, { "epoch": 0.5888243787054956, "grad_norm": 0.3136414885520935, "learning_rate": 1.6046540322248995e-05, "loss": 0.5282, "step": 19168 }, { "epoch": 0.5888550978404448, "grad_norm": 0.3501688241958618, "learning_rate": 1.604615539047751e-05, "loss": 0.5206, "step": 19169 }, { "epoch": 0.588885816975394, "grad_norm": 0.36391979455947876, "learning_rate": 1.60457704445848e-05, "loss": 0.594, "step": 19170 }, { "epoch": 0.5889165361103431, "grad_norm": 0.3559848964214325, "learning_rate": 1.6045385484571777e-05, "loss": 0.5658, "step": 19171 }, { "epoch": 0.5889472552452923, "grad_norm": 0.3706744313240051, "learning_rate": 1.6045000510439333e-05, "loss": 0.6279, "step": 19172 }, { "epoch": 0.5889779743802415, "grad_norm": 0.4004368185997009, "learning_rate": 1.6044615522188368e-05, "loss": 0.5581, "step": 19173 }, { "epoch": 0.5890086935151906, "grad_norm": 0.3403806686401367, "learning_rate": 1.604423051981978e-05, "loss": 0.5935, "step": 19174 }, { "epoch": 0.5890394126501398, "grad_norm": 0.3670117259025574, "learning_rate": 1.604384550333447e-05, "loss": 0.538, "step": 19175 }, { "epoch": 0.5890701317850889, "grad_norm": 0.33607217669487, "learning_rate": 1.604346047273334e-05, "loss": 0.5849, "step": 19176 }, { "epoch": 0.589100850920038, "grad_norm": 0.33237749338150024, "learning_rate": 1.6043075428017282e-05, "loss": 0.5591, "step": 19177 }, { "epoch": 0.5891315700549873, "grad_norm": 0.3990505635738373, "learning_rate": 1.60426903691872e-05, "loss": 0.6165, "step": 19178 }, { "epoch": 0.5891622891899364, "grad_norm": 0.4320217967033386, "learning_rate": 1.604230529624399e-05, "loss": 0.5332, "step": 19179 }, { "epoch": 0.5891930083248855, "grad_norm": 0.3693620562553406, "learning_rate": 1.604192020918856e-05, "loss": 0.6275, "step": 19180 }, { "epoch": 0.5892237274598348, "grad_norm": 0.3757578134536743, "learning_rate": 1.6041535108021798e-05, "loss": 0.5562, "step": 19181 }, { "epoch": 0.5892544465947839, "grad_norm": 0.3804378807544708, "learning_rate": 1.6041149992744612e-05, "loss": 0.5933, "step": 19182 }, { "epoch": 0.589285165729733, "grad_norm": 0.4815181791782379, "learning_rate": 1.6040764863357897e-05, "loss": 0.5288, "step": 19183 }, { "epoch": 0.5893158848646822, "grad_norm": 0.340792179107666, "learning_rate": 1.604037971986255e-05, "loss": 0.6009, "step": 19184 }, { "epoch": 0.5893466039996313, "grad_norm": 0.3687705099582672, "learning_rate": 1.6039994562259484e-05, "loss": 0.5459, "step": 19185 }, { "epoch": 0.5893773231345806, "grad_norm": 0.38099876046180725, "learning_rate": 1.603960939054958e-05, "loss": 0.5873, "step": 19186 }, { "epoch": 0.5894080422695297, "grad_norm": 0.3369413912296295, "learning_rate": 1.603922420473375e-05, "loss": 0.6065, "step": 19187 }, { "epoch": 0.5894387614044788, "grad_norm": 0.37255752086639404, "learning_rate": 1.603883900481289e-05, "loss": 0.622, "step": 19188 }, { "epoch": 0.589469480539428, "grad_norm": 0.3669999837875366, "learning_rate": 1.60384537907879e-05, "loss": 0.6137, "step": 19189 }, { "epoch": 0.5895001996743772, "grad_norm": 0.3657234311103821, "learning_rate": 1.6038068562659678e-05, "loss": 0.4754, "step": 19190 }, { "epoch": 0.5895309188093263, "grad_norm": 0.3680228590965271, "learning_rate": 1.6037683320429126e-05, "loss": 0.5434, "step": 19191 }, { "epoch": 0.5895616379442755, "grad_norm": 0.3365907371044159, "learning_rate": 1.6037298064097146e-05, "loss": 0.5598, "step": 19192 }, { "epoch": 0.5895923570792246, "grad_norm": 0.3276981711387634, "learning_rate": 1.603691279366463e-05, "loss": 0.6287, "step": 19193 }, { "epoch": 0.5896230762141738, "grad_norm": 0.4141208827495575, "learning_rate": 1.6036527509132486e-05, "loss": 0.5106, "step": 19194 }, { "epoch": 0.589653795349123, "grad_norm": 0.380124568939209, "learning_rate": 1.6036142210501608e-05, "loss": 0.5283, "step": 19195 }, { "epoch": 0.5896845144840721, "grad_norm": 0.3881632387638092, "learning_rate": 1.6035756897772897e-05, "loss": 0.5712, "step": 19196 }, { "epoch": 0.5897152336190213, "grad_norm": 0.4712063670158386, "learning_rate": 1.6035371570947257e-05, "loss": 0.5036, "step": 19197 }, { "epoch": 0.5897459527539705, "grad_norm": 0.34177762269973755, "learning_rate": 1.6034986230025586e-05, "loss": 0.5716, "step": 19198 }, { "epoch": 0.5897766718889196, "grad_norm": 0.41399797797203064, "learning_rate": 1.6034600875008783e-05, "loss": 0.6233, "step": 19199 }, { "epoch": 0.5898073910238688, "grad_norm": 0.35270261764526367, "learning_rate": 1.6034215505897744e-05, "loss": 0.486, "step": 19200 }, { "epoch": 0.5898381101588179, "grad_norm": 0.34697434306144714, "learning_rate": 1.6033830122693377e-05, "loss": 0.5736, "step": 19201 }, { "epoch": 0.589868829293767, "grad_norm": 0.32443496584892273, "learning_rate": 1.6033444725396575e-05, "loss": 0.5116, "step": 19202 }, { "epoch": 0.5898995484287163, "grad_norm": 0.4106752574443817, "learning_rate": 1.6033059314008244e-05, "loss": 0.59, "step": 19203 }, { "epoch": 0.5899302675636654, "grad_norm": 0.339203804731369, "learning_rate": 1.6032673888529284e-05, "loss": 0.526, "step": 19204 }, { "epoch": 0.5899609866986145, "grad_norm": 0.4364751875400543, "learning_rate": 1.603228844896059e-05, "loss": 0.5797, "step": 19205 }, { "epoch": 0.5899917058335638, "grad_norm": 0.3689621686935425, "learning_rate": 1.6031902995303066e-05, "loss": 0.5641, "step": 19206 }, { "epoch": 0.5900224249685129, "grad_norm": 0.3282196819782257, "learning_rate": 1.603151752755761e-05, "loss": 0.59, "step": 19207 }, { "epoch": 0.5900531441034621, "grad_norm": 0.3863656222820282, "learning_rate": 1.6031132045725123e-05, "loss": 0.5697, "step": 19208 }, { "epoch": 0.5900838632384112, "grad_norm": 0.39933210611343384, "learning_rate": 1.603074654980651e-05, "loss": 0.6219, "step": 19209 }, { "epoch": 0.5901145823733603, "grad_norm": 0.7663844227790833, "learning_rate": 1.6030361039802666e-05, "loss": 0.5485, "step": 19210 }, { "epoch": 0.5901453015083096, "grad_norm": 0.46287813782691956, "learning_rate": 1.6029975515714488e-05, "loss": 0.5974, "step": 19211 }, { "epoch": 0.5901760206432587, "grad_norm": 0.35972902178764343, "learning_rate": 1.6029589977542886e-05, "loss": 0.5378, "step": 19212 }, { "epoch": 0.5902067397782078, "grad_norm": 0.3376941680908203, "learning_rate": 1.6029204425288754e-05, "loss": 0.5305, "step": 19213 }, { "epoch": 0.590237458913157, "grad_norm": 0.3320756256580353, "learning_rate": 1.6028818858952992e-05, "loss": 0.5044, "step": 19214 }, { "epoch": 0.5902681780481062, "grad_norm": 0.3839505910873413, "learning_rate": 1.6028433278536506e-05, "loss": 0.5888, "step": 19215 }, { "epoch": 0.5902988971830553, "grad_norm": 0.4184506833553314, "learning_rate": 1.602804768404019e-05, "loss": 0.512, "step": 19216 }, { "epoch": 0.5903296163180045, "grad_norm": 0.3642100691795349, "learning_rate": 1.6027662075464952e-05, "loss": 0.5315, "step": 19217 }, { "epoch": 0.5903603354529536, "grad_norm": 0.34609290957450867, "learning_rate": 1.6027276452811683e-05, "loss": 0.5544, "step": 19218 }, { "epoch": 0.5903910545879028, "grad_norm": 0.3579016327857971, "learning_rate": 1.602689081608129e-05, "loss": 0.5581, "step": 19219 }, { "epoch": 0.590421773722852, "grad_norm": 0.3420396149158478, "learning_rate": 1.602650516527467e-05, "loss": 0.6092, "step": 19220 }, { "epoch": 0.5904524928578011, "grad_norm": 0.3440295457839966, "learning_rate": 1.602611950039273e-05, "loss": 0.561, "step": 19221 }, { "epoch": 0.5904832119927503, "grad_norm": 0.3693169951438904, "learning_rate": 1.6025733821436372e-05, "loss": 0.5444, "step": 19222 }, { "epoch": 0.5905139311276995, "grad_norm": 0.44674068689346313, "learning_rate": 1.6025348128406487e-05, "loss": 0.5549, "step": 19223 }, { "epoch": 0.5905446502626486, "grad_norm": 0.30887359380722046, "learning_rate": 1.602496242130398e-05, "loss": 0.4905, "step": 19224 }, { "epoch": 0.5905753693975978, "grad_norm": 0.394621878862381, "learning_rate": 1.6024576700129753e-05, "loss": 0.6176, "step": 19225 }, { "epoch": 0.5906060885325469, "grad_norm": 0.3359978199005127, "learning_rate": 1.6024190964884704e-05, "loss": 0.5469, "step": 19226 }, { "epoch": 0.590636807667496, "grad_norm": 0.39687079191207886, "learning_rate": 1.602380521556974e-05, "loss": 0.6574, "step": 19227 }, { "epoch": 0.5906675268024453, "grad_norm": 0.35068628191947937, "learning_rate": 1.6023419452185756e-05, "loss": 0.5152, "step": 19228 }, { "epoch": 0.5906982459373944, "grad_norm": 0.31275051832199097, "learning_rate": 1.6023033674733658e-05, "loss": 0.4605, "step": 19229 }, { "epoch": 0.5907289650723435, "grad_norm": 0.36968111991882324, "learning_rate": 1.6022647883214342e-05, "loss": 0.5561, "step": 19230 }, { "epoch": 0.5907596842072927, "grad_norm": 0.3330898582935333, "learning_rate": 1.602226207762871e-05, "loss": 0.5858, "step": 19231 }, { "epoch": 0.5907904033422419, "grad_norm": 0.3563777208328247, "learning_rate": 1.6021876257977668e-05, "loss": 0.6225, "step": 19232 }, { "epoch": 0.5908211224771911, "grad_norm": 0.37471804022789, "learning_rate": 1.6021490424262108e-05, "loss": 0.559, "step": 19233 }, { "epoch": 0.5908518416121402, "grad_norm": 0.4101230502128601, "learning_rate": 1.6021104576482942e-05, "loss": 0.6605, "step": 19234 }, { "epoch": 0.5908825607470893, "grad_norm": 0.40303608775138855, "learning_rate": 1.6020718714641064e-05, "loss": 0.5649, "step": 19235 }, { "epoch": 0.5909132798820386, "grad_norm": 0.352078378200531, "learning_rate": 1.6020332838737375e-05, "loss": 0.6291, "step": 19236 }, { "epoch": 0.5909439990169877, "grad_norm": 0.31379687786102295, "learning_rate": 1.601994694877278e-05, "loss": 0.5222, "step": 19237 }, { "epoch": 0.5909747181519368, "grad_norm": 0.3683655560016632, "learning_rate": 1.6019561044748176e-05, "loss": 0.5231, "step": 19238 }, { "epoch": 0.591005437286886, "grad_norm": 0.3601285219192505, "learning_rate": 1.601917512666447e-05, "loss": 0.5328, "step": 19239 }, { "epoch": 0.5910361564218352, "grad_norm": 0.3455623388290405, "learning_rate": 1.601878919452256e-05, "loss": 0.6189, "step": 19240 }, { "epoch": 0.5910668755567843, "grad_norm": 0.3334978222846985, "learning_rate": 1.6018403248323345e-05, "loss": 0.5036, "step": 19241 }, { "epoch": 0.5910975946917335, "grad_norm": 0.3630285859107971, "learning_rate": 1.6018017288067734e-05, "loss": 0.5839, "step": 19242 }, { "epoch": 0.5911283138266826, "grad_norm": 0.35371050238609314, "learning_rate": 1.6017631313756618e-05, "loss": 0.6134, "step": 19243 }, { "epoch": 0.5911590329616317, "grad_norm": 0.32165825366973877, "learning_rate": 1.6017245325390908e-05, "loss": 0.6425, "step": 19244 }, { "epoch": 0.591189752096581, "grad_norm": 0.3615274727344513, "learning_rate": 1.6016859322971498e-05, "loss": 0.5917, "step": 19245 }, { "epoch": 0.5912204712315301, "grad_norm": 0.3757474422454834, "learning_rate": 1.6016473306499293e-05, "loss": 0.5378, "step": 19246 }, { "epoch": 0.5912511903664793, "grad_norm": 0.4002845883369446, "learning_rate": 1.6016087275975194e-05, "loss": 0.5872, "step": 19247 }, { "epoch": 0.5912819095014284, "grad_norm": 0.6013129353523254, "learning_rate": 1.6015701231400103e-05, "loss": 0.5111, "step": 19248 }, { "epoch": 0.5913126286363776, "grad_norm": 0.36723750829696655, "learning_rate": 1.6015315172774924e-05, "loss": 0.5274, "step": 19249 }, { "epoch": 0.5913433477713268, "grad_norm": 0.3332446217536926, "learning_rate": 1.6014929100100555e-05, "loss": 0.5405, "step": 19250 }, { "epoch": 0.5913740669062759, "grad_norm": 0.3154708445072174, "learning_rate": 1.60145430133779e-05, "loss": 0.5182, "step": 19251 }, { "epoch": 0.591404786041225, "grad_norm": 0.40841516852378845, "learning_rate": 1.601415691260786e-05, "loss": 0.586, "step": 19252 }, { "epoch": 0.5914355051761743, "grad_norm": 0.3574203848838806, "learning_rate": 1.6013770797791334e-05, "loss": 0.5557, "step": 19253 }, { "epoch": 0.5914662243111234, "grad_norm": 0.38576656579971313, "learning_rate": 1.6013384668929228e-05, "loss": 0.5219, "step": 19254 }, { "epoch": 0.5914969434460725, "grad_norm": 0.3841092586517334, "learning_rate": 1.601299852602244e-05, "loss": 0.6341, "step": 19255 }, { "epoch": 0.5915276625810217, "grad_norm": 0.38109660148620605, "learning_rate": 1.601261236907188e-05, "loss": 0.5856, "step": 19256 }, { "epoch": 0.5915583817159709, "grad_norm": 0.3726730942726135, "learning_rate": 1.6012226198078438e-05, "loss": 0.5626, "step": 19257 }, { "epoch": 0.5915891008509201, "grad_norm": 0.3247823119163513, "learning_rate": 1.6011840013043024e-05, "loss": 0.5085, "step": 19258 }, { "epoch": 0.5916198199858692, "grad_norm": 0.3163151741027832, "learning_rate": 1.6011453813966538e-05, "loss": 0.5599, "step": 19259 }, { "epoch": 0.5916505391208183, "grad_norm": 0.3472658395767212, "learning_rate": 1.6011067600849878e-05, "loss": 0.6513, "step": 19260 }, { "epoch": 0.5916812582557676, "grad_norm": 0.35683494806289673, "learning_rate": 1.6010681373693955e-05, "loss": 0.5643, "step": 19261 }, { "epoch": 0.5917119773907167, "grad_norm": 0.3440124988555908, "learning_rate": 1.601029513249966e-05, "loss": 0.5916, "step": 19262 }, { "epoch": 0.5917426965256658, "grad_norm": 0.48546740412712097, "learning_rate": 1.600990887726791e-05, "loss": 0.4889, "step": 19263 }, { "epoch": 0.591773415660615, "grad_norm": 0.5301685333251953, "learning_rate": 1.600952260799959e-05, "loss": 0.602, "step": 19264 }, { "epoch": 0.5918041347955642, "grad_norm": 0.35059770941734314, "learning_rate": 1.6009136324695614e-05, "loss": 0.5502, "step": 19265 }, { "epoch": 0.5918348539305133, "grad_norm": 0.39798352122306824, "learning_rate": 1.6008750027356882e-05, "loss": 0.5413, "step": 19266 }, { "epoch": 0.5918655730654625, "grad_norm": 0.3356948792934418, "learning_rate": 1.600836371598429e-05, "loss": 0.5554, "step": 19267 }, { "epoch": 0.5918962922004116, "grad_norm": 0.3579341471195221, "learning_rate": 1.6007977390578747e-05, "loss": 0.505, "step": 19268 }, { "epoch": 0.5919270113353607, "grad_norm": 0.39246848225593567, "learning_rate": 1.6007591051141153e-05, "loss": 0.5817, "step": 19269 }, { "epoch": 0.59195773047031, "grad_norm": 0.3265687823295593, "learning_rate": 1.600720469767241e-05, "loss": 0.643, "step": 19270 }, { "epoch": 0.5919884496052591, "grad_norm": 0.3336569666862488, "learning_rate": 1.6006818330173422e-05, "loss": 0.5828, "step": 19271 }, { "epoch": 0.5920191687402083, "grad_norm": 0.3668178915977478, "learning_rate": 1.600643194864509e-05, "loss": 0.5763, "step": 19272 }, { "epoch": 0.5920498878751574, "grad_norm": 0.33128488063812256, "learning_rate": 1.6006045553088315e-05, "loss": 0.5825, "step": 19273 }, { "epoch": 0.5920806070101066, "grad_norm": 0.34235134720802307, "learning_rate": 1.6005659143504003e-05, "loss": 0.5572, "step": 19274 }, { "epoch": 0.5921113261450558, "grad_norm": 0.37000060081481934, "learning_rate": 1.6005272719893055e-05, "loss": 0.5526, "step": 19275 }, { "epoch": 0.5921420452800049, "grad_norm": 0.3954235911369324, "learning_rate": 1.6004886282256374e-05, "loss": 0.5919, "step": 19276 }, { "epoch": 0.592172764414954, "grad_norm": 0.4259476661682129, "learning_rate": 1.6004499830594863e-05, "loss": 0.5845, "step": 19277 }, { "epoch": 0.5922034835499033, "grad_norm": 0.4111291170120239, "learning_rate": 1.6004113364909417e-05, "loss": 0.5538, "step": 19278 }, { "epoch": 0.5922342026848524, "grad_norm": 0.37739330530166626, "learning_rate": 1.6003726885200948e-05, "loss": 0.5394, "step": 19279 }, { "epoch": 0.5922649218198015, "grad_norm": 0.32633981108665466, "learning_rate": 1.6003340391470357e-05, "loss": 0.5232, "step": 19280 }, { "epoch": 0.5922956409547507, "grad_norm": 0.3619232475757599, "learning_rate": 1.6002953883718544e-05, "loss": 0.5786, "step": 19281 }, { "epoch": 0.5923263600896999, "grad_norm": 0.4002396762371063, "learning_rate": 1.6002567361946416e-05, "loss": 0.582, "step": 19282 }, { "epoch": 0.5923570792246491, "grad_norm": 0.3951985836029053, "learning_rate": 1.600218082615487e-05, "loss": 0.5596, "step": 19283 }, { "epoch": 0.5923877983595982, "grad_norm": 0.35461243987083435, "learning_rate": 1.600179427634481e-05, "loss": 0.5219, "step": 19284 }, { "epoch": 0.5924185174945473, "grad_norm": 0.40092140436172485, "learning_rate": 1.6001407712517144e-05, "loss": 0.5664, "step": 19285 }, { "epoch": 0.5924492366294966, "grad_norm": 0.3842078149318695, "learning_rate": 1.6001021134672766e-05, "loss": 0.5372, "step": 19286 }, { "epoch": 0.5924799557644457, "grad_norm": 0.45095011591911316, "learning_rate": 1.600063454281259e-05, "loss": 0.6565, "step": 19287 }, { "epoch": 0.5925106748993948, "grad_norm": 0.3898492455482483, "learning_rate": 1.600024793693751e-05, "loss": 0.6408, "step": 19288 }, { "epoch": 0.592541394034344, "grad_norm": 0.36950138211250305, "learning_rate": 1.599986131704843e-05, "loss": 0.6899, "step": 19289 }, { "epoch": 0.5925721131692931, "grad_norm": 0.38936054706573486, "learning_rate": 1.599947468314626e-05, "loss": 0.6176, "step": 19290 }, { "epoch": 0.5926028323042423, "grad_norm": 0.356499046087265, "learning_rate": 1.5999088035231893e-05, "loss": 0.5484, "step": 19291 }, { "epoch": 0.5926335514391915, "grad_norm": 0.35601773858070374, "learning_rate": 1.5998701373306237e-05, "loss": 0.6212, "step": 19292 }, { "epoch": 0.5926642705741406, "grad_norm": 0.4337078630924225, "learning_rate": 1.5998314697370195e-05, "loss": 0.5913, "step": 19293 }, { "epoch": 0.5926949897090897, "grad_norm": 0.33639830350875854, "learning_rate": 1.5997928007424675e-05, "loss": 0.5098, "step": 19294 }, { "epoch": 0.592725708844039, "grad_norm": 0.3699624240398407, "learning_rate": 1.5997541303470573e-05, "loss": 0.5107, "step": 19295 }, { "epoch": 0.5927564279789881, "grad_norm": 0.3939288854598999, "learning_rate": 1.599715458550879e-05, "loss": 0.5202, "step": 19296 }, { "epoch": 0.5927871471139373, "grad_norm": 0.34713053703308105, "learning_rate": 1.599676785354024e-05, "loss": 0.5877, "step": 19297 }, { "epoch": 0.5928178662488864, "grad_norm": 0.3266112804412842, "learning_rate": 1.5996381107565815e-05, "loss": 0.519, "step": 19298 }, { "epoch": 0.5928485853838356, "grad_norm": 0.30991309881210327, "learning_rate": 1.5995994347586425e-05, "loss": 0.5408, "step": 19299 }, { "epoch": 0.5928793045187848, "grad_norm": 0.3322933614253998, "learning_rate": 1.5995607573602972e-05, "loss": 0.5491, "step": 19300 }, { "epoch": 0.5929100236537339, "grad_norm": 0.33362510800361633, "learning_rate": 1.5995220785616357e-05, "loss": 0.5323, "step": 19301 }, { "epoch": 0.592940742788683, "grad_norm": 0.3545125424861908, "learning_rate": 1.5994833983627488e-05, "loss": 0.616, "step": 19302 }, { "epoch": 0.5929714619236323, "grad_norm": 0.3495185971260071, "learning_rate": 1.5994447167637262e-05, "loss": 0.6062, "step": 19303 }, { "epoch": 0.5930021810585814, "grad_norm": 0.35838645696640015, "learning_rate": 1.5994060337646588e-05, "loss": 0.5381, "step": 19304 }, { "epoch": 0.5930329001935305, "grad_norm": 0.37456998229026794, "learning_rate": 1.599367349365637e-05, "loss": 0.5701, "step": 19305 }, { "epoch": 0.5930636193284797, "grad_norm": 0.3691597282886505, "learning_rate": 1.5993286635667503e-05, "loss": 0.6091, "step": 19306 }, { "epoch": 0.5930943384634288, "grad_norm": 0.3367885947227478, "learning_rate": 1.5992899763680902e-05, "loss": 0.5459, "step": 19307 }, { "epoch": 0.5931250575983781, "grad_norm": 0.3539595305919647, "learning_rate": 1.5992512877697464e-05, "loss": 0.5135, "step": 19308 }, { "epoch": 0.5931557767333272, "grad_norm": 0.34208106994628906, "learning_rate": 1.599212597771809e-05, "loss": 0.5843, "step": 19309 }, { "epoch": 0.5931864958682763, "grad_norm": 0.38329675793647766, "learning_rate": 1.599173906374369e-05, "loss": 0.5373, "step": 19310 }, { "epoch": 0.5932172150032256, "grad_norm": 0.36502987146377563, "learning_rate": 1.5991352135775166e-05, "loss": 0.6107, "step": 19311 }, { "epoch": 0.5932479341381747, "grad_norm": 0.33045077323913574, "learning_rate": 1.5990965193813418e-05, "loss": 0.6691, "step": 19312 }, { "epoch": 0.5932786532731238, "grad_norm": 0.3736932873725891, "learning_rate": 1.5990578237859356e-05, "loss": 0.6008, "step": 19313 }, { "epoch": 0.593309372408073, "grad_norm": 0.3784506916999817, "learning_rate": 1.599019126791388e-05, "loss": 0.6381, "step": 19314 }, { "epoch": 0.5933400915430221, "grad_norm": 0.37355944514274597, "learning_rate": 1.598980428397789e-05, "loss": 0.5454, "step": 19315 }, { "epoch": 0.5933708106779713, "grad_norm": 0.3459323048591614, "learning_rate": 1.5989417286052296e-05, "loss": 0.4831, "step": 19316 }, { "epoch": 0.5934015298129205, "grad_norm": 0.3421623110771179, "learning_rate": 1.5989030274137996e-05, "loss": 0.5138, "step": 19317 }, { "epoch": 0.5934322489478696, "grad_norm": 0.3537704348564148, "learning_rate": 1.5988643248235903e-05, "loss": 0.564, "step": 19318 }, { "epoch": 0.5934629680828188, "grad_norm": 0.39815521240234375, "learning_rate": 1.5988256208346914e-05, "loss": 0.6109, "step": 19319 }, { "epoch": 0.593493687217768, "grad_norm": 0.328481525182724, "learning_rate": 1.5987869154471933e-05, "loss": 0.509, "step": 19320 }, { "epoch": 0.5935244063527171, "grad_norm": 0.396294504404068, "learning_rate": 1.5987482086611866e-05, "loss": 0.5892, "step": 19321 }, { "epoch": 0.5935551254876663, "grad_norm": 0.33556777238845825, "learning_rate": 1.5987095004767618e-05, "loss": 0.5625, "step": 19322 }, { "epoch": 0.5935858446226154, "grad_norm": 0.3741730749607086, "learning_rate": 1.5986707908940088e-05, "loss": 0.5097, "step": 19323 }, { "epoch": 0.5936165637575646, "grad_norm": 0.33968105912208557, "learning_rate": 1.598632079913019e-05, "loss": 0.5381, "step": 19324 }, { "epoch": 0.5936472828925138, "grad_norm": 0.46215569972991943, "learning_rate": 1.5985933675338812e-05, "loss": 0.7077, "step": 19325 }, { "epoch": 0.5936780020274629, "grad_norm": 0.3681497275829315, "learning_rate": 1.5985546537566875e-05, "loss": 0.5873, "step": 19326 }, { "epoch": 0.593708721162412, "grad_norm": 0.3190513551235199, "learning_rate": 1.5985159385815276e-05, "loss": 0.5637, "step": 19327 }, { "epoch": 0.5937394402973613, "grad_norm": 0.4325663447380066, "learning_rate": 1.5984772220084916e-05, "loss": 0.5809, "step": 19328 }, { "epoch": 0.5937701594323104, "grad_norm": 0.3291528820991516, "learning_rate": 1.5984385040376703e-05, "loss": 0.5873, "step": 19329 }, { "epoch": 0.5938008785672595, "grad_norm": 0.3456628620624542, "learning_rate": 1.5983997846691542e-05, "loss": 0.5304, "step": 19330 }, { "epoch": 0.5938315977022087, "grad_norm": 0.35486331582069397, "learning_rate": 1.5983610639030337e-05, "loss": 0.5042, "step": 19331 }, { "epoch": 0.5938623168371578, "grad_norm": 0.4078337848186493, "learning_rate": 1.598322341739399e-05, "loss": 0.5841, "step": 19332 }, { "epoch": 0.5938930359721071, "grad_norm": 0.3735998570919037, "learning_rate": 1.5982836181783405e-05, "loss": 0.6353, "step": 19333 }, { "epoch": 0.5939237551070562, "grad_norm": 0.41082003712654114, "learning_rate": 1.5982448932199494e-05, "loss": 0.5753, "step": 19334 }, { "epoch": 0.5939544742420053, "grad_norm": 0.3484957218170166, "learning_rate": 1.598206166864315e-05, "loss": 0.5398, "step": 19335 }, { "epoch": 0.5939851933769545, "grad_norm": 0.36360836029052734, "learning_rate": 1.5981674391115287e-05, "loss": 0.5504, "step": 19336 }, { "epoch": 0.5940159125119037, "grad_norm": 0.37756314873695374, "learning_rate": 1.5981287099616804e-05, "loss": 0.5703, "step": 19337 }, { "epoch": 0.5940466316468528, "grad_norm": 0.36489471793174744, "learning_rate": 1.5980899794148606e-05, "loss": 0.5756, "step": 19338 }, { "epoch": 0.594077350781802, "grad_norm": 0.358417272567749, "learning_rate": 1.59805124747116e-05, "loss": 0.5622, "step": 19339 }, { "epoch": 0.5941080699167511, "grad_norm": 0.3928965628147125, "learning_rate": 1.598012514130669e-05, "loss": 0.6718, "step": 19340 }, { "epoch": 0.5941387890517003, "grad_norm": 0.36069533228874207, "learning_rate": 1.597973779393478e-05, "loss": 0.5794, "step": 19341 }, { "epoch": 0.5941695081866495, "grad_norm": 0.3934551775455475, "learning_rate": 1.5979350432596773e-05, "loss": 0.55, "step": 19342 }, { "epoch": 0.5942002273215986, "grad_norm": 0.3488169014453888, "learning_rate": 1.597896305729358e-05, "loss": 0.5905, "step": 19343 }, { "epoch": 0.5942309464565478, "grad_norm": 0.36372920870780945, "learning_rate": 1.5978575668026098e-05, "loss": 0.5027, "step": 19344 }, { "epoch": 0.594261665591497, "grad_norm": 0.36539432406425476, "learning_rate": 1.5978188264795235e-05, "loss": 0.5464, "step": 19345 }, { "epoch": 0.5942923847264461, "grad_norm": 0.3769056797027588, "learning_rate": 1.5977800847601894e-05, "loss": 0.6023, "step": 19346 }, { "epoch": 0.5943231038613953, "grad_norm": 0.4286012649536133, "learning_rate": 1.5977413416446984e-05, "loss": 0.5543, "step": 19347 }, { "epoch": 0.5943538229963444, "grad_norm": 0.36897438764572144, "learning_rate": 1.597702597133141e-05, "loss": 0.5369, "step": 19348 }, { "epoch": 0.5943845421312935, "grad_norm": 0.3078899085521698, "learning_rate": 1.597663851225607e-05, "loss": 0.5301, "step": 19349 }, { "epoch": 0.5944152612662428, "grad_norm": 0.32975298166275024, "learning_rate": 1.597625103922187e-05, "loss": 0.534, "step": 19350 }, { "epoch": 0.5944459804011919, "grad_norm": 0.33969250321388245, "learning_rate": 1.5975863552229726e-05, "loss": 0.5707, "step": 19351 }, { "epoch": 0.594476699536141, "grad_norm": 0.35825541615486145, "learning_rate": 1.597547605128053e-05, "loss": 0.568, "step": 19352 }, { "epoch": 0.5945074186710902, "grad_norm": 0.3468765616416931, "learning_rate": 1.5975088536375197e-05, "loss": 0.5732, "step": 19353 }, { "epoch": 0.5945381378060394, "grad_norm": 0.5485103130340576, "learning_rate": 1.5974701007514626e-05, "loss": 0.5629, "step": 19354 }, { "epoch": 0.5945688569409885, "grad_norm": 0.40568608045578003, "learning_rate": 1.5974313464699723e-05, "loss": 0.5472, "step": 19355 }, { "epoch": 0.5945995760759377, "grad_norm": 0.3163728713989258, "learning_rate": 1.5973925907931392e-05, "loss": 0.5451, "step": 19356 }, { "epoch": 0.5946302952108868, "grad_norm": 0.3666117191314697, "learning_rate": 1.597353833721054e-05, "loss": 0.6404, "step": 19357 }, { "epoch": 0.5946610143458361, "grad_norm": 0.34055402874946594, "learning_rate": 1.5973150752538075e-05, "loss": 0.5612, "step": 19358 }, { "epoch": 0.5946917334807852, "grad_norm": 0.37029626965522766, "learning_rate": 1.59727631539149e-05, "loss": 0.5165, "step": 19359 }, { "epoch": 0.5947224526157343, "grad_norm": 0.3291531205177307, "learning_rate": 1.5972375541341916e-05, "loss": 0.5522, "step": 19360 }, { "epoch": 0.5947531717506835, "grad_norm": 0.3694935739040375, "learning_rate": 1.5971987914820032e-05, "loss": 0.5795, "step": 19361 }, { "epoch": 0.5947838908856327, "grad_norm": 0.37608832120895386, "learning_rate": 1.5971600274350155e-05, "loss": 0.5602, "step": 19362 }, { "epoch": 0.5948146100205818, "grad_norm": 0.3728635907173157, "learning_rate": 1.597121261993319e-05, "loss": 0.549, "step": 19363 }, { "epoch": 0.594845329155531, "grad_norm": 0.543639063835144, "learning_rate": 1.5970824951570035e-05, "loss": 0.5512, "step": 19364 }, { "epoch": 0.5948760482904801, "grad_norm": 0.3835147023200989, "learning_rate": 1.5970437269261606e-05, "loss": 0.5459, "step": 19365 }, { "epoch": 0.5949067674254293, "grad_norm": 0.34439709782600403, "learning_rate": 1.5970049573008802e-05, "loss": 0.5376, "step": 19366 }, { "epoch": 0.5949374865603785, "grad_norm": 0.42755138874053955, "learning_rate": 1.5969661862812528e-05, "loss": 0.5314, "step": 19367 }, { "epoch": 0.5949682056953276, "grad_norm": 0.3454504609107971, "learning_rate": 1.5969274138673695e-05, "loss": 0.6028, "step": 19368 }, { "epoch": 0.5949989248302768, "grad_norm": 0.3711286783218384, "learning_rate": 1.5968886400593208e-05, "loss": 0.5205, "step": 19369 }, { "epoch": 0.595029643965226, "grad_norm": 0.3256004750728607, "learning_rate": 1.5968498648571965e-05, "loss": 0.5525, "step": 19370 }, { "epoch": 0.5950603631001751, "grad_norm": 0.35665425658226013, "learning_rate": 1.596811088261088e-05, "loss": 0.5796, "step": 19371 }, { "epoch": 0.5950910822351243, "grad_norm": 0.38774681091308594, "learning_rate": 1.5967723102710855e-05, "loss": 0.6361, "step": 19372 }, { "epoch": 0.5951218013700734, "grad_norm": 0.3702642321586609, "learning_rate": 1.5967335308872795e-05, "loss": 0.5879, "step": 19373 }, { "epoch": 0.5951525205050225, "grad_norm": 0.3696678578853607, "learning_rate": 1.5966947501097603e-05, "loss": 0.5661, "step": 19374 }, { "epoch": 0.5951832396399718, "grad_norm": 0.4024767577648163, "learning_rate": 1.5966559679386193e-05, "loss": 0.5499, "step": 19375 }, { "epoch": 0.5952139587749209, "grad_norm": 0.3787553906440735, "learning_rate": 1.5966171843739467e-05, "loss": 0.644, "step": 19376 }, { "epoch": 0.59524467790987, "grad_norm": 0.39395782351493835, "learning_rate": 1.5965783994158327e-05, "loss": 0.5573, "step": 19377 }, { "epoch": 0.5952753970448192, "grad_norm": 0.43377450108528137, "learning_rate": 1.5965396130643682e-05, "loss": 0.535, "step": 19378 }, { "epoch": 0.5953061161797684, "grad_norm": 0.31680646538734436, "learning_rate": 1.5965008253196436e-05, "loss": 0.5527, "step": 19379 }, { "epoch": 0.5953368353147175, "grad_norm": 0.5360217690467834, "learning_rate": 1.59646203618175e-05, "loss": 0.5209, "step": 19380 }, { "epoch": 0.5953675544496667, "grad_norm": 0.3608631193637848, "learning_rate": 1.5964232456507776e-05, "loss": 0.5827, "step": 19381 }, { "epoch": 0.5953982735846158, "grad_norm": 0.3350711166858673, "learning_rate": 1.596384453726817e-05, "loss": 0.5856, "step": 19382 }, { "epoch": 0.5954289927195651, "grad_norm": 0.3783625066280365, "learning_rate": 1.596345660409959e-05, "loss": 0.5098, "step": 19383 }, { "epoch": 0.5954597118545142, "grad_norm": 0.3237423598766327, "learning_rate": 1.5963068657002937e-05, "loss": 0.5727, "step": 19384 }, { "epoch": 0.5954904309894633, "grad_norm": 0.3689960539340973, "learning_rate": 1.5962680695979123e-05, "loss": 0.4661, "step": 19385 }, { "epoch": 0.5955211501244125, "grad_norm": 0.42060428857803345, "learning_rate": 1.5962292721029054e-05, "loss": 0.6383, "step": 19386 }, { "epoch": 0.5955518692593617, "grad_norm": 0.5782510042190552, "learning_rate": 1.596190473215363e-05, "loss": 0.5626, "step": 19387 }, { "epoch": 0.5955825883943108, "grad_norm": 0.3611844480037689, "learning_rate": 1.5961516729353762e-05, "loss": 0.5326, "step": 19388 }, { "epoch": 0.59561330752926, "grad_norm": 0.36223089694976807, "learning_rate": 1.596112871263036e-05, "loss": 0.5486, "step": 19389 }, { "epoch": 0.5956440266642091, "grad_norm": 0.3270007371902466, "learning_rate": 1.5960740681984323e-05, "loss": 0.5099, "step": 19390 }, { "epoch": 0.5956747457991582, "grad_norm": 0.39997607469558716, "learning_rate": 1.5960352637416554e-05, "loss": 0.5971, "step": 19391 }, { "epoch": 0.5957054649341075, "grad_norm": 0.3451806604862213, "learning_rate": 1.5959964578927973e-05, "loss": 0.4902, "step": 19392 }, { "epoch": 0.5957361840690566, "grad_norm": 0.38417574763298035, "learning_rate": 1.5959576506519475e-05, "loss": 0.573, "step": 19393 }, { "epoch": 0.5957669032040058, "grad_norm": 0.44434601068496704, "learning_rate": 1.595918842019197e-05, "loss": 0.5383, "step": 19394 }, { "epoch": 0.595797622338955, "grad_norm": 0.38195866346359253, "learning_rate": 1.5958800319946364e-05, "loss": 0.6022, "step": 19395 }, { "epoch": 0.5958283414739041, "grad_norm": 0.3542909622192383, "learning_rate": 1.5958412205783564e-05, "loss": 0.4747, "step": 19396 }, { "epoch": 0.5958590606088533, "grad_norm": 0.35853907465934753, "learning_rate": 1.5958024077704476e-05, "loss": 0.5702, "step": 19397 }, { "epoch": 0.5958897797438024, "grad_norm": 0.310473769903183, "learning_rate": 1.5957635935710006e-05, "loss": 0.5284, "step": 19398 }, { "epoch": 0.5959204988787515, "grad_norm": 0.3660898208618164, "learning_rate": 1.5957247779801065e-05, "loss": 0.5399, "step": 19399 }, { "epoch": 0.5959512180137008, "grad_norm": 0.33408379554748535, "learning_rate": 1.595685960997855e-05, "loss": 0.5643, "step": 19400 }, { "epoch": 0.5959819371486499, "grad_norm": 0.3565502166748047, "learning_rate": 1.5956471426243377e-05, "loss": 0.5078, "step": 19401 }, { "epoch": 0.596012656283599, "grad_norm": 0.32802027463912964, "learning_rate": 1.5956083228596447e-05, "loss": 0.5199, "step": 19402 }, { "epoch": 0.5960433754185482, "grad_norm": 0.3303773105144501, "learning_rate": 1.595569501703867e-05, "loss": 0.5182, "step": 19403 }, { "epoch": 0.5960740945534974, "grad_norm": 0.3500034809112549, "learning_rate": 1.5955306791570954e-05, "loss": 0.5887, "step": 19404 }, { "epoch": 0.5961048136884466, "grad_norm": 0.36049652099609375, "learning_rate": 1.59549185521942e-05, "loss": 0.5461, "step": 19405 }, { "epoch": 0.5961355328233957, "grad_norm": 0.3547614514827728, "learning_rate": 1.5954530298909318e-05, "loss": 0.5341, "step": 19406 }, { "epoch": 0.5961662519583448, "grad_norm": 0.35513198375701904, "learning_rate": 1.595414203171721e-05, "loss": 0.5113, "step": 19407 }, { "epoch": 0.5961969710932941, "grad_norm": 0.3563268482685089, "learning_rate": 1.5953753750618796e-05, "loss": 0.5341, "step": 19408 }, { "epoch": 0.5962276902282432, "grad_norm": 0.33120062947273254, "learning_rate": 1.595336545561497e-05, "loss": 0.6422, "step": 19409 }, { "epoch": 0.5962584093631923, "grad_norm": 0.346703439950943, "learning_rate": 1.5952977146706647e-05, "loss": 0.5077, "step": 19410 }, { "epoch": 0.5962891284981415, "grad_norm": 0.4519638419151306, "learning_rate": 1.5952588823894722e-05, "loss": 0.6006, "step": 19411 }, { "epoch": 0.5963198476330906, "grad_norm": 0.3553495407104492, "learning_rate": 1.5952200487180117e-05, "loss": 0.5868, "step": 19412 }, { "epoch": 0.5963505667680398, "grad_norm": 0.3689003884792328, "learning_rate": 1.595181213656373e-05, "loss": 0.582, "step": 19413 }, { "epoch": 0.596381285902989, "grad_norm": 0.39063119888305664, "learning_rate": 1.595142377204647e-05, "loss": 0.6596, "step": 19414 }, { "epoch": 0.5964120050379381, "grad_norm": 0.3508763909339905, "learning_rate": 1.5951035393629244e-05, "loss": 0.6047, "step": 19415 }, { "epoch": 0.5964427241728872, "grad_norm": 0.3537028729915619, "learning_rate": 1.595064700131296e-05, "loss": 0.5211, "step": 19416 }, { "epoch": 0.5964734433078365, "grad_norm": 0.35449978709220886, "learning_rate": 1.5950258595098525e-05, "loss": 0.4647, "step": 19417 }, { "epoch": 0.5965041624427856, "grad_norm": 0.325618714094162, "learning_rate": 1.5949870174986845e-05, "loss": 0.5705, "step": 19418 }, { "epoch": 0.5965348815777348, "grad_norm": 0.40347597002983093, "learning_rate": 1.5949481740978823e-05, "loss": 0.5418, "step": 19419 }, { "epoch": 0.5965656007126839, "grad_norm": 0.5935251116752625, "learning_rate": 1.5949093293075375e-05, "loss": 0.5518, "step": 19420 }, { "epoch": 0.5965963198476331, "grad_norm": 0.4928579032421112, "learning_rate": 1.5948704831277406e-05, "loss": 0.5124, "step": 19421 }, { "epoch": 0.5966270389825823, "grad_norm": 0.33877480030059814, "learning_rate": 1.594831635558582e-05, "loss": 0.5261, "step": 19422 }, { "epoch": 0.5966577581175314, "grad_norm": 0.4171619415283203, "learning_rate": 1.5947927866001522e-05, "loss": 0.5519, "step": 19423 }, { "epoch": 0.5966884772524805, "grad_norm": 0.373649001121521, "learning_rate": 1.594753936252543e-05, "loss": 0.5172, "step": 19424 }, { "epoch": 0.5967191963874298, "grad_norm": 0.41560202836990356, "learning_rate": 1.5947150845158438e-05, "loss": 0.5843, "step": 19425 }, { "epoch": 0.5967499155223789, "grad_norm": 0.34785234928131104, "learning_rate": 1.5946762313901463e-05, "loss": 0.5806, "step": 19426 }, { "epoch": 0.596780634657328, "grad_norm": 0.3622877299785614, "learning_rate": 1.5946373768755407e-05, "loss": 0.5061, "step": 19427 }, { "epoch": 0.5968113537922772, "grad_norm": 0.3564468026161194, "learning_rate": 1.5945985209721185e-05, "loss": 0.5058, "step": 19428 }, { "epoch": 0.5968420729272264, "grad_norm": 0.34875187277793884, "learning_rate": 1.5945596636799693e-05, "loss": 0.5142, "step": 19429 }, { "epoch": 0.5968727920621756, "grad_norm": 0.35888105630874634, "learning_rate": 1.594520804999185e-05, "loss": 0.5794, "step": 19430 }, { "epoch": 0.5969035111971247, "grad_norm": 0.3730042576789856, "learning_rate": 1.5944819449298553e-05, "loss": 0.5403, "step": 19431 }, { "epoch": 0.5969342303320738, "grad_norm": 0.4047829210758209, "learning_rate": 1.5944430834720722e-05, "loss": 0.565, "step": 19432 }, { "epoch": 0.596964949467023, "grad_norm": 0.3604030907154083, "learning_rate": 1.594404220625925e-05, "loss": 0.55, "step": 19433 }, { "epoch": 0.5969956686019722, "grad_norm": 0.44642189145088196, "learning_rate": 1.5943653563915057e-05, "loss": 0.5519, "step": 19434 }, { "epoch": 0.5970263877369213, "grad_norm": 0.35048708319664, "learning_rate": 1.5943264907689046e-05, "loss": 0.615, "step": 19435 }, { "epoch": 0.5970571068718705, "grad_norm": 0.3370307385921478, "learning_rate": 1.5942876237582124e-05, "loss": 0.5101, "step": 19436 }, { "epoch": 0.5970878260068196, "grad_norm": 0.37070542573928833, "learning_rate": 1.5942487553595198e-05, "loss": 0.5828, "step": 19437 }, { "epoch": 0.5971185451417688, "grad_norm": 0.3838955760002136, "learning_rate": 1.594209885572918e-05, "loss": 0.5202, "step": 19438 }, { "epoch": 0.597149264276718, "grad_norm": 0.38030537962913513, "learning_rate": 1.5941710143984973e-05, "loss": 0.5977, "step": 19439 }, { "epoch": 0.5971799834116671, "grad_norm": 0.37394019961357117, "learning_rate": 1.5941321418363488e-05, "loss": 0.6158, "step": 19440 }, { "epoch": 0.5972107025466162, "grad_norm": 0.3157587945461273, "learning_rate": 1.5940932678865627e-05, "loss": 0.494, "step": 19441 }, { "epoch": 0.5972414216815655, "grad_norm": 0.3622080981731415, "learning_rate": 1.594054392549231e-05, "loss": 0.4821, "step": 19442 }, { "epoch": 0.5972721408165146, "grad_norm": 0.41837725043296814, "learning_rate": 1.5940155158244434e-05, "loss": 0.6397, "step": 19443 }, { "epoch": 0.5973028599514638, "grad_norm": 0.34028303623199463, "learning_rate": 1.5939766377122914e-05, "loss": 0.5973, "step": 19444 }, { "epoch": 0.5973335790864129, "grad_norm": 0.34651872515678406, "learning_rate": 1.5939377582128654e-05, "loss": 0.4918, "step": 19445 }, { "epoch": 0.5973642982213621, "grad_norm": 0.3477226793766022, "learning_rate": 1.593898877326256e-05, "loss": 0.5568, "step": 19446 }, { "epoch": 0.5973950173563113, "grad_norm": 0.3637239634990692, "learning_rate": 1.5938599950525544e-05, "loss": 0.586, "step": 19447 }, { "epoch": 0.5974257364912604, "grad_norm": 0.4042789936065674, "learning_rate": 1.5938211113918514e-05, "loss": 0.5529, "step": 19448 }, { "epoch": 0.5974564556262095, "grad_norm": 0.3374525010585785, "learning_rate": 1.5937822263442377e-05, "loss": 0.5102, "step": 19449 }, { "epoch": 0.5974871747611588, "grad_norm": 0.32946062088012695, "learning_rate": 1.593743339909804e-05, "loss": 0.5733, "step": 19450 }, { "epoch": 0.5975178938961079, "grad_norm": 0.3583781123161316, "learning_rate": 1.5937044520886415e-05, "loss": 0.603, "step": 19451 }, { "epoch": 0.597548613031057, "grad_norm": 0.3819290101528168, "learning_rate": 1.5936655628808406e-05, "loss": 0.5937, "step": 19452 }, { "epoch": 0.5975793321660062, "grad_norm": 0.3218579888343811, "learning_rate": 1.5936266722864923e-05, "loss": 0.5729, "step": 19453 }, { "epoch": 0.5976100513009553, "grad_norm": 0.3782792389392853, "learning_rate": 1.5935877803056875e-05, "loss": 0.6092, "step": 19454 }, { "epoch": 0.5976407704359046, "grad_norm": 2.5513217449188232, "learning_rate": 1.593548886938517e-05, "loss": 0.5821, "step": 19455 }, { "epoch": 0.5976714895708537, "grad_norm": 0.3869505822658539, "learning_rate": 1.5935099921850717e-05, "loss": 0.5201, "step": 19456 }, { "epoch": 0.5977022087058028, "grad_norm": 0.35041344165802, "learning_rate": 1.5934710960454426e-05, "loss": 0.5748, "step": 19457 }, { "epoch": 0.597732927840752, "grad_norm": 0.33245256543159485, "learning_rate": 1.59343219851972e-05, "loss": 0.5163, "step": 19458 }, { "epoch": 0.5977636469757012, "grad_norm": 0.3445160388946533, "learning_rate": 1.593393299607995e-05, "loss": 0.5483, "step": 19459 }, { "epoch": 0.5977943661106503, "grad_norm": 0.3344051241874695, "learning_rate": 1.5933543993103583e-05, "loss": 0.4976, "step": 19460 }, { "epoch": 0.5978250852455995, "grad_norm": 0.37686511874198914, "learning_rate": 1.5933154976269015e-05, "loss": 0.5833, "step": 19461 }, { "epoch": 0.5978558043805486, "grad_norm": 0.34298238158226013, "learning_rate": 1.5932765945577145e-05, "loss": 0.5575, "step": 19462 }, { "epoch": 0.5978865235154978, "grad_norm": 0.32611843943595886, "learning_rate": 1.5932376901028886e-05, "loss": 0.5231, "step": 19463 }, { "epoch": 0.597917242650447, "grad_norm": 0.40259721875190735, "learning_rate": 1.593198784262515e-05, "loss": 0.5281, "step": 19464 }, { "epoch": 0.5979479617853961, "grad_norm": 0.3927069306373596, "learning_rate": 1.593159877036684e-05, "loss": 0.5943, "step": 19465 }, { "epoch": 0.5979786809203452, "grad_norm": 0.4234201908111572, "learning_rate": 1.5931209684254867e-05, "loss": 0.5324, "step": 19466 }, { "epoch": 0.5980094000552945, "grad_norm": 0.3608892858028412, "learning_rate": 1.593082058429014e-05, "loss": 0.4547, "step": 19467 }, { "epoch": 0.5980401191902436, "grad_norm": 0.3717694878578186, "learning_rate": 1.5930431470473565e-05, "loss": 0.5587, "step": 19468 }, { "epoch": 0.5980708383251928, "grad_norm": 0.31921806931495667, "learning_rate": 1.5930042342806055e-05, "loss": 0.5303, "step": 19469 }, { "epoch": 0.5981015574601419, "grad_norm": 0.37309640645980835, "learning_rate": 1.592965320128852e-05, "loss": 0.5934, "step": 19470 }, { "epoch": 0.598132276595091, "grad_norm": 0.38161543011665344, "learning_rate": 1.592926404592186e-05, "loss": 0.5019, "step": 19471 }, { "epoch": 0.5981629957300403, "grad_norm": 0.3574732840061188, "learning_rate": 1.5928874876706993e-05, "loss": 0.5527, "step": 19472 }, { "epoch": 0.5981937148649894, "grad_norm": 0.3310537338256836, "learning_rate": 1.5928485693644826e-05, "loss": 0.4965, "step": 19473 }, { "epoch": 0.5982244339999385, "grad_norm": 0.3321588635444641, "learning_rate": 1.592809649673626e-05, "loss": 0.4906, "step": 19474 }, { "epoch": 0.5982551531348878, "grad_norm": 0.34403035044670105, "learning_rate": 1.5927707285982215e-05, "loss": 0.4533, "step": 19475 }, { "epoch": 0.5982858722698369, "grad_norm": 0.37050917744636536, "learning_rate": 1.5927318061383595e-05, "loss": 0.5959, "step": 19476 }, { "epoch": 0.598316591404786, "grad_norm": 0.36326029896736145, "learning_rate": 1.5926928822941312e-05, "loss": 0.5337, "step": 19477 }, { "epoch": 0.5983473105397352, "grad_norm": 0.38365069031715393, "learning_rate": 1.5926539570656272e-05, "loss": 0.6059, "step": 19478 }, { "epoch": 0.5983780296746843, "grad_norm": 0.3418891727924347, "learning_rate": 1.5926150304529385e-05, "loss": 0.569, "step": 19479 }, { "epoch": 0.5984087488096336, "grad_norm": 0.333897203207016, "learning_rate": 1.592576102456156e-05, "loss": 0.5407, "step": 19480 }, { "epoch": 0.5984394679445827, "grad_norm": 0.3380824029445648, "learning_rate": 1.5925371730753703e-05, "loss": 0.4528, "step": 19481 }, { "epoch": 0.5984701870795318, "grad_norm": 0.427156537771225, "learning_rate": 1.5924982423106728e-05, "loss": 0.6738, "step": 19482 }, { "epoch": 0.598500906214481, "grad_norm": 0.3595770299434662, "learning_rate": 1.5924593101621545e-05, "loss": 0.5176, "step": 19483 }, { "epoch": 0.5985316253494302, "grad_norm": 0.40811100602149963, "learning_rate": 1.592420376629906e-05, "loss": 0.604, "step": 19484 }, { "epoch": 0.5985623444843793, "grad_norm": 0.3422069847583771, "learning_rate": 1.5923814417140186e-05, "loss": 0.5382, "step": 19485 }, { "epoch": 0.5985930636193285, "grad_norm": 0.41692590713500977, "learning_rate": 1.592342505414582e-05, "loss": 0.5839, "step": 19486 }, { "epoch": 0.5986237827542776, "grad_norm": 0.3491307497024536, "learning_rate": 1.592303567731689e-05, "loss": 0.546, "step": 19487 }, { "epoch": 0.5986545018892268, "grad_norm": 0.9688826203346252, "learning_rate": 1.5922646286654298e-05, "loss": 0.6247, "step": 19488 }, { "epoch": 0.598685221024176, "grad_norm": 0.3704121708869934, "learning_rate": 1.5922256882158948e-05, "loss": 0.5347, "step": 19489 }, { "epoch": 0.5987159401591251, "grad_norm": 0.32912978529930115, "learning_rate": 1.5921867463831753e-05, "loss": 0.5429, "step": 19490 }, { "epoch": 0.5987466592940742, "grad_norm": 0.32064101099967957, "learning_rate": 1.592147803167362e-05, "loss": 0.6633, "step": 19491 }, { "epoch": 0.5987773784290235, "grad_norm": 0.3497718572616577, "learning_rate": 1.5921088585685468e-05, "loss": 0.5762, "step": 19492 }, { "epoch": 0.5988080975639726, "grad_norm": 0.33952605724334717, "learning_rate": 1.5920699125868197e-05, "loss": 0.5167, "step": 19493 }, { "epoch": 0.5988388166989218, "grad_norm": 0.3658501207828522, "learning_rate": 1.5920309652222724e-05, "loss": 0.6027, "step": 19494 }, { "epoch": 0.5988695358338709, "grad_norm": 0.4256591200828552, "learning_rate": 1.5919920164749946e-05, "loss": 0.5599, "step": 19495 }, { "epoch": 0.59890025496882, "grad_norm": 0.4202582538127899, "learning_rate": 1.5919530663450785e-05, "loss": 0.5792, "step": 19496 }, { "epoch": 0.5989309741037693, "grad_norm": 0.3854195177555084, "learning_rate": 1.5919141148326152e-05, "loss": 0.5196, "step": 19497 }, { "epoch": 0.5989616932387184, "grad_norm": 0.3967524766921997, "learning_rate": 1.5918751619376946e-05, "loss": 0.5563, "step": 19498 }, { "epoch": 0.5989924123736675, "grad_norm": 0.41796550154685974, "learning_rate": 1.591836207660408e-05, "loss": 0.5763, "step": 19499 }, { "epoch": 0.5990231315086167, "grad_norm": 0.3295767605304718, "learning_rate": 1.591797252000847e-05, "loss": 0.5165, "step": 19500 }, { "epoch": 0.5990538506435659, "grad_norm": 0.3308233320713043, "learning_rate": 1.591758294959102e-05, "loss": 0.5383, "step": 19501 }, { "epoch": 0.599084569778515, "grad_norm": 0.33869725465774536, "learning_rate": 1.5917193365352647e-05, "loss": 0.6054, "step": 19502 }, { "epoch": 0.5991152889134642, "grad_norm": 0.44373536109924316, "learning_rate": 1.591680376729425e-05, "loss": 0.54, "step": 19503 }, { "epoch": 0.5991460080484133, "grad_norm": 0.36836227774620056, "learning_rate": 1.5916414155416743e-05, "loss": 0.5924, "step": 19504 }, { "epoch": 0.5991767271833626, "grad_norm": 0.3327288031578064, "learning_rate": 1.5916024529721043e-05, "loss": 0.5444, "step": 19505 }, { "epoch": 0.5992074463183117, "grad_norm": 0.32884112000465393, "learning_rate": 1.5915634890208053e-05, "loss": 0.5171, "step": 19506 }, { "epoch": 0.5992381654532608, "grad_norm": 0.5821641683578491, "learning_rate": 1.591524523687868e-05, "loss": 0.5357, "step": 19507 }, { "epoch": 0.59926888458821, "grad_norm": 0.37584060430526733, "learning_rate": 1.5914855569733845e-05, "loss": 0.6236, "step": 19508 }, { "epoch": 0.5992996037231592, "grad_norm": 0.4199647009372711, "learning_rate": 1.5914465888774445e-05, "loss": 0.5443, "step": 19509 }, { "epoch": 0.5993303228581083, "grad_norm": 0.41374093294143677, "learning_rate": 1.59140761940014e-05, "loss": 0.5446, "step": 19510 }, { "epoch": 0.5993610419930575, "grad_norm": 0.376899391412735, "learning_rate": 1.591368648541562e-05, "loss": 0.5147, "step": 19511 }, { "epoch": 0.5993917611280066, "grad_norm": 0.36680424213409424, "learning_rate": 1.591329676301801e-05, "loss": 0.475, "step": 19512 }, { "epoch": 0.5994224802629557, "grad_norm": 0.48222753405570984, "learning_rate": 1.591290702680948e-05, "loss": 0.5485, "step": 19513 }, { "epoch": 0.599453199397905, "grad_norm": 0.3725198209285736, "learning_rate": 1.5912517276790947e-05, "loss": 0.5536, "step": 19514 }, { "epoch": 0.5994839185328541, "grad_norm": 0.3388107419013977, "learning_rate": 1.5912127512963316e-05, "loss": 0.5519, "step": 19515 }, { "epoch": 0.5995146376678033, "grad_norm": 0.35696518421173096, "learning_rate": 1.5911737735327496e-05, "loss": 0.6336, "step": 19516 }, { "epoch": 0.5995453568027524, "grad_norm": 0.351314902305603, "learning_rate": 1.5911347943884402e-05, "loss": 0.5607, "step": 19517 }, { "epoch": 0.5995760759377016, "grad_norm": 0.37799492478370667, "learning_rate": 1.5910958138634942e-05, "loss": 0.527, "step": 19518 }, { "epoch": 0.5996067950726508, "grad_norm": 0.34950292110443115, "learning_rate": 1.5910568319580024e-05, "loss": 0.5218, "step": 19519 }, { "epoch": 0.5996375142075999, "grad_norm": 0.3978697955608368, "learning_rate": 1.5910178486720563e-05, "loss": 0.6401, "step": 19520 }, { "epoch": 0.599668233342549, "grad_norm": 0.3663484454154968, "learning_rate": 1.5909788640057467e-05, "loss": 0.5437, "step": 19521 }, { "epoch": 0.5996989524774983, "grad_norm": 0.36812081933021545, "learning_rate": 1.5909398779591647e-05, "loss": 0.5682, "step": 19522 }, { "epoch": 0.5997296716124474, "grad_norm": 0.3493898808956146, "learning_rate": 1.590900890532401e-05, "loss": 0.5566, "step": 19523 }, { "epoch": 0.5997603907473965, "grad_norm": 0.36044541001319885, "learning_rate": 1.5908619017255475e-05, "loss": 0.6347, "step": 19524 }, { "epoch": 0.5997911098823457, "grad_norm": 0.34203091263771057, "learning_rate": 1.5908229115386945e-05, "loss": 0.4968, "step": 19525 }, { "epoch": 0.5998218290172949, "grad_norm": 0.3907161056995392, "learning_rate": 1.5907839199719334e-05, "loss": 0.5701, "step": 19526 }, { "epoch": 0.599852548152244, "grad_norm": 0.3466881215572357, "learning_rate": 1.5907449270253553e-05, "loss": 0.5321, "step": 19527 }, { "epoch": 0.5998832672871932, "grad_norm": 0.33413711190223694, "learning_rate": 1.590705932699051e-05, "loss": 0.4807, "step": 19528 }, { "epoch": 0.5999139864221423, "grad_norm": 0.3419716954231262, "learning_rate": 1.590666936993112e-05, "loss": 0.6148, "step": 19529 }, { "epoch": 0.5999447055570916, "grad_norm": 0.3156024217605591, "learning_rate": 1.5906279399076286e-05, "loss": 0.5531, "step": 19530 }, { "epoch": 0.5999754246920407, "grad_norm": 0.3905488848686218, "learning_rate": 1.590588941442693e-05, "loss": 0.6032, "step": 19531 }, { "epoch": 0.6000061438269898, "grad_norm": 0.37016335129737854, "learning_rate": 1.590549941598395e-05, "loss": 0.5946, "step": 19532 }, { "epoch": 0.600036862961939, "grad_norm": 0.3995557725429535, "learning_rate": 1.5905109403748272e-05, "loss": 0.6376, "step": 19533 }, { "epoch": 0.6000675820968882, "grad_norm": 0.34126079082489014, "learning_rate": 1.5904719377720793e-05, "loss": 0.5066, "step": 19534 }, { "epoch": 0.6000983012318373, "grad_norm": 0.3695078194141388, "learning_rate": 1.5904329337902428e-05, "loss": 0.5835, "step": 19535 }, { "epoch": 0.6001290203667865, "grad_norm": 0.35126134753227234, "learning_rate": 1.590393928429409e-05, "loss": 0.5955, "step": 19536 }, { "epoch": 0.6001597395017356, "grad_norm": 0.4138120114803314, "learning_rate": 1.5903549216896696e-05, "loss": 0.5145, "step": 19537 }, { "epoch": 0.6001904586366847, "grad_norm": 0.37094202637672424, "learning_rate": 1.5903159135711146e-05, "loss": 0.531, "step": 19538 }, { "epoch": 0.600221177771634, "grad_norm": 0.32772138714790344, "learning_rate": 1.5902769040738356e-05, "loss": 0.5379, "step": 19539 }, { "epoch": 0.6002518969065831, "grad_norm": 0.36894676089286804, "learning_rate": 1.5902378931979237e-05, "loss": 0.5902, "step": 19540 }, { "epoch": 0.6002826160415323, "grad_norm": 0.3961254060268402, "learning_rate": 1.5901988809434698e-05, "loss": 0.6143, "step": 19541 }, { "epoch": 0.6003133351764814, "grad_norm": 0.36286818981170654, "learning_rate": 1.5901598673105653e-05, "loss": 0.5715, "step": 19542 }, { "epoch": 0.6003440543114306, "grad_norm": 0.40156176686286926, "learning_rate": 1.5901208522993014e-05, "loss": 0.5668, "step": 19543 }, { "epoch": 0.6003747734463798, "grad_norm": 0.3763079047203064, "learning_rate": 1.5900818359097688e-05, "loss": 0.5753, "step": 19544 }, { "epoch": 0.6004054925813289, "grad_norm": 0.41827788949012756, "learning_rate": 1.5900428181420587e-05, "loss": 0.5596, "step": 19545 }, { "epoch": 0.600436211716278, "grad_norm": 0.3603114187717438, "learning_rate": 1.590003798996263e-05, "loss": 0.6693, "step": 19546 }, { "epoch": 0.6004669308512273, "grad_norm": 0.36478447914123535, "learning_rate": 1.5899647784724717e-05, "loss": 0.5656, "step": 19547 }, { "epoch": 0.6004976499861764, "grad_norm": 0.36018648743629456, "learning_rate": 1.5899257565707763e-05, "loss": 0.5831, "step": 19548 }, { "epoch": 0.6005283691211255, "grad_norm": 0.34253913164138794, "learning_rate": 1.5898867332912687e-05, "loss": 0.5968, "step": 19549 }, { "epoch": 0.6005590882560747, "grad_norm": 0.37522199749946594, "learning_rate": 1.589847708634039e-05, "loss": 0.5932, "step": 19550 }, { "epoch": 0.6005898073910239, "grad_norm": 0.36543136835098267, "learning_rate": 1.5898086825991786e-05, "loss": 0.6357, "step": 19551 }, { "epoch": 0.600620526525973, "grad_norm": 0.35073959827423096, "learning_rate": 1.5897696551867794e-05, "loss": 0.5716, "step": 19552 }, { "epoch": 0.6006512456609222, "grad_norm": 0.42632871866226196, "learning_rate": 1.5897306263969318e-05, "loss": 0.5469, "step": 19553 }, { "epoch": 0.6006819647958713, "grad_norm": 0.363543838262558, "learning_rate": 1.589691596229727e-05, "loss": 0.5557, "step": 19554 }, { "epoch": 0.6007126839308206, "grad_norm": 0.3618170917034149, "learning_rate": 1.5896525646852564e-05, "loss": 0.5822, "step": 19555 }, { "epoch": 0.6007434030657697, "grad_norm": 0.348664790391922, "learning_rate": 1.589613531763611e-05, "loss": 0.5973, "step": 19556 }, { "epoch": 0.6007741222007188, "grad_norm": 0.34390124678611755, "learning_rate": 1.589574497464882e-05, "loss": 0.4447, "step": 19557 }, { "epoch": 0.600804841335668, "grad_norm": 0.39013803005218506, "learning_rate": 1.5895354617891607e-05, "loss": 0.5581, "step": 19558 }, { "epoch": 0.6008355604706171, "grad_norm": 0.3361425995826721, "learning_rate": 1.5894964247365382e-05, "loss": 0.5006, "step": 19559 }, { "epoch": 0.6008662796055663, "grad_norm": 0.3458382487297058, "learning_rate": 1.5894573863071053e-05, "loss": 0.4997, "step": 19560 }, { "epoch": 0.6008969987405155, "grad_norm": 0.3913351893424988, "learning_rate": 1.5894183465009536e-05, "loss": 0.6045, "step": 19561 }, { "epoch": 0.6009277178754646, "grad_norm": 0.3635474145412445, "learning_rate": 1.5893793053181744e-05, "loss": 0.6039, "step": 19562 }, { "epoch": 0.6009584370104137, "grad_norm": 0.374327152967453, "learning_rate": 1.5893402627588586e-05, "loss": 0.5744, "step": 19563 }, { "epoch": 0.600989156145363, "grad_norm": 0.33798420429229736, "learning_rate": 1.5893012188230973e-05, "loss": 0.5633, "step": 19564 }, { "epoch": 0.6010198752803121, "grad_norm": 0.3624313771724701, "learning_rate": 1.589262173510982e-05, "loss": 0.5634, "step": 19565 }, { "epoch": 0.6010505944152613, "grad_norm": 0.6231707334518433, "learning_rate": 1.589223126822604e-05, "loss": 0.5174, "step": 19566 }, { "epoch": 0.6010813135502104, "grad_norm": 0.3626222610473633, "learning_rate": 1.5891840787580533e-05, "loss": 0.5515, "step": 19567 }, { "epoch": 0.6011120326851596, "grad_norm": 0.3414170742034912, "learning_rate": 1.589145029317423e-05, "loss": 0.5605, "step": 19568 }, { "epoch": 0.6011427518201088, "grad_norm": 0.3681817054748535, "learning_rate": 1.589105978500803e-05, "loss": 0.5338, "step": 19569 }, { "epoch": 0.6011734709550579, "grad_norm": 0.3049899935722351, "learning_rate": 1.589066926308285e-05, "loss": 0.5539, "step": 19570 }, { "epoch": 0.601204190090007, "grad_norm": 0.43711522221565247, "learning_rate": 1.58902787273996e-05, "loss": 0.6416, "step": 19571 }, { "epoch": 0.6012349092249563, "grad_norm": 0.33012062311172485, "learning_rate": 1.588988817795919e-05, "loss": 0.5598, "step": 19572 }, { "epoch": 0.6012656283599054, "grad_norm": 0.47111645340919495, "learning_rate": 1.5889497614762536e-05, "loss": 0.5347, "step": 19573 }, { "epoch": 0.6012963474948545, "grad_norm": 0.34404662251472473, "learning_rate": 1.588910703781055e-05, "loss": 0.573, "step": 19574 }, { "epoch": 0.6013270666298037, "grad_norm": 0.3304150104522705, "learning_rate": 1.5888716447104142e-05, "loss": 0.5799, "step": 19575 }, { "epoch": 0.6013577857647529, "grad_norm": 0.3609777092933655, "learning_rate": 1.588832584264423e-05, "loss": 0.5968, "step": 19576 }, { "epoch": 0.601388504899702, "grad_norm": 0.3994951546192169, "learning_rate": 1.5887935224431713e-05, "loss": 0.6007, "step": 19577 }, { "epoch": 0.6014192240346512, "grad_norm": 0.30830270051956177, "learning_rate": 1.588754459246752e-05, "loss": 0.5572, "step": 19578 }, { "epoch": 0.6014499431696003, "grad_norm": 0.4771658778190613, "learning_rate": 1.588715394675255e-05, "loss": 0.5028, "step": 19579 }, { "epoch": 0.6014806623045496, "grad_norm": 0.3539998531341553, "learning_rate": 1.5886763287287725e-05, "loss": 0.5999, "step": 19580 }, { "epoch": 0.6015113814394987, "grad_norm": 0.3429117202758789, "learning_rate": 1.588637261407395e-05, "loss": 0.493, "step": 19581 }, { "epoch": 0.6015421005744478, "grad_norm": 0.3613777458667755, "learning_rate": 1.5885981927112145e-05, "loss": 0.6046, "step": 19582 }, { "epoch": 0.601572819709397, "grad_norm": 0.38133755326271057, "learning_rate": 1.588559122640321e-05, "loss": 0.6053, "step": 19583 }, { "epoch": 0.6016035388443461, "grad_norm": 0.3817594349384308, "learning_rate": 1.5885200511948077e-05, "loss": 0.4458, "step": 19584 }, { "epoch": 0.6016342579792953, "grad_norm": 0.3266666531562805, "learning_rate": 1.5884809783747638e-05, "loss": 0.5418, "step": 19585 }, { "epoch": 0.6016649771142445, "grad_norm": 0.32851642370224, "learning_rate": 1.588441904180282e-05, "loss": 0.508, "step": 19586 }, { "epoch": 0.6016956962491936, "grad_norm": 0.3258872926235199, "learning_rate": 1.588402828611453e-05, "loss": 0.5443, "step": 19587 }, { "epoch": 0.6017264153841427, "grad_norm": 0.35090503096580505, "learning_rate": 1.5883637516683677e-05, "loss": 0.4993, "step": 19588 }, { "epoch": 0.601757134519092, "grad_norm": 0.3413984477519989, "learning_rate": 1.588324673351118e-05, "loss": 0.5158, "step": 19589 }, { "epoch": 0.6017878536540411, "grad_norm": 0.3647646903991699, "learning_rate": 1.5882855936597947e-05, "loss": 0.607, "step": 19590 }, { "epoch": 0.6018185727889903, "grad_norm": 0.3940635621547699, "learning_rate": 1.5882465125944895e-05, "loss": 0.6178, "step": 19591 }, { "epoch": 0.6018492919239394, "grad_norm": 0.3580094873905182, "learning_rate": 1.588207430155294e-05, "loss": 0.5693, "step": 19592 }, { "epoch": 0.6018800110588886, "grad_norm": 0.3632410168647766, "learning_rate": 1.5881683463422982e-05, "loss": 0.5473, "step": 19593 }, { "epoch": 0.6019107301938378, "grad_norm": 0.33120229840278625, "learning_rate": 1.5881292611555946e-05, "loss": 0.584, "step": 19594 }, { "epoch": 0.6019414493287869, "grad_norm": 0.3559219241142273, "learning_rate": 1.5880901745952736e-05, "loss": 0.6186, "step": 19595 }, { "epoch": 0.601972168463736, "grad_norm": 0.4880407750606537, "learning_rate": 1.5880510866614278e-05, "loss": 0.4769, "step": 19596 }, { "epoch": 0.6020028875986853, "grad_norm": 0.3619921803474426, "learning_rate": 1.5880119973541468e-05, "loss": 0.5059, "step": 19597 }, { "epoch": 0.6020336067336344, "grad_norm": 0.3507855236530304, "learning_rate": 1.5879729066735228e-05, "loss": 0.5427, "step": 19598 }, { "epoch": 0.6020643258685835, "grad_norm": 0.3435400128364563, "learning_rate": 1.5879338146196473e-05, "loss": 0.5941, "step": 19599 }, { "epoch": 0.6020950450035327, "grad_norm": 0.40415143966674805, "learning_rate": 1.587894721192611e-05, "loss": 0.6236, "step": 19600 }, { "epoch": 0.6021257641384818, "grad_norm": 0.3205101191997528, "learning_rate": 1.587855626392506e-05, "loss": 0.5234, "step": 19601 }, { "epoch": 0.602156483273431, "grad_norm": 0.3605336844921112, "learning_rate": 1.5878165302194224e-05, "loss": 0.5874, "step": 19602 }, { "epoch": 0.6021872024083802, "grad_norm": 0.36998528242111206, "learning_rate": 1.5877774326734528e-05, "loss": 0.5658, "step": 19603 }, { "epoch": 0.6022179215433293, "grad_norm": 0.4074169099330902, "learning_rate": 1.587738333754688e-05, "loss": 0.5593, "step": 19604 }, { "epoch": 0.6022486406782785, "grad_norm": 0.35767021775245667, "learning_rate": 1.587699233463219e-05, "loss": 0.525, "step": 19605 }, { "epoch": 0.6022793598132277, "grad_norm": 0.3320155739784241, "learning_rate": 1.5876601317991373e-05, "loss": 0.5022, "step": 19606 }, { "epoch": 0.6023100789481768, "grad_norm": 0.33550047874450684, "learning_rate": 1.5876210287625348e-05, "loss": 0.5386, "step": 19607 }, { "epoch": 0.602340798083126, "grad_norm": 0.34095141291618347, "learning_rate": 1.5875819243535017e-05, "loss": 0.4935, "step": 19608 }, { "epoch": 0.6023715172180751, "grad_norm": 0.3117285668849945, "learning_rate": 1.5875428185721305e-05, "loss": 0.503, "step": 19609 }, { "epoch": 0.6024022363530243, "grad_norm": 0.3561420440673828, "learning_rate": 1.5875037114185118e-05, "loss": 0.5481, "step": 19610 }, { "epoch": 0.6024329554879735, "grad_norm": 0.3460310697555542, "learning_rate": 1.5874646028927372e-05, "loss": 0.5116, "step": 19611 }, { "epoch": 0.6024636746229226, "grad_norm": 0.34081321954727173, "learning_rate": 1.587425492994898e-05, "loss": 0.5275, "step": 19612 }, { "epoch": 0.6024943937578717, "grad_norm": 0.3533998727798462, "learning_rate": 1.5873863817250855e-05, "loss": 0.5028, "step": 19613 }, { "epoch": 0.602525112892821, "grad_norm": 0.3379310369491577, "learning_rate": 1.5873472690833912e-05, "loss": 0.5448, "step": 19614 }, { "epoch": 0.6025558320277701, "grad_norm": 0.3325033485889435, "learning_rate": 1.5873081550699064e-05, "loss": 0.5603, "step": 19615 }, { "epoch": 0.6025865511627193, "grad_norm": 0.4163253903388977, "learning_rate": 1.5872690396847222e-05, "loss": 0.5615, "step": 19616 }, { "epoch": 0.6026172702976684, "grad_norm": 0.37373730540275574, "learning_rate": 1.58722992292793e-05, "loss": 0.5699, "step": 19617 }, { "epoch": 0.6026479894326175, "grad_norm": 0.3341946005821228, "learning_rate": 1.5871908047996215e-05, "loss": 0.5342, "step": 19618 }, { "epoch": 0.6026787085675668, "grad_norm": 0.352104514837265, "learning_rate": 1.587151685299888e-05, "loss": 0.5699, "step": 19619 }, { "epoch": 0.6027094277025159, "grad_norm": 0.3575037717819214, "learning_rate": 1.5871125644288204e-05, "loss": 0.5101, "step": 19620 }, { "epoch": 0.602740146837465, "grad_norm": 0.34641531109809875, "learning_rate": 1.5870734421865105e-05, "loss": 0.5001, "step": 19621 }, { "epoch": 0.6027708659724142, "grad_norm": 0.35123884677886963, "learning_rate": 1.5870343185730497e-05, "loss": 0.5285, "step": 19622 }, { "epoch": 0.6028015851073634, "grad_norm": 0.32900580763816833, "learning_rate": 1.5869951935885292e-05, "loss": 0.5685, "step": 19623 }, { "epoch": 0.6028323042423125, "grad_norm": 0.4103802740573883, "learning_rate": 1.5869560672330405e-05, "loss": 0.6335, "step": 19624 }, { "epoch": 0.6028630233772617, "grad_norm": 0.3774985373020172, "learning_rate": 1.5869169395066747e-05, "loss": 0.6651, "step": 19625 }, { "epoch": 0.6028937425122108, "grad_norm": 0.35895925760269165, "learning_rate": 1.5868778104095236e-05, "loss": 0.6081, "step": 19626 }, { "epoch": 0.6029244616471601, "grad_norm": 0.412117063999176, "learning_rate": 1.5868386799416784e-05, "loss": 0.5418, "step": 19627 }, { "epoch": 0.6029551807821092, "grad_norm": 0.37072139978408813, "learning_rate": 1.5867995481032305e-05, "loss": 0.5829, "step": 19628 }, { "epoch": 0.6029858999170583, "grad_norm": 0.35745006799697876, "learning_rate": 1.5867604148942712e-05, "loss": 0.5327, "step": 19629 }, { "epoch": 0.6030166190520075, "grad_norm": 0.38440951704978943, "learning_rate": 1.5867212803148918e-05, "loss": 0.5424, "step": 19630 }, { "epoch": 0.6030473381869567, "grad_norm": 0.3669082224369049, "learning_rate": 1.5866821443651842e-05, "loss": 0.5644, "step": 19631 }, { "epoch": 0.6030780573219058, "grad_norm": 0.3516797423362732, "learning_rate": 1.5866430070452392e-05, "loss": 0.5373, "step": 19632 }, { "epoch": 0.603108776456855, "grad_norm": 0.33965370059013367, "learning_rate": 1.5866038683551486e-05, "loss": 0.672, "step": 19633 }, { "epoch": 0.6031394955918041, "grad_norm": 0.36765801906585693, "learning_rate": 1.5865647282950036e-05, "loss": 0.4798, "step": 19634 }, { "epoch": 0.6031702147267533, "grad_norm": 0.350129097700119, "learning_rate": 1.586525586864896e-05, "loss": 0.57, "step": 19635 }, { "epoch": 0.6032009338617025, "grad_norm": 0.40640467405319214, "learning_rate": 1.5864864440649168e-05, "loss": 0.5708, "step": 19636 }, { "epoch": 0.6032316529966516, "grad_norm": 0.7209264039993286, "learning_rate": 1.5864472998951573e-05, "loss": 0.6466, "step": 19637 }, { "epoch": 0.6032623721316007, "grad_norm": 0.4318600594997406, "learning_rate": 1.5864081543557093e-05, "loss": 0.6151, "step": 19638 }, { "epoch": 0.60329309126655, "grad_norm": 0.35024645924568176, "learning_rate": 1.5863690074466642e-05, "loss": 0.5403, "step": 19639 }, { "epoch": 0.6033238104014991, "grad_norm": 0.34964507818222046, "learning_rate": 1.5863298591681132e-05, "loss": 0.6392, "step": 19640 }, { "epoch": 0.6033545295364483, "grad_norm": 0.34097084403038025, "learning_rate": 1.586290709520148e-05, "loss": 0.5382, "step": 19641 }, { "epoch": 0.6033852486713974, "grad_norm": 0.33768364787101746, "learning_rate": 1.5862515585028598e-05, "loss": 0.5138, "step": 19642 }, { "epoch": 0.6034159678063465, "grad_norm": 0.3855282962322235, "learning_rate": 1.5862124061163403e-05, "loss": 0.6025, "step": 19643 }, { "epoch": 0.6034466869412958, "grad_norm": 0.3261837661266327, "learning_rate": 1.586173252360681e-05, "loss": 0.4902, "step": 19644 }, { "epoch": 0.6034774060762449, "grad_norm": 0.3382112681865692, "learning_rate": 1.5861340972359728e-05, "loss": 0.5376, "step": 19645 }, { "epoch": 0.603508125211194, "grad_norm": 0.33056193590164185, "learning_rate": 1.5860949407423075e-05, "loss": 0.5376, "step": 19646 }, { "epoch": 0.6035388443461432, "grad_norm": 0.34258076548576355, "learning_rate": 1.5860557828797762e-05, "loss": 0.5832, "step": 19647 }, { "epoch": 0.6035695634810924, "grad_norm": 0.4458651840686798, "learning_rate": 1.5860166236484712e-05, "loss": 0.5757, "step": 19648 }, { "epoch": 0.6036002826160415, "grad_norm": 0.35673975944519043, "learning_rate": 1.5859774630484833e-05, "loss": 0.5359, "step": 19649 }, { "epoch": 0.6036310017509907, "grad_norm": 0.35447192192077637, "learning_rate": 1.585938301079904e-05, "loss": 0.6055, "step": 19650 }, { "epoch": 0.6036617208859398, "grad_norm": 0.42689234018325806, "learning_rate": 1.585899137742825e-05, "loss": 0.6134, "step": 19651 }, { "epoch": 0.6036924400208891, "grad_norm": 0.319844514131546, "learning_rate": 1.5858599730373378e-05, "loss": 0.5495, "step": 19652 }, { "epoch": 0.6037231591558382, "grad_norm": 0.37136343121528625, "learning_rate": 1.5858208069635336e-05, "loss": 0.5257, "step": 19653 }, { "epoch": 0.6037538782907873, "grad_norm": 0.3340265452861786, "learning_rate": 1.585781639521504e-05, "loss": 0.5064, "step": 19654 }, { "epoch": 0.6037845974257365, "grad_norm": 0.37029388546943665, "learning_rate": 1.58574247071134e-05, "loss": 0.6018, "step": 19655 }, { "epoch": 0.6038153165606857, "grad_norm": 0.3776484429836273, "learning_rate": 1.5857033005331338e-05, "loss": 0.5457, "step": 19656 }, { "epoch": 0.6038460356956348, "grad_norm": 0.32486724853515625, "learning_rate": 1.5856641289869767e-05, "loss": 0.5688, "step": 19657 }, { "epoch": 0.603876754830584, "grad_norm": 0.3557395935058594, "learning_rate": 1.58562495607296e-05, "loss": 0.5164, "step": 19658 }, { "epoch": 0.6039074739655331, "grad_norm": 0.34721335768699646, "learning_rate": 1.585585781791176e-05, "loss": 0.49, "step": 19659 }, { "epoch": 0.6039381931004822, "grad_norm": 0.33351588249206543, "learning_rate": 1.5855466061417147e-05, "loss": 0.5781, "step": 19660 }, { "epoch": 0.6039689122354315, "grad_norm": 0.3405664563179016, "learning_rate": 1.585507429124669e-05, "loss": 0.5524, "step": 19661 }, { "epoch": 0.6039996313703806, "grad_norm": 0.3538987934589386, "learning_rate": 1.5854682507401294e-05, "loss": 0.635, "step": 19662 }, { "epoch": 0.6040303505053297, "grad_norm": 0.37324070930480957, "learning_rate": 1.585429070988188e-05, "loss": 0.5697, "step": 19663 }, { "epoch": 0.604061069640279, "grad_norm": 0.36579573154449463, "learning_rate": 1.5853898898689362e-05, "loss": 0.6163, "step": 19664 }, { "epoch": 0.6040917887752281, "grad_norm": 0.38179725408554077, "learning_rate": 1.5853507073824653e-05, "loss": 0.5609, "step": 19665 }, { "epoch": 0.6041225079101773, "grad_norm": 0.3891889452934265, "learning_rate": 1.585311523528867e-05, "loss": 0.6334, "step": 19666 }, { "epoch": 0.6041532270451264, "grad_norm": 0.4114670157432556, "learning_rate": 1.5852723383082324e-05, "loss": 0.6413, "step": 19667 }, { "epoch": 0.6041839461800755, "grad_norm": 0.3764037787914276, "learning_rate": 1.585233151720654e-05, "loss": 0.5872, "step": 19668 }, { "epoch": 0.6042146653150248, "grad_norm": 0.40786388516426086, "learning_rate": 1.5851939637662225e-05, "loss": 0.5961, "step": 19669 }, { "epoch": 0.6042453844499739, "grad_norm": 0.36487334966659546, "learning_rate": 1.5851547744450292e-05, "loss": 0.5098, "step": 19670 }, { "epoch": 0.604276103584923, "grad_norm": 0.3727163076400757, "learning_rate": 1.5851155837571662e-05, "loss": 0.695, "step": 19671 }, { "epoch": 0.6043068227198722, "grad_norm": 0.3500899374485016, "learning_rate": 1.5850763917027254e-05, "loss": 0.5534, "step": 19672 }, { "epoch": 0.6043375418548214, "grad_norm": 0.3572900891304016, "learning_rate": 1.5850371982817974e-05, "loss": 0.5395, "step": 19673 }, { "epoch": 0.6043682609897705, "grad_norm": 0.3445829451084137, "learning_rate": 1.584998003494474e-05, "loss": 0.5593, "step": 19674 }, { "epoch": 0.6043989801247197, "grad_norm": 0.3858060836791992, "learning_rate": 1.584958807340847e-05, "loss": 0.5569, "step": 19675 }, { "epoch": 0.6044296992596688, "grad_norm": 0.303276389837265, "learning_rate": 1.5849196098210082e-05, "loss": 0.4732, "step": 19676 }, { "epoch": 0.6044604183946181, "grad_norm": 0.36313676834106445, "learning_rate": 1.5848804109350487e-05, "loss": 0.5803, "step": 19677 }, { "epoch": 0.6044911375295672, "grad_norm": 0.34246817231178284, "learning_rate": 1.58484121068306e-05, "loss": 0.589, "step": 19678 }, { "epoch": 0.6045218566645163, "grad_norm": 0.3809898793697357, "learning_rate": 1.584802009065134e-05, "loss": 0.6228, "step": 19679 }, { "epoch": 0.6045525757994655, "grad_norm": 0.3211471438407898, "learning_rate": 1.584762806081362e-05, "loss": 0.5541, "step": 19680 }, { "epoch": 0.6045832949344147, "grad_norm": 0.3397151231765747, "learning_rate": 1.5847236017318356e-05, "loss": 0.5935, "step": 19681 }, { "epoch": 0.6046140140693638, "grad_norm": 0.3576836585998535, "learning_rate": 1.584684396016646e-05, "loss": 0.566, "step": 19682 }, { "epoch": 0.604644733204313, "grad_norm": 0.3393116891384125, "learning_rate": 1.584645188935886e-05, "loss": 0.5906, "step": 19683 }, { "epoch": 0.6046754523392621, "grad_norm": 0.3868539035320282, "learning_rate": 1.5846059804896455e-05, "loss": 0.5442, "step": 19684 }, { "epoch": 0.6047061714742112, "grad_norm": 0.38078856468200684, "learning_rate": 1.584566770678017e-05, "loss": 0.6168, "step": 19685 }, { "epoch": 0.6047368906091605, "grad_norm": 0.4008997976779938, "learning_rate": 1.5845275595010925e-05, "loss": 0.5832, "step": 19686 }, { "epoch": 0.6047676097441096, "grad_norm": 0.36542171239852905, "learning_rate": 1.584488346958963e-05, "loss": 0.5246, "step": 19687 }, { "epoch": 0.6047983288790587, "grad_norm": 0.34593698382377625, "learning_rate": 1.58444913305172e-05, "loss": 0.5538, "step": 19688 }, { "epoch": 0.6048290480140079, "grad_norm": 0.48420464992523193, "learning_rate": 1.5844099177794547e-05, "loss": 0.612, "step": 19689 }, { "epoch": 0.6048597671489571, "grad_norm": 0.378679096698761, "learning_rate": 1.58437070114226e-05, "loss": 0.5027, "step": 19690 }, { "epoch": 0.6048904862839063, "grad_norm": 0.3393810987472534, "learning_rate": 1.5843314831402265e-05, "loss": 0.5311, "step": 19691 }, { "epoch": 0.6049212054188554, "grad_norm": 0.34903600811958313, "learning_rate": 1.584292263773446e-05, "loss": 0.4976, "step": 19692 }, { "epoch": 0.6049519245538045, "grad_norm": 0.3171023428440094, "learning_rate": 1.5842530430420095e-05, "loss": 0.5475, "step": 19693 }, { "epoch": 0.6049826436887538, "grad_norm": 0.3400307297706604, "learning_rate": 1.5842138209460097e-05, "loss": 0.4604, "step": 19694 }, { "epoch": 0.6050133628237029, "grad_norm": 0.3605404794216156, "learning_rate": 1.584174597485538e-05, "loss": 0.6156, "step": 19695 }, { "epoch": 0.605044081958652, "grad_norm": 0.3385610282421112, "learning_rate": 1.5841353726606853e-05, "loss": 0.5179, "step": 19696 }, { "epoch": 0.6050748010936012, "grad_norm": 0.35225188732147217, "learning_rate": 1.584096146471544e-05, "loss": 0.5564, "step": 19697 }, { "epoch": 0.6051055202285504, "grad_norm": 0.3628232181072235, "learning_rate": 1.584056918918205e-05, "loss": 0.5658, "step": 19698 }, { "epoch": 0.6051362393634995, "grad_norm": 0.4675293564796448, "learning_rate": 1.5840176900007606e-05, "loss": 0.5701, "step": 19699 }, { "epoch": 0.6051669584984487, "grad_norm": 0.3717822730541229, "learning_rate": 1.583978459719302e-05, "loss": 0.5623, "step": 19700 }, { "epoch": 0.6051976776333978, "grad_norm": 0.35834774374961853, "learning_rate": 1.583939228073921e-05, "loss": 0.5364, "step": 19701 }, { "epoch": 0.605228396768347, "grad_norm": 0.32229188084602356, "learning_rate": 1.583899995064709e-05, "loss": 0.5072, "step": 19702 }, { "epoch": 0.6052591159032962, "grad_norm": 0.37704983353614807, "learning_rate": 1.583860760691758e-05, "loss": 0.5591, "step": 19703 }, { "epoch": 0.6052898350382453, "grad_norm": 0.417023628950119, "learning_rate": 1.5838215249551588e-05, "loss": 0.5824, "step": 19704 }, { "epoch": 0.6053205541731945, "grad_norm": 0.3586321771144867, "learning_rate": 1.5837822878550043e-05, "loss": 0.5689, "step": 19705 }, { "epoch": 0.6053512733081436, "grad_norm": 0.31763002276420593, "learning_rate": 1.5837430493913853e-05, "loss": 0.5219, "step": 19706 }, { "epoch": 0.6053819924430928, "grad_norm": 0.41796398162841797, "learning_rate": 1.5837038095643936e-05, "loss": 0.5492, "step": 19707 }, { "epoch": 0.605412711578042, "grad_norm": 0.355555921792984, "learning_rate": 1.583664568374121e-05, "loss": 0.5076, "step": 19708 }, { "epoch": 0.6054434307129911, "grad_norm": 0.4371371269226074, "learning_rate": 1.583625325820659e-05, "loss": 0.5382, "step": 19709 }, { "epoch": 0.6054741498479402, "grad_norm": 0.3483518064022064, "learning_rate": 1.583586081904099e-05, "loss": 0.5949, "step": 19710 }, { "epoch": 0.6055048689828895, "grad_norm": 0.33098793029785156, "learning_rate": 1.5835468366245334e-05, "loss": 0.5457, "step": 19711 }, { "epoch": 0.6055355881178386, "grad_norm": 0.38949963450431824, "learning_rate": 1.583507589982053e-05, "loss": 0.4652, "step": 19712 }, { "epoch": 0.6055663072527878, "grad_norm": 0.3467639982700348, "learning_rate": 1.5834683419767502e-05, "loss": 0.595, "step": 19713 }, { "epoch": 0.6055970263877369, "grad_norm": 0.35216203331947327, "learning_rate": 1.583429092608716e-05, "loss": 0.6236, "step": 19714 }, { "epoch": 0.6056277455226861, "grad_norm": 0.3715200126171112, "learning_rate": 1.583389841878043e-05, "loss": 0.5936, "step": 19715 }, { "epoch": 0.6056584646576353, "grad_norm": 0.3459460437297821, "learning_rate": 1.583350589784822e-05, "loss": 0.5539, "step": 19716 }, { "epoch": 0.6056891837925844, "grad_norm": 0.34611955285072327, "learning_rate": 1.5833113363291443e-05, "loss": 0.5107, "step": 19717 }, { "epoch": 0.6057199029275335, "grad_norm": 0.32056716084480286, "learning_rate": 1.583272081511103e-05, "loss": 0.5678, "step": 19718 }, { "epoch": 0.6057506220624828, "grad_norm": 0.38228464126586914, "learning_rate": 1.583232825330789e-05, "loss": 0.5646, "step": 19719 }, { "epoch": 0.6057813411974319, "grad_norm": 0.417367160320282, "learning_rate": 1.5831935677882933e-05, "loss": 0.6463, "step": 19720 }, { "epoch": 0.605812060332381, "grad_norm": 0.3455538749694824, "learning_rate": 1.583154308883709e-05, "loss": 0.5744, "step": 19721 }, { "epoch": 0.6058427794673302, "grad_norm": 0.3226395547389984, "learning_rate": 1.5831150486171272e-05, "loss": 0.6349, "step": 19722 }, { "epoch": 0.6058734986022793, "grad_norm": 0.3261832892894745, "learning_rate": 1.5830757869886387e-05, "loss": 0.5536, "step": 19723 }, { "epoch": 0.6059042177372285, "grad_norm": 0.34588393568992615, "learning_rate": 1.5830365239983368e-05, "loss": 0.5531, "step": 19724 }, { "epoch": 0.6059349368721777, "grad_norm": 0.35835081338882446, "learning_rate": 1.5829972596463118e-05, "loss": 0.4721, "step": 19725 }, { "epoch": 0.6059656560071268, "grad_norm": 0.34687235951423645, "learning_rate": 1.5829579939326566e-05, "loss": 0.5383, "step": 19726 }, { "epoch": 0.605996375142076, "grad_norm": 0.3995806872844696, "learning_rate": 1.5829187268574617e-05, "loss": 0.5043, "step": 19727 }, { "epoch": 0.6060270942770252, "grad_norm": 0.34178483486175537, "learning_rate": 1.58287945842082e-05, "loss": 0.5144, "step": 19728 }, { "epoch": 0.6060578134119743, "grad_norm": 0.38269978761672974, "learning_rate": 1.5828401886228216e-05, "loss": 0.5412, "step": 19729 }, { "epoch": 0.6060885325469235, "grad_norm": 0.33733657002449036, "learning_rate": 1.58280091746356e-05, "loss": 0.6582, "step": 19730 }, { "epoch": 0.6061192516818726, "grad_norm": 0.3263873755931854, "learning_rate": 1.582761644943126e-05, "loss": 0.6196, "step": 19731 }, { "epoch": 0.6061499708168218, "grad_norm": 0.36087653040885925, "learning_rate": 1.5827223710616115e-05, "loss": 0.5211, "step": 19732 }, { "epoch": 0.606180689951771, "grad_norm": 0.35976606607437134, "learning_rate": 1.582683095819108e-05, "loss": 0.5664, "step": 19733 }, { "epoch": 0.6062114090867201, "grad_norm": 0.38893425464630127, "learning_rate": 1.582643819215708e-05, "loss": 0.6081, "step": 19734 }, { "epoch": 0.6062421282216692, "grad_norm": 0.34804660081863403, "learning_rate": 1.5826045412515024e-05, "loss": 0.5427, "step": 19735 }, { "epoch": 0.6062728473566185, "grad_norm": 0.32780927419662476, "learning_rate": 1.582565261926583e-05, "loss": 0.5011, "step": 19736 }, { "epoch": 0.6063035664915676, "grad_norm": 0.38502931594848633, "learning_rate": 1.5825259812410418e-05, "loss": 0.6648, "step": 19737 }, { "epoch": 0.6063342856265168, "grad_norm": 0.33615604043006897, "learning_rate": 1.5824866991949702e-05, "loss": 0.5624, "step": 19738 }, { "epoch": 0.6063650047614659, "grad_norm": 0.3423157036304474, "learning_rate": 1.582447415788461e-05, "loss": 0.6433, "step": 19739 }, { "epoch": 0.606395723896415, "grad_norm": 0.40935060381889343, "learning_rate": 1.5824081310216047e-05, "loss": 0.5507, "step": 19740 }, { "epoch": 0.6064264430313643, "grad_norm": 0.3377135694026947, "learning_rate": 1.5823688448944934e-05, "loss": 0.5957, "step": 19741 }, { "epoch": 0.6064571621663134, "grad_norm": 0.40993282198905945, "learning_rate": 1.5823295574072193e-05, "loss": 0.5616, "step": 19742 }, { "epoch": 0.6064878813012625, "grad_norm": 0.4279015362262726, "learning_rate": 1.5822902685598736e-05, "loss": 0.5451, "step": 19743 }, { "epoch": 0.6065186004362118, "grad_norm": 0.5883047580718994, "learning_rate": 1.5822509783525488e-05, "loss": 0.576, "step": 19744 }, { "epoch": 0.6065493195711609, "grad_norm": 0.36939364671707153, "learning_rate": 1.5822116867853355e-05, "loss": 0.5796, "step": 19745 }, { "epoch": 0.60658003870611, "grad_norm": 0.3599061071872711, "learning_rate": 1.5821723938583268e-05, "loss": 0.5957, "step": 19746 }, { "epoch": 0.6066107578410592, "grad_norm": 0.349224716424942, "learning_rate": 1.582133099571613e-05, "loss": 0.4662, "step": 19747 }, { "epoch": 0.6066414769760083, "grad_norm": 0.3611404597759247, "learning_rate": 1.5820938039252872e-05, "loss": 0.5619, "step": 19748 }, { "epoch": 0.6066721961109575, "grad_norm": 0.6689884662628174, "learning_rate": 1.5820545069194408e-05, "loss": 0.5362, "step": 19749 }, { "epoch": 0.6067029152459067, "grad_norm": 0.36642390489578247, "learning_rate": 1.5820152085541654e-05, "loss": 0.4928, "step": 19750 }, { "epoch": 0.6067336343808558, "grad_norm": 0.3209627866744995, "learning_rate": 1.5819759088295526e-05, "loss": 0.5359, "step": 19751 }, { "epoch": 0.606764353515805, "grad_norm": 0.35106751322746277, "learning_rate": 1.5819366077456945e-05, "loss": 0.6167, "step": 19752 }, { "epoch": 0.6067950726507542, "grad_norm": 0.33478984236717224, "learning_rate": 1.581897305302683e-05, "loss": 0.5321, "step": 19753 }, { "epoch": 0.6068257917857033, "grad_norm": 0.3575763702392578, "learning_rate": 1.5818580015006097e-05, "loss": 0.5615, "step": 19754 }, { "epoch": 0.6068565109206525, "grad_norm": 0.3838667869567871, "learning_rate": 1.581818696339566e-05, "loss": 0.544, "step": 19755 }, { "epoch": 0.6068872300556016, "grad_norm": 0.4412974417209625, "learning_rate": 1.581779389819644e-05, "loss": 0.5198, "step": 19756 }, { "epoch": 0.6069179491905508, "grad_norm": 0.32985755801200867, "learning_rate": 1.5817400819409362e-05, "loss": 0.4984, "step": 19757 }, { "epoch": 0.6069486683255, "grad_norm": 0.33730363845825195, "learning_rate": 1.581700772703534e-05, "loss": 0.5301, "step": 19758 }, { "epoch": 0.6069793874604491, "grad_norm": 0.3722679316997528, "learning_rate": 1.5816614621075282e-05, "loss": 0.539, "step": 19759 }, { "epoch": 0.6070101065953982, "grad_norm": 0.37636712193489075, "learning_rate": 1.581622150153012e-05, "loss": 0.5549, "step": 19760 }, { "epoch": 0.6070408257303475, "grad_norm": 0.34745413064956665, "learning_rate": 1.5815828368400767e-05, "loss": 0.4613, "step": 19761 }, { "epoch": 0.6070715448652966, "grad_norm": 0.3982921242713928, "learning_rate": 1.5815435221688136e-05, "loss": 0.6005, "step": 19762 }, { "epoch": 0.6071022640002458, "grad_norm": 0.3420640826225281, "learning_rate": 1.5815042061393153e-05, "loss": 0.5858, "step": 19763 }, { "epoch": 0.6071329831351949, "grad_norm": 0.3569534718990326, "learning_rate": 1.5814648887516732e-05, "loss": 0.544, "step": 19764 }, { "epoch": 0.607163702270144, "grad_norm": 0.3505745530128479, "learning_rate": 1.5814255700059794e-05, "loss": 0.5601, "step": 19765 }, { "epoch": 0.6071944214050933, "grad_norm": 0.48391300439834595, "learning_rate": 1.581386249902325e-05, "loss": 0.5423, "step": 19766 }, { "epoch": 0.6072251405400424, "grad_norm": 0.3519800007343292, "learning_rate": 1.5813469284408028e-05, "loss": 0.5898, "step": 19767 }, { "epoch": 0.6072558596749915, "grad_norm": 0.34945523738861084, "learning_rate": 1.5813076056215043e-05, "loss": 0.5162, "step": 19768 }, { "epoch": 0.6072865788099407, "grad_norm": 0.33413073420524597, "learning_rate": 1.5812682814445217e-05, "loss": 0.5215, "step": 19769 }, { "epoch": 0.6073172979448899, "grad_norm": 0.3683869242668152, "learning_rate": 1.581228955909946e-05, "loss": 0.643, "step": 19770 }, { "epoch": 0.607348017079839, "grad_norm": 0.4410287141799927, "learning_rate": 1.5811896290178694e-05, "loss": 0.5491, "step": 19771 }, { "epoch": 0.6073787362147882, "grad_norm": 0.35006460547447205, "learning_rate": 1.581150300768384e-05, "loss": 0.5371, "step": 19772 }, { "epoch": 0.6074094553497373, "grad_norm": 0.7770568132400513, "learning_rate": 1.5811109711615814e-05, "loss": 0.5649, "step": 19773 }, { "epoch": 0.6074401744846865, "grad_norm": 0.3541722297668457, "learning_rate": 1.5810716401975536e-05, "loss": 0.4831, "step": 19774 }, { "epoch": 0.6074708936196357, "grad_norm": 0.36694714426994324, "learning_rate": 1.5810323078763926e-05, "loss": 0.5889, "step": 19775 }, { "epoch": 0.6075016127545848, "grad_norm": 0.34842827916145325, "learning_rate": 1.5809929741981897e-05, "loss": 0.6275, "step": 19776 }, { "epoch": 0.607532331889534, "grad_norm": 0.3350789248943329, "learning_rate": 1.5809536391630372e-05, "loss": 0.6149, "step": 19777 }, { "epoch": 0.6075630510244832, "grad_norm": 0.3613494634628296, "learning_rate": 1.580914302771027e-05, "loss": 0.6079, "step": 19778 }, { "epoch": 0.6075937701594323, "grad_norm": 0.3529644012451172, "learning_rate": 1.5808749650222508e-05, "loss": 0.5147, "step": 19779 }, { "epoch": 0.6076244892943815, "grad_norm": 0.364911824464798, "learning_rate": 1.5808356259168008e-05, "loss": 0.5877, "step": 19780 }, { "epoch": 0.6076552084293306, "grad_norm": 0.46895480155944824, "learning_rate": 1.5807962854547682e-05, "loss": 0.5505, "step": 19781 }, { "epoch": 0.6076859275642797, "grad_norm": 0.30724653601646423, "learning_rate": 1.5807569436362456e-05, "loss": 0.5419, "step": 19782 }, { "epoch": 0.607716646699229, "grad_norm": 0.36756470799446106, "learning_rate": 1.5807176004613247e-05, "loss": 0.5357, "step": 19783 }, { "epoch": 0.6077473658341781, "grad_norm": 0.565044105052948, "learning_rate": 1.580678255930097e-05, "loss": 0.476, "step": 19784 }, { "epoch": 0.6077780849691272, "grad_norm": 0.31421878933906555, "learning_rate": 1.580638910042655e-05, "loss": 0.5016, "step": 19785 }, { "epoch": 0.6078088041040764, "grad_norm": 0.37179315090179443, "learning_rate": 1.58059956279909e-05, "loss": 0.6395, "step": 19786 }, { "epoch": 0.6078395232390256, "grad_norm": 0.3237500488758087, "learning_rate": 1.5805602141994942e-05, "loss": 0.6021, "step": 19787 }, { "epoch": 0.6078702423739748, "grad_norm": 0.35266607999801636, "learning_rate": 1.5805208642439597e-05, "loss": 0.5587, "step": 19788 }, { "epoch": 0.6079009615089239, "grad_norm": 0.3380683660507202, "learning_rate": 1.580481512932578e-05, "loss": 0.5044, "step": 19789 }, { "epoch": 0.607931680643873, "grad_norm": 0.4284604787826538, "learning_rate": 1.5804421602654413e-05, "loss": 0.6295, "step": 19790 }, { "epoch": 0.6079623997788223, "grad_norm": 0.3612566292285919, "learning_rate": 1.580402806242641e-05, "loss": 0.5322, "step": 19791 }, { "epoch": 0.6079931189137714, "grad_norm": 0.3554583191871643, "learning_rate": 1.5803634508642697e-05, "loss": 0.5159, "step": 19792 }, { "epoch": 0.6080238380487205, "grad_norm": 0.38387542963027954, "learning_rate": 1.580324094130419e-05, "loss": 0.4771, "step": 19793 }, { "epoch": 0.6080545571836697, "grad_norm": 0.3524211645126343, "learning_rate": 1.5802847360411813e-05, "loss": 0.553, "step": 19794 }, { "epoch": 0.6080852763186189, "grad_norm": 0.3502418100833893, "learning_rate": 1.5802453765966475e-05, "loss": 0.4896, "step": 19795 }, { "epoch": 0.608115995453568, "grad_norm": 0.3509482443332672, "learning_rate": 1.58020601579691e-05, "loss": 0.5681, "step": 19796 }, { "epoch": 0.6081467145885172, "grad_norm": 0.3629661798477173, "learning_rate": 1.580166653642061e-05, "loss": 0.5896, "step": 19797 }, { "epoch": 0.6081774337234663, "grad_norm": 0.35665059089660645, "learning_rate": 1.5801272901321926e-05, "loss": 0.5407, "step": 19798 }, { "epoch": 0.6082081528584155, "grad_norm": 0.34110361337661743, "learning_rate": 1.5800879252673962e-05, "loss": 0.5509, "step": 19799 }, { "epoch": 0.6082388719933647, "grad_norm": 0.38904085755348206, "learning_rate": 1.580048559047764e-05, "loss": 0.5334, "step": 19800 }, { "epoch": 0.6082695911283138, "grad_norm": 0.38293707370758057, "learning_rate": 1.580009191473388e-05, "loss": 0.6344, "step": 19801 }, { "epoch": 0.608300310263263, "grad_norm": 0.3594493269920349, "learning_rate": 1.5799698225443594e-05, "loss": 0.5369, "step": 19802 }, { "epoch": 0.6083310293982122, "grad_norm": 0.3906310200691223, "learning_rate": 1.579930452260771e-05, "loss": 0.5431, "step": 19803 }, { "epoch": 0.6083617485331613, "grad_norm": 0.3557775318622589, "learning_rate": 1.5798910806227148e-05, "loss": 0.545, "step": 19804 }, { "epoch": 0.6083924676681105, "grad_norm": 0.36759063601493835, "learning_rate": 1.5798517076302823e-05, "loss": 0.5397, "step": 19805 }, { "epoch": 0.6084231868030596, "grad_norm": 0.3393675684928894, "learning_rate": 1.5798123332835657e-05, "loss": 0.5649, "step": 19806 }, { "epoch": 0.6084539059380087, "grad_norm": 0.41923341155052185, "learning_rate": 1.579772957582657e-05, "loss": 0.5796, "step": 19807 }, { "epoch": 0.608484625072958, "grad_norm": 0.3500669598579407, "learning_rate": 1.579733580527648e-05, "loss": 0.5829, "step": 19808 }, { "epoch": 0.6085153442079071, "grad_norm": 0.3766281306743622, "learning_rate": 1.579694202118631e-05, "loss": 0.5653, "step": 19809 }, { "epoch": 0.6085460633428562, "grad_norm": 0.3366462290287018, "learning_rate": 1.579654822355697e-05, "loss": 0.4908, "step": 19810 }, { "epoch": 0.6085767824778054, "grad_norm": 0.36889857053756714, "learning_rate": 1.5796154412389392e-05, "loss": 0.5332, "step": 19811 }, { "epoch": 0.6086075016127546, "grad_norm": 0.3871638774871826, "learning_rate": 1.5795760587684492e-05, "loss": 0.5968, "step": 19812 }, { "epoch": 0.6086382207477038, "grad_norm": 0.3424086570739746, "learning_rate": 1.5795366749443183e-05, "loss": 0.5617, "step": 19813 }, { "epoch": 0.6086689398826529, "grad_norm": 0.3721122741699219, "learning_rate": 1.5794972897666395e-05, "loss": 0.595, "step": 19814 }, { "epoch": 0.608699659017602, "grad_norm": 0.34665486216545105, "learning_rate": 1.579457903235504e-05, "loss": 0.6471, "step": 19815 }, { "epoch": 0.6087303781525513, "grad_norm": 0.35875269770622253, "learning_rate": 1.5794185153510045e-05, "loss": 0.5603, "step": 19816 }, { "epoch": 0.6087610972875004, "grad_norm": 0.32894447445869446, "learning_rate": 1.5793791261132322e-05, "loss": 0.5575, "step": 19817 }, { "epoch": 0.6087918164224495, "grad_norm": 0.3417474627494812, "learning_rate": 1.5793397355222798e-05, "loss": 0.5387, "step": 19818 }, { "epoch": 0.6088225355573987, "grad_norm": 0.32956233620643616, "learning_rate": 1.579300343578239e-05, "loss": 0.5942, "step": 19819 }, { "epoch": 0.6088532546923479, "grad_norm": 0.3468533754348755, "learning_rate": 1.579260950281201e-05, "loss": 0.5102, "step": 19820 }, { "epoch": 0.608883973827297, "grad_norm": 0.3534393012523651, "learning_rate": 1.5792215556312594e-05, "loss": 0.5683, "step": 19821 }, { "epoch": 0.6089146929622462, "grad_norm": 0.3607156276702881, "learning_rate": 1.579182159628505e-05, "loss": 0.6761, "step": 19822 }, { "epoch": 0.6089454120971953, "grad_norm": 0.38264086842536926, "learning_rate": 1.5791427622730308e-05, "loss": 0.5794, "step": 19823 }, { "epoch": 0.6089761312321446, "grad_norm": 0.34277036786079407, "learning_rate": 1.5791033635649274e-05, "loss": 0.5512, "step": 19824 }, { "epoch": 0.6090068503670937, "grad_norm": 0.36320945620536804, "learning_rate": 1.5790639635042882e-05, "loss": 0.4809, "step": 19825 }, { "epoch": 0.6090375695020428, "grad_norm": 0.675413191318512, "learning_rate": 1.5790245620912047e-05, "loss": 0.6218, "step": 19826 }, { "epoch": 0.609068288636992, "grad_norm": 0.3330702483654022, "learning_rate": 1.5789851593257687e-05, "loss": 0.6029, "step": 19827 }, { "epoch": 0.6090990077719411, "grad_norm": 0.3688007891178131, "learning_rate": 1.578945755208072e-05, "loss": 0.5675, "step": 19828 }, { "epoch": 0.6091297269068903, "grad_norm": 1.106933832168579, "learning_rate": 1.578906349738208e-05, "loss": 0.542, "step": 19829 }, { "epoch": 0.6091604460418395, "grad_norm": 0.40283748507499695, "learning_rate": 1.5788669429162672e-05, "loss": 0.502, "step": 19830 }, { "epoch": 0.6091911651767886, "grad_norm": 0.3739638328552246, "learning_rate": 1.5788275347423426e-05, "loss": 0.5436, "step": 19831 }, { "epoch": 0.6092218843117377, "grad_norm": 0.3684109151363373, "learning_rate": 1.5787881252165256e-05, "loss": 0.5922, "step": 19832 }, { "epoch": 0.609252603446687, "grad_norm": 0.39276060461997986, "learning_rate": 1.5787487143389083e-05, "loss": 0.6168, "step": 19833 }, { "epoch": 0.6092833225816361, "grad_norm": 0.37226665019989014, "learning_rate": 1.578709302109583e-05, "loss": 0.6223, "step": 19834 }, { "epoch": 0.6093140417165852, "grad_norm": 0.3413839638233185, "learning_rate": 1.5786698885286423e-05, "loss": 0.5495, "step": 19835 }, { "epoch": 0.6093447608515344, "grad_norm": 0.3321729004383087, "learning_rate": 1.578630473596177e-05, "loss": 0.6389, "step": 19836 }, { "epoch": 0.6093754799864836, "grad_norm": 0.4061351716518402, "learning_rate": 1.5785910573122802e-05, "loss": 0.5728, "step": 19837 }, { "epoch": 0.6094061991214328, "grad_norm": 0.3650919795036316, "learning_rate": 1.5785516396770436e-05, "loss": 0.6158, "step": 19838 }, { "epoch": 0.6094369182563819, "grad_norm": 0.38190916180610657, "learning_rate": 1.5785122206905588e-05, "loss": 0.6092, "step": 19839 }, { "epoch": 0.609467637391331, "grad_norm": 0.3649922013282776, "learning_rate": 1.5784728003529188e-05, "loss": 0.5472, "step": 19840 }, { "epoch": 0.6094983565262803, "grad_norm": 0.35533496737480164, "learning_rate": 1.578433378664215e-05, "loss": 0.6253, "step": 19841 }, { "epoch": 0.6095290756612294, "grad_norm": 0.382029265165329, "learning_rate": 1.5783939556245394e-05, "loss": 0.5635, "step": 19842 }, { "epoch": 0.6095597947961785, "grad_norm": 0.3371339440345764, "learning_rate": 1.5783545312339844e-05, "loss": 0.5406, "step": 19843 }, { "epoch": 0.6095905139311277, "grad_norm": 0.3823716938495636, "learning_rate": 1.578315105492642e-05, "loss": 0.5633, "step": 19844 }, { "epoch": 0.6096212330660769, "grad_norm": 0.3692004382610321, "learning_rate": 1.578275678400605e-05, "loss": 0.5513, "step": 19845 }, { "epoch": 0.609651952201026, "grad_norm": 0.33973750472068787, "learning_rate": 1.5782362499579642e-05, "loss": 0.5601, "step": 19846 }, { "epoch": 0.6096826713359752, "grad_norm": 0.39466920495033264, "learning_rate": 1.578196820164812e-05, "loss": 0.5738, "step": 19847 }, { "epoch": 0.6097133904709243, "grad_norm": 0.34401455521583557, "learning_rate": 1.578157389021241e-05, "loss": 0.4981, "step": 19848 }, { "epoch": 0.6097441096058736, "grad_norm": 0.3082047402858734, "learning_rate": 1.578117956527343e-05, "loss": 0.5179, "step": 19849 }, { "epoch": 0.6097748287408227, "grad_norm": 0.3996541202068329, "learning_rate": 1.57807852268321e-05, "loss": 0.6074, "step": 19850 }, { "epoch": 0.6098055478757718, "grad_norm": 0.3881242871284485, "learning_rate": 1.578039087488935e-05, "loss": 0.5689, "step": 19851 }, { "epoch": 0.609836267010721, "grad_norm": 0.35472625494003296, "learning_rate": 1.5779996509446086e-05, "loss": 0.5011, "step": 19852 }, { "epoch": 0.6098669861456701, "grad_norm": 0.34079527854919434, "learning_rate": 1.5779602130503236e-05, "loss": 0.5118, "step": 19853 }, { "epoch": 0.6098977052806193, "grad_norm": 0.34223851561546326, "learning_rate": 1.5779207738061724e-05, "loss": 0.6276, "step": 19854 }, { "epoch": 0.6099284244155685, "grad_norm": 0.3362572491168976, "learning_rate": 1.577881333212247e-05, "loss": 0.5835, "step": 19855 }, { "epoch": 0.6099591435505176, "grad_norm": 0.34296995401382446, "learning_rate": 1.577841891268639e-05, "loss": 0.6094, "step": 19856 }, { "epoch": 0.6099898626854667, "grad_norm": 0.34161829948425293, "learning_rate": 1.577802447975441e-05, "loss": 0.6221, "step": 19857 }, { "epoch": 0.610020581820416, "grad_norm": 0.3522222936153412, "learning_rate": 1.5777630033327455e-05, "loss": 0.6403, "step": 19858 }, { "epoch": 0.6100513009553651, "grad_norm": 0.35925522446632385, "learning_rate": 1.5777235573406436e-05, "loss": 0.5845, "step": 19859 }, { "epoch": 0.6100820200903142, "grad_norm": 0.6133700013160706, "learning_rate": 1.577684109999228e-05, "loss": 0.5949, "step": 19860 }, { "epoch": 0.6101127392252634, "grad_norm": 0.3830329477787018, "learning_rate": 1.577644661308591e-05, "loss": 0.5553, "step": 19861 }, { "epoch": 0.6101434583602126, "grad_norm": 0.3425469994544983, "learning_rate": 1.5776052112688245e-05, "loss": 0.4779, "step": 19862 }, { "epoch": 0.6101741774951618, "grad_norm": 0.38654065132141113, "learning_rate": 1.5775657598800212e-05, "loss": 0.4757, "step": 19863 }, { "epoch": 0.6102048966301109, "grad_norm": 0.40215864777565, "learning_rate": 1.577526307142272e-05, "loss": 0.6452, "step": 19864 }, { "epoch": 0.61023561576506, "grad_norm": 0.336574912071228, "learning_rate": 1.57748685305567e-05, "loss": 0.4808, "step": 19865 }, { "epoch": 0.6102663349000093, "grad_norm": 0.3824908435344696, "learning_rate": 1.5774473976203072e-05, "loss": 0.5365, "step": 19866 }, { "epoch": 0.6102970540349584, "grad_norm": 0.3559548854827881, "learning_rate": 1.5774079408362757e-05, "loss": 0.6324, "step": 19867 }, { "epoch": 0.6103277731699075, "grad_norm": 1.0108847618103027, "learning_rate": 1.577368482703668e-05, "loss": 0.5943, "step": 19868 }, { "epoch": 0.6103584923048567, "grad_norm": 0.43300172686576843, "learning_rate": 1.577329023222575e-05, "loss": 0.5444, "step": 19869 }, { "epoch": 0.6103892114398058, "grad_norm": 0.3527476489543915, "learning_rate": 1.5772895623930905e-05, "loss": 0.5266, "step": 19870 }, { "epoch": 0.610419930574755, "grad_norm": 0.36126962304115295, "learning_rate": 1.5772501002153054e-05, "loss": 0.5455, "step": 19871 }, { "epoch": 0.6104506497097042, "grad_norm": 0.357204794883728, "learning_rate": 1.5772106366893125e-05, "loss": 0.5535, "step": 19872 }, { "epoch": 0.6104813688446533, "grad_norm": 0.3657701909542084, "learning_rate": 1.577171171815204e-05, "loss": 0.549, "step": 19873 }, { "epoch": 0.6105120879796025, "grad_norm": 0.37166136503219604, "learning_rate": 1.577131705593072e-05, "loss": 0.5666, "step": 19874 }, { "epoch": 0.6105428071145517, "grad_norm": 0.364071249961853, "learning_rate": 1.5770922380230083e-05, "loss": 0.6377, "step": 19875 }, { "epoch": 0.6105735262495008, "grad_norm": 0.3653603792190552, "learning_rate": 1.5770527691051056e-05, "loss": 0.543, "step": 19876 }, { "epoch": 0.61060424538445, "grad_norm": 0.46002912521362305, "learning_rate": 1.5770132988394555e-05, "loss": 0.5482, "step": 19877 }, { "epoch": 0.6106349645193991, "grad_norm": 0.33316266536712646, "learning_rate": 1.576973827226151e-05, "loss": 0.5217, "step": 19878 }, { "epoch": 0.6106656836543483, "grad_norm": 0.41018831729888916, "learning_rate": 1.5769343542652836e-05, "loss": 0.5601, "step": 19879 }, { "epoch": 0.6106964027892975, "grad_norm": 0.3519958257675171, "learning_rate": 1.5768948799569458e-05, "loss": 0.5064, "step": 19880 }, { "epoch": 0.6107271219242466, "grad_norm": 0.3893750309944153, "learning_rate": 1.57685540430123e-05, "loss": 0.5267, "step": 19881 }, { "epoch": 0.6107578410591957, "grad_norm": 0.3630312979221344, "learning_rate": 1.5768159272982277e-05, "loss": 0.5201, "step": 19882 }, { "epoch": 0.610788560194145, "grad_norm": 0.36598706245422363, "learning_rate": 1.5767764489480316e-05, "loss": 0.5862, "step": 19883 }, { "epoch": 0.6108192793290941, "grad_norm": 0.36506620049476624, "learning_rate": 1.576736969250734e-05, "loss": 0.5199, "step": 19884 }, { "epoch": 0.6108499984640432, "grad_norm": 0.3354564607143402, "learning_rate": 1.5766974882064267e-05, "loss": 0.5803, "step": 19885 }, { "epoch": 0.6108807175989924, "grad_norm": 0.3472549319267273, "learning_rate": 1.576658005815202e-05, "loss": 0.5085, "step": 19886 }, { "epoch": 0.6109114367339415, "grad_norm": 0.39523881673812866, "learning_rate": 1.5766185220771525e-05, "loss": 0.6209, "step": 19887 }, { "epoch": 0.6109421558688908, "grad_norm": 0.3443721830844879, "learning_rate": 1.5765790369923703e-05, "loss": 0.6493, "step": 19888 }, { "epoch": 0.6109728750038399, "grad_norm": 0.40198472142219543, "learning_rate": 1.5765395505609473e-05, "loss": 0.6031, "step": 19889 }, { "epoch": 0.611003594138789, "grad_norm": 0.32337382435798645, "learning_rate": 1.576500062782976e-05, "loss": 0.4997, "step": 19890 }, { "epoch": 0.6110343132737382, "grad_norm": 0.4124335050582886, "learning_rate": 1.5764605736585484e-05, "loss": 0.6136, "step": 19891 }, { "epoch": 0.6110650324086874, "grad_norm": 0.356300950050354, "learning_rate": 1.576421083187757e-05, "loss": 0.57, "step": 19892 }, { "epoch": 0.6110957515436365, "grad_norm": 0.33871734142303467, "learning_rate": 1.576381591370694e-05, "loss": 0.5786, "step": 19893 }, { "epoch": 0.6111264706785857, "grad_norm": 0.34623876214027405, "learning_rate": 1.5763420982074512e-05, "loss": 0.6579, "step": 19894 }, { "epoch": 0.6111571898135348, "grad_norm": 0.3643794357776642, "learning_rate": 1.5763026036981214e-05, "loss": 0.65, "step": 19895 }, { "epoch": 0.611187908948484, "grad_norm": 0.3172047734260559, "learning_rate": 1.5762631078427965e-05, "loss": 0.529, "step": 19896 }, { "epoch": 0.6112186280834332, "grad_norm": 0.3470968008041382, "learning_rate": 1.576223610641569e-05, "loss": 0.5496, "step": 19897 }, { "epoch": 0.6112493472183823, "grad_norm": 0.33341002464294434, "learning_rate": 1.576184112094531e-05, "loss": 0.5849, "step": 19898 }, { "epoch": 0.6112800663533315, "grad_norm": 0.4321824014186859, "learning_rate": 1.576144612201775e-05, "loss": 0.5552, "step": 19899 }, { "epoch": 0.6113107854882807, "grad_norm": 0.3325037658214569, "learning_rate": 1.5761051109633927e-05, "loss": 0.5957, "step": 19900 }, { "epoch": 0.6113415046232298, "grad_norm": 0.33826586604118347, "learning_rate": 1.5760656083794767e-05, "loss": 0.561, "step": 19901 }, { "epoch": 0.611372223758179, "grad_norm": 0.4060676693916321, "learning_rate": 1.5760261044501196e-05, "loss": 0.5316, "step": 19902 }, { "epoch": 0.6114029428931281, "grad_norm": 0.3196074962615967, "learning_rate": 1.575986599175413e-05, "loss": 0.5367, "step": 19903 }, { "epoch": 0.6114336620280773, "grad_norm": 0.3730468153953552, "learning_rate": 1.5759470925554492e-05, "loss": 0.6211, "step": 19904 }, { "epoch": 0.6114643811630265, "grad_norm": 0.357700914144516, "learning_rate": 1.575907584590321e-05, "loss": 0.5842, "step": 19905 }, { "epoch": 0.6114951002979756, "grad_norm": 0.339065819978714, "learning_rate": 1.5758680752801207e-05, "loss": 0.531, "step": 19906 }, { "epoch": 0.6115258194329247, "grad_norm": 0.34992748498916626, "learning_rate": 1.5758285646249398e-05, "loss": 0.5637, "step": 19907 }, { "epoch": 0.611556538567874, "grad_norm": 0.40914684534072876, "learning_rate": 1.5757890526248712e-05, "loss": 0.5787, "step": 19908 }, { "epoch": 0.6115872577028231, "grad_norm": 0.36202892661094666, "learning_rate": 1.5757495392800073e-05, "loss": 0.5974, "step": 19909 }, { "epoch": 0.6116179768377723, "grad_norm": 0.36433157324790955, "learning_rate": 1.57571002459044e-05, "loss": 0.555, "step": 19910 }, { "epoch": 0.6116486959727214, "grad_norm": 0.317096084356308, "learning_rate": 1.5756705085562618e-05, "loss": 0.5562, "step": 19911 }, { "epoch": 0.6116794151076705, "grad_norm": 0.33264651894569397, "learning_rate": 1.5756309911775644e-05, "loss": 0.5804, "step": 19912 }, { "epoch": 0.6117101342426198, "grad_norm": 0.37515613436698914, "learning_rate": 1.575591472454441e-05, "loss": 0.526, "step": 19913 }, { "epoch": 0.6117408533775689, "grad_norm": 0.3323467969894409, "learning_rate": 1.575551952386984e-05, "loss": 0.514, "step": 19914 }, { "epoch": 0.611771572512518, "grad_norm": 0.4672061800956726, "learning_rate": 1.5755124309752847e-05, "loss": 0.5755, "step": 19915 }, { "epoch": 0.6118022916474672, "grad_norm": 0.34963905811309814, "learning_rate": 1.575472908219436e-05, "loss": 0.5649, "step": 19916 }, { "epoch": 0.6118330107824164, "grad_norm": 0.3521571755409241, "learning_rate": 1.57543338411953e-05, "loss": 0.5748, "step": 19917 }, { "epoch": 0.6118637299173655, "grad_norm": 0.8136649131774902, "learning_rate": 1.5753938586756598e-05, "loss": 0.4901, "step": 19918 }, { "epoch": 0.6118944490523147, "grad_norm": 0.3494507372379303, "learning_rate": 1.575354331887916e-05, "loss": 0.6635, "step": 19919 }, { "epoch": 0.6119251681872638, "grad_norm": 0.5848416090011597, "learning_rate": 1.5753148037563927e-05, "loss": 0.4647, "step": 19920 }, { "epoch": 0.611955887322213, "grad_norm": 0.34671348333358765, "learning_rate": 1.5752752742811816e-05, "loss": 0.562, "step": 19921 }, { "epoch": 0.6119866064571622, "grad_norm": 0.48257046937942505, "learning_rate": 1.5752357434623743e-05, "loss": 0.6218, "step": 19922 }, { "epoch": 0.6120173255921113, "grad_norm": 0.3560166358947754, "learning_rate": 1.5751962113000643e-05, "loss": 0.5497, "step": 19923 }, { "epoch": 0.6120480447270605, "grad_norm": 0.3286709785461426, "learning_rate": 1.575156677794343e-05, "loss": 0.5683, "step": 19924 }, { "epoch": 0.6120787638620097, "grad_norm": 0.3781934380531311, "learning_rate": 1.5751171429453033e-05, "loss": 0.594, "step": 19925 }, { "epoch": 0.6121094829969588, "grad_norm": 0.3823683559894562, "learning_rate": 1.5750776067530375e-05, "loss": 0.5114, "step": 19926 }, { "epoch": 0.612140202131908, "grad_norm": 0.38837921619415283, "learning_rate": 1.5750380692176378e-05, "loss": 0.5676, "step": 19927 }, { "epoch": 0.6121709212668571, "grad_norm": 0.33566924929618835, "learning_rate": 1.574998530339196e-05, "loss": 0.5284, "step": 19928 }, { "epoch": 0.6122016404018062, "grad_norm": 0.3519568145275116, "learning_rate": 1.5749589901178054e-05, "loss": 0.5389, "step": 19929 }, { "epoch": 0.6122323595367555, "grad_norm": 0.3531639575958252, "learning_rate": 1.5749194485535577e-05, "loss": 0.6047, "step": 19930 }, { "epoch": 0.6122630786717046, "grad_norm": 0.4201236367225647, "learning_rate": 1.5748799056465457e-05, "loss": 0.5381, "step": 19931 }, { "epoch": 0.6122937978066537, "grad_norm": 0.3444674015045166, "learning_rate": 1.5748403613968615e-05, "loss": 0.6067, "step": 19932 }, { "epoch": 0.612324516941603, "grad_norm": 0.4024088382720947, "learning_rate": 1.5748008158045974e-05, "loss": 0.6003, "step": 19933 }, { "epoch": 0.6123552360765521, "grad_norm": 0.37967559695243835, "learning_rate": 1.574761268869846e-05, "loss": 0.6122, "step": 19934 }, { "epoch": 0.6123859552115013, "grad_norm": 0.35605645179748535, "learning_rate": 1.574721720592699e-05, "loss": 0.5816, "step": 19935 }, { "epoch": 0.6124166743464504, "grad_norm": 0.35675400495529175, "learning_rate": 1.5746821709732502e-05, "loss": 0.604, "step": 19936 }, { "epoch": 0.6124473934813995, "grad_norm": 0.3573191165924072, "learning_rate": 1.5746426200115904e-05, "loss": 0.5383, "step": 19937 }, { "epoch": 0.6124781126163488, "grad_norm": 0.806117594242096, "learning_rate": 1.5746030677078124e-05, "loss": 0.5321, "step": 19938 }, { "epoch": 0.6125088317512979, "grad_norm": 0.6000556945800781, "learning_rate": 1.5745635140620096e-05, "loss": 0.5234, "step": 19939 }, { "epoch": 0.612539550886247, "grad_norm": 0.4902452230453491, "learning_rate": 1.5745239590742732e-05, "loss": 0.62, "step": 19940 }, { "epoch": 0.6125702700211962, "grad_norm": 0.3083290457725525, "learning_rate": 1.574484402744696e-05, "loss": 0.4604, "step": 19941 }, { "epoch": 0.6126009891561454, "grad_norm": 0.3228245973587036, "learning_rate": 1.57444484507337e-05, "loss": 0.5655, "step": 19942 }, { "epoch": 0.6126317082910945, "grad_norm": 0.3698170483112335, "learning_rate": 1.5744052860603885e-05, "loss": 0.6088, "step": 19943 }, { "epoch": 0.6126624274260437, "grad_norm": 0.34574347734451294, "learning_rate": 1.574365725705843e-05, "loss": 0.5729, "step": 19944 }, { "epoch": 0.6126931465609928, "grad_norm": 0.5571406483650208, "learning_rate": 1.5743261640098264e-05, "loss": 0.601, "step": 19945 }, { "epoch": 0.612723865695942, "grad_norm": 0.43689531087875366, "learning_rate": 1.5742866009724306e-05, "loss": 0.5274, "step": 19946 }, { "epoch": 0.6127545848308912, "grad_norm": 0.3343958258628845, "learning_rate": 1.5742470365937486e-05, "loss": 0.531, "step": 19947 }, { "epoch": 0.6127853039658403, "grad_norm": 0.3979825973510742, "learning_rate": 1.5742074708738723e-05, "loss": 0.5469, "step": 19948 }, { "epoch": 0.6128160231007895, "grad_norm": 0.34698253870010376, "learning_rate": 1.574167903812895e-05, "loss": 0.5612, "step": 19949 }, { "epoch": 0.6128467422357387, "grad_norm": 0.31067365407943726, "learning_rate": 1.5741283354109077e-05, "loss": 0.5876, "step": 19950 }, { "epoch": 0.6128774613706878, "grad_norm": 0.3723515570163727, "learning_rate": 1.574088765668004e-05, "loss": 0.5097, "step": 19951 }, { "epoch": 0.612908180505637, "grad_norm": 0.370000958442688, "learning_rate": 1.574049194584276e-05, "loss": 0.5183, "step": 19952 }, { "epoch": 0.6129388996405861, "grad_norm": 0.35642749071121216, "learning_rate": 1.5740096221598155e-05, "loss": 0.5713, "step": 19953 }, { "epoch": 0.6129696187755352, "grad_norm": 0.33782655000686646, "learning_rate": 1.573970048394716e-05, "loss": 0.5323, "step": 19954 }, { "epoch": 0.6130003379104845, "grad_norm": 0.3753032982349396, "learning_rate": 1.573930473289069e-05, "loss": 0.5382, "step": 19955 }, { "epoch": 0.6130310570454336, "grad_norm": 0.41736266016960144, "learning_rate": 1.5738908968429677e-05, "loss": 0.5728, "step": 19956 }, { "epoch": 0.6130617761803827, "grad_norm": 0.3491252064704895, "learning_rate": 1.5738513190565036e-05, "loss": 0.6229, "step": 19957 }, { "epoch": 0.6130924953153319, "grad_norm": 0.39773958921432495, "learning_rate": 1.57381173992977e-05, "loss": 0.604, "step": 19958 }, { "epoch": 0.6131232144502811, "grad_norm": 0.38413313031196594, "learning_rate": 1.573772159462859e-05, "loss": 0.561, "step": 19959 }, { "epoch": 0.6131539335852303, "grad_norm": 0.3457111418247223, "learning_rate": 1.5737325776558628e-05, "loss": 0.6013, "step": 19960 }, { "epoch": 0.6131846527201794, "grad_norm": 0.3455888628959656, "learning_rate": 1.5736929945088745e-05, "loss": 0.5794, "step": 19961 }, { "epoch": 0.6132153718551285, "grad_norm": 0.3261467218399048, "learning_rate": 1.5736534100219855e-05, "loss": 0.4006, "step": 19962 }, { "epoch": 0.6132460909900778, "grad_norm": 0.37541282176971436, "learning_rate": 1.5736138241952895e-05, "loss": 0.5394, "step": 19963 }, { "epoch": 0.6132768101250269, "grad_norm": 0.3303651809692383, "learning_rate": 1.573574237028878e-05, "loss": 0.5378, "step": 19964 }, { "epoch": 0.613307529259976, "grad_norm": 0.3823045790195465, "learning_rate": 1.5735346485228442e-05, "loss": 0.6751, "step": 19965 }, { "epoch": 0.6133382483949252, "grad_norm": 0.39475372433662415, "learning_rate": 1.57349505867728e-05, "loss": 0.5493, "step": 19966 }, { "epoch": 0.6133689675298744, "grad_norm": 0.3741797208786011, "learning_rate": 1.573455467492278e-05, "loss": 0.597, "step": 19967 }, { "epoch": 0.6133996866648235, "grad_norm": 0.3579210340976715, "learning_rate": 1.5734158749679305e-05, "loss": 0.4737, "step": 19968 }, { "epoch": 0.6134304057997727, "grad_norm": 0.33073604106903076, "learning_rate": 1.57337628110433e-05, "loss": 0.535, "step": 19969 }, { "epoch": 0.6134611249347218, "grad_norm": 0.3171059787273407, "learning_rate": 1.5733366859015697e-05, "loss": 0.6, "step": 19970 }, { "epoch": 0.6134918440696709, "grad_norm": 0.3644452393054962, "learning_rate": 1.5732970893597416e-05, "loss": 0.4964, "step": 19971 }, { "epoch": 0.6135225632046202, "grad_norm": 0.34469467401504517, "learning_rate": 1.5732574914789377e-05, "loss": 0.4496, "step": 19972 }, { "epoch": 0.6135532823395693, "grad_norm": 0.3425200283527374, "learning_rate": 1.5732178922592513e-05, "loss": 0.5259, "step": 19973 }, { "epoch": 0.6135840014745185, "grad_norm": 0.3572368323802948, "learning_rate": 1.573178291700774e-05, "loss": 0.5738, "step": 19974 }, { "epoch": 0.6136147206094676, "grad_norm": 0.3848574757575989, "learning_rate": 1.573138689803599e-05, "loss": 0.5502, "step": 19975 }, { "epoch": 0.6136454397444168, "grad_norm": 0.35092154145240784, "learning_rate": 1.5730990865678187e-05, "loss": 0.4939, "step": 19976 }, { "epoch": 0.613676158879366, "grad_norm": 0.3639199137687683, "learning_rate": 1.5730594819935254e-05, "loss": 0.504, "step": 19977 }, { "epoch": 0.6137068780143151, "grad_norm": 0.6488158106803894, "learning_rate": 1.5730198760808116e-05, "loss": 0.5043, "step": 19978 }, { "epoch": 0.6137375971492642, "grad_norm": 0.34508103132247925, "learning_rate": 1.5729802688297698e-05, "loss": 0.4959, "step": 19979 }, { "epoch": 0.6137683162842135, "grad_norm": 0.3514493405818939, "learning_rate": 1.5729406602404925e-05, "loss": 0.5173, "step": 19980 }, { "epoch": 0.6137990354191626, "grad_norm": 0.3800249397754669, "learning_rate": 1.5729010503130725e-05, "loss": 0.5628, "step": 19981 }, { "epoch": 0.6138297545541117, "grad_norm": 0.3495163917541504, "learning_rate": 1.572861439047602e-05, "loss": 0.5311, "step": 19982 }, { "epoch": 0.6138604736890609, "grad_norm": 0.36389702558517456, "learning_rate": 1.5728218264441733e-05, "loss": 0.5586, "step": 19983 }, { "epoch": 0.6138911928240101, "grad_norm": 0.3634556233882904, "learning_rate": 1.5727822125028794e-05, "loss": 0.5621, "step": 19984 }, { "epoch": 0.6139219119589593, "grad_norm": 0.4390878975391388, "learning_rate": 1.5727425972238128e-05, "loss": 0.4798, "step": 19985 }, { "epoch": 0.6139526310939084, "grad_norm": 0.3315463960170746, "learning_rate": 1.572702980607066e-05, "loss": 0.4953, "step": 19986 }, { "epoch": 0.6139833502288575, "grad_norm": 0.35774537920951843, "learning_rate": 1.572663362652731e-05, "loss": 0.5277, "step": 19987 }, { "epoch": 0.6140140693638068, "grad_norm": 0.5335140228271484, "learning_rate": 1.572623743360901e-05, "loss": 0.5237, "step": 19988 }, { "epoch": 0.6140447884987559, "grad_norm": 0.32507818937301636, "learning_rate": 1.572584122731668e-05, "loss": 0.5666, "step": 19989 }, { "epoch": 0.614075507633705, "grad_norm": 0.37318718433380127, "learning_rate": 1.572544500765125e-05, "loss": 0.5034, "step": 19990 }, { "epoch": 0.6141062267686542, "grad_norm": 0.388094037771225, "learning_rate": 1.5725048774613644e-05, "loss": 0.5632, "step": 19991 }, { "epoch": 0.6141369459036033, "grad_norm": 0.35396715998649597, "learning_rate": 1.5724652528204783e-05, "loss": 0.5362, "step": 19992 }, { "epoch": 0.6141676650385525, "grad_norm": 0.34719449281692505, "learning_rate": 1.5724256268425602e-05, "loss": 0.5286, "step": 19993 }, { "epoch": 0.6141983841735017, "grad_norm": 0.37881559133529663, "learning_rate": 1.5723859995277016e-05, "loss": 0.619, "step": 19994 }, { "epoch": 0.6142291033084508, "grad_norm": 0.3377190828323364, "learning_rate": 1.5723463708759957e-05, "loss": 0.5534, "step": 19995 }, { "epoch": 0.6142598224433999, "grad_norm": 0.3332573473453522, "learning_rate": 1.5723067408875348e-05, "loss": 0.5538, "step": 19996 }, { "epoch": 0.6142905415783492, "grad_norm": 0.38024193048477173, "learning_rate": 1.572267109562412e-05, "loss": 0.5582, "step": 19997 }, { "epoch": 0.6143212607132983, "grad_norm": 0.6023377180099487, "learning_rate": 1.5722274769007185e-05, "loss": 0.5158, "step": 19998 }, { "epoch": 0.6143519798482475, "grad_norm": 0.4823945164680481, "learning_rate": 1.5721878429025484e-05, "loss": 0.5783, "step": 19999 }, { "epoch": 0.6143826989831966, "grad_norm": 0.3267231583595276, "learning_rate": 1.5721482075679933e-05, "loss": 0.4951, "step": 20000 }, { "epoch": 0.6144134181181458, "grad_norm": 0.33363470435142517, "learning_rate": 1.5721085708971462e-05, "loss": 0.5473, "step": 20001 }, { "epoch": 0.614444137253095, "grad_norm": 0.36676302552223206, "learning_rate": 1.5720689328900996e-05, "loss": 0.555, "step": 20002 }, { "epoch": 0.6144748563880441, "grad_norm": 0.35385662317276, "learning_rate": 1.5720292935469462e-05, "loss": 0.5679, "step": 20003 }, { "epoch": 0.6145055755229932, "grad_norm": 0.3523530960083008, "learning_rate": 1.5719896528677787e-05, "loss": 0.5981, "step": 20004 }, { "epoch": 0.6145362946579425, "grad_norm": 0.3575359582901001, "learning_rate": 1.5719500108526887e-05, "loss": 0.5462, "step": 20005 }, { "epoch": 0.6145670137928916, "grad_norm": 0.3873613476753235, "learning_rate": 1.57191036750177e-05, "loss": 0.5478, "step": 20006 }, { "epoch": 0.6145977329278407, "grad_norm": 0.5025413036346436, "learning_rate": 1.5718707228151145e-05, "loss": 0.5482, "step": 20007 }, { "epoch": 0.6146284520627899, "grad_norm": 0.5378583073616028, "learning_rate": 1.5718310767928153e-05, "loss": 0.5503, "step": 20008 }, { "epoch": 0.614659171197739, "grad_norm": 0.5225047469139099, "learning_rate": 1.5717914294349644e-05, "loss": 0.5811, "step": 20009 }, { "epoch": 0.6146898903326883, "grad_norm": 0.3624952435493469, "learning_rate": 1.571751780741655e-05, "loss": 0.6126, "step": 20010 }, { "epoch": 0.6147206094676374, "grad_norm": 0.35226601362228394, "learning_rate": 1.571712130712979e-05, "loss": 0.5659, "step": 20011 }, { "epoch": 0.6147513286025865, "grad_norm": 0.354672372341156, "learning_rate": 1.571672479349029e-05, "loss": 0.562, "step": 20012 }, { "epoch": 0.6147820477375358, "grad_norm": 0.33172741532325745, "learning_rate": 1.571632826649899e-05, "loss": 0.5389, "step": 20013 }, { "epoch": 0.6148127668724849, "grad_norm": 0.32504263520240784, "learning_rate": 1.57159317261568e-05, "loss": 0.5824, "step": 20014 }, { "epoch": 0.614843486007434, "grad_norm": 0.325534462928772, "learning_rate": 1.5715535172464654e-05, "loss": 0.5676, "step": 20015 }, { "epoch": 0.6148742051423832, "grad_norm": 0.39362186193466187, "learning_rate": 1.5715138605423476e-05, "loss": 0.5926, "step": 20016 }, { "epoch": 0.6149049242773323, "grad_norm": 0.35501164197921753, "learning_rate": 1.571474202503419e-05, "loss": 0.5959, "step": 20017 }, { "epoch": 0.6149356434122815, "grad_norm": 0.3957788050174713, "learning_rate": 1.5714345431297728e-05, "loss": 0.5461, "step": 20018 }, { "epoch": 0.6149663625472307, "grad_norm": 0.3934948444366455, "learning_rate": 1.571394882421501e-05, "loss": 0.5535, "step": 20019 }, { "epoch": 0.6149970816821798, "grad_norm": 0.36712780594825745, "learning_rate": 1.571355220378697e-05, "loss": 0.5452, "step": 20020 }, { "epoch": 0.615027800817129, "grad_norm": 0.3699999451637268, "learning_rate": 1.571315557001453e-05, "loss": 0.5911, "step": 20021 }, { "epoch": 0.6150585199520782, "grad_norm": 0.3840087652206421, "learning_rate": 1.5712758922898613e-05, "loss": 0.4885, "step": 20022 }, { "epoch": 0.6150892390870273, "grad_norm": 0.40485963225364685, "learning_rate": 1.571236226244015e-05, "loss": 0.5149, "step": 20023 }, { "epoch": 0.6151199582219765, "grad_norm": 0.3650969862937927, "learning_rate": 1.5711965588640064e-05, "loss": 0.5385, "step": 20024 }, { "epoch": 0.6151506773569256, "grad_norm": 0.3590003550052643, "learning_rate": 1.5711568901499283e-05, "loss": 0.5079, "step": 20025 }, { "epoch": 0.6151813964918748, "grad_norm": 0.3837849795818329, "learning_rate": 1.571117220101874e-05, "loss": 0.5272, "step": 20026 }, { "epoch": 0.615212115626824, "grad_norm": 0.37979063391685486, "learning_rate": 1.571077548719935e-05, "loss": 0.5975, "step": 20027 }, { "epoch": 0.6152428347617731, "grad_norm": 0.35939693450927734, "learning_rate": 1.5710378760042045e-05, "loss": 0.5255, "step": 20028 }, { "epoch": 0.6152735538967222, "grad_norm": 0.8815918564796448, "learning_rate": 1.5709982019547753e-05, "loss": 0.5488, "step": 20029 }, { "epoch": 0.6153042730316715, "grad_norm": 0.36259591579437256, "learning_rate": 1.57095852657174e-05, "loss": 0.5557, "step": 20030 }, { "epoch": 0.6153349921666206, "grad_norm": 0.4119247496128082, "learning_rate": 1.570918849855191e-05, "loss": 0.5654, "step": 20031 }, { "epoch": 0.6153657113015697, "grad_norm": 0.5384522080421448, "learning_rate": 1.5708791718052215e-05, "loss": 0.448, "step": 20032 }, { "epoch": 0.6153964304365189, "grad_norm": 0.34670814871788025, "learning_rate": 1.5708394924219235e-05, "loss": 0.5591, "step": 20033 }, { "epoch": 0.615427149571468, "grad_norm": 0.3570784330368042, "learning_rate": 1.57079981170539e-05, "loss": 0.5255, "step": 20034 }, { "epoch": 0.6154578687064173, "grad_norm": 0.40354785323143005, "learning_rate": 1.570760129655714e-05, "loss": 0.5965, "step": 20035 }, { "epoch": 0.6154885878413664, "grad_norm": 0.3293945789337158, "learning_rate": 1.5707204462729873e-05, "loss": 0.5957, "step": 20036 }, { "epoch": 0.6155193069763155, "grad_norm": 0.36629441380500793, "learning_rate": 1.5706807615573035e-05, "loss": 0.516, "step": 20037 }, { "epoch": 0.6155500261112647, "grad_norm": 0.4048144817352295, "learning_rate": 1.5706410755087548e-05, "loss": 0.6246, "step": 20038 }, { "epoch": 0.6155807452462139, "grad_norm": 0.36063429713249207, "learning_rate": 1.5706013881274343e-05, "loss": 0.5827, "step": 20039 }, { "epoch": 0.615611464381163, "grad_norm": 0.3235717713832855, "learning_rate": 1.570561699413434e-05, "loss": 0.4724, "step": 20040 }, { "epoch": 0.6156421835161122, "grad_norm": 0.3705417215824127, "learning_rate": 1.5705220093668472e-05, "loss": 0.5425, "step": 20041 }, { "epoch": 0.6156729026510613, "grad_norm": 0.3352258503437042, "learning_rate": 1.5704823179877664e-05, "loss": 0.5893, "step": 20042 }, { "epoch": 0.6157036217860105, "grad_norm": 0.34131917357444763, "learning_rate": 1.5704426252762844e-05, "loss": 0.6585, "step": 20043 }, { "epoch": 0.6157343409209597, "grad_norm": 0.4153641164302826, "learning_rate": 1.5704029312324934e-05, "loss": 0.6488, "step": 20044 }, { "epoch": 0.6157650600559088, "grad_norm": 0.35076287388801575, "learning_rate": 1.570363235856487e-05, "loss": 0.5646, "step": 20045 }, { "epoch": 0.615795779190858, "grad_norm": 0.36791232228279114, "learning_rate": 1.5703235391483573e-05, "loss": 0.5416, "step": 20046 }, { "epoch": 0.6158264983258072, "grad_norm": 0.3328336477279663, "learning_rate": 1.570283841108197e-05, "loss": 0.5232, "step": 20047 }, { "epoch": 0.6158572174607563, "grad_norm": 0.34191474318504333, "learning_rate": 1.5702441417360993e-05, "loss": 0.5364, "step": 20048 }, { "epoch": 0.6158879365957055, "grad_norm": 0.34130430221557617, "learning_rate": 1.570204441032156e-05, "loss": 0.4932, "step": 20049 }, { "epoch": 0.6159186557306546, "grad_norm": 0.3539690673351288, "learning_rate": 1.5701647389964612e-05, "loss": 0.5926, "step": 20050 }, { "epoch": 0.6159493748656037, "grad_norm": 0.37565550208091736, "learning_rate": 1.570125035629106e-05, "loss": 0.4949, "step": 20051 }, { "epoch": 0.615980094000553, "grad_norm": 0.3752390444278717, "learning_rate": 1.5700853309301845e-05, "loss": 0.5439, "step": 20052 }, { "epoch": 0.6160108131355021, "grad_norm": 0.3942031264305115, "learning_rate": 1.5700456248997887e-05, "loss": 0.546, "step": 20053 }, { "epoch": 0.6160415322704512, "grad_norm": 0.3656398057937622, "learning_rate": 1.5700059175380117e-05, "loss": 0.5503, "step": 20054 }, { "epoch": 0.6160722514054005, "grad_norm": 0.3633913993835449, "learning_rate": 1.5699662088449458e-05, "loss": 0.5114, "step": 20055 }, { "epoch": 0.6161029705403496, "grad_norm": 0.34658706188201904, "learning_rate": 1.5699264988206843e-05, "loss": 0.4909, "step": 20056 }, { "epoch": 0.6161336896752987, "grad_norm": 0.38993069529533386, "learning_rate": 1.5698867874653197e-05, "loss": 0.6057, "step": 20057 }, { "epoch": 0.6161644088102479, "grad_norm": 0.41222715377807617, "learning_rate": 1.569847074778944e-05, "loss": 0.5977, "step": 20058 }, { "epoch": 0.616195127945197, "grad_norm": 0.4034956395626068, "learning_rate": 1.5698073607616514e-05, "loss": 0.5837, "step": 20059 }, { "epoch": 0.6162258470801463, "grad_norm": 0.4304681122303009, "learning_rate": 1.5697676454135337e-05, "loss": 0.6142, "step": 20060 }, { "epoch": 0.6162565662150954, "grad_norm": 0.34312090277671814, "learning_rate": 1.569727928734684e-05, "loss": 0.5667, "step": 20061 }, { "epoch": 0.6162872853500445, "grad_norm": 0.375232458114624, "learning_rate": 1.5696882107251948e-05, "loss": 0.4916, "step": 20062 }, { "epoch": 0.6163180044849937, "grad_norm": 0.39199256896972656, "learning_rate": 1.569648491385159e-05, "loss": 0.6329, "step": 20063 }, { "epoch": 0.6163487236199429, "grad_norm": 0.4091860353946686, "learning_rate": 1.569608770714669e-05, "loss": 0.4965, "step": 20064 }, { "epoch": 0.616379442754892, "grad_norm": 0.40206378698349, "learning_rate": 1.5695690487138184e-05, "loss": 0.5083, "step": 20065 }, { "epoch": 0.6164101618898412, "grad_norm": 0.4098055958747864, "learning_rate": 1.5695293253827e-05, "loss": 0.5579, "step": 20066 }, { "epoch": 0.6164408810247903, "grad_norm": 0.43076208233833313, "learning_rate": 1.569489600721405e-05, "loss": 0.5797, "step": 20067 }, { "epoch": 0.6164716001597395, "grad_norm": 0.31374025344848633, "learning_rate": 1.569449874730028e-05, "loss": 0.621, "step": 20068 }, { "epoch": 0.6165023192946887, "grad_norm": 0.3453384339809418, "learning_rate": 1.5694101474086606e-05, "loss": 0.5119, "step": 20069 }, { "epoch": 0.6165330384296378, "grad_norm": 0.3184613883495331, "learning_rate": 1.5693704187573965e-05, "loss": 0.5192, "step": 20070 }, { "epoch": 0.616563757564587, "grad_norm": 0.3263596296310425, "learning_rate": 1.5693306887763276e-05, "loss": 0.5996, "step": 20071 }, { "epoch": 0.6165944766995362, "grad_norm": 0.33698633313179016, "learning_rate": 1.5692909574655474e-05, "loss": 0.5505, "step": 20072 }, { "epoch": 0.6166251958344853, "grad_norm": 0.45911043882369995, "learning_rate": 1.569251224825148e-05, "loss": 0.6072, "step": 20073 }, { "epoch": 0.6166559149694345, "grad_norm": 0.48256418108940125, "learning_rate": 1.569211490855223e-05, "loss": 0.5244, "step": 20074 }, { "epoch": 0.6166866341043836, "grad_norm": 0.3918696939945221, "learning_rate": 1.5691717555558648e-05, "loss": 0.4473, "step": 20075 }, { "epoch": 0.6167173532393327, "grad_norm": 0.40557125210762024, "learning_rate": 1.5691320189271658e-05, "loss": 0.5768, "step": 20076 }, { "epoch": 0.616748072374282, "grad_norm": 0.3452286422252655, "learning_rate": 1.5690922809692197e-05, "loss": 0.556, "step": 20077 }, { "epoch": 0.6167787915092311, "grad_norm": 0.3621424734592438, "learning_rate": 1.5690525416821186e-05, "loss": 0.5931, "step": 20078 }, { "epoch": 0.6168095106441802, "grad_norm": 0.34673982858657837, "learning_rate": 1.5690128010659558e-05, "loss": 0.5616, "step": 20079 }, { "epoch": 0.6168402297791294, "grad_norm": 0.3507561981678009, "learning_rate": 1.5689730591208237e-05, "loss": 0.5735, "step": 20080 }, { "epoch": 0.6168709489140786, "grad_norm": 0.3650291860103607, "learning_rate": 1.568933315846815e-05, "loss": 0.5143, "step": 20081 }, { "epoch": 0.6169016680490277, "grad_norm": 0.377253919839859, "learning_rate": 1.5688935712440236e-05, "loss": 0.5821, "step": 20082 }, { "epoch": 0.6169323871839769, "grad_norm": 0.370964378118515, "learning_rate": 1.568853825312541e-05, "loss": 0.5806, "step": 20083 }, { "epoch": 0.616963106318926, "grad_norm": 0.380140095949173, "learning_rate": 1.5688140780524604e-05, "loss": 0.5195, "step": 20084 }, { "epoch": 0.6169938254538753, "grad_norm": 0.37320399284362793, "learning_rate": 1.5687743294638747e-05, "loss": 0.5185, "step": 20085 }, { "epoch": 0.6170245445888244, "grad_norm": 0.3763851523399353, "learning_rate": 1.5687345795468774e-05, "loss": 0.5326, "step": 20086 }, { "epoch": 0.6170552637237735, "grad_norm": 0.36061838269233704, "learning_rate": 1.5686948283015604e-05, "loss": 0.5645, "step": 20087 }, { "epoch": 0.6170859828587227, "grad_norm": 0.3443211317062378, "learning_rate": 1.568655075728017e-05, "loss": 0.5614, "step": 20088 }, { "epoch": 0.6171167019936719, "grad_norm": 0.3644794821739197, "learning_rate": 1.56861532182634e-05, "loss": 0.5322, "step": 20089 }, { "epoch": 0.617147421128621, "grad_norm": 0.3591267168521881, "learning_rate": 1.568575566596622e-05, "loss": 0.4729, "step": 20090 }, { "epoch": 0.6171781402635702, "grad_norm": 0.3833564817905426, "learning_rate": 1.568535810038956e-05, "loss": 0.5952, "step": 20091 }, { "epoch": 0.6172088593985193, "grad_norm": 0.370572566986084, "learning_rate": 1.5684960521534354e-05, "loss": 0.5313, "step": 20092 }, { "epoch": 0.6172395785334684, "grad_norm": 0.3982078731060028, "learning_rate": 1.5684562929401523e-05, "loss": 0.5766, "step": 20093 }, { "epoch": 0.6172702976684177, "grad_norm": 0.8025742769241333, "learning_rate": 1.5684165323991995e-05, "loss": 0.616, "step": 20094 }, { "epoch": 0.6173010168033668, "grad_norm": 0.3218934237957001, "learning_rate": 1.5683767705306707e-05, "loss": 0.584, "step": 20095 }, { "epoch": 0.617331735938316, "grad_norm": 0.3523132801055908, "learning_rate": 1.568337007334658e-05, "loss": 0.5709, "step": 20096 }, { "epoch": 0.6173624550732651, "grad_norm": 0.394745409488678, "learning_rate": 1.5682972428112548e-05, "loss": 0.6346, "step": 20097 }, { "epoch": 0.6173931742082143, "grad_norm": 0.40425875782966614, "learning_rate": 1.5682574769605534e-05, "loss": 0.5423, "step": 20098 }, { "epoch": 0.6174238933431635, "grad_norm": 0.40671876072883606, "learning_rate": 1.568217709782647e-05, "loss": 0.6354, "step": 20099 }, { "epoch": 0.6174546124781126, "grad_norm": 0.5964885354042053, "learning_rate": 1.5681779412776284e-05, "loss": 0.5168, "step": 20100 }, { "epoch": 0.6174853316130617, "grad_norm": 0.3435389995574951, "learning_rate": 1.5681381714455906e-05, "loss": 0.5486, "step": 20101 }, { "epoch": 0.617516050748011, "grad_norm": 0.3771516680717468, "learning_rate": 1.5680984002866265e-05, "loss": 0.6155, "step": 20102 }, { "epoch": 0.6175467698829601, "grad_norm": 0.35459277033805847, "learning_rate": 1.568058627800829e-05, "loss": 0.5525, "step": 20103 }, { "epoch": 0.6175774890179092, "grad_norm": 0.3624761700630188, "learning_rate": 1.5680188539882907e-05, "loss": 0.5567, "step": 20104 }, { "epoch": 0.6176082081528584, "grad_norm": 0.354975163936615, "learning_rate": 1.5679790788491046e-05, "loss": 0.4782, "step": 20105 }, { "epoch": 0.6176389272878076, "grad_norm": 0.3419898450374603, "learning_rate": 1.567939302383364e-05, "loss": 0.6445, "step": 20106 }, { "epoch": 0.6176696464227568, "grad_norm": 1.0901061296463013, "learning_rate": 1.5678995245911615e-05, "loss": 0.462, "step": 20107 }, { "epoch": 0.6177003655577059, "grad_norm": 0.431669682264328, "learning_rate": 1.56785974547259e-05, "loss": 0.5823, "step": 20108 }, { "epoch": 0.617731084692655, "grad_norm": 0.3622507154941559, "learning_rate": 1.5678199650277418e-05, "loss": 0.5962, "step": 20109 }, { "epoch": 0.6177618038276043, "grad_norm": 0.34497302770614624, "learning_rate": 1.5677801832567112e-05, "loss": 0.5079, "step": 20110 }, { "epoch": 0.6177925229625534, "grad_norm": 0.35605427622795105, "learning_rate": 1.56774040015959e-05, "loss": 0.5183, "step": 20111 }, { "epoch": 0.6178232420975025, "grad_norm": 0.3840530812740326, "learning_rate": 1.567700615736471e-05, "loss": 0.5885, "step": 20112 }, { "epoch": 0.6178539612324517, "grad_norm": 0.3348402678966522, "learning_rate": 1.567660829987448e-05, "loss": 0.5793, "step": 20113 }, { "epoch": 0.6178846803674009, "grad_norm": 0.38123592734336853, "learning_rate": 1.5676210429126137e-05, "loss": 0.457, "step": 20114 }, { "epoch": 0.61791539950235, "grad_norm": 0.3448537290096283, "learning_rate": 1.5675812545120606e-05, "loss": 0.5992, "step": 20115 }, { "epoch": 0.6179461186372992, "grad_norm": 0.345031201839447, "learning_rate": 1.567541464785882e-05, "loss": 0.5651, "step": 20116 }, { "epoch": 0.6179768377722483, "grad_norm": 0.37656369805336, "learning_rate": 1.5675016737341705e-05, "loss": 0.5795, "step": 20117 }, { "epoch": 0.6180075569071974, "grad_norm": 0.37120091915130615, "learning_rate": 1.5674618813570192e-05, "loss": 0.6912, "step": 20118 }, { "epoch": 0.6180382760421467, "grad_norm": 0.39015480875968933, "learning_rate": 1.5674220876545213e-05, "loss": 0.5651, "step": 20119 }, { "epoch": 0.6180689951770958, "grad_norm": 0.36776962876319885, "learning_rate": 1.567382292626769e-05, "loss": 0.6012, "step": 20120 }, { "epoch": 0.618099714312045, "grad_norm": 0.49045488238334656, "learning_rate": 1.5673424962738563e-05, "loss": 0.5896, "step": 20121 }, { "epoch": 0.6181304334469941, "grad_norm": 0.3635050654411316, "learning_rate": 1.5673026985958753e-05, "loss": 0.6022, "step": 20122 }, { "epoch": 0.6181611525819433, "grad_norm": 0.3433187007904053, "learning_rate": 1.567262899592919e-05, "loss": 0.5518, "step": 20123 }, { "epoch": 0.6181918717168925, "grad_norm": 0.5578868389129639, "learning_rate": 1.567223099265081e-05, "loss": 0.6872, "step": 20124 }, { "epoch": 0.6182225908518416, "grad_norm": 0.5959756970405579, "learning_rate": 1.5671832976124535e-05, "loss": 0.552, "step": 20125 }, { "epoch": 0.6182533099867907, "grad_norm": 0.3548046946525574, "learning_rate": 1.5671434946351298e-05, "loss": 0.5806, "step": 20126 }, { "epoch": 0.61828402912174, "grad_norm": 0.396415114402771, "learning_rate": 1.567103690333203e-05, "loss": 0.644, "step": 20127 }, { "epoch": 0.6183147482566891, "grad_norm": 0.4397309720516205, "learning_rate": 1.5670638847067658e-05, "loss": 0.6087, "step": 20128 }, { "epoch": 0.6183454673916382, "grad_norm": 0.38220444321632385, "learning_rate": 1.5670240777559116e-05, "loss": 0.5407, "step": 20129 }, { "epoch": 0.6183761865265874, "grad_norm": 0.45391401648521423, "learning_rate": 1.566984269480733e-05, "loss": 0.5807, "step": 20130 }, { "epoch": 0.6184069056615366, "grad_norm": 0.34300467371940613, "learning_rate": 1.5669444598813225e-05, "loss": 0.5637, "step": 20131 }, { "epoch": 0.6184376247964858, "grad_norm": 0.3176201581954956, "learning_rate": 1.5669046489577743e-05, "loss": 0.5296, "step": 20132 }, { "epoch": 0.6184683439314349, "grad_norm": 0.3482523262500763, "learning_rate": 1.56686483671018e-05, "loss": 0.5529, "step": 20133 }, { "epoch": 0.618499063066384, "grad_norm": 0.349398136138916, "learning_rate": 1.566825023138634e-05, "loss": 0.5601, "step": 20134 }, { "epoch": 0.6185297822013333, "grad_norm": 0.3518083989620209, "learning_rate": 1.5667852082432286e-05, "loss": 0.56, "step": 20135 }, { "epoch": 0.6185605013362824, "grad_norm": 0.34431517124176025, "learning_rate": 1.566745392024056e-05, "loss": 0.6052, "step": 20136 }, { "epoch": 0.6185912204712315, "grad_norm": 0.3580375015735626, "learning_rate": 1.5667055744812105e-05, "loss": 0.5873, "step": 20137 }, { "epoch": 0.6186219396061807, "grad_norm": 0.39282166957855225, "learning_rate": 1.5666657556147846e-05, "loss": 0.6058, "step": 20138 }, { "epoch": 0.6186526587411298, "grad_norm": 0.37852203845977783, "learning_rate": 1.5666259354248712e-05, "loss": 0.5803, "step": 20139 }, { "epoch": 0.618683377876079, "grad_norm": 0.43533265590667725, "learning_rate": 1.566586113911563e-05, "loss": 0.5092, "step": 20140 }, { "epoch": 0.6187140970110282, "grad_norm": 0.4160943031311035, "learning_rate": 1.5665462910749537e-05, "loss": 0.6051, "step": 20141 }, { "epoch": 0.6187448161459773, "grad_norm": 0.3570893704891205, "learning_rate": 1.566506466915136e-05, "loss": 0.5907, "step": 20142 }, { "epoch": 0.6187755352809264, "grad_norm": 0.36415818333625793, "learning_rate": 1.566466641432203e-05, "loss": 0.562, "step": 20143 }, { "epoch": 0.6188062544158757, "grad_norm": 0.3558482527732849, "learning_rate": 1.566426814626247e-05, "loss": 0.6093, "step": 20144 }, { "epoch": 0.6188369735508248, "grad_norm": 0.5197294354438782, "learning_rate": 1.566386986497362e-05, "loss": 0.5488, "step": 20145 }, { "epoch": 0.618867692685774, "grad_norm": 0.5130001902580261, "learning_rate": 1.566347157045641e-05, "loss": 0.5588, "step": 20146 }, { "epoch": 0.6188984118207231, "grad_norm": 0.3367644250392914, "learning_rate": 1.566307326271176e-05, "loss": 0.5467, "step": 20147 }, { "epoch": 0.6189291309556723, "grad_norm": 0.3580525815486908, "learning_rate": 1.566267494174061e-05, "loss": 0.5723, "step": 20148 }, { "epoch": 0.6189598500906215, "grad_norm": 0.39101073145866394, "learning_rate": 1.5662276607543884e-05, "loss": 0.5939, "step": 20149 }, { "epoch": 0.6189905692255706, "grad_norm": 0.3471052348613739, "learning_rate": 1.5661878260122517e-05, "loss": 0.5278, "step": 20150 }, { "epoch": 0.6190212883605197, "grad_norm": 0.4418967366218567, "learning_rate": 1.5661479899477444e-05, "loss": 0.5853, "step": 20151 }, { "epoch": 0.619052007495469, "grad_norm": 0.35835859179496765, "learning_rate": 1.566108152560958e-05, "loss": 0.5465, "step": 20152 }, { "epoch": 0.6190827266304181, "grad_norm": 0.33837372064590454, "learning_rate": 1.5660683138519873e-05, "loss": 0.5337, "step": 20153 }, { "epoch": 0.6191134457653672, "grad_norm": 0.4967881739139557, "learning_rate": 1.566028473820924e-05, "loss": 0.6301, "step": 20154 }, { "epoch": 0.6191441649003164, "grad_norm": 0.3260073661804199, "learning_rate": 1.565988632467862e-05, "loss": 0.6086, "step": 20155 }, { "epoch": 0.6191748840352655, "grad_norm": 0.3598198890686035, "learning_rate": 1.5659487897928937e-05, "loss": 0.5364, "step": 20156 }, { "epoch": 0.6192056031702148, "grad_norm": 0.39631494879722595, "learning_rate": 1.5659089457961126e-05, "loss": 0.5162, "step": 20157 }, { "epoch": 0.6192363223051639, "grad_norm": 0.33622583746910095, "learning_rate": 1.565869100477612e-05, "loss": 0.6353, "step": 20158 }, { "epoch": 0.619267041440113, "grad_norm": 0.32688531279563904, "learning_rate": 1.565829253837484e-05, "loss": 0.5558, "step": 20159 }, { "epoch": 0.6192977605750623, "grad_norm": 0.345709890127182, "learning_rate": 1.5657894058758227e-05, "loss": 0.576, "step": 20160 }, { "epoch": 0.6193284797100114, "grad_norm": 0.3592391014099121, "learning_rate": 1.56574955659272e-05, "loss": 0.5741, "step": 20161 }, { "epoch": 0.6193591988449605, "grad_norm": 0.36095279455184937, "learning_rate": 1.5657097059882707e-05, "loss": 0.6151, "step": 20162 }, { "epoch": 0.6193899179799097, "grad_norm": 0.36710911989212036, "learning_rate": 1.5656698540625663e-05, "loss": 0.553, "step": 20163 }, { "epoch": 0.6194206371148588, "grad_norm": 0.3597314655780792, "learning_rate": 1.5656300008157006e-05, "loss": 0.5314, "step": 20164 }, { "epoch": 0.619451356249808, "grad_norm": 0.5139023661613464, "learning_rate": 1.5655901462477668e-05, "loss": 0.5154, "step": 20165 }, { "epoch": 0.6194820753847572, "grad_norm": 0.3389265835285187, "learning_rate": 1.565550290358857e-05, "loss": 0.5364, "step": 20166 }, { "epoch": 0.6195127945197063, "grad_norm": 0.3534304201602936, "learning_rate": 1.5655104331490653e-05, "loss": 0.5359, "step": 20167 }, { "epoch": 0.6195435136546554, "grad_norm": 0.33933448791503906, "learning_rate": 1.5654705746184847e-05, "loss": 0.552, "step": 20168 }, { "epoch": 0.6195742327896047, "grad_norm": 0.4696357548236847, "learning_rate": 1.565430714767208e-05, "loss": 0.5586, "step": 20169 }, { "epoch": 0.6196049519245538, "grad_norm": 0.4026802182197571, "learning_rate": 1.5653908535953285e-05, "loss": 0.5399, "step": 20170 }, { "epoch": 0.619635671059503, "grad_norm": 0.411404550075531, "learning_rate": 1.565350991102939e-05, "loss": 0.5476, "step": 20171 }, { "epoch": 0.6196663901944521, "grad_norm": 0.3969603478908539, "learning_rate": 1.5653111272901328e-05, "loss": 0.6429, "step": 20172 }, { "epoch": 0.6196971093294013, "grad_norm": 0.3882073163986206, "learning_rate": 1.565271262157003e-05, "loss": 0.6163, "step": 20173 }, { "epoch": 0.6197278284643505, "grad_norm": 0.3656962811946869, "learning_rate": 1.5652313957036426e-05, "loss": 0.5671, "step": 20174 }, { "epoch": 0.6197585475992996, "grad_norm": 0.3866586983203888, "learning_rate": 1.565191527930145e-05, "loss": 0.6067, "step": 20175 }, { "epoch": 0.6197892667342487, "grad_norm": 0.32495298981666565, "learning_rate": 1.565151658836603e-05, "loss": 0.5555, "step": 20176 }, { "epoch": 0.619819985869198, "grad_norm": 0.3638238310813904, "learning_rate": 1.5651117884231097e-05, "loss": 0.5678, "step": 20177 }, { "epoch": 0.6198507050041471, "grad_norm": 0.32121720910072327, "learning_rate": 1.5650719166897584e-05, "loss": 0.5281, "step": 20178 }, { "epoch": 0.6198814241390962, "grad_norm": 0.3461604714393616, "learning_rate": 1.5650320436366424e-05, "loss": 0.5656, "step": 20179 }, { "epoch": 0.6199121432740454, "grad_norm": 0.37883663177490234, "learning_rate": 1.5649921692638544e-05, "loss": 0.625, "step": 20180 }, { "epoch": 0.6199428624089945, "grad_norm": 0.3576296865940094, "learning_rate": 1.5649522935714878e-05, "loss": 0.5826, "step": 20181 }, { "epoch": 0.6199735815439438, "grad_norm": 0.3389306962490082, "learning_rate": 1.5649124165596355e-05, "loss": 0.5896, "step": 20182 }, { "epoch": 0.6200043006788929, "grad_norm": 0.34850698709487915, "learning_rate": 1.5648725382283912e-05, "loss": 0.6494, "step": 20183 }, { "epoch": 0.620035019813842, "grad_norm": 0.3684212565422058, "learning_rate": 1.5648326585778474e-05, "loss": 0.6418, "step": 20184 }, { "epoch": 0.6200657389487912, "grad_norm": 0.3488147258758545, "learning_rate": 1.5647927776080972e-05, "loss": 0.6171, "step": 20185 }, { "epoch": 0.6200964580837404, "grad_norm": 0.3521983325481415, "learning_rate": 1.5647528953192344e-05, "loss": 0.6471, "step": 20186 }, { "epoch": 0.6201271772186895, "grad_norm": 0.3362077474594116, "learning_rate": 1.5647130117113513e-05, "loss": 0.5526, "step": 20187 }, { "epoch": 0.6201578963536387, "grad_norm": 0.35070887207984924, "learning_rate": 1.564673126784542e-05, "loss": 0.5474, "step": 20188 }, { "epoch": 0.6201886154885878, "grad_norm": 0.3778250217437744, "learning_rate": 1.564633240538899e-05, "loss": 0.5252, "step": 20189 }, { "epoch": 0.620219334623537, "grad_norm": 0.3441431522369385, "learning_rate": 1.5645933529745158e-05, "loss": 0.5587, "step": 20190 }, { "epoch": 0.6202500537584862, "grad_norm": 0.42282599210739136, "learning_rate": 1.5645534640914853e-05, "loss": 0.6173, "step": 20191 }, { "epoch": 0.6202807728934353, "grad_norm": 0.35542169213294983, "learning_rate": 1.5645135738899006e-05, "loss": 0.5444, "step": 20192 }, { "epoch": 0.6203114920283844, "grad_norm": 0.3258785009384155, "learning_rate": 1.5644736823698553e-05, "loss": 0.5479, "step": 20193 }, { "epoch": 0.6203422111633337, "grad_norm": 0.33711665868759155, "learning_rate": 1.564433789531442e-05, "loss": 0.6132, "step": 20194 }, { "epoch": 0.6203729302982828, "grad_norm": 0.3751281499862671, "learning_rate": 1.5643938953747543e-05, "loss": 0.5575, "step": 20195 }, { "epoch": 0.620403649433232, "grad_norm": 0.3692573606967926, "learning_rate": 1.5643539998998854e-05, "loss": 0.5503, "step": 20196 }, { "epoch": 0.6204343685681811, "grad_norm": 0.5115947723388672, "learning_rate": 1.564314103106928e-05, "loss": 0.6041, "step": 20197 }, { "epoch": 0.6204650877031302, "grad_norm": 0.3514859974384308, "learning_rate": 1.5642742049959762e-05, "loss": 0.5288, "step": 20198 }, { "epoch": 0.6204958068380795, "grad_norm": 0.34153345227241516, "learning_rate": 1.564234305567122e-05, "loss": 0.5687, "step": 20199 }, { "epoch": 0.6205265259730286, "grad_norm": 0.33640673756599426, "learning_rate": 1.564194404820459e-05, "loss": 0.5644, "step": 20200 }, { "epoch": 0.6205572451079777, "grad_norm": 0.3776742219924927, "learning_rate": 1.564154502756081e-05, "loss": 0.5475, "step": 20201 }, { "epoch": 0.620587964242927, "grad_norm": 0.3747698664665222, "learning_rate": 1.5641145993740807e-05, "loss": 0.5111, "step": 20202 }, { "epoch": 0.6206186833778761, "grad_norm": 0.38643911480903625, "learning_rate": 1.5640746946745517e-05, "loss": 0.5966, "step": 20203 }, { "epoch": 0.6206494025128252, "grad_norm": 0.3718632459640503, "learning_rate": 1.564034788657586e-05, "loss": 0.5391, "step": 20204 }, { "epoch": 0.6206801216477744, "grad_norm": 0.3459409475326538, "learning_rate": 1.5639948813232782e-05, "loss": 0.6171, "step": 20205 }, { "epoch": 0.6207108407827235, "grad_norm": 0.34201887249946594, "learning_rate": 1.563954972671721e-05, "loss": 0.4609, "step": 20206 }, { "epoch": 0.6207415599176728, "grad_norm": 0.359014630317688, "learning_rate": 1.5639150627030072e-05, "loss": 0.6096, "step": 20207 }, { "epoch": 0.6207722790526219, "grad_norm": 0.3277183175086975, "learning_rate": 1.563875151417231e-05, "loss": 0.5548, "step": 20208 }, { "epoch": 0.620802998187571, "grad_norm": 0.4222385883331299, "learning_rate": 1.5638352388144843e-05, "loss": 0.5677, "step": 20209 }, { "epoch": 0.6208337173225202, "grad_norm": 0.377604216337204, "learning_rate": 1.5637953248948617e-05, "loss": 0.4852, "step": 20210 }, { "epoch": 0.6208644364574694, "grad_norm": 0.33904320001602173, "learning_rate": 1.5637554096584553e-05, "loss": 0.5241, "step": 20211 }, { "epoch": 0.6208951555924185, "grad_norm": 0.36126917600631714, "learning_rate": 1.563715493105359e-05, "loss": 0.493, "step": 20212 }, { "epoch": 0.6209258747273677, "grad_norm": 0.3615190088748932, "learning_rate": 1.5636755752356655e-05, "loss": 0.5712, "step": 20213 }, { "epoch": 0.6209565938623168, "grad_norm": 0.3198011517524719, "learning_rate": 1.5636356560494686e-05, "loss": 0.5367, "step": 20214 }, { "epoch": 0.620987312997266, "grad_norm": 0.3472548723220825, "learning_rate": 1.563595735546861e-05, "loss": 0.5113, "step": 20215 }, { "epoch": 0.6210180321322152, "grad_norm": 0.3611818850040436, "learning_rate": 1.5635558137279367e-05, "loss": 0.5673, "step": 20216 }, { "epoch": 0.6210487512671643, "grad_norm": 0.3708208203315735, "learning_rate": 1.5635158905927877e-05, "loss": 0.5742, "step": 20217 }, { "epoch": 0.6210794704021135, "grad_norm": 0.42246297001838684, "learning_rate": 1.5634759661415085e-05, "loss": 0.6142, "step": 20218 }, { "epoch": 0.6211101895370627, "grad_norm": 0.3573817312717438, "learning_rate": 1.5634360403741918e-05, "loss": 0.5202, "step": 20219 }, { "epoch": 0.6211409086720118, "grad_norm": 0.3912177085876465, "learning_rate": 1.5633961132909308e-05, "loss": 0.5782, "step": 20220 }, { "epoch": 0.621171627806961, "grad_norm": 0.3656556010246277, "learning_rate": 1.563356184891819e-05, "loss": 0.5967, "step": 20221 }, { "epoch": 0.6212023469419101, "grad_norm": 0.37438473105430603, "learning_rate": 1.563316255176949e-05, "loss": 0.5639, "step": 20222 }, { "epoch": 0.6212330660768592, "grad_norm": 0.40562841296195984, "learning_rate": 1.563276324146415e-05, "loss": 0.6079, "step": 20223 }, { "epoch": 0.6212637852118085, "grad_norm": 0.38282981514930725, "learning_rate": 1.56323639180031e-05, "loss": 0.5595, "step": 20224 }, { "epoch": 0.6212945043467576, "grad_norm": 0.33911892771720886, "learning_rate": 1.5631964581387264e-05, "loss": 0.5984, "step": 20225 }, { "epoch": 0.6213252234817067, "grad_norm": 0.43060222268104553, "learning_rate": 1.5631565231617584e-05, "loss": 0.4811, "step": 20226 }, { "epoch": 0.6213559426166559, "grad_norm": 0.4229173958301544, "learning_rate": 1.5631165868694993e-05, "loss": 0.5411, "step": 20227 }, { "epoch": 0.6213866617516051, "grad_norm": 0.39340540766716003, "learning_rate": 1.5630766492620416e-05, "loss": 0.5321, "step": 20228 }, { "epoch": 0.6214173808865542, "grad_norm": 0.3778016269207001, "learning_rate": 1.5630367103394796e-05, "loss": 0.5537, "step": 20229 }, { "epoch": 0.6214481000215034, "grad_norm": 0.35572168231010437, "learning_rate": 1.5629967701019054e-05, "loss": 0.488, "step": 20230 }, { "epoch": 0.6214788191564525, "grad_norm": 0.33956265449523926, "learning_rate": 1.5629568285494135e-05, "loss": 0.5827, "step": 20231 }, { "epoch": 0.6215095382914018, "grad_norm": 0.3832680881023407, "learning_rate": 1.5629168856820964e-05, "loss": 0.62, "step": 20232 }, { "epoch": 0.6215402574263509, "grad_norm": 0.42909902334213257, "learning_rate": 1.562876941500048e-05, "loss": 0.5778, "step": 20233 }, { "epoch": 0.6215709765613, "grad_norm": 0.37452277541160583, "learning_rate": 1.5628369960033606e-05, "loss": 0.6099, "step": 20234 }, { "epoch": 0.6216016956962492, "grad_norm": 0.33592885732650757, "learning_rate": 1.5627970491921284e-05, "loss": 0.5607, "step": 20235 }, { "epoch": 0.6216324148311984, "grad_norm": 0.37904301285743713, "learning_rate": 1.5627571010664443e-05, "loss": 0.5695, "step": 20236 }, { "epoch": 0.6216631339661475, "grad_norm": 0.3290107250213623, "learning_rate": 1.5627171516264016e-05, "loss": 0.5369, "step": 20237 }, { "epoch": 0.6216938531010967, "grad_norm": 0.4227602183818817, "learning_rate": 1.5626772008720937e-05, "loss": 0.5292, "step": 20238 }, { "epoch": 0.6217245722360458, "grad_norm": 0.33521002531051636, "learning_rate": 1.562637248803614e-05, "loss": 0.5744, "step": 20239 }, { "epoch": 0.621755291370995, "grad_norm": 0.34605687856674194, "learning_rate": 1.5625972954210556e-05, "loss": 0.5463, "step": 20240 }, { "epoch": 0.6217860105059442, "grad_norm": 0.3886035680770874, "learning_rate": 1.5625573407245118e-05, "loss": 0.577, "step": 20241 }, { "epoch": 0.6218167296408933, "grad_norm": 0.38744834065437317, "learning_rate": 1.562517384714076e-05, "loss": 0.5234, "step": 20242 }, { "epoch": 0.6218474487758425, "grad_norm": 0.32925546169281006, "learning_rate": 1.562477427389842e-05, "loss": 0.561, "step": 20243 }, { "epoch": 0.6218781679107916, "grad_norm": 0.3692215383052826, "learning_rate": 1.5624374687519023e-05, "loss": 0.5563, "step": 20244 }, { "epoch": 0.6219088870457408, "grad_norm": 0.38909897208213806, "learning_rate": 1.562397508800351e-05, "loss": 0.6325, "step": 20245 }, { "epoch": 0.62193960618069, "grad_norm": 0.401996374130249, "learning_rate": 1.562357547535281e-05, "loss": 0.5966, "step": 20246 }, { "epoch": 0.6219703253156391, "grad_norm": 0.3626191318035126, "learning_rate": 1.5623175849567854e-05, "loss": 0.4666, "step": 20247 }, { "epoch": 0.6220010444505882, "grad_norm": 0.4817671775817871, "learning_rate": 1.5622776210649576e-05, "loss": 0.6384, "step": 20248 }, { "epoch": 0.6220317635855375, "grad_norm": 0.3484368920326233, "learning_rate": 1.562237655859892e-05, "loss": 0.6332, "step": 20249 }, { "epoch": 0.6220624827204866, "grad_norm": 0.35054945945739746, "learning_rate": 1.5621976893416805e-05, "loss": 0.534, "step": 20250 }, { "epoch": 0.6220932018554357, "grad_norm": 0.3210464417934418, "learning_rate": 1.562157721510417e-05, "loss": 0.51, "step": 20251 }, { "epoch": 0.6221239209903849, "grad_norm": 0.4582434892654419, "learning_rate": 1.562117752366195e-05, "loss": 0.5289, "step": 20252 }, { "epoch": 0.6221546401253341, "grad_norm": 0.3635072410106659, "learning_rate": 1.5620777819091077e-05, "loss": 0.532, "step": 20253 }, { "epoch": 0.6221853592602832, "grad_norm": 0.3709373474121094, "learning_rate": 1.5620378101392483e-05, "loss": 0.6439, "step": 20254 }, { "epoch": 0.6222160783952324, "grad_norm": 0.36763226985931396, "learning_rate": 1.5619978370567105e-05, "loss": 0.5919, "step": 20255 }, { "epoch": 0.6222467975301815, "grad_norm": 0.36122146248817444, "learning_rate": 1.5619578626615877e-05, "loss": 0.5491, "step": 20256 }, { "epoch": 0.6222775166651308, "grad_norm": 0.36009296774864197, "learning_rate": 1.561917886953973e-05, "loss": 0.6079, "step": 20257 }, { "epoch": 0.6223082358000799, "grad_norm": 0.36087119579315186, "learning_rate": 1.5618779099339593e-05, "loss": 0.5413, "step": 20258 }, { "epoch": 0.622338954935029, "grad_norm": 0.38777151703834534, "learning_rate": 1.561837931601641e-05, "loss": 0.5624, "step": 20259 }, { "epoch": 0.6223696740699782, "grad_norm": 0.38642266392707825, "learning_rate": 1.561797951957111e-05, "loss": 0.5813, "step": 20260 }, { "epoch": 0.6224003932049273, "grad_norm": 0.40504777431488037, "learning_rate": 1.5617579710004626e-05, "loss": 0.5746, "step": 20261 }, { "epoch": 0.6224311123398765, "grad_norm": 0.37294527888298035, "learning_rate": 1.561717988731789e-05, "loss": 0.5642, "step": 20262 }, { "epoch": 0.6224618314748257, "grad_norm": 0.3473112881183624, "learning_rate": 1.5616780051511842e-05, "loss": 0.6123, "step": 20263 }, { "epoch": 0.6224925506097748, "grad_norm": 0.34021830558776855, "learning_rate": 1.5616380202587408e-05, "loss": 0.6057, "step": 20264 }, { "epoch": 0.6225232697447239, "grad_norm": 0.37240070104599, "learning_rate": 1.5615980340545527e-05, "loss": 0.5767, "step": 20265 }, { "epoch": 0.6225539888796732, "grad_norm": 0.330222487449646, "learning_rate": 1.5615580465387134e-05, "loss": 0.5613, "step": 20266 }, { "epoch": 0.6225847080146223, "grad_norm": 0.34180399775505066, "learning_rate": 1.5615180577113157e-05, "loss": 0.5532, "step": 20267 }, { "epoch": 0.6226154271495715, "grad_norm": 0.3683977425098419, "learning_rate": 1.5614780675724535e-05, "loss": 0.5788, "step": 20268 }, { "epoch": 0.6226461462845206, "grad_norm": 0.3937702775001526, "learning_rate": 1.56143807612222e-05, "loss": 0.5392, "step": 20269 }, { "epoch": 0.6226768654194698, "grad_norm": 0.3747239410877228, "learning_rate": 1.561398083360709e-05, "loss": 0.5085, "step": 20270 }, { "epoch": 0.622707584554419, "grad_norm": 0.37120485305786133, "learning_rate": 1.5613580892880135e-05, "loss": 0.5837, "step": 20271 }, { "epoch": 0.6227383036893681, "grad_norm": 0.36735910177230835, "learning_rate": 1.5613180939042267e-05, "loss": 0.6175, "step": 20272 }, { "epoch": 0.6227690228243172, "grad_norm": 0.3447132110595703, "learning_rate": 1.5612780972094424e-05, "loss": 0.5629, "step": 20273 }, { "epoch": 0.6227997419592665, "grad_norm": 0.36704298853874207, "learning_rate": 1.5612380992037536e-05, "loss": 0.5354, "step": 20274 }, { "epoch": 0.6228304610942156, "grad_norm": 0.3546651005744934, "learning_rate": 1.5611980998872545e-05, "loss": 0.5573, "step": 20275 }, { "epoch": 0.6228611802291647, "grad_norm": 0.3663739562034607, "learning_rate": 1.561158099260038e-05, "loss": 0.6658, "step": 20276 }, { "epoch": 0.6228918993641139, "grad_norm": 1.7424795627593994, "learning_rate": 1.5611180973221973e-05, "loss": 0.4956, "step": 20277 }, { "epoch": 0.622922618499063, "grad_norm": 0.41066089272499084, "learning_rate": 1.5610780940738262e-05, "loss": 0.5854, "step": 20278 }, { "epoch": 0.6229533376340122, "grad_norm": 0.3640250265598297, "learning_rate": 1.5610380895150183e-05, "loss": 0.631, "step": 20279 }, { "epoch": 0.6229840567689614, "grad_norm": 0.36628273129463196, "learning_rate": 1.5609980836458662e-05, "loss": 0.5503, "step": 20280 }, { "epoch": 0.6230147759039105, "grad_norm": 0.3494238257408142, "learning_rate": 1.5609580764664644e-05, "loss": 0.6163, "step": 20281 }, { "epoch": 0.6230454950388598, "grad_norm": 0.3752349615097046, "learning_rate": 1.5609180679769055e-05, "loss": 0.5398, "step": 20282 }, { "epoch": 0.6230762141738089, "grad_norm": 0.33498701453208923, "learning_rate": 1.5608780581772834e-05, "loss": 0.4901, "step": 20283 }, { "epoch": 0.623106933308758, "grad_norm": 0.4469001889228821, "learning_rate": 1.5608380470676917e-05, "loss": 0.5571, "step": 20284 }, { "epoch": 0.6231376524437072, "grad_norm": 0.38199135661125183, "learning_rate": 1.560798034648223e-05, "loss": 0.585, "step": 20285 }, { "epoch": 0.6231683715786563, "grad_norm": 0.3607659935951233, "learning_rate": 1.560758020918972e-05, "loss": 0.6686, "step": 20286 }, { "epoch": 0.6231990907136055, "grad_norm": 0.34624844789505005, "learning_rate": 1.560718005880031e-05, "loss": 0.5034, "step": 20287 }, { "epoch": 0.6232298098485547, "grad_norm": 0.4342850148677826, "learning_rate": 1.560677989531494e-05, "loss": 0.5739, "step": 20288 }, { "epoch": 0.6232605289835038, "grad_norm": 0.37431398034095764, "learning_rate": 1.5606379718734547e-05, "loss": 0.5642, "step": 20289 }, { "epoch": 0.6232912481184529, "grad_norm": 0.33282822370529175, "learning_rate": 1.5605979529060057e-05, "loss": 0.5553, "step": 20290 }, { "epoch": 0.6233219672534022, "grad_norm": 0.3502263128757477, "learning_rate": 1.5605579326292413e-05, "loss": 0.4899, "step": 20291 }, { "epoch": 0.6233526863883513, "grad_norm": 0.41162538528442383, "learning_rate": 1.560517911043255e-05, "loss": 0.5702, "step": 20292 }, { "epoch": 0.6233834055233005, "grad_norm": 0.35347771644592285, "learning_rate": 1.5604778881481398e-05, "loss": 0.4785, "step": 20293 }, { "epoch": 0.6234141246582496, "grad_norm": 0.36461132764816284, "learning_rate": 1.5604378639439892e-05, "loss": 0.5028, "step": 20294 }, { "epoch": 0.6234448437931988, "grad_norm": 0.41234779357910156, "learning_rate": 1.560397838430897e-05, "loss": 0.5289, "step": 20295 }, { "epoch": 0.623475562928148, "grad_norm": 0.36615824699401855, "learning_rate": 1.5603578116089563e-05, "loss": 0.5258, "step": 20296 }, { "epoch": 0.6235062820630971, "grad_norm": 0.34095320105552673, "learning_rate": 1.5603177834782608e-05, "loss": 0.5942, "step": 20297 }, { "epoch": 0.6235370011980462, "grad_norm": 0.3924514353275299, "learning_rate": 1.560277754038904e-05, "loss": 0.5511, "step": 20298 }, { "epoch": 0.6235677203329955, "grad_norm": 0.3572743535041809, "learning_rate": 1.5602377232909797e-05, "loss": 0.5148, "step": 20299 }, { "epoch": 0.6235984394679446, "grad_norm": 0.4173939526081085, "learning_rate": 1.560197691234581e-05, "loss": 0.4636, "step": 20300 }, { "epoch": 0.6236291586028937, "grad_norm": 0.39687275886535645, "learning_rate": 1.560157657869801e-05, "loss": 0.5942, "step": 20301 }, { "epoch": 0.6236598777378429, "grad_norm": 0.36986595392227173, "learning_rate": 1.560117623196734e-05, "loss": 0.5624, "step": 20302 }, { "epoch": 0.623690596872792, "grad_norm": 0.33975210785865784, "learning_rate": 1.560077587215473e-05, "loss": 0.5591, "step": 20303 }, { "epoch": 0.6237213160077412, "grad_norm": 0.33342888951301575, "learning_rate": 1.5600375499261123e-05, "loss": 0.5366, "step": 20304 }, { "epoch": 0.6237520351426904, "grad_norm": 0.3406507968902588, "learning_rate": 1.559997511328744e-05, "loss": 0.5705, "step": 20305 }, { "epoch": 0.6237827542776395, "grad_norm": 0.3446168005466461, "learning_rate": 1.5599574714234628e-05, "loss": 0.575, "step": 20306 }, { "epoch": 0.6238134734125887, "grad_norm": 0.39808762073516846, "learning_rate": 1.5599174302103618e-05, "loss": 0.5534, "step": 20307 }, { "epoch": 0.6238441925475379, "grad_norm": 0.37941062450408936, "learning_rate": 1.5598773876895344e-05, "loss": 0.6134, "step": 20308 }, { "epoch": 0.623874911682487, "grad_norm": 0.386218786239624, "learning_rate": 1.5598373438610742e-05, "loss": 0.6606, "step": 20309 }, { "epoch": 0.6239056308174362, "grad_norm": 0.3606838583946228, "learning_rate": 1.559797298725075e-05, "loss": 0.5695, "step": 20310 }, { "epoch": 0.6239363499523853, "grad_norm": 0.37791499495506287, "learning_rate": 1.5597572522816302e-05, "loss": 0.5424, "step": 20311 }, { "epoch": 0.6239670690873345, "grad_norm": 0.3469729721546173, "learning_rate": 1.559717204530833e-05, "loss": 0.5734, "step": 20312 }, { "epoch": 0.6239977882222837, "grad_norm": 0.3574044406414032, "learning_rate": 1.5596771554727773e-05, "loss": 0.5986, "step": 20313 }, { "epoch": 0.6240285073572328, "grad_norm": 0.346773236989975, "learning_rate": 1.5596371051075564e-05, "loss": 0.5852, "step": 20314 }, { "epoch": 0.6240592264921819, "grad_norm": 0.4272225797176361, "learning_rate": 1.559597053435264e-05, "loss": 0.5713, "step": 20315 }, { "epoch": 0.6240899456271312, "grad_norm": 0.34396475553512573, "learning_rate": 1.5595570004559936e-05, "loss": 0.6551, "step": 20316 }, { "epoch": 0.6241206647620803, "grad_norm": 0.3303702473640442, "learning_rate": 1.559516946169839e-05, "loss": 0.5962, "step": 20317 }, { "epoch": 0.6241513838970295, "grad_norm": 0.3556104600429535, "learning_rate": 1.559476890576893e-05, "loss": 0.6011, "step": 20318 }, { "epoch": 0.6241821030319786, "grad_norm": 0.3486095070838928, "learning_rate": 1.55943683367725e-05, "loss": 0.4926, "step": 20319 }, { "epoch": 0.6242128221669277, "grad_norm": 0.39561527967453003, "learning_rate": 1.5593967754710032e-05, "loss": 0.5444, "step": 20320 }, { "epoch": 0.624243541301877, "grad_norm": 0.3853559195995331, "learning_rate": 1.559356715958246e-05, "loss": 0.5784, "step": 20321 }, { "epoch": 0.6242742604368261, "grad_norm": 0.38690152764320374, "learning_rate": 1.5593166551390722e-05, "loss": 0.525, "step": 20322 }, { "epoch": 0.6243049795717752, "grad_norm": 0.5880854725837708, "learning_rate": 1.5592765930135755e-05, "loss": 0.5461, "step": 20323 }, { "epoch": 0.6243356987067245, "grad_norm": 0.3164910674095154, "learning_rate": 1.559236529581849e-05, "loss": 0.5702, "step": 20324 }, { "epoch": 0.6243664178416736, "grad_norm": 0.3293531537055969, "learning_rate": 1.5591964648439866e-05, "loss": 0.639, "step": 20325 }, { "epoch": 0.6243971369766227, "grad_norm": 0.3801756799221039, "learning_rate": 1.5591563988000816e-05, "loss": 0.4494, "step": 20326 }, { "epoch": 0.6244278561115719, "grad_norm": 0.34833499789237976, "learning_rate": 1.5591163314502283e-05, "loss": 0.5045, "step": 20327 }, { "epoch": 0.624458575246521, "grad_norm": 0.3804747760295868, "learning_rate": 1.559076262794519e-05, "loss": 0.5596, "step": 20328 }, { "epoch": 0.6244892943814703, "grad_norm": 0.3530394732952118, "learning_rate": 1.5590361928330487e-05, "loss": 0.5817, "step": 20329 }, { "epoch": 0.6245200135164194, "grad_norm": 0.3381734788417816, "learning_rate": 1.5589961215659103e-05, "loss": 0.4873, "step": 20330 }, { "epoch": 0.6245507326513685, "grad_norm": 0.35292479395866394, "learning_rate": 1.558956048993197e-05, "loss": 0.5822, "step": 20331 }, { "epoch": 0.6245814517863177, "grad_norm": 0.33589720726013184, "learning_rate": 1.5589159751150028e-05, "loss": 0.5516, "step": 20332 }, { "epoch": 0.6246121709212669, "grad_norm": 0.4186857044696808, "learning_rate": 1.558875899931422e-05, "loss": 0.5724, "step": 20333 }, { "epoch": 0.624642890056216, "grad_norm": 0.3641681969165802, "learning_rate": 1.558835823442547e-05, "loss": 0.5258, "step": 20334 }, { "epoch": 0.6246736091911652, "grad_norm": 0.36163365840911865, "learning_rate": 1.558795745648472e-05, "loss": 0.5774, "step": 20335 }, { "epoch": 0.6247043283261143, "grad_norm": 0.4218415319919586, "learning_rate": 1.55875566654929e-05, "loss": 0.5487, "step": 20336 }, { "epoch": 0.6247350474610635, "grad_norm": 0.3333381116390228, "learning_rate": 1.558715586145096e-05, "loss": 0.6728, "step": 20337 }, { "epoch": 0.6247657665960127, "grad_norm": 0.3613649010658264, "learning_rate": 1.558675504435982e-05, "loss": 0.4685, "step": 20338 }, { "epoch": 0.6247964857309618, "grad_norm": 0.31287825107574463, "learning_rate": 1.558635421422043e-05, "loss": 0.4974, "step": 20339 }, { "epoch": 0.6248272048659109, "grad_norm": 0.36588823795318604, "learning_rate": 1.5585953371033717e-05, "loss": 0.5454, "step": 20340 }, { "epoch": 0.6248579240008602, "grad_norm": 0.4029732942581177, "learning_rate": 1.5585552514800615e-05, "loss": 0.6342, "step": 20341 }, { "epoch": 0.6248886431358093, "grad_norm": 0.349438339471817, "learning_rate": 1.5585151645522068e-05, "loss": 0.5102, "step": 20342 }, { "epoch": 0.6249193622707585, "grad_norm": 0.36874547600746155, "learning_rate": 1.5584750763199013e-05, "loss": 0.6263, "step": 20343 }, { "epoch": 0.6249500814057076, "grad_norm": 0.39016738533973694, "learning_rate": 1.558434986783238e-05, "loss": 0.611, "step": 20344 }, { "epoch": 0.6249808005406567, "grad_norm": 0.4183611273765564, "learning_rate": 1.5583948959423106e-05, "loss": 0.5467, "step": 20345 }, { "epoch": 0.625011519675606, "grad_norm": 0.3780514597892761, "learning_rate": 1.558354803797213e-05, "loss": 0.5429, "step": 20346 }, { "epoch": 0.6250422388105551, "grad_norm": 0.34606581926345825, "learning_rate": 1.558314710348039e-05, "loss": 0.5278, "step": 20347 }, { "epoch": 0.6250729579455042, "grad_norm": 0.3839844763278961, "learning_rate": 1.5582746155948816e-05, "loss": 0.4788, "step": 20348 }, { "epoch": 0.6251036770804534, "grad_norm": 0.34468334913253784, "learning_rate": 1.5582345195378356e-05, "loss": 0.5722, "step": 20349 }, { "epoch": 0.6251343962154026, "grad_norm": 0.7304366230964661, "learning_rate": 1.558194422176993e-05, "loss": 0.5452, "step": 20350 }, { "epoch": 0.6251651153503517, "grad_norm": 0.67729651927948, "learning_rate": 1.558154323512449e-05, "loss": 0.5696, "step": 20351 }, { "epoch": 0.6251958344853009, "grad_norm": 0.326732873916626, "learning_rate": 1.5581142235442962e-05, "loss": 0.6293, "step": 20352 }, { "epoch": 0.62522655362025, "grad_norm": 0.3587125539779663, "learning_rate": 1.5580741222726287e-05, "loss": 0.5842, "step": 20353 }, { "epoch": 0.6252572727551993, "grad_norm": 0.38662803173065186, "learning_rate": 1.55803401969754e-05, "loss": 0.618, "step": 20354 }, { "epoch": 0.6252879918901484, "grad_norm": 0.3704102337360382, "learning_rate": 1.5579939158191243e-05, "loss": 0.5068, "step": 20355 }, { "epoch": 0.6253187110250975, "grad_norm": 0.36692318320274353, "learning_rate": 1.5579538106374744e-05, "loss": 0.6086, "step": 20356 }, { "epoch": 0.6253494301600467, "grad_norm": 0.3747425377368927, "learning_rate": 1.557913704152685e-05, "loss": 0.5509, "step": 20357 }, { "epoch": 0.6253801492949959, "grad_norm": 0.36989927291870117, "learning_rate": 1.5578735963648486e-05, "loss": 0.548, "step": 20358 }, { "epoch": 0.625410868429945, "grad_norm": 0.45083093643188477, "learning_rate": 1.5578334872740596e-05, "loss": 0.5441, "step": 20359 }, { "epoch": 0.6254415875648942, "grad_norm": 0.3696020841598511, "learning_rate": 1.5577933768804117e-05, "loss": 0.5545, "step": 20360 }, { "epoch": 0.6254723066998433, "grad_norm": 0.32543450593948364, "learning_rate": 1.557753265183998e-05, "loss": 0.578, "step": 20361 }, { "epoch": 0.6255030258347924, "grad_norm": 0.3693821132183075, "learning_rate": 1.557713152184913e-05, "loss": 0.5607, "step": 20362 }, { "epoch": 0.6255337449697417, "grad_norm": 0.388360857963562, "learning_rate": 1.55767303788325e-05, "loss": 0.5603, "step": 20363 }, { "epoch": 0.6255644641046908, "grad_norm": 0.3571040630340576, "learning_rate": 1.5576329222791025e-05, "loss": 0.5965, "step": 20364 }, { "epoch": 0.6255951832396399, "grad_norm": 0.3314792513847351, "learning_rate": 1.5575928053725644e-05, "loss": 0.5526, "step": 20365 }, { "epoch": 0.6256259023745891, "grad_norm": 0.3671272397041321, "learning_rate": 1.5575526871637294e-05, "loss": 0.5595, "step": 20366 }, { "epoch": 0.6256566215095383, "grad_norm": 0.3597525954246521, "learning_rate": 1.5575125676526914e-05, "loss": 0.5451, "step": 20367 }, { "epoch": 0.6256873406444875, "grad_norm": 0.42397505044937134, "learning_rate": 1.557472446839543e-05, "loss": 0.5394, "step": 20368 }, { "epoch": 0.6257180597794366, "grad_norm": 0.39243772625923157, "learning_rate": 1.5574323247243795e-05, "loss": 0.5, "step": 20369 }, { "epoch": 0.6257487789143857, "grad_norm": 0.322532057762146, "learning_rate": 1.5573922013072937e-05, "loss": 0.5168, "step": 20370 }, { "epoch": 0.625779498049335, "grad_norm": 0.40949758887290955, "learning_rate": 1.5573520765883797e-05, "loss": 0.5877, "step": 20371 }, { "epoch": 0.6258102171842841, "grad_norm": 0.3303810656070709, "learning_rate": 1.5573119505677308e-05, "loss": 0.6063, "step": 20372 }, { "epoch": 0.6258409363192332, "grad_norm": 0.3624688982963562, "learning_rate": 1.557271823245441e-05, "loss": 0.5611, "step": 20373 }, { "epoch": 0.6258716554541824, "grad_norm": 0.3398160934448242, "learning_rate": 1.5572316946216037e-05, "loss": 0.5886, "step": 20374 }, { "epoch": 0.6259023745891316, "grad_norm": 0.32726097106933594, "learning_rate": 1.5571915646963128e-05, "loss": 0.4545, "step": 20375 }, { "epoch": 0.6259330937240807, "grad_norm": 0.3664303421974182, "learning_rate": 1.5571514334696626e-05, "loss": 0.6124, "step": 20376 }, { "epoch": 0.6259638128590299, "grad_norm": 0.3315085470676422, "learning_rate": 1.557111300941746e-05, "loss": 0.497, "step": 20377 }, { "epoch": 0.625994531993979, "grad_norm": 0.3742835223674774, "learning_rate": 1.557071167112657e-05, "loss": 0.6066, "step": 20378 }, { "epoch": 0.6260252511289283, "grad_norm": 0.48858344554901123, "learning_rate": 1.5570310319824895e-05, "loss": 0.5968, "step": 20379 }, { "epoch": 0.6260559702638774, "grad_norm": 0.45883843302726746, "learning_rate": 1.556990895551337e-05, "loss": 0.5385, "step": 20380 }, { "epoch": 0.6260866893988265, "grad_norm": 0.3296858072280884, "learning_rate": 1.5569507578192936e-05, "loss": 0.5691, "step": 20381 }, { "epoch": 0.6261174085337757, "grad_norm": 0.3481998145580292, "learning_rate": 1.5569106187864528e-05, "loss": 0.5321, "step": 20382 }, { "epoch": 0.6261481276687249, "grad_norm": 0.37513676285743713, "learning_rate": 1.556870478452908e-05, "loss": 0.5585, "step": 20383 }, { "epoch": 0.626178846803674, "grad_norm": 0.3349640667438507, "learning_rate": 1.5568303368187536e-05, "loss": 0.5661, "step": 20384 }, { "epoch": 0.6262095659386232, "grad_norm": 0.36723998188972473, "learning_rate": 1.556790193884083e-05, "loss": 0.6002, "step": 20385 }, { "epoch": 0.6262402850735723, "grad_norm": 0.4949514865875244, "learning_rate": 1.55675004964899e-05, "loss": 0.5483, "step": 20386 }, { "epoch": 0.6262710042085214, "grad_norm": 0.37252646684646606, "learning_rate": 1.5567099041135683e-05, "loss": 0.5546, "step": 20387 }, { "epoch": 0.6263017233434707, "grad_norm": 0.35027769207954407, "learning_rate": 1.556669757277912e-05, "loss": 0.5778, "step": 20388 }, { "epoch": 0.6263324424784198, "grad_norm": 0.34808480739593506, "learning_rate": 1.5566296091421143e-05, "loss": 0.5997, "step": 20389 }, { "epoch": 0.6263631616133689, "grad_norm": 0.35844239592552185, "learning_rate": 1.5565894597062692e-05, "loss": 0.4572, "step": 20390 }, { "epoch": 0.6263938807483181, "grad_norm": 0.3405925929546356, "learning_rate": 1.556549308970471e-05, "loss": 0.5414, "step": 20391 }, { "epoch": 0.6264245998832673, "grad_norm": 0.37571245431900024, "learning_rate": 1.5565091569348127e-05, "loss": 0.6482, "step": 20392 }, { "epoch": 0.6264553190182165, "grad_norm": 0.36241674423217773, "learning_rate": 1.5564690035993882e-05, "loss": 0.4692, "step": 20393 }, { "epoch": 0.6264860381531656, "grad_norm": 0.3588320314884186, "learning_rate": 1.556428848964292e-05, "loss": 0.5839, "step": 20394 }, { "epoch": 0.6265167572881147, "grad_norm": 0.37929967045783997, "learning_rate": 1.556388693029617e-05, "loss": 0.5599, "step": 20395 }, { "epoch": 0.626547476423064, "grad_norm": 0.3708060383796692, "learning_rate": 1.5563485357954572e-05, "loss": 0.5297, "step": 20396 }, { "epoch": 0.6265781955580131, "grad_norm": 0.38552412390708923, "learning_rate": 1.556308377261907e-05, "loss": 0.6319, "step": 20397 }, { "epoch": 0.6266089146929622, "grad_norm": 0.3585259020328522, "learning_rate": 1.5562682174290595e-05, "loss": 0.5436, "step": 20398 }, { "epoch": 0.6266396338279114, "grad_norm": 0.3690339922904968, "learning_rate": 1.5562280562970088e-05, "loss": 0.6288, "step": 20399 }, { "epoch": 0.6266703529628606, "grad_norm": 0.46102777123451233, "learning_rate": 1.5561878938658487e-05, "loss": 0.6163, "step": 20400 }, { "epoch": 0.6267010720978097, "grad_norm": 0.323840469121933, "learning_rate": 1.5561477301356725e-05, "loss": 0.5162, "step": 20401 }, { "epoch": 0.6267317912327589, "grad_norm": 0.3887838125228882, "learning_rate": 1.556107565106575e-05, "loss": 0.5217, "step": 20402 }, { "epoch": 0.626762510367708, "grad_norm": 0.36207377910614014, "learning_rate": 1.5560673987786494e-05, "loss": 0.6146, "step": 20403 }, { "epoch": 0.6267932295026573, "grad_norm": 0.34619376063346863, "learning_rate": 1.556027231151989e-05, "loss": 0.5619, "step": 20404 }, { "epoch": 0.6268239486376064, "grad_norm": 0.439299076795578, "learning_rate": 1.5559870622266884e-05, "loss": 0.5694, "step": 20405 }, { "epoch": 0.6268546677725555, "grad_norm": 0.3564947843551636, "learning_rate": 1.5559468920028412e-05, "loss": 0.5128, "step": 20406 }, { "epoch": 0.6268853869075047, "grad_norm": 0.3430590033531189, "learning_rate": 1.5559067204805412e-05, "loss": 0.6248, "step": 20407 }, { "epoch": 0.6269161060424538, "grad_norm": 0.4377790689468384, "learning_rate": 1.5558665476598827e-05, "loss": 0.556, "step": 20408 }, { "epoch": 0.626946825177403, "grad_norm": 0.3764573931694031, "learning_rate": 1.5558263735409585e-05, "loss": 0.4679, "step": 20409 }, { "epoch": 0.6269775443123522, "grad_norm": 0.3445410430431366, "learning_rate": 1.5557861981238632e-05, "loss": 0.5216, "step": 20410 }, { "epoch": 0.6270082634473013, "grad_norm": 0.3749435842037201, "learning_rate": 1.5557460214086905e-05, "loss": 0.5705, "step": 20411 }, { "epoch": 0.6270389825822504, "grad_norm": 0.3365252614021301, "learning_rate": 1.555705843395534e-05, "loss": 0.5454, "step": 20412 }, { "epoch": 0.6270697017171997, "grad_norm": 0.33961358666419983, "learning_rate": 1.555665664084488e-05, "loss": 0.5425, "step": 20413 }, { "epoch": 0.6271004208521488, "grad_norm": 0.38736483454704285, "learning_rate": 1.5556254834756455e-05, "loss": 0.5602, "step": 20414 }, { "epoch": 0.627131139987098, "grad_norm": 0.3686889111995697, "learning_rate": 1.555585301569101e-05, "loss": 0.5088, "step": 20415 }, { "epoch": 0.6271618591220471, "grad_norm": 0.3708992004394531, "learning_rate": 1.5555451183649485e-05, "loss": 0.5923, "step": 20416 }, { "epoch": 0.6271925782569963, "grad_norm": 0.3533540368080139, "learning_rate": 1.5555049338632815e-05, "loss": 0.5337, "step": 20417 }, { "epoch": 0.6272232973919455, "grad_norm": 0.3432773947715759, "learning_rate": 1.555464748064194e-05, "loss": 0.5947, "step": 20418 }, { "epoch": 0.6272540165268946, "grad_norm": 0.32339704036712646, "learning_rate": 1.5554245609677796e-05, "loss": 0.5675, "step": 20419 }, { "epoch": 0.6272847356618437, "grad_norm": 0.35667115449905396, "learning_rate": 1.5553843725741323e-05, "loss": 0.5159, "step": 20420 }, { "epoch": 0.627315454796793, "grad_norm": 0.6666114926338196, "learning_rate": 1.5553441828833463e-05, "loss": 0.5435, "step": 20421 }, { "epoch": 0.6273461739317421, "grad_norm": 0.3649226725101471, "learning_rate": 1.555303991895515e-05, "loss": 0.6304, "step": 20422 }, { "epoch": 0.6273768930666912, "grad_norm": 0.3630847930908203, "learning_rate": 1.5552637996107323e-05, "loss": 0.5141, "step": 20423 }, { "epoch": 0.6274076122016404, "grad_norm": 0.31804391741752625, "learning_rate": 1.5552236060290923e-05, "loss": 0.5937, "step": 20424 }, { "epoch": 0.6274383313365895, "grad_norm": 0.32584747672080994, "learning_rate": 1.5551834111506892e-05, "loss": 0.5879, "step": 20425 }, { "epoch": 0.6274690504715387, "grad_norm": 0.33412402868270874, "learning_rate": 1.555143214975616e-05, "loss": 0.5278, "step": 20426 }, { "epoch": 0.6274997696064879, "grad_norm": 0.3993333876132965, "learning_rate": 1.5551030175039673e-05, "loss": 0.5245, "step": 20427 }, { "epoch": 0.627530488741437, "grad_norm": 0.33502545952796936, "learning_rate": 1.5550628187358365e-05, "loss": 0.5657, "step": 20428 }, { "epoch": 0.6275612078763863, "grad_norm": 0.3404931426048279, "learning_rate": 1.5550226186713177e-05, "loss": 0.6055, "step": 20429 }, { "epoch": 0.6275919270113354, "grad_norm": 0.37626227736473083, "learning_rate": 1.554982417310505e-05, "loss": 0.5641, "step": 20430 }, { "epoch": 0.6276226461462845, "grad_norm": 0.41799941658973694, "learning_rate": 1.5549422146534922e-05, "loss": 0.5933, "step": 20431 }, { "epoch": 0.6276533652812337, "grad_norm": 0.3484961986541748, "learning_rate": 1.554902010700373e-05, "loss": 0.6127, "step": 20432 }, { "epoch": 0.6276840844161828, "grad_norm": 0.3641349673271179, "learning_rate": 1.5548618054512413e-05, "loss": 0.5556, "step": 20433 }, { "epoch": 0.627714803551132, "grad_norm": 0.41515618562698364, "learning_rate": 1.554821598906191e-05, "loss": 0.566, "step": 20434 }, { "epoch": 0.6277455226860812, "grad_norm": 0.3398890793323517, "learning_rate": 1.5547813910653164e-05, "loss": 0.5135, "step": 20435 }, { "epoch": 0.6277762418210303, "grad_norm": 0.3732627034187317, "learning_rate": 1.554741181928711e-05, "loss": 0.6581, "step": 20436 }, { "epoch": 0.6278069609559794, "grad_norm": 0.3755606710910797, "learning_rate": 1.5547009714964688e-05, "loss": 0.6143, "step": 20437 }, { "epoch": 0.6278376800909287, "grad_norm": 0.339266300201416, "learning_rate": 1.5546607597686838e-05, "loss": 0.5982, "step": 20438 }, { "epoch": 0.6278683992258778, "grad_norm": 0.3711911737918854, "learning_rate": 1.55462054674545e-05, "loss": 0.6118, "step": 20439 }, { "epoch": 0.627899118360827, "grad_norm": 0.37741419672966003, "learning_rate": 1.5545803324268608e-05, "loss": 0.5943, "step": 20440 }, { "epoch": 0.6279298374957761, "grad_norm": 0.3734387457370758, "learning_rate": 1.5545401168130102e-05, "loss": 0.6435, "step": 20441 }, { "epoch": 0.6279605566307253, "grad_norm": 0.35493794083595276, "learning_rate": 1.554499899903993e-05, "loss": 0.5139, "step": 20442 }, { "epoch": 0.6279912757656745, "grad_norm": 0.3818681538105011, "learning_rate": 1.554459681699903e-05, "loss": 0.5976, "step": 20443 }, { "epoch": 0.6280219949006236, "grad_norm": 0.3543015420436859, "learning_rate": 1.5544194622008328e-05, "loss": 0.5883, "step": 20444 }, { "epoch": 0.6280527140355727, "grad_norm": 0.3577049970626831, "learning_rate": 1.5543792414068776e-05, "loss": 0.6056, "step": 20445 }, { "epoch": 0.628083433170522, "grad_norm": 0.37554576992988586, "learning_rate": 1.554339019318131e-05, "loss": 0.516, "step": 20446 }, { "epoch": 0.6281141523054711, "grad_norm": 0.3587116301059723, "learning_rate": 1.5542987959346868e-05, "loss": 0.6028, "step": 20447 }, { "epoch": 0.6281448714404202, "grad_norm": 0.35254475474357605, "learning_rate": 1.554258571256639e-05, "loss": 0.584, "step": 20448 }, { "epoch": 0.6281755905753694, "grad_norm": 0.3810597062110901, "learning_rate": 1.554218345284082e-05, "loss": 0.5963, "step": 20449 }, { "epoch": 0.6282063097103185, "grad_norm": 0.33081164956092834, "learning_rate": 1.554178118017109e-05, "loss": 0.5702, "step": 20450 }, { "epoch": 0.6282370288452677, "grad_norm": 0.4131650924682617, "learning_rate": 1.5541378894558143e-05, "loss": 0.5955, "step": 20451 }, { "epoch": 0.6282677479802169, "grad_norm": 0.3811388313770294, "learning_rate": 1.554097659600292e-05, "loss": 0.5149, "step": 20452 }, { "epoch": 0.628298467115166, "grad_norm": 0.35823890566825867, "learning_rate": 1.5540574284506357e-05, "loss": 0.4822, "step": 20453 }, { "epoch": 0.6283291862501152, "grad_norm": 0.3847796618938446, "learning_rate": 1.55401719600694e-05, "loss": 0.5763, "step": 20454 }, { "epoch": 0.6283599053850644, "grad_norm": 0.3444887399673462, "learning_rate": 1.5539769622692983e-05, "loss": 0.5415, "step": 20455 }, { "epoch": 0.6283906245200135, "grad_norm": 0.37655991315841675, "learning_rate": 1.5539367272378042e-05, "loss": 0.5341, "step": 20456 }, { "epoch": 0.6284213436549627, "grad_norm": 0.3359065055847168, "learning_rate": 1.5538964909125527e-05, "loss": 0.544, "step": 20457 }, { "epoch": 0.6284520627899118, "grad_norm": 0.4218202531337738, "learning_rate": 1.553856253293637e-05, "loss": 0.59, "step": 20458 }, { "epoch": 0.628482781924861, "grad_norm": 0.35771822929382324, "learning_rate": 1.5538160143811514e-05, "loss": 0.53, "step": 20459 }, { "epoch": 0.6285135010598102, "grad_norm": 0.3386017084121704, "learning_rate": 1.55377577417519e-05, "loss": 0.5285, "step": 20460 }, { "epoch": 0.6285442201947593, "grad_norm": 0.37595507502555847, "learning_rate": 1.5537355326758462e-05, "loss": 0.568, "step": 20461 }, { "epoch": 0.6285749393297084, "grad_norm": 0.3615695536136627, "learning_rate": 1.553695289883215e-05, "loss": 0.5724, "step": 20462 }, { "epoch": 0.6286056584646577, "grad_norm": 0.4047395884990692, "learning_rate": 1.5536550457973892e-05, "loss": 0.6522, "step": 20463 }, { "epoch": 0.6286363775996068, "grad_norm": 0.35092693567276, "learning_rate": 1.5536148004184637e-05, "loss": 0.579, "step": 20464 }, { "epoch": 0.628667096734556, "grad_norm": 0.3631301820278168, "learning_rate": 1.553574553746532e-05, "loss": 0.5929, "step": 20465 }, { "epoch": 0.6286978158695051, "grad_norm": 0.47979989647865295, "learning_rate": 1.5535343057816884e-05, "loss": 0.5533, "step": 20466 }, { "epoch": 0.6287285350044542, "grad_norm": 0.34637317061424255, "learning_rate": 1.553494056524027e-05, "loss": 0.5101, "step": 20467 }, { "epoch": 0.6287592541394035, "grad_norm": 0.3926982879638672, "learning_rate": 1.553453805973641e-05, "loss": 0.559, "step": 20468 }, { "epoch": 0.6287899732743526, "grad_norm": 0.35731396079063416, "learning_rate": 1.5534135541306254e-05, "loss": 0.6466, "step": 20469 }, { "epoch": 0.6288206924093017, "grad_norm": 0.3476036787033081, "learning_rate": 1.553373300995074e-05, "loss": 0.5451, "step": 20470 }, { "epoch": 0.628851411544251, "grad_norm": 0.3408403992652893, "learning_rate": 1.5533330465670804e-05, "loss": 0.5345, "step": 20471 }, { "epoch": 0.6288821306792001, "grad_norm": 0.35337403416633606, "learning_rate": 1.5532927908467385e-05, "loss": 0.5299, "step": 20472 }, { "epoch": 0.6289128498141492, "grad_norm": 0.3565610349178314, "learning_rate": 1.5532525338341428e-05, "loss": 0.5219, "step": 20473 }, { "epoch": 0.6289435689490984, "grad_norm": 0.35909876227378845, "learning_rate": 1.5532122755293875e-05, "loss": 0.5764, "step": 20474 }, { "epoch": 0.6289742880840475, "grad_norm": 0.3503480851650238, "learning_rate": 1.553172015932566e-05, "loss": 0.5685, "step": 20475 }, { "epoch": 0.6290050072189967, "grad_norm": 0.37346675992012024, "learning_rate": 1.5531317550437725e-05, "loss": 0.6063, "step": 20476 }, { "epoch": 0.6290357263539459, "grad_norm": 0.3576838970184326, "learning_rate": 1.5530914928631015e-05, "loss": 0.5226, "step": 20477 }, { "epoch": 0.629066445488895, "grad_norm": 0.3738254904747009, "learning_rate": 1.5530512293906463e-05, "loss": 0.5713, "step": 20478 }, { "epoch": 0.6290971646238442, "grad_norm": 0.3547265827655792, "learning_rate": 1.5530109646265014e-05, "loss": 0.5057, "step": 20479 }, { "epoch": 0.6291278837587934, "grad_norm": 0.3845902383327484, "learning_rate": 1.552970698570761e-05, "loss": 0.5571, "step": 20480 }, { "epoch": 0.6291586028937425, "grad_norm": 0.303670734167099, "learning_rate": 1.552930431223519e-05, "loss": 0.4697, "step": 20481 }, { "epoch": 0.6291893220286917, "grad_norm": 0.35287436842918396, "learning_rate": 1.552890162584869e-05, "loss": 0.554, "step": 20482 }, { "epoch": 0.6292200411636408, "grad_norm": 0.37689483165740967, "learning_rate": 1.552849892654906e-05, "loss": 0.5305, "step": 20483 }, { "epoch": 0.62925076029859, "grad_norm": 0.341313898563385, "learning_rate": 1.5528096214337225e-05, "loss": 0.5409, "step": 20484 }, { "epoch": 0.6292814794335392, "grad_norm": 0.3480125367641449, "learning_rate": 1.5527693489214143e-05, "loss": 0.5502, "step": 20485 }, { "epoch": 0.6293121985684883, "grad_norm": 0.3598208427429199, "learning_rate": 1.5527290751180746e-05, "loss": 0.5825, "step": 20486 }, { "epoch": 0.6293429177034374, "grad_norm": 0.3815525770187378, "learning_rate": 1.5526888000237975e-05, "loss": 0.5199, "step": 20487 }, { "epoch": 0.6293736368383867, "grad_norm": 0.3319852948188782, "learning_rate": 1.552648523638677e-05, "loss": 0.5464, "step": 20488 }, { "epoch": 0.6294043559733358, "grad_norm": 0.3767613470554352, "learning_rate": 1.5526082459628074e-05, "loss": 0.5666, "step": 20489 }, { "epoch": 0.629435075108285, "grad_norm": 0.35610392689704895, "learning_rate": 1.5525679669962826e-05, "loss": 0.5492, "step": 20490 }, { "epoch": 0.6294657942432341, "grad_norm": 0.39223989844322205, "learning_rate": 1.5525276867391963e-05, "loss": 0.5672, "step": 20491 }, { "epoch": 0.6294965133781832, "grad_norm": 0.3839449882507324, "learning_rate": 1.5524874051916435e-05, "loss": 0.5421, "step": 20492 }, { "epoch": 0.6295272325131325, "grad_norm": 0.40603959560394287, "learning_rate": 1.5524471223537176e-05, "loss": 0.5817, "step": 20493 }, { "epoch": 0.6295579516480816, "grad_norm": 0.3871927857398987, "learning_rate": 1.5524068382255132e-05, "loss": 0.5378, "step": 20494 }, { "epoch": 0.6295886707830307, "grad_norm": 0.3058789074420929, "learning_rate": 1.5523665528071235e-05, "loss": 0.4918, "step": 20495 }, { "epoch": 0.6296193899179799, "grad_norm": 0.35998424887657166, "learning_rate": 1.5523262660986433e-05, "loss": 0.4836, "step": 20496 }, { "epoch": 0.6296501090529291, "grad_norm": 0.33925777673721313, "learning_rate": 1.552285978100167e-05, "loss": 0.5455, "step": 20497 }, { "epoch": 0.6296808281878782, "grad_norm": 0.436638206243515, "learning_rate": 1.5522456888117873e-05, "loss": 0.5888, "step": 20498 }, { "epoch": 0.6297115473228274, "grad_norm": 0.3938359022140503, "learning_rate": 1.5522053982335998e-05, "loss": 0.5956, "step": 20499 }, { "epoch": 0.6297422664577765, "grad_norm": 0.3435802757740021, "learning_rate": 1.5521651063656982e-05, "loss": 0.4817, "step": 20500 }, { "epoch": 0.6297729855927257, "grad_norm": 0.4295613467693329, "learning_rate": 1.5521248132081762e-05, "loss": 0.5625, "step": 20501 }, { "epoch": 0.6298037047276749, "grad_norm": 0.3144241273403168, "learning_rate": 1.552084518761128e-05, "loss": 0.5148, "step": 20502 }, { "epoch": 0.629834423862624, "grad_norm": 0.3551277816295624, "learning_rate": 1.5520442230246482e-05, "loss": 0.5358, "step": 20503 }, { "epoch": 0.6298651429975732, "grad_norm": 0.37462595105171204, "learning_rate": 1.5520039259988303e-05, "loss": 0.5539, "step": 20504 }, { "epoch": 0.6298958621325224, "grad_norm": 0.358066588640213, "learning_rate": 1.5519636276837687e-05, "loss": 0.5693, "step": 20505 }, { "epoch": 0.6299265812674715, "grad_norm": 0.40625783801078796, "learning_rate": 1.5519233280795578e-05, "loss": 0.5331, "step": 20506 }, { "epoch": 0.6299573004024207, "grad_norm": 0.3592485189437866, "learning_rate": 1.5518830271862912e-05, "loss": 0.4918, "step": 20507 }, { "epoch": 0.6299880195373698, "grad_norm": 0.3648546636104584, "learning_rate": 1.5518427250040634e-05, "loss": 0.6228, "step": 20508 }, { "epoch": 0.630018738672319, "grad_norm": 0.3484272360801697, "learning_rate": 1.5518024215329682e-05, "loss": 0.4928, "step": 20509 }, { "epoch": 0.6300494578072682, "grad_norm": 0.3424433469772339, "learning_rate": 1.5517621167731e-05, "loss": 0.5676, "step": 20510 }, { "epoch": 0.6300801769422173, "grad_norm": 0.3437756299972534, "learning_rate": 1.5517218107245525e-05, "loss": 0.5268, "step": 20511 }, { "epoch": 0.6301108960771664, "grad_norm": 0.3439585268497467, "learning_rate": 1.5516815033874207e-05, "loss": 0.5521, "step": 20512 }, { "epoch": 0.6301416152121156, "grad_norm": 0.3851661682128906, "learning_rate": 1.5516411947617977e-05, "loss": 0.5861, "step": 20513 }, { "epoch": 0.6301723343470648, "grad_norm": 0.390421986579895, "learning_rate": 1.5516008848477783e-05, "loss": 0.6113, "step": 20514 }, { "epoch": 0.630203053482014, "grad_norm": 0.35814282298088074, "learning_rate": 1.5515605736454566e-05, "loss": 0.5465, "step": 20515 }, { "epoch": 0.6302337726169631, "grad_norm": 0.3719334900379181, "learning_rate": 1.5515202611549266e-05, "loss": 0.5643, "step": 20516 }, { "epoch": 0.6302644917519122, "grad_norm": 0.4079569876194, "learning_rate": 1.551479947376283e-05, "loss": 0.5831, "step": 20517 }, { "epoch": 0.6302952108868615, "grad_norm": 0.3554408550262451, "learning_rate": 1.551439632309619e-05, "loss": 0.539, "step": 20518 }, { "epoch": 0.6303259300218106, "grad_norm": 0.3844473958015442, "learning_rate": 1.5513993159550292e-05, "loss": 0.4966, "step": 20519 }, { "epoch": 0.6303566491567597, "grad_norm": 0.35590845346450806, "learning_rate": 1.551358998312608e-05, "loss": 0.5315, "step": 20520 }, { "epoch": 0.6303873682917089, "grad_norm": 0.40069684386253357, "learning_rate": 1.551318679382449e-05, "loss": 0.5953, "step": 20521 }, { "epoch": 0.6304180874266581, "grad_norm": 0.3644263446331024, "learning_rate": 1.551278359164647e-05, "loss": 0.5808, "step": 20522 }, { "epoch": 0.6304488065616072, "grad_norm": 0.3472433090209961, "learning_rate": 1.551238037659296e-05, "loss": 0.5225, "step": 20523 }, { "epoch": 0.6304795256965564, "grad_norm": 0.36197564005851746, "learning_rate": 1.5511977148664898e-05, "loss": 0.5737, "step": 20524 }, { "epoch": 0.6305102448315055, "grad_norm": 0.3834817111492157, "learning_rate": 1.551157390786323e-05, "loss": 0.5373, "step": 20525 }, { "epoch": 0.6305409639664548, "grad_norm": 0.3339015543460846, "learning_rate": 1.5511170654188895e-05, "loss": 0.5751, "step": 20526 }, { "epoch": 0.6305716831014039, "grad_norm": 0.3716455399990082, "learning_rate": 1.5510767387642836e-05, "loss": 0.5389, "step": 20527 }, { "epoch": 0.630602402236353, "grad_norm": 0.3447161912918091, "learning_rate": 1.5510364108226e-05, "loss": 0.5445, "step": 20528 }, { "epoch": 0.6306331213713022, "grad_norm": 0.34782522916793823, "learning_rate": 1.550996081593932e-05, "loss": 0.5392, "step": 20529 }, { "epoch": 0.6306638405062513, "grad_norm": 0.39493536949157715, "learning_rate": 1.550955751078374e-05, "loss": 0.623, "step": 20530 }, { "epoch": 0.6306945596412005, "grad_norm": 0.339934378862381, "learning_rate": 1.5509154192760204e-05, "loss": 0.5161, "step": 20531 }, { "epoch": 0.6307252787761497, "grad_norm": 0.3500380516052246, "learning_rate": 1.5508750861869654e-05, "loss": 0.5597, "step": 20532 }, { "epoch": 0.6307559979110988, "grad_norm": 0.3801518380641937, "learning_rate": 1.5508347518113032e-05, "loss": 0.5706, "step": 20533 }, { "epoch": 0.6307867170460479, "grad_norm": 0.3735102117061615, "learning_rate": 1.5507944161491278e-05, "loss": 0.5597, "step": 20534 }, { "epoch": 0.6308174361809972, "grad_norm": 0.3631405234336853, "learning_rate": 1.5507540792005336e-05, "loss": 0.571, "step": 20535 }, { "epoch": 0.6308481553159463, "grad_norm": 0.34059038758277893, "learning_rate": 1.5507137409656152e-05, "loss": 0.5472, "step": 20536 }, { "epoch": 0.6308788744508954, "grad_norm": 0.36077627539634705, "learning_rate": 1.550673401444466e-05, "loss": 0.5555, "step": 20537 }, { "epoch": 0.6309095935858446, "grad_norm": 0.33515822887420654, "learning_rate": 1.5506330606371808e-05, "loss": 0.5767, "step": 20538 }, { "epoch": 0.6309403127207938, "grad_norm": 0.37249404191970825, "learning_rate": 1.550592718543854e-05, "loss": 0.5508, "step": 20539 }, { "epoch": 0.630971031855743, "grad_norm": 0.3659844994544983, "learning_rate": 1.5505523751645788e-05, "loss": 0.4624, "step": 20540 }, { "epoch": 0.6310017509906921, "grad_norm": 0.34699442982673645, "learning_rate": 1.5505120304994504e-05, "loss": 0.5236, "step": 20541 }, { "epoch": 0.6310324701256412, "grad_norm": 0.34897685050964355, "learning_rate": 1.5504716845485626e-05, "loss": 0.5076, "step": 20542 }, { "epoch": 0.6310631892605905, "grad_norm": 0.38492971658706665, "learning_rate": 1.5504313373120096e-05, "loss": 0.5784, "step": 20543 }, { "epoch": 0.6310939083955396, "grad_norm": 0.37766900658607483, "learning_rate": 1.5503909887898862e-05, "loss": 0.5803, "step": 20544 }, { "epoch": 0.6311246275304887, "grad_norm": 0.34090855717658997, "learning_rate": 1.5503506389822856e-05, "loss": 0.6352, "step": 20545 }, { "epoch": 0.6311553466654379, "grad_norm": 0.34887561202049255, "learning_rate": 1.550310287889303e-05, "loss": 0.5511, "step": 20546 }, { "epoch": 0.631186065800387, "grad_norm": 0.34751734137535095, "learning_rate": 1.5502699355110324e-05, "loss": 0.5395, "step": 20547 }, { "epoch": 0.6312167849353362, "grad_norm": 0.37066540122032166, "learning_rate": 1.5502295818475677e-05, "loss": 0.5522, "step": 20548 }, { "epoch": 0.6312475040702854, "grad_norm": 0.40714436769485474, "learning_rate": 1.5501892268990037e-05, "loss": 0.6063, "step": 20549 }, { "epoch": 0.6312782232052345, "grad_norm": 0.4499944746494293, "learning_rate": 1.5501488706654337e-05, "loss": 0.5403, "step": 20550 }, { "epoch": 0.6313089423401838, "grad_norm": 0.3475160598754883, "learning_rate": 1.5501085131469534e-05, "loss": 0.6034, "step": 20551 }, { "epoch": 0.6313396614751329, "grad_norm": 0.35442328453063965, "learning_rate": 1.550068154343656e-05, "loss": 0.5222, "step": 20552 }, { "epoch": 0.631370380610082, "grad_norm": 0.4465176463127136, "learning_rate": 1.5500277942556354e-05, "loss": 0.5367, "step": 20553 }, { "epoch": 0.6314010997450312, "grad_norm": 0.3562955856323242, "learning_rate": 1.5499874328829872e-05, "loss": 0.5194, "step": 20554 }, { "epoch": 0.6314318188799803, "grad_norm": 0.3731214106082916, "learning_rate": 1.5499470702258043e-05, "loss": 0.6273, "step": 20555 }, { "epoch": 0.6314625380149295, "grad_norm": 0.3472862243652344, "learning_rate": 1.549906706284182e-05, "loss": 0.591, "step": 20556 }, { "epoch": 0.6314932571498787, "grad_norm": 0.3627469539642334, "learning_rate": 1.5498663410582145e-05, "loss": 0.4968, "step": 20557 }, { "epoch": 0.6315239762848278, "grad_norm": 0.449040025472641, "learning_rate": 1.5498259745479956e-05, "loss": 0.605, "step": 20558 }, { "epoch": 0.6315546954197769, "grad_norm": 0.3526633679866791, "learning_rate": 1.5497856067536196e-05, "loss": 0.5785, "step": 20559 }, { "epoch": 0.6315854145547262, "grad_norm": 0.40549594163894653, "learning_rate": 1.5497452376751806e-05, "loss": 0.6682, "step": 20560 }, { "epoch": 0.6316161336896753, "grad_norm": 0.857367992401123, "learning_rate": 1.5497048673127734e-05, "loss": 0.5702, "step": 20561 }, { "epoch": 0.6316468528246244, "grad_norm": 0.29835548996925354, "learning_rate": 1.5496644956664923e-05, "loss": 0.4883, "step": 20562 }, { "epoch": 0.6316775719595736, "grad_norm": 0.36649906635284424, "learning_rate": 1.5496241227364312e-05, "loss": 0.5498, "step": 20563 }, { "epoch": 0.6317082910945228, "grad_norm": 0.4365297853946686, "learning_rate": 1.5495837485226845e-05, "loss": 0.5465, "step": 20564 }, { "epoch": 0.631739010229472, "grad_norm": 0.36736178398132324, "learning_rate": 1.549543373025347e-05, "loss": 0.5937, "step": 20565 }, { "epoch": 0.6317697293644211, "grad_norm": 0.37375736236572266, "learning_rate": 1.5495029962445123e-05, "loss": 0.5368, "step": 20566 }, { "epoch": 0.6318004484993702, "grad_norm": 0.3342702388763428, "learning_rate": 1.5494626181802747e-05, "loss": 0.5033, "step": 20567 }, { "epoch": 0.6318311676343195, "grad_norm": 0.3606123626232147, "learning_rate": 1.5494222388327295e-05, "loss": 0.5195, "step": 20568 }, { "epoch": 0.6318618867692686, "grad_norm": 0.34914031624794006, "learning_rate": 1.5493818582019696e-05, "loss": 0.5355, "step": 20569 }, { "epoch": 0.6318926059042177, "grad_norm": 0.3298788368701935, "learning_rate": 1.54934147628809e-05, "loss": 0.5568, "step": 20570 }, { "epoch": 0.6319233250391669, "grad_norm": 0.31659412384033203, "learning_rate": 1.5493010930911855e-05, "loss": 0.5819, "step": 20571 }, { "epoch": 0.631954044174116, "grad_norm": 0.37260058522224426, "learning_rate": 1.5492607086113497e-05, "loss": 0.5815, "step": 20572 }, { "epoch": 0.6319847633090652, "grad_norm": 0.31667935848236084, "learning_rate": 1.5492203228486767e-05, "loss": 0.5884, "step": 20573 }, { "epoch": 0.6320154824440144, "grad_norm": 0.44548648595809937, "learning_rate": 1.549179935803262e-05, "loss": 0.6191, "step": 20574 }, { "epoch": 0.6320462015789635, "grad_norm": 0.3704669177532196, "learning_rate": 1.549139547475199e-05, "loss": 0.6566, "step": 20575 }, { "epoch": 0.6320769207139127, "grad_norm": 0.3571530878543854, "learning_rate": 1.549099157864582e-05, "loss": 0.5079, "step": 20576 }, { "epoch": 0.6321076398488619, "grad_norm": 0.3359098434448242, "learning_rate": 1.549058766971506e-05, "loss": 0.5531, "step": 20577 }, { "epoch": 0.632138358983811, "grad_norm": 0.3299316167831421, "learning_rate": 1.5490183747960646e-05, "loss": 0.5926, "step": 20578 }, { "epoch": 0.6321690781187602, "grad_norm": 0.3441930413246155, "learning_rate": 1.5489779813383525e-05, "loss": 0.5216, "step": 20579 }, { "epoch": 0.6321997972537093, "grad_norm": 0.36515337228775024, "learning_rate": 1.5489375865984638e-05, "loss": 0.5052, "step": 20580 }, { "epoch": 0.6322305163886585, "grad_norm": 0.34651175141334534, "learning_rate": 1.5488971905764932e-05, "loss": 0.489, "step": 20581 }, { "epoch": 0.6322612355236077, "grad_norm": 0.3564282953739166, "learning_rate": 1.548856793272535e-05, "loss": 0.5234, "step": 20582 }, { "epoch": 0.6322919546585568, "grad_norm": 0.3526737689971924, "learning_rate": 1.5488163946866833e-05, "loss": 0.5859, "step": 20583 }, { "epoch": 0.6323226737935059, "grad_norm": 0.36250051856040955, "learning_rate": 1.5487759948190323e-05, "loss": 0.4706, "step": 20584 }, { "epoch": 0.6323533929284552, "grad_norm": 0.3842659890651703, "learning_rate": 1.5487355936696772e-05, "loss": 0.553, "step": 20585 }, { "epoch": 0.6323841120634043, "grad_norm": 0.384400337934494, "learning_rate": 1.5486951912387115e-05, "loss": 0.6371, "step": 20586 }, { "epoch": 0.6324148311983534, "grad_norm": 0.4381459951400757, "learning_rate": 1.5486547875262296e-05, "loss": 0.601, "step": 20587 }, { "epoch": 0.6324455503333026, "grad_norm": 0.35756537318229675, "learning_rate": 1.5486143825323264e-05, "loss": 0.5774, "step": 20588 }, { "epoch": 0.6324762694682518, "grad_norm": 0.4041038155555725, "learning_rate": 1.548573976257096e-05, "loss": 0.5921, "step": 20589 }, { "epoch": 0.632506988603201, "grad_norm": 0.3591277301311493, "learning_rate": 1.5485335687006328e-05, "loss": 0.5234, "step": 20590 }, { "epoch": 0.6325377077381501, "grad_norm": 0.33940356969833374, "learning_rate": 1.548493159863031e-05, "loss": 0.532, "step": 20591 }, { "epoch": 0.6325684268730992, "grad_norm": 0.3424537479877472, "learning_rate": 1.5484527497443857e-05, "loss": 0.6378, "step": 20592 }, { "epoch": 0.6325991460080485, "grad_norm": 0.3633648455142975, "learning_rate": 1.54841233834479e-05, "loss": 0.5446, "step": 20593 }, { "epoch": 0.6326298651429976, "grad_norm": 0.39615964889526367, "learning_rate": 1.548371925664339e-05, "loss": 0.5463, "step": 20594 }, { "epoch": 0.6326605842779467, "grad_norm": 0.3161156475543976, "learning_rate": 1.5483315117031272e-05, "loss": 0.6039, "step": 20595 }, { "epoch": 0.6326913034128959, "grad_norm": 0.3578416705131531, "learning_rate": 1.5482910964612492e-05, "loss": 0.5133, "step": 20596 }, { "epoch": 0.632722022547845, "grad_norm": 0.36269715428352356, "learning_rate": 1.5482506799387987e-05, "loss": 0.4781, "step": 20597 }, { "epoch": 0.6327527416827942, "grad_norm": 0.32749316096305847, "learning_rate": 1.5482102621358705e-05, "loss": 0.598, "step": 20598 }, { "epoch": 0.6327834608177434, "grad_norm": 0.39082032442092896, "learning_rate": 1.5481698430525592e-05, "loss": 0.5036, "step": 20599 }, { "epoch": 0.6328141799526925, "grad_norm": 0.35259172320365906, "learning_rate": 1.5481294226889587e-05, "loss": 0.6591, "step": 20600 }, { "epoch": 0.6328448990876417, "grad_norm": 0.3608958423137665, "learning_rate": 1.5480890010451633e-05, "loss": 0.5696, "step": 20601 }, { "epoch": 0.6328756182225909, "grad_norm": 0.38871926069259644, "learning_rate": 1.5480485781212684e-05, "loss": 0.4914, "step": 20602 }, { "epoch": 0.63290633735754, "grad_norm": 0.3707088828086853, "learning_rate": 1.5480081539173675e-05, "loss": 0.541, "step": 20603 }, { "epoch": 0.6329370564924892, "grad_norm": 0.3155675232410431, "learning_rate": 1.5479677284335554e-05, "loss": 0.4528, "step": 20604 }, { "epoch": 0.6329677756274383, "grad_norm": 0.366276353597641, "learning_rate": 1.547927301669926e-05, "loss": 0.489, "step": 20605 }, { "epoch": 0.6329984947623875, "grad_norm": 0.37932366132736206, "learning_rate": 1.5478868736265745e-05, "loss": 0.6015, "step": 20606 }, { "epoch": 0.6330292138973367, "grad_norm": 0.6573251485824585, "learning_rate": 1.5478464443035947e-05, "loss": 0.5772, "step": 20607 }, { "epoch": 0.6330599330322858, "grad_norm": 0.3410279154777527, "learning_rate": 1.5478060137010816e-05, "loss": 0.4437, "step": 20608 }, { "epoch": 0.6330906521672349, "grad_norm": 0.3760042190551758, "learning_rate": 1.5477655818191292e-05, "loss": 0.6057, "step": 20609 }, { "epoch": 0.6331213713021842, "grad_norm": 0.4065687358379364, "learning_rate": 1.5477251486578318e-05, "loss": 0.5809, "step": 20610 }, { "epoch": 0.6331520904371333, "grad_norm": 0.4009949862957001, "learning_rate": 1.5476847142172843e-05, "loss": 0.583, "step": 20611 }, { "epoch": 0.6331828095720825, "grad_norm": 0.49443402886390686, "learning_rate": 1.5476442784975808e-05, "loss": 0.5558, "step": 20612 }, { "epoch": 0.6332135287070316, "grad_norm": 0.3432754576206207, "learning_rate": 1.5476038414988156e-05, "loss": 0.5381, "step": 20613 }, { "epoch": 0.6332442478419807, "grad_norm": 0.3635360896587372, "learning_rate": 1.547563403221084e-05, "loss": 0.5932, "step": 20614 }, { "epoch": 0.63327496697693, "grad_norm": 0.36128470301628113, "learning_rate": 1.5475229636644793e-05, "loss": 0.6386, "step": 20615 }, { "epoch": 0.6333056861118791, "grad_norm": 0.3623252213001251, "learning_rate": 1.5474825228290966e-05, "loss": 0.5906, "step": 20616 }, { "epoch": 0.6333364052468282, "grad_norm": 0.4556536078453064, "learning_rate": 1.5474420807150303e-05, "loss": 0.6126, "step": 20617 }, { "epoch": 0.6333671243817774, "grad_norm": 0.335367351770401, "learning_rate": 1.547401637322375e-05, "loss": 0.4873, "step": 20618 }, { "epoch": 0.6333978435167266, "grad_norm": 0.31407225131988525, "learning_rate": 1.5473611926512245e-05, "loss": 0.5533, "step": 20619 }, { "epoch": 0.6334285626516757, "grad_norm": 0.3477489650249481, "learning_rate": 1.547320746701674e-05, "loss": 0.5822, "step": 20620 }, { "epoch": 0.6334592817866249, "grad_norm": 0.3763788938522339, "learning_rate": 1.5472802994738177e-05, "loss": 0.5807, "step": 20621 }, { "epoch": 0.633490000921574, "grad_norm": 0.35665056109428406, "learning_rate": 1.54723985096775e-05, "loss": 0.5802, "step": 20622 }, { "epoch": 0.6335207200565232, "grad_norm": 0.3646020293235779, "learning_rate": 1.547199401183565e-05, "loss": 0.5779, "step": 20623 }, { "epoch": 0.6335514391914724, "grad_norm": 0.3318079113960266, "learning_rate": 1.547158950121358e-05, "loss": 0.5059, "step": 20624 }, { "epoch": 0.6335821583264215, "grad_norm": 0.3685024678707123, "learning_rate": 1.5471184977812227e-05, "loss": 0.5975, "step": 20625 }, { "epoch": 0.6336128774613707, "grad_norm": 0.38560864329338074, "learning_rate": 1.5470780441632546e-05, "loss": 0.628, "step": 20626 }, { "epoch": 0.6336435965963199, "grad_norm": 0.386865496635437, "learning_rate": 1.547037589267547e-05, "loss": 0.558, "step": 20627 }, { "epoch": 0.633674315731269, "grad_norm": 0.3806707561016083, "learning_rate": 1.5469971330941952e-05, "loss": 0.5495, "step": 20628 }, { "epoch": 0.6337050348662182, "grad_norm": 0.36128732562065125, "learning_rate": 1.5469566756432933e-05, "loss": 0.4458, "step": 20629 }, { "epoch": 0.6337357540011673, "grad_norm": 0.3996214270591736, "learning_rate": 1.5469162169149356e-05, "loss": 0.4925, "step": 20630 }, { "epoch": 0.6337664731361164, "grad_norm": 0.32999929785728455, "learning_rate": 1.5468757569092173e-05, "loss": 0.5267, "step": 20631 }, { "epoch": 0.6337971922710657, "grad_norm": 0.3447803854942322, "learning_rate": 1.5468352956262323e-05, "loss": 0.5373, "step": 20632 }, { "epoch": 0.6338279114060148, "grad_norm": 0.3750666677951813, "learning_rate": 1.546794833066075e-05, "loss": 0.5645, "step": 20633 }, { "epoch": 0.6338586305409639, "grad_norm": 0.3979819416999817, "learning_rate": 1.5467543692288405e-05, "loss": 0.5475, "step": 20634 }, { "epoch": 0.6338893496759131, "grad_norm": 0.35353389382362366, "learning_rate": 1.5467139041146228e-05, "loss": 0.5866, "step": 20635 }, { "epoch": 0.6339200688108623, "grad_norm": 0.3958336412906647, "learning_rate": 1.5466734377235168e-05, "loss": 0.5522, "step": 20636 }, { "epoch": 0.6339507879458115, "grad_norm": 0.44844740629196167, "learning_rate": 1.5466329700556167e-05, "loss": 0.5976, "step": 20637 }, { "epoch": 0.6339815070807606, "grad_norm": 0.3868013620376587, "learning_rate": 1.546592501111017e-05, "loss": 0.5218, "step": 20638 }, { "epoch": 0.6340122262157097, "grad_norm": 0.3436957895755768, "learning_rate": 1.5465520308898128e-05, "loss": 0.5532, "step": 20639 }, { "epoch": 0.634042945350659, "grad_norm": 0.3283960223197937, "learning_rate": 1.546511559392098e-05, "loss": 0.4995, "step": 20640 }, { "epoch": 0.6340736644856081, "grad_norm": 0.35345005989074707, "learning_rate": 1.5464710866179668e-05, "loss": 0.5297, "step": 20641 }, { "epoch": 0.6341043836205572, "grad_norm": 0.3709981441497803, "learning_rate": 1.5464306125675146e-05, "loss": 0.5042, "step": 20642 }, { "epoch": 0.6341351027555064, "grad_norm": 0.3927631378173828, "learning_rate": 1.5463901372408354e-05, "loss": 0.5698, "step": 20643 }, { "epoch": 0.6341658218904556, "grad_norm": 0.34704309701919556, "learning_rate": 1.546349660638024e-05, "loss": 0.5325, "step": 20644 }, { "epoch": 0.6341965410254047, "grad_norm": 0.35497593879699707, "learning_rate": 1.546309182759175e-05, "loss": 0.5556, "step": 20645 }, { "epoch": 0.6342272601603539, "grad_norm": 0.34287047386169434, "learning_rate": 1.5462687036043825e-05, "loss": 0.504, "step": 20646 }, { "epoch": 0.634257979295303, "grad_norm": 0.32989874482154846, "learning_rate": 1.546228223173741e-05, "loss": 0.5294, "step": 20647 }, { "epoch": 0.6342886984302522, "grad_norm": 0.3911438286304474, "learning_rate": 1.5461877414673455e-05, "loss": 0.5963, "step": 20648 }, { "epoch": 0.6343194175652014, "grad_norm": 0.35218244791030884, "learning_rate": 1.5461472584852907e-05, "loss": 0.6245, "step": 20649 }, { "epoch": 0.6343501367001505, "grad_norm": 0.3385533094406128, "learning_rate": 1.5461067742276706e-05, "loss": 0.5378, "step": 20650 }, { "epoch": 0.6343808558350997, "grad_norm": 0.33788207173347473, "learning_rate": 1.5460662886945802e-05, "loss": 0.5953, "step": 20651 }, { "epoch": 0.6344115749700489, "grad_norm": 0.3946107029914856, "learning_rate": 1.5460258018861137e-05, "loss": 0.6214, "step": 20652 }, { "epoch": 0.634442294104998, "grad_norm": 0.38215965032577515, "learning_rate": 1.545985313802366e-05, "loss": 0.5225, "step": 20653 }, { "epoch": 0.6344730132399472, "grad_norm": 0.3689245283603668, "learning_rate": 1.545944824443431e-05, "loss": 0.5024, "step": 20654 }, { "epoch": 0.6345037323748963, "grad_norm": 0.35270994901657104, "learning_rate": 1.545904333809404e-05, "loss": 0.5439, "step": 20655 }, { "epoch": 0.6345344515098454, "grad_norm": 0.3805410861968994, "learning_rate": 1.5458638419003796e-05, "loss": 0.6138, "step": 20656 }, { "epoch": 0.6345651706447947, "grad_norm": 0.3736203908920288, "learning_rate": 1.545823348716452e-05, "loss": 0.5606, "step": 20657 }, { "epoch": 0.6345958897797438, "grad_norm": 0.35092538595199585, "learning_rate": 1.5457828542577152e-05, "loss": 0.5277, "step": 20658 }, { "epoch": 0.6346266089146929, "grad_norm": 0.3583061099052429, "learning_rate": 1.5457423585242648e-05, "loss": 0.525, "step": 20659 }, { "epoch": 0.6346573280496421, "grad_norm": 0.4489299952983856, "learning_rate": 1.545701861516195e-05, "loss": 0.5438, "step": 20660 }, { "epoch": 0.6346880471845913, "grad_norm": 0.34606486558914185, "learning_rate": 1.5456613632336007e-05, "loss": 0.6309, "step": 20661 }, { "epoch": 0.6347187663195405, "grad_norm": 0.349439799785614, "learning_rate": 1.5456208636765758e-05, "loss": 0.5612, "step": 20662 }, { "epoch": 0.6347494854544896, "grad_norm": 0.33288678526878357, "learning_rate": 1.5455803628452155e-05, "loss": 0.5209, "step": 20663 }, { "epoch": 0.6347802045894387, "grad_norm": 0.36087116599082947, "learning_rate": 1.545539860739614e-05, "loss": 0.5118, "step": 20664 }, { "epoch": 0.634810923724388, "grad_norm": 0.3262404501438141, "learning_rate": 1.545499357359866e-05, "loss": 0.5718, "step": 20665 }, { "epoch": 0.6348416428593371, "grad_norm": 0.33213570713996887, "learning_rate": 1.5454588527060664e-05, "loss": 0.5872, "step": 20666 }, { "epoch": 0.6348723619942862, "grad_norm": 0.3270513117313385, "learning_rate": 1.5454183467783095e-05, "loss": 0.5468, "step": 20667 }, { "epoch": 0.6349030811292354, "grad_norm": 0.3904571235179901, "learning_rate": 1.54537783957669e-05, "loss": 0.5836, "step": 20668 }, { "epoch": 0.6349338002641846, "grad_norm": 0.4107714891433716, "learning_rate": 1.5453373311013022e-05, "loss": 0.4721, "step": 20669 }, { "epoch": 0.6349645193991337, "grad_norm": 0.36879709362983704, "learning_rate": 1.5452968213522412e-05, "loss": 0.6164, "step": 20670 }, { "epoch": 0.6349952385340829, "grad_norm": 0.5524283051490784, "learning_rate": 1.545256310329601e-05, "loss": 0.5657, "step": 20671 }, { "epoch": 0.635025957669032, "grad_norm": 0.345878541469574, "learning_rate": 1.545215798033477e-05, "loss": 0.5574, "step": 20672 }, { "epoch": 0.6350566768039811, "grad_norm": 1.4070717096328735, "learning_rate": 1.5451752844639637e-05, "loss": 0.5322, "step": 20673 }, { "epoch": 0.6350873959389304, "grad_norm": 0.36917844414711, "learning_rate": 1.545134769621155e-05, "loss": 0.6207, "step": 20674 }, { "epoch": 0.6351181150738795, "grad_norm": 0.3268089294433594, "learning_rate": 1.545094253505146e-05, "loss": 0.5618, "step": 20675 }, { "epoch": 0.6351488342088287, "grad_norm": 0.3683381974697113, "learning_rate": 1.5450537361160315e-05, "loss": 0.5882, "step": 20676 }, { "epoch": 0.6351795533437778, "grad_norm": 0.5048633813858032, "learning_rate": 1.5450132174539057e-05, "loss": 0.6077, "step": 20677 }, { "epoch": 0.635210272478727, "grad_norm": 0.3426235020160675, "learning_rate": 1.5449726975188636e-05, "loss": 0.5075, "step": 20678 }, { "epoch": 0.6352409916136762, "grad_norm": 0.48833996057510376, "learning_rate": 1.5449321763109996e-05, "loss": 0.5382, "step": 20679 }, { "epoch": 0.6352717107486253, "grad_norm": 0.3817489743232727, "learning_rate": 1.5448916538304086e-05, "loss": 0.5356, "step": 20680 }, { "epoch": 0.6353024298835744, "grad_norm": 0.32480886578559875, "learning_rate": 1.544851130077185e-05, "loss": 0.537, "step": 20681 }, { "epoch": 0.6353331490185237, "grad_norm": 0.3687842786312103, "learning_rate": 1.5448106050514235e-05, "loss": 0.5925, "step": 20682 }, { "epoch": 0.6353638681534728, "grad_norm": 0.35520657896995544, "learning_rate": 1.544770078753219e-05, "loss": 0.5778, "step": 20683 }, { "epoch": 0.6353945872884219, "grad_norm": 0.4000779390335083, "learning_rate": 1.5447295511826658e-05, "loss": 0.6062, "step": 20684 }, { "epoch": 0.6354253064233711, "grad_norm": 0.3560211658477783, "learning_rate": 1.5446890223398586e-05, "loss": 0.6061, "step": 20685 }, { "epoch": 0.6354560255583203, "grad_norm": 0.3718532919883728, "learning_rate": 1.5446484922248922e-05, "loss": 0.5723, "step": 20686 }, { "epoch": 0.6354867446932695, "grad_norm": 0.34510546922683716, "learning_rate": 1.544607960837861e-05, "loss": 0.5603, "step": 20687 }, { "epoch": 0.6355174638282186, "grad_norm": 0.4259403944015503, "learning_rate": 1.5445674281788602e-05, "loss": 0.5856, "step": 20688 }, { "epoch": 0.6355481829631677, "grad_norm": 0.3514156639575958, "learning_rate": 1.544526894247984e-05, "loss": 0.6007, "step": 20689 }, { "epoch": 0.635578902098117, "grad_norm": 0.34669044613838196, "learning_rate": 1.5444863590453275e-05, "loss": 0.5555, "step": 20690 }, { "epoch": 0.6356096212330661, "grad_norm": 0.40064260363578796, "learning_rate": 1.5444458225709848e-05, "loss": 0.6062, "step": 20691 }, { "epoch": 0.6356403403680152, "grad_norm": 0.3235289752483368, "learning_rate": 1.544405284825051e-05, "loss": 0.5803, "step": 20692 }, { "epoch": 0.6356710595029644, "grad_norm": 0.38601207733154297, "learning_rate": 1.54436474580762e-05, "loss": 0.5844, "step": 20693 }, { "epoch": 0.6357017786379136, "grad_norm": 0.35080596804618835, "learning_rate": 1.544324205518788e-05, "loss": 0.548, "step": 20694 }, { "epoch": 0.6357324977728627, "grad_norm": 0.3823280930519104, "learning_rate": 1.5442836639586483e-05, "loss": 0.5961, "step": 20695 }, { "epoch": 0.6357632169078119, "grad_norm": 0.40195712447166443, "learning_rate": 1.5442431211272963e-05, "loss": 0.5391, "step": 20696 }, { "epoch": 0.635793936042761, "grad_norm": 0.3560311198234558, "learning_rate": 1.5442025770248263e-05, "loss": 0.6462, "step": 20697 }, { "epoch": 0.6358246551777101, "grad_norm": 0.35367000102996826, "learning_rate": 1.5441620316513332e-05, "loss": 0.5506, "step": 20698 }, { "epoch": 0.6358553743126594, "grad_norm": 0.4651823937892914, "learning_rate": 1.544121485006912e-05, "loss": 0.5981, "step": 20699 }, { "epoch": 0.6358860934476085, "grad_norm": 0.3703811764717102, "learning_rate": 1.5440809370916565e-05, "loss": 0.5399, "step": 20700 }, { "epoch": 0.6359168125825577, "grad_norm": 0.8316811323165894, "learning_rate": 1.5440403879056624e-05, "loss": 0.5265, "step": 20701 }, { "epoch": 0.6359475317175068, "grad_norm": 0.39461830258369446, "learning_rate": 1.543999837449024e-05, "loss": 0.5181, "step": 20702 }, { "epoch": 0.635978250852456, "grad_norm": 0.36780497431755066, "learning_rate": 1.5439592857218358e-05, "loss": 0.5954, "step": 20703 }, { "epoch": 0.6360089699874052, "grad_norm": 0.5289907455444336, "learning_rate": 1.5439187327241927e-05, "loss": 0.5227, "step": 20704 }, { "epoch": 0.6360396891223543, "grad_norm": 0.3992893695831299, "learning_rate": 1.5438781784561896e-05, "loss": 0.5889, "step": 20705 }, { "epoch": 0.6360704082573034, "grad_norm": 0.3405161499977112, "learning_rate": 1.543837622917921e-05, "loss": 0.4899, "step": 20706 }, { "epoch": 0.6361011273922527, "grad_norm": 0.3286382555961609, "learning_rate": 1.5437970661094814e-05, "loss": 0.5427, "step": 20707 }, { "epoch": 0.6361318465272018, "grad_norm": 0.40151527523994446, "learning_rate": 1.543756508030966e-05, "loss": 0.6525, "step": 20708 }, { "epoch": 0.6361625656621509, "grad_norm": 0.416187584400177, "learning_rate": 1.543715948682469e-05, "loss": 0.4958, "step": 20709 }, { "epoch": 0.6361932847971001, "grad_norm": 0.3576178550720215, "learning_rate": 1.543675388064086e-05, "loss": 0.5564, "step": 20710 }, { "epoch": 0.6362240039320493, "grad_norm": 0.3249701261520386, "learning_rate": 1.5436348261759108e-05, "loss": 0.5354, "step": 20711 }, { "epoch": 0.6362547230669985, "grad_norm": 0.4750104248523712, "learning_rate": 1.5435942630180384e-05, "loss": 0.5314, "step": 20712 }, { "epoch": 0.6362854422019476, "grad_norm": 0.35425230860710144, "learning_rate": 1.543553698590564e-05, "loss": 0.5812, "step": 20713 }, { "epoch": 0.6363161613368967, "grad_norm": 0.8439845442771912, "learning_rate": 1.5435131328935815e-05, "loss": 0.557, "step": 20714 }, { "epoch": 0.636346880471846, "grad_norm": 0.3976151943206787, "learning_rate": 1.5434725659271863e-05, "loss": 0.6146, "step": 20715 }, { "epoch": 0.6363775996067951, "grad_norm": 0.43389877676963806, "learning_rate": 1.543431997691473e-05, "loss": 0.6527, "step": 20716 }, { "epoch": 0.6364083187417442, "grad_norm": 0.3755555748939514, "learning_rate": 1.5433914281865365e-05, "loss": 0.5669, "step": 20717 }, { "epoch": 0.6364390378766934, "grad_norm": 0.3850994408130646, "learning_rate": 1.543350857412471e-05, "loss": 0.5758, "step": 20718 }, { "epoch": 0.6364697570116425, "grad_norm": 0.32519200444221497, "learning_rate": 1.5433102853693718e-05, "loss": 0.5734, "step": 20719 }, { "epoch": 0.6365004761465917, "grad_norm": 0.35938090085983276, "learning_rate": 1.5432697120573338e-05, "loss": 0.5271, "step": 20720 }, { "epoch": 0.6365311952815409, "grad_norm": 0.354297012090683, "learning_rate": 1.543229137476451e-05, "loss": 0.5496, "step": 20721 }, { "epoch": 0.63656191441649, "grad_norm": 0.3233354091644287, "learning_rate": 1.543188561626819e-05, "loss": 0.5185, "step": 20722 }, { "epoch": 0.6365926335514392, "grad_norm": 0.3785257339477539, "learning_rate": 1.5431479845085316e-05, "loss": 0.6376, "step": 20723 }, { "epoch": 0.6366233526863884, "grad_norm": 0.3564755916595459, "learning_rate": 1.5431074061216844e-05, "loss": 0.5143, "step": 20724 }, { "epoch": 0.6366540718213375, "grad_norm": 0.32328638434410095, "learning_rate": 1.5430668264663722e-05, "loss": 0.4431, "step": 20725 }, { "epoch": 0.6366847909562867, "grad_norm": 0.3565138280391693, "learning_rate": 1.543026245542689e-05, "loss": 0.5393, "step": 20726 }, { "epoch": 0.6367155100912358, "grad_norm": 0.3640054166316986, "learning_rate": 1.5429856633507307e-05, "loss": 0.5501, "step": 20727 }, { "epoch": 0.636746229226185, "grad_norm": 0.3780773878097534, "learning_rate": 1.5429450798905913e-05, "loss": 0.6163, "step": 20728 }, { "epoch": 0.6367769483611342, "grad_norm": 0.3750295341014862, "learning_rate": 1.5429044951623655e-05, "loss": 0.5645, "step": 20729 }, { "epoch": 0.6368076674960833, "grad_norm": 0.36474609375, "learning_rate": 1.5428639091661484e-05, "loss": 0.5589, "step": 20730 }, { "epoch": 0.6368383866310324, "grad_norm": 0.38259291648864746, "learning_rate": 1.5428233219020348e-05, "loss": 0.6142, "step": 20731 }, { "epoch": 0.6368691057659817, "grad_norm": 0.3590189516544342, "learning_rate": 1.542782733370119e-05, "loss": 0.5284, "step": 20732 }, { "epoch": 0.6368998249009308, "grad_norm": 0.375313401222229, "learning_rate": 1.5427421435704968e-05, "loss": 0.5501, "step": 20733 }, { "epoch": 0.6369305440358799, "grad_norm": 0.33337441086769104, "learning_rate": 1.542701552503262e-05, "loss": 0.5717, "step": 20734 }, { "epoch": 0.6369612631708291, "grad_norm": 0.3405343294143677, "learning_rate": 1.5426609601685104e-05, "loss": 0.5309, "step": 20735 }, { "epoch": 0.6369919823057782, "grad_norm": 0.4539313316345215, "learning_rate": 1.542620366566336e-05, "loss": 0.5388, "step": 20736 }, { "epoch": 0.6370227014407275, "grad_norm": 0.3602193593978882, "learning_rate": 1.5425797716968337e-05, "loss": 0.5627, "step": 20737 }, { "epoch": 0.6370534205756766, "grad_norm": 0.37307775020599365, "learning_rate": 1.5425391755600984e-05, "loss": 0.5472, "step": 20738 }, { "epoch": 0.6370841397106257, "grad_norm": 0.34480804204940796, "learning_rate": 1.5424985781562248e-05, "loss": 0.4821, "step": 20739 }, { "epoch": 0.637114858845575, "grad_norm": 0.3704259693622589, "learning_rate": 1.5424579794853082e-05, "loss": 0.5804, "step": 20740 }, { "epoch": 0.6371455779805241, "grad_norm": 0.3935807943344116, "learning_rate": 1.5424173795474428e-05, "loss": 0.4997, "step": 20741 }, { "epoch": 0.6371762971154732, "grad_norm": 0.3182840347290039, "learning_rate": 1.542376778342724e-05, "loss": 0.5532, "step": 20742 }, { "epoch": 0.6372070162504224, "grad_norm": 0.31924566626548767, "learning_rate": 1.5423361758712465e-05, "loss": 0.5298, "step": 20743 }, { "epoch": 0.6372377353853715, "grad_norm": 0.3846641182899475, "learning_rate": 1.5422955721331047e-05, "loss": 0.6265, "step": 20744 }, { "epoch": 0.6372684545203207, "grad_norm": 0.334900438785553, "learning_rate": 1.542254967128394e-05, "loss": 0.5522, "step": 20745 }, { "epoch": 0.6372991736552699, "grad_norm": 0.39805877208709717, "learning_rate": 1.5422143608572085e-05, "loss": 0.5738, "step": 20746 }, { "epoch": 0.637329892790219, "grad_norm": 0.3536311089992523, "learning_rate": 1.542173753319644e-05, "loss": 0.6673, "step": 20747 }, { "epoch": 0.6373606119251682, "grad_norm": 0.34713104367256165, "learning_rate": 1.5421331445157944e-05, "loss": 0.6164, "step": 20748 }, { "epoch": 0.6373913310601174, "grad_norm": 0.33841443061828613, "learning_rate": 1.542092534445755e-05, "loss": 0.5898, "step": 20749 }, { "epoch": 0.6374220501950665, "grad_norm": 0.39818644523620605, "learning_rate": 1.542051923109621e-05, "loss": 0.5448, "step": 20750 }, { "epoch": 0.6374527693300157, "grad_norm": 0.3755238652229309, "learning_rate": 1.5420113105074865e-05, "loss": 0.5395, "step": 20751 }, { "epoch": 0.6374834884649648, "grad_norm": 0.35733336210250854, "learning_rate": 1.541970696639447e-05, "loss": 0.5338, "step": 20752 }, { "epoch": 0.637514207599914, "grad_norm": 0.32510673999786377, "learning_rate": 1.541930081505597e-05, "loss": 0.5563, "step": 20753 }, { "epoch": 0.6375449267348632, "grad_norm": 0.3711097240447998, "learning_rate": 1.5418894651060314e-05, "loss": 0.5849, "step": 20754 }, { "epoch": 0.6375756458698123, "grad_norm": 0.34897685050964355, "learning_rate": 1.541848847440845e-05, "loss": 0.5304, "step": 20755 }, { "epoch": 0.6376063650047614, "grad_norm": 0.42798200249671936, "learning_rate": 1.541808228510133e-05, "loss": 0.5718, "step": 20756 }, { "epoch": 0.6376370841397107, "grad_norm": 2.154426336288452, "learning_rate": 1.54176760831399e-05, "loss": 0.546, "step": 20757 }, { "epoch": 0.6376678032746598, "grad_norm": 0.4037649929523468, "learning_rate": 1.5417269868525108e-05, "loss": 0.5783, "step": 20758 }, { "epoch": 0.6376985224096089, "grad_norm": 0.41507789492607117, "learning_rate": 1.5416863641257904e-05, "loss": 0.5569, "step": 20759 }, { "epoch": 0.6377292415445581, "grad_norm": 0.3546105921268463, "learning_rate": 1.541645740133924e-05, "loss": 0.5439, "step": 20760 }, { "epoch": 0.6377599606795072, "grad_norm": 0.374881774187088, "learning_rate": 1.5416051148770057e-05, "loss": 0.5702, "step": 20761 }, { "epoch": 0.6377906798144565, "grad_norm": 0.420716792345047, "learning_rate": 1.541564488355131e-05, "loss": 0.5774, "step": 20762 }, { "epoch": 0.6378213989494056, "grad_norm": 0.3426470160484314, "learning_rate": 1.5415238605683948e-05, "loss": 0.5308, "step": 20763 }, { "epoch": 0.6378521180843547, "grad_norm": 0.3916623592376709, "learning_rate": 1.5414832315168916e-05, "loss": 0.6849, "step": 20764 }, { "epoch": 0.6378828372193039, "grad_norm": 0.3270256817340851, "learning_rate": 1.5414426012007163e-05, "loss": 0.5459, "step": 20765 }, { "epoch": 0.6379135563542531, "grad_norm": 0.3480682075023651, "learning_rate": 1.5414019696199643e-05, "loss": 0.4922, "step": 20766 }, { "epoch": 0.6379442754892022, "grad_norm": 0.40008991956710815, "learning_rate": 1.5413613367747302e-05, "loss": 0.5978, "step": 20767 }, { "epoch": 0.6379749946241514, "grad_norm": 0.35763034224510193, "learning_rate": 1.5413207026651088e-05, "loss": 0.5022, "step": 20768 }, { "epoch": 0.6380057137591005, "grad_norm": 0.47359034419059753, "learning_rate": 1.5412800672911948e-05, "loss": 0.5115, "step": 20769 }, { "epoch": 0.6380364328940497, "grad_norm": 0.34236940741539, "learning_rate": 1.5412394306530834e-05, "loss": 0.5682, "step": 20770 }, { "epoch": 0.6380671520289989, "grad_norm": 0.38796931505203247, "learning_rate": 1.54119879275087e-05, "loss": 0.6241, "step": 20771 }, { "epoch": 0.638097871163948, "grad_norm": 0.3342176675796509, "learning_rate": 1.5411581535846488e-05, "loss": 0.5712, "step": 20772 }, { "epoch": 0.6381285902988972, "grad_norm": 0.3565404415130615, "learning_rate": 1.541117513154515e-05, "loss": 0.5355, "step": 20773 }, { "epoch": 0.6381593094338464, "grad_norm": 0.42896947264671326, "learning_rate": 1.541076871460563e-05, "loss": 0.587, "step": 20774 }, { "epoch": 0.6381900285687955, "grad_norm": 0.3301527202129364, "learning_rate": 1.541036228502889e-05, "loss": 0.4794, "step": 20775 }, { "epoch": 0.6382207477037447, "grad_norm": 0.35773420333862305, "learning_rate": 1.540995584281587e-05, "loss": 0.5426, "step": 20776 }, { "epoch": 0.6382514668386938, "grad_norm": 0.35400286316871643, "learning_rate": 1.5409549387967517e-05, "loss": 0.4602, "step": 20777 }, { "epoch": 0.638282185973643, "grad_norm": 0.35787689685821533, "learning_rate": 1.5409142920484784e-05, "loss": 0.5616, "step": 20778 }, { "epoch": 0.6383129051085922, "grad_norm": 0.6055699586868286, "learning_rate": 1.5408736440368616e-05, "loss": 0.5556, "step": 20779 }, { "epoch": 0.6383436242435413, "grad_norm": 0.38203462958335876, "learning_rate": 1.5408329947619972e-05, "loss": 0.6023, "step": 20780 }, { "epoch": 0.6383743433784904, "grad_norm": 0.3609989285469055, "learning_rate": 1.5407923442239793e-05, "loss": 0.6627, "step": 20781 }, { "epoch": 0.6384050625134396, "grad_norm": 0.4683116674423218, "learning_rate": 1.5407516924229035e-05, "loss": 0.5121, "step": 20782 }, { "epoch": 0.6384357816483888, "grad_norm": 0.39630070328712463, "learning_rate": 1.540711039358864e-05, "loss": 0.7149, "step": 20783 }, { "epoch": 0.6384665007833379, "grad_norm": 0.3756365478038788, "learning_rate": 1.5406703850319558e-05, "loss": 0.5426, "step": 20784 }, { "epoch": 0.6384972199182871, "grad_norm": 0.5582771897315979, "learning_rate": 1.5406297294422744e-05, "loss": 0.5218, "step": 20785 }, { "epoch": 0.6385279390532362, "grad_norm": 0.38374239206314087, "learning_rate": 1.540589072589915e-05, "loss": 0.589, "step": 20786 }, { "epoch": 0.6385586581881855, "grad_norm": 0.35624638199806213, "learning_rate": 1.5405484144749718e-05, "loss": 0.5646, "step": 20787 }, { "epoch": 0.6385893773231346, "grad_norm": 0.4281298518180847, "learning_rate": 1.5405077550975396e-05, "loss": 0.5883, "step": 20788 }, { "epoch": 0.6386200964580837, "grad_norm": 0.3845742344856262, "learning_rate": 1.540467094457714e-05, "loss": 0.6353, "step": 20789 }, { "epoch": 0.6386508155930329, "grad_norm": 0.5202739834785461, "learning_rate": 1.5404264325555896e-05, "loss": 0.5497, "step": 20790 }, { "epoch": 0.6386815347279821, "grad_norm": 0.3463848829269409, "learning_rate": 1.5403857693912622e-05, "loss": 0.5718, "step": 20791 }, { "epoch": 0.6387122538629312, "grad_norm": 0.35784783959388733, "learning_rate": 1.5403451049648252e-05, "loss": 0.5668, "step": 20792 }, { "epoch": 0.6387429729978804, "grad_norm": 0.3366399109363556, "learning_rate": 1.540304439276375e-05, "loss": 0.5938, "step": 20793 }, { "epoch": 0.6387736921328295, "grad_norm": 0.3377671241760254, "learning_rate": 1.540263772326006e-05, "loss": 0.6155, "step": 20794 }, { "epoch": 0.6388044112677786, "grad_norm": 0.3631085753440857, "learning_rate": 1.5402231041138135e-05, "loss": 0.5054, "step": 20795 }, { "epoch": 0.6388351304027279, "grad_norm": 0.3519534468650818, "learning_rate": 1.5401824346398916e-05, "loss": 0.5655, "step": 20796 }, { "epoch": 0.638865849537677, "grad_norm": 0.4018741250038147, "learning_rate": 1.5401417639043364e-05, "loss": 0.6043, "step": 20797 }, { "epoch": 0.6388965686726262, "grad_norm": 0.36169248819351196, "learning_rate": 1.540101091907242e-05, "loss": 0.6538, "step": 20798 }, { "epoch": 0.6389272878075754, "grad_norm": 0.3719204068183899, "learning_rate": 1.5400604186487043e-05, "loss": 0.6352, "step": 20799 }, { "epoch": 0.6389580069425245, "grad_norm": 0.3760837912559509, "learning_rate": 1.5400197441288174e-05, "loss": 0.5543, "step": 20800 }, { "epoch": 0.6389887260774737, "grad_norm": 0.4423889219760895, "learning_rate": 1.5399790683476767e-05, "loss": 0.5414, "step": 20801 }, { "epoch": 0.6390194452124228, "grad_norm": 0.3917571008205414, "learning_rate": 1.5399383913053772e-05, "loss": 0.6224, "step": 20802 }, { "epoch": 0.6390501643473719, "grad_norm": 0.3391745388507843, "learning_rate": 1.539897713002014e-05, "loss": 0.583, "step": 20803 }, { "epoch": 0.6390808834823212, "grad_norm": 0.41930949687957764, "learning_rate": 1.539857033437682e-05, "loss": 0.5317, "step": 20804 }, { "epoch": 0.6391116026172703, "grad_norm": 0.407981276512146, "learning_rate": 1.5398163526124757e-05, "loss": 0.5955, "step": 20805 }, { "epoch": 0.6391423217522194, "grad_norm": 0.3955775201320648, "learning_rate": 1.539775670526491e-05, "loss": 0.6376, "step": 20806 }, { "epoch": 0.6391730408871686, "grad_norm": 0.3503192663192749, "learning_rate": 1.5397349871798225e-05, "loss": 0.6108, "step": 20807 }, { "epoch": 0.6392037600221178, "grad_norm": 0.34872543811798096, "learning_rate": 1.5396943025725654e-05, "loss": 0.5565, "step": 20808 }, { "epoch": 0.6392344791570669, "grad_norm": 0.3625309467315674, "learning_rate": 1.5396536167048144e-05, "loss": 0.5641, "step": 20809 }, { "epoch": 0.6392651982920161, "grad_norm": 0.3512546718120575, "learning_rate": 1.5396129295766645e-05, "loss": 0.648, "step": 20810 }, { "epoch": 0.6392959174269652, "grad_norm": 0.380602091550827, "learning_rate": 1.5395722411882113e-05, "loss": 0.5424, "step": 20811 }, { "epoch": 0.6393266365619145, "grad_norm": 0.3533155620098114, "learning_rate": 1.5395315515395492e-05, "loss": 0.5646, "step": 20812 }, { "epoch": 0.6393573556968636, "grad_norm": 0.4151459336280823, "learning_rate": 1.5394908606307738e-05, "loss": 0.464, "step": 20813 }, { "epoch": 0.6393880748318127, "grad_norm": 0.37673863768577576, "learning_rate": 1.5394501684619793e-05, "loss": 0.5918, "step": 20814 }, { "epoch": 0.6394187939667619, "grad_norm": 0.35735154151916504, "learning_rate": 1.5394094750332616e-05, "loss": 0.5879, "step": 20815 }, { "epoch": 0.639449513101711, "grad_norm": 0.32787731289863586, "learning_rate": 1.539368780344715e-05, "loss": 0.5694, "step": 20816 }, { "epoch": 0.6394802322366602, "grad_norm": 0.38867661356925964, "learning_rate": 1.5393280843964356e-05, "loss": 0.6178, "step": 20817 }, { "epoch": 0.6395109513716094, "grad_norm": 0.3139607310295105, "learning_rate": 1.5392873871885174e-05, "loss": 0.5276, "step": 20818 }, { "epoch": 0.6395416705065585, "grad_norm": 0.8744598627090454, "learning_rate": 1.5392466887210558e-05, "loss": 0.6235, "step": 20819 }, { "epoch": 0.6395723896415076, "grad_norm": 0.36900168657302856, "learning_rate": 1.5392059889941464e-05, "loss": 0.5813, "step": 20820 }, { "epoch": 0.6396031087764569, "grad_norm": 0.39651721715927124, "learning_rate": 1.5391652880078832e-05, "loss": 0.6226, "step": 20821 }, { "epoch": 0.639633827911406, "grad_norm": 0.4150601327419281, "learning_rate": 1.539124585762362e-05, "loss": 0.4746, "step": 20822 }, { "epoch": 0.6396645470463552, "grad_norm": 0.3916657269001007, "learning_rate": 1.5390838822576774e-05, "loss": 0.6371, "step": 20823 }, { "epoch": 0.6396952661813043, "grad_norm": 0.3985803723335266, "learning_rate": 1.5390431774939248e-05, "loss": 0.6138, "step": 20824 }, { "epoch": 0.6397259853162535, "grad_norm": 0.38621610403060913, "learning_rate": 1.5390024714711994e-05, "loss": 0.6171, "step": 20825 }, { "epoch": 0.6397567044512027, "grad_norm": 0.35168588161468506, "learning_rate": 1.5389617641895963e-05, "loss": 0.6149, "step": 20826 }, { "epoch": 0.6397874235861518, "grad_norm": 0.3802066147327423, "learning_rate": 1.53892105564921e-05, "loss": 0.4938, "step": 20827 }, { "epoch": 0.6398181427211009, "grad_norm": 0.38327112793922424, "learning_rate": 1.538880345850136e-05, "loss": 0.5722, "step": 20828 }, { "epoch": 0.6398488618560502, "grad_norm": 0.35668909549713135, "learning_rate": 1.5388396347924698e-05, "loss": 0.5104, "step": 20829 }, { "epoch": 0.6398795809909993, "grad_norm": 0.324063241481781, "learning_rate": 1.538798922476305e-05, "loss": 0.4872, "step": 20830 }, { "epoch": 0.6399103001259484, "grad_norm": 0.3448141813278198, "learning_rate": 1.538758208901739e-05, "loss": 0.5741, "step": 20831 }, { "epoch": 0.6399410192608976, "grad_norm": 0.36655572056770325, "learning_rate": 1.538717494068864e-05, "loss": 0.5778, "step": 20832 }, { "epoch": 0.6399717383958468, "grad_norm": 0.31668636202812195, "learning_rate": 1.5386767779777782e-05, "loss": 0.5966, "step": 20833 }, { "epoch": 0.640002457530796, "grad_norm": 0.3346656858921051, "learning_rate": 1.5386360606285743e-05, "loss": 0.5643, "step": 20834 }, { "epoch": 0.6400331766657451, "grad_norm": 0.5087994337081909, "learning_rate": 1.5385953420213485e-05, "loss": 0.6153, "step": 20835 }, { "epoch": 0.6400638958006942, "grad_norm": 0.36429840326309204, "learning_rate": 1.5385546221561955e-05, "loss": 0.5531, "step": 20836 }, { "epoch": 0.6400946149356435, "grad_norm": 0.4043413996696472, "learning_rate": 1.5385139010332107e-05, "loss": 0.4874, "step": 20837 }, { "epoch": 0.6401253340705926, "grad_norm": 0.36924898624420166, "learning_rate": 1.538473178652489e-05, "loss": 0.4554, "step": 20838 }, { "epoch": 0.6401560532055417, "grad_norm": 0.3485294282436371, "learning_rate": 1.538432455014126e-05, "loss": 0.6355, "step": 20839 }, { "epoch": 0.6401867723404909, "grad_norm": 0.353496789932251, "learning_rate": 1.5383917301182158e-05, "loss": 0.5197, "step": 20840 }, { "epoch": 0.64021749147544, "grad_norm": 0.3431825041770935, "learning_rate": 1.5383510039648545e-05, "loss": 0.4991, "step": 20841 }, { "epoch": 0.6402482106103892, "grad_norm": 0.3692113757133484, "learning_rate": 1.5383102765541365e-05, "loss": 0.5504, "step": 20842 }, { "epoch": 0.6402789297453384, "grad_norm": 0.36240479350090027, "learning_rate": 1.5382695478861577e-05, "loss": 0.5895, "step": 20843 }, { "epoch": 0.6403096488802875, "grad_norm": 0.5612323880195618, "learning_rate": 1.5382288179610122e-05, "loss": 0.6117, "step": 20844 }, { "epoch": 0.6403403680152366, "grad_norm": 0.4030100107192993, "learning_rate": 1.5381880867787963e-05, "loss": 0.5857, "step": 20845 }, { "epoch": 0.6403710871501859, "grad_norm": 0.3628098666667938, "learning_rate": 1.5381473543396038e-05, "loss": 0.5478, "step": 20846 }, { "epoch": 0.640401806285135, "grad_norm": 0.34048354625701904, "learning_rate": 1.5381066206435314e-05, "loss": 0.5063, "step": 20847 }, { "epoch": 0.6404325254200842, "grad_norm": 0.34463179111480713, "learning_rate": 1.538065885690673e-05, "loss": 0.5652, "step": 20848 }, { "epoch": 0.6404632445550333, "grad_norm": 0.3524990975856781, "learning_rate": 1.5380251494811242e-05, "loss": 0.5592, "step": 20849 }, { "epoch": 0.6404939636899825, "grad_norm": 0.3656536638736725, "learning_rate": 1.53798441201498e-05, "loss": 0.6148, "step": 20850 }, { "epoch": 0.6405246828249317, "grad_norm": 0.32302892208099365, "learning_rate": 1.5379436732923358e-05, "loss": 0.5275, "step": 20851 }, { "epoch": 0.6405554019598808, "grad_norm": 0.4332520067691803, "learning_rate": 1.5379029333132866e-05, "loss": 0.5539, "step": 20852 }, { "epoch": 0.6405861210948299, "grad_norm": 0.3678499758243561, "learning_rate": 1.537862192077927e-05, "loss": 0.5246, "step": 20853 }, { "epoch": 0.6406168402297792, "grad_norm": 0.6043170690536499, "learning_rate": 1.5378214495863536e-05, "loss": 0.5469, "step": 20854 }, { "epoch": 0.6406475593647283, "grad_norm": 0.38966259360313416, "learning_rate": 1.5377807058386597e-05, "loss": 0.5233, "step": 20855 }, { "epoch": 0.6406782784996774, "grad_norm": 0.3550439178943634, "learning_rate": 1.537739960834942e-05, "loss": 0.5986, "step": 20856 }, { "epoch": 0.6407089976346266, "grad_norm": 0.3994467854499817, "learning_rate": 1.537699214575295e-05, "loss": 0.6188, "step": 20857 }, { "epoch": 0.6407397167695758, "grad_norm": 0.33483171463012695, "learning_rate": 1.537658467059814e-05, "loss": 0.5402, "step": 20858 }, { "epoch": 0.640770435904525, "grad_norm": 0.3513179123401642, "learning_rate": 1.5376177182885937e-05, "loss": 0.4559, "step": 20859 }, { "epoch": 0.6408011550394741, "grad_norm": 0.36502605676651, "learning_rate": 1.53757696826173e-05, "loss": 0.62, "step": 20860 }, { "epoch": 0.6408318741744232, "grad_norm": 0.3467021584510803, "learning_rate": 1.5375362169793177e-05, "loss": 0.5364, "step": 20861 }, { "epoch": 0.6408625933093725, "grad_norm": 0.38371577858924866, "learning_rate": 1.537495464441452e-05, "loss": 0.5055, "step": 20862 }, { "epoch": 0.6408933124443216, "grad_norm": 0.3472099006175995, "learning_rate": 1.537454710648228e-05, "loss": 0.6678, "step": 20863 }, { "epoch": 0.6409240315792707, "grad_norm": 0.3693546950817108, "learning_rate": 1.5374139555997413e-05, "loss": 0.5687, "step": 20864 }, { "epoch": 0.6409547507142199, "grad_norm": 0.36298009753227234, "learning_rate": 1.5373731992960864e-05, "loss": 0.5841, "step": 20865 }, { "epoch": 0.640985469849169, "grad_norm": 0.3719865679740906, "learning_rate": 1.537332441737359e-05, "loss": 0.6207, "step": 20866 }, { "epoch": 0.6410161889841182, "grad_norm": 0.36504945158958435, "learning_rate": 1.5372916829236545e-05, "loss": 0.6039, "step": 20867 }, { "epoch": 0.6410469081190674, "grad_norm": 0.3743671178817749, "learning_rate": 1.537250922855067e-05, "loss": 0.5676, "step": 20868 }, { "epoch": 0.6410776272540165, "grad_norm": 0.3298199474811554, "learning_rate": 1.537210161531693e-05, "loss": 0.5368, "step": 20869 }, { "epoch": 0.6411083463889656, "grad_norm": 0.3886088728904724, "learning_rate": 1.537169398953627e-05, "loss": 0.543, "step": 20870 }, { "epoch": 0.6411390655239149, "grad_norm": 0.3698728680610657, "learning_rate": 1.5371286351209643e-05, "loss": 0.5459, "step": 20871 }, { "epoch": 0.641169784658864, "grad_norm": 0.3116261661052704, "learning_rate": 1.5370878700338003e-05, "loss": 0.5102, "step": 20872 }, { "epoch": 0.6412005037938132, "grad_norm": 0.34786030650138855, "learning_rate": 1.53704710369223e-05, "loss": 0.5471, "step": 20873 }, { "epoch": 0.6412312229287623, "grad_norm": 0.3540440797805786, "learning_rate": 1.537006336096349e-05, "loss": 0.5652, "step": 20874 }, { "epoch": 0.6412619420637115, "grad_norm": 0.36045390367507935, "learning_rate": 1.536965567246252e-05, "loss": 0.5836, "step": 20875 }, { "epoch": 0.6412926611986607, "grad_norm": 0.35088762640953064, "learning_rate": 1.5369247971420342e-05, "loss": 0.5743, "step": 20876 }, { "epoch": 0.6413233803336098, "grad_norm": 0.4171912372112274, "learning_rate": 1.536884025783791e-05, "loss": 0.5131, "step": 20877 }, { "epoch": 0.6413540994685589, "grad_norm": 0.4174949526786804, "learning_rate": 1.536843253171618e-05, "loss": 0.599, "step": 20878 }, { "epoch": 0.6413848186035082, "grad_norm": 0.38767898082733154, "learning_rate": 1.5368024793056096e-05, "loss": 0.4437, "step": 20879 }, { "epoch": 0.6414155377384573, "grad_norm": 0.3775469660758972, "learning_rate": 1.5367617041858618e-05, "loss": 0.5954, "step": 20880 }, { "epoch": 0.6414462568734064, "grad_norm": 0.3442866802215576, "learning_rate": 1.5367209278124693e-05, "loss": 0.4426, "step": 20881 }, { "epoch": 0.6414769760083556, "grad_norm": 0.3539390563964844, "learning_rate": 1.536680150185528e-05, "loss": 0.5028, "step": 20882 }, { "epoch": 0.6415076951433047, "grad_norm": 0.32517609000205994, "learning_rate": 1.5366393713051325e-05, "loss": 0.5355, "step": 20883 }, { "epoch": 0.641538414278254, "grad_norm": 0.3497147858142853, "learning_rate": 1.5365985911713786e-05, "loss": 0.5731, "step": 20884 }, { "epoch": 0.6415691334132031, "grad_norm": 0.3645322322845459, "learning_rate": 1.536557809784361e-05, "loss": 0.534, "step": 20885 }, { "epoch": 0.6415998525481522, "grad_norm": 0.3056846857070923, "learning_rate": 1.5365170271441755e-05, "loss": 0.4847, "step": 20886 }, { "epoch": 0.6416305716831014, "grad_norm": 0.36134231090545654, "learning_rate": 1.5364762432509163e-05, "loss": 0.5569, "step": 20887 }, { "epoch": 0.6416612908180506, "grad_norm": 0.32816246151924133, "learning_rate": 1.53643545810468e-05, "loss": 0.557, "step": 20888 }, { "epoch": 0.6416920099529997, "grad_norm": 0.3362811803817749, "learning_rate": 1.5363946717055606e-05, "loss": 0.5905, "step": 20889 }, { "epoch": 0.6417227290879489, "grad_norm": 0.3576391339302063, "learning_rate": 1.5363538840536544e-05, "loss": 0.599, "step": 20890 }, { "epoch": 0.641753448222898, "grad_norm": 0.34165647625923157, "learning_rate": 1.5363130951490562e-05, "loss": 0.5034, "step": 20891 }, { "epoch": 0.6417841673578472, "grad_norm": 0.39356979727745056, "learning_rate": 1.5362723049918614e-05, "loss": 0.4751, "step": 20892 }, { "epoch": 0.6418148864927964, "grad_norm": 0.35495203733444214, "learning_rate": 1.536231513582165e-05, "loss": 0.5561, "step": 20893 }, { "epoch": 0.6418456056277455, "grad_norm": 0.35927629470825195, "learning_rate": 1.5361907209200623e-05, "loss": 0.5392, "step": 20894 }, { "epoch": 0.6418763247626946, "grad_norm": 0.4135212004184723, "learning_rate": 1.536149927005649e-05, "loss": 0.4691, "step": 20895 }, { "epoch": 0.6419070438976439, "grad_norm": 0.33481723070144653, "learning_rate": 1.53610913183902e-05, "loss": 0.5204, "step": 20896 }, { "epoch": 0.641937763032593, "grad_norm": 0.39213618636131287, "learning_rate": 1.5360683354202707e-05, "loss": 0.6237, "step": 20897 }, { "epoch": 0.6419684821675422, "grad_norm": 0.3819192349910736, "learning_rate": 1.5360275377494965e-05, "loss": 0.5602, "step": 20898 }, { "epoch": 0.6419992013024913, "grad_norm": 0.44466233253479004, "learning_rate": 1.5359867388267924e-05, "loss": 0.5725, "step": 20899 }, { "epoch": 0.6420299204374404, "grad_norm": 0.3888411521911621, "learning_rate": 1.535945938652254e-05, "loss": 0.5982, "step": 20900 }, { "epoch": 0.6420606395723897, "grad_norm": 0.4260590672492981, "learning_rate": 1.535905137225976e-05, "loss": 0.4823, "step": 20901 }, { "epoch": 0.6420913587073388, "grad_norm": 0.36727961897850037, "learning_rate": 1.5358643345480544e-05, "loss": 0.6115, "step": 20902 }, { "epoch": 0.6421220778422879, "grad_norm": 0.36011600494384766, "learning_rate": 1.5358235306185846e-05, "loss": 0.5766, "step": 20903 }, { "epoch": 0.6421527969772372, "grad_norm": 0.3337893784046173, "learning_rate": 1.535782725437661e-05, "loss": 0.5962, "step": 20904 }, { "epoch": 0.6421835161121863, "grad_norm": 0.3691105544567108, "learning_rate": 1.53574191900538e-05, "loss": 0.6058, "step": 20905 }, { "epoch": 0.6422142352471354, "grad_norm": 0.35306116938591003, "learning_rate": 1.5357011113218353e-05, "loss": 0.5731, "step": 20906 }, { "epoch": 0.6422449543820846, "grad_norm": 0.3464304804801941, "learning_rate": 1.5356603023871238e-05, "loss": 0.5436, "step": 20907 }, { "epoch": 0.6422756735170337, "grad_norm": 0.3951212465763092, "learning_rate": 1.535619492201341e-05, "loss": 0.5562, "step": 20908 }, { "epoch": 0.642306392651983, "grad_norm": 0.32665568590164185, "learning_rate": 1.5355786807645805e-05, "loss": 0.5791, "step": 20909 }, { "epoch": 0.6423371117869321, "grad_norm": 0.3780112862586975, "learning_rate": 1.5355378680769387e-05, "loss": 0.5513, "step": 20910 }, { "epoch": 0.6423678309218812, "grad_norm": 0.3381659984588623, "learning_rate": 1.535497054138511e-05, "loss": 0.5473, "step": 20911 }, { "epoch": 0.6423985500568304, "grad_norm": 0.3771618604660034, "learning_rate": 1.5354562389493928e-05, "loss": 0.5769, "step": 20912 }, { "epoch": 0.6424292691917796, "grad_norm": 0.3657260239124298, "learning_rate": 1.5354154225096783e-05, "loss": 0.5345, "step": 20913 }, { "epoch": 0.6424599883267287, "grad_norm": 0.3492717146873474, "learning_rate": 1.5353746048194648e-05, "loss": 0.5194, "step": 20914 }, { "epoch": 0.6424907074616779, "grad_norm": 0.3742915987968445, "learning_rate": 1.535333785878846e-05, "loss": 0.572, "step": 20915 }, { "epoch": 0.642521426596627, "grad_norm": 0.3834552466869354, "learning_rate": 1.5352929656879173e-05, "loss": 0.5389, "step": 20916 }, { "epoch": 0.6425521457315762, "grad_norm": 0.4887505769729614, "learning_rate": 1.535252144246775e-05, "loss": 0.625, "step": 20917 }, { "epoch": 0.6425828648665254, "grad_norm": 0.37432119250297546, "learning_rate": 1.5352113215555137e-05, "loss": 0.5669, "step": 20918 }, { "epoch": 0.6426135840014745, "grad_norm": 0.35921257734298706, "learning_rate": 1.5351704976142295e-05, "loss": 0.565, "step": 20919 }, { "epoch": 0.6426443031364237, "grad_norm": 0.4020228683948517, "learning_rate": 1.5351296724230167e-05, "loss": 0.5776, "step": 20920 }, { "epoch": 0.6426750222713729, "grad_norm": 0.3747492730617523, "learning_rate": 1.5350888459819712e-05, "loss": 0.5728, "step": 20921 }, { "epoch": 0.642705741406322, "grad_norm": 0.368836373090744, "learning_rate": 1.535048018291189e-05, "loss": 0.5963, "step": 20922 }, { "epoch": 0.6427364605412712, "grad_norm": 0.3211019039154053, "learning_rate": 1.5350071893507637e-05, "loss": 0.5372, "step": 20923 }, { "epoch": 0.6427671796762203, "grad_norm": 0.3805239796638489, "learning_rate": 1.5349663591607928e-05, "loss": 0.5645, "step": 20924 }, { "epoch": 0.6427978988111694, "grad_norm": 0.3502267301082611, "learning_rate": 1.53492552772137e-05, "loss": 0.5388, "step": 20925 }, { "epoch": 0.6428286179461187, "grad_norm": 0.34087899327278137, "learning_rate": 1.5348846950325915e-05, "loss": 0.5452, "step": 20926 }, { "epoch": 0.6428593370810678, "grad_norm": 0.3199111223220825, "learning_rate": 1.5348438610945523e-05, "loss": 0.4818, "step": 20927 }, { "epoch": 0.6428900562160169, "grad_norm": 0.3485901653766632, "learning_rate": 1.534803025907348e-05, "loss": 0.6056, "step": 20928 }, { "epoch": 0.6429207753509661, "grad_norm": 0.3199564516544342, "learning_rate": 1.5347621894710737e-05, "loss": 0.5179, "step": 20929 }, { "epoch": 0.6429514944859153, "grad_norm": 0.40650683641433716, "learning_rate": 1.534721351785825e-05, "loss": 0.5352, "step": 20930 }, { "epoch": 0.6429822136208644, "grad_norm": 0.3668935000896454, "learning_rate": 1.5346805128516972e-05, "loss": 0.532, "step": 20931 }, { "epoch": 0.6430129327558136, "grad_norm": 0.36821943521499634, "learning_rate": 1.534639672668786e-05, "loss": 0.5684, "step": 20932 }, { "epoch": 0.6430436518907627, "grad_norm": 0.35517168045043945, "learning_rate": 1.5345988312371865e-05, "loss": 0.5325, "step": 20933 }, { "epoch": 0.643074371025712, "grad_norm": 0.3482302725315094, "learning_rate": 1.5345579885569938e-05, "loss": 0.5399, "step": 20934 }, { "epoch": 0.6431050901606611, "grad_norm": 0.3526201546192169, "learning_rate": 1.5345171446283036e-05, "loss": 0.5457, "step": 20935 }, { "epoch": 0.6431358092956102, "grad_norm": 0.36651378870010376, "learning_rate": 1.5344762994512112e-05, "loss": 0.6171, "step": 20936 }, { "epoch": 0.6431665284305594, "grad_norm": 0.41763150691986084, "learning_rate": 1.5344354530258125e-05, "loss": 0.6012, "step": 20937 }, { "epoch": 0.6431972475655086, "grad_norm": 0.3550092577934265, "learning_rate": 1.5343946053522023e-05, "loss": 0.5514, "step": 20938 }, { "epoch": 0.6432279667004577, "grad_norm": 0.33637434244155884, "learning_rate": 1.534353756430476e-05, "loss": 0.6186, "step": 20939 }, { "epoch": 0.6432586858354069, "grad_norm": 0.3596220314502716, "learning_rate": 1.5343129062607297e-05, "loss": 0.5342, "step": 20940 }, { "epoch": 0.643289404970356, "grad_norm": 0.3487268090248108, "learning_rate": 1.5342720548430573e-05, "loss": 0.5348, "step": 20941 }, { "epoch": 0.6433201241053051, "grad_norm": 0.4124956429004669, "learning_rate": 1.5342312021775564e-05, "loss": 0.6236, "step": 20942 }, { "epoch": 0.6433508432402544, "grad_norm": 0.3736782670021057, "learning_rate": 1.5341903482643203e-05, "loss": 0.5236, "step": 20943 }, { "epoch": 0.6433815623752035, "grad_norm": 0.4179568290710449, "learning_rate": 1.5341494931034457e-05, "loss": 0.657, "step": 20944 }, { "epoch": 0.6434122815101527, "grad_norm": 0.37498927116394043, "learning_rate": 1.5341086366950275e-05, "loss": 0.5836, "step": 20945 }, { "epoch": 0.6434430006451018, "grad_norm": 0.3792065978050232, "learning_rate": 1.5340677790391614e-05, "loss": 0.5367, "step": 20946 }, { "epoch": 0.643473719780051, "grad_norm": 0.3634234368801117, "learning_rate": 1.5340269201359428e-05, "loss": 0.5228, "step": 20947 }, { "epoch": 0.6435044389150002, "grad_norm": 0.3433651626110077, "learning_rate": 1.533986059985467e-05, "loss": 0.5879, "step": 20948 }, { "epoch": 0.6435351580499493, "grad_norm": 0.3351402282714844, "learning_rate": 1.5339451985878297e-05, "loss": 0.542, "step": 20949 }, { "epoch": 0.6435658771848984, "grad_norm": 0.31583377718925476, "learning_rate": 1.5339043359431253e-05, "loss": 0.5073, "step": 20950 }, { "epoch": 0.6435965963198477, "grad_norm": 0.34151533246040344, "learning_rate": 1.5338634720514508e-05, "loss": 0.6071, "step": 20951 }, { "epoch": 0.6436273154547968, "grad_norm": 0.340353786945343, "learning_rate": 1.5338226069129004e-05, "loss": 0.6473, "step": 20952 }, { "epoch": 0.6436580345897459, "grad_norm": 0.3504771888256073, "learning_rate": 1.53378174052757e-05, "loss": 0.5377, "step": 20953 }, { "epoch": 0.6436887537246951, "grad_norm": 0.4145713448524475, "learning_rate": 1.5337408728955553e-05, "loss": 0.6044, "step": 20954 }, { "epoch": 0.6437194728596443, "grad_norm": 0.4213389456272125, "learning_rate": 1.5337000040169517e-05, "loss": 0.5941, "step": 20955 }, { "epoch": 0.6437501919945934, "grad_norm": 0.3491220474243164, "learning_rate": 1.5336591338918543e-05, "loss": 0.56, "step": 20956 }, { "epoch": 0.6437809111295426, "grad_norm": 0.32232803106307983, "learning_rate": 1.5336182625203585e-05, "loss": 0.5392, "step": 20957 }, { "epoch": 0.6438116302644917, "grad_norm": 0.4804140329360962, "learning_rate": 1.5335773899025604e-05, "loss": 0.5981, "step": 20958 }, { "epoch": 0.643842349399441, "grad_norm": 0.3941435217857361, "learning_rate": 1.5335365160385546e-05, "loss": 0.5443, "step": 20959 }, { "epoch": 0.6438730685343901, "grad_norm": 0.32931315898895264, "learning_rate": 1.5334956409284373e-05, "loss": 0.5901, "step": 20960 }, { "epoch": 0.6439037876693392, "grad_norm": 0.3866810202598572, "learning_rate": 1.5334547645723037e-05, "loss": 0.5797, "step": 20961 }, { "epoch": 0.6439345068042884, "grad_norm": 0.375496506690979, "learning_rate": 1.533413886970249e-05, "loss": 0.4977, "step": 20962 }, { "epoch": 0.6439652259392376, "grad_norm": 0.35723400115966797, "learning_rate": 1.533373008122369e-05, "loss": 0.571, "step": 20963 }, { "epoch": 0.6439959450741867, "grad_norm": 0.5616766810417175, "learning_rate": 1.5333321280287595e-05, "loss": 0.5447, "step": 20964 }, { "epoch": 0.6440266642091359, "grad_norm": 0.4586467444896698, "learning_rate": 1.5332912466895148e-05, "loss": 0.6426, "step": 20965 }, { "epoch": 0.644057383344085, "grad_norm": 0.36803698539733887, "learning_rate": 1.5332503641047315e-05, "loss": 0.6273, "step": 20966 }, { "epoch": 0.6440881024790341, "grad_norm": 0.36828169226646423, "learning_rate": 1.533209480274505e-05, "loss": 0.4956, "step": 20967 }, { "epoch": 0.6441188216139834, "grad_norm": 0.37871161103248596, "learning_rate": 1.53316859519893e-05, "loss": 0.5496, "step": 20968 }, { "epoch": 0.6441495407489325, "grad_norm": 0.3682262599468231, "learning_rate": 1.5331277088781034e-05, "loss": 0.5923, "step": 20969 }, { "epoch": 0.6441802598838817, "grad_norm": 0.3739127218723297, "learning_rate": 1.533086821312119e-05, "loss": 0.5535, "step": 20970 }, { "epoch": 0.6442109790188308, "grad_norm": 0.36176133155822754, "learning_rate": 1.5330459325010732e-05, "loss": 0.5567, "step": 20971 }, { "epoch": 0.64424169815378, "grad_norm": 0.3365798592567444, "learning_rate": 1.5330050424450613e-05, "loss": 0.5749, "step": 20972 }, { "epoch": 0.6442724172887292, "grad_norm": 0.38414496183395386, "learning_rate": 1.5329641511441792e-05, "loss": 0.4606, "step": 20973 }, { "epoch": 0.6443031364236783, "grad_norm": 0.38018038868904114, "learning_rate": 1.532923258598522e-05, "loss": 0.5871, "step": 20974 }, { "epoch": 0.6443338555586274, "grad_norm": 0.3654167950153351, "learning_rate": 1.5328823648081853e-05, "loss": 0.5692, "step": 20975 }, { "epoch": 0.6443645746935767, "grad_norm": 0.4189678728580475, "learning_rate": 1.532841469773265e-05, "loss": 0.566, "step": 20976 }, { "epoch": 0.6443952938285258, "grad_norm": 0.3532921373844147, "learning_rate": 1.5328005734938555e-05, "loss": 0.5512, "step": 20977 }, { "epoch": 0.6444260129634749, "grad_norm": 0.4898541271686554, "learning_rate": 1.5327596759700535e-05, "loss": 0.5924, "step": 20978 }, { "epoch": 0.6444567320984241, "grad_norm": 0.3392971158027649, "learning_rate": 1.532718777201954e-05, "loss": 0.6062, "step": 20979 }, { "epoch": 0.6444874512333733, "grad_norm": 0.35596346855163574, "learning_rate": 1.5326778771896527e-05, "loss": 0.6037, "step": 20980 }, { "epoch": 0.6445181703683224, "grad_norm": 0.34132859110832214, "learning_rate": 1.5326369759332448e-05, "loss": 0.5471, "step": 20981 }, { "epoch": 0.6445488895032716, "grad_norm": 0.3714083433151245, "learning_rate": 1.5325960734328264e-05, "loss": 0.6321, "step": 20982 }, { "epoch": 0.6445796086382207, "grad_norm": 0.348080575466156, "learning_rate": 1.5325551696884923e-05, "loss": 0.5503, "step": 20983 }, { "epoch": 0.64461032777317, "grad_norm": 0.35637056827545166, "learning_rate": 1.5325142647003388e-05, "loss": 0.5492, "step": 20984 }, { "epoch": 0.6446410469081191, "grad_norm": 0.3944288194179535, "learning_rate": 1.5324733584684607e-05, "loss": 0.5675, "step": 20985 }, { "epoch": 0.6446717660430682, "grad_norm": 0.40230047702789307, "learning_rate": 1.532432450992954e-05, "loss": 0.5545, "step": 20986 }, { "epoch": 0.6447024851780174, "grad_norm": 0.3403817415237427, "learning_rate": 1.532391542273914e-05, "loss": 0.5691, "step": 20987 }, { "epoch": 0.6447332043129665, "grad_norm": 0.3405209481716156, "learning_rate": 1.532350632311436e-05, "loss": 0.5402, "step": 20988 }, { "epoch": 0.6447639234479157, "grad_norm": 0.3776181638240814, "learning_rate": 1.5323097211056163e-05, "loss": 0.5386, "step": 20989 }, { "epoch": 0.6447946425828649, "grad_norm": 0.38848719000816345, "learning_rate": 1.5322688086565505e-05, "loss": 0.5229, "step": 20990 }, { "epoch": 0.644825361717814, "grad_norm": 0.3658437430858612, "learning_rate": 1.5322278949643332e-05, "loss": 0.5875, "step": 20991 }, { "epoch": 0.6448560808527631, "grad_norm": 0.4088321328163147, "learning_rate": 1.5321869800290608e-05, "loss": 0.4693, "step": 20992 }, { "epoch": 0.6448867999877124, "grad_norm": 0.35241207480430603, "learning_rate": 1.5321460638508282e-05, "loss": 0.5631, "step": 20993 }, { "epoch": 0.6449175191226615, "grad_norm": 0.41194894909858704, "learning_rate": 1.5321051464297316e-05, "loss": 0.5396, "step": 20994 }, { "epoch": 0.6449482382576107, "grad_norm": 0.37471991777420044, "learning_rate": 1.5320642277658662e-05, "loss": 0.4863, "step": 20995 }, { "epoch": 0.6449789573925598, "grad_norm": 0.3904237151145935, "learning_rate": 1.5320233078593276e-05, "loss": 0.585, "step": 20996 }, { "epoch": 0.645009676527509, "grad_norm": 0.4080604314804077, "learning_rate": 1.5319823867102114e-05, "loss": 0.6125, "step": 20997 }, { "epoch": 0.6450403956624582, "grad_norm": 0.3524417281150818, "learning_rate": 1.5319414643186133e-05, "loss": 0.5139, "step": 20998 }, { "epoch": 0.6450711147974073, "grad_norm": 0.39282363653182983, "learning_rate": 1.5319005406846286e-05, "loss": 0.5862, "step": 20999 }, { "epoch": 0.6451018339323564, "grad_norm": 0.37743422389030457, "learning_rate": 1.5318596158083533e-05, "loss": 0.5646, "step": 21000 }, { "epoch": 0.6451325530673057, "grad_norm": 0.34742310643196106, "learning_rate": 1.5318186896898824e-05, "loss": 0.5233, "step": 21001 }, { "epoch": 0.6451632722022548, "grad_norm": 0.35940808057785034, "learning_rate": 1.531777762329312e-05, "loss": 0.4975, "step": 21002 }, { "epoch": 0.6451939913372039, "grad_norm": 0.36026981472969055, "learning_rate": 1.5317368337267373e-05, "loss": 0.4613, "step": 21003 }, { "epoch": 0.6452247104721531, "grad_norm": 0.38685715198516846, "learning_rate": 1.531695903882254e-05, "loss": 0.5905, "step": 21004 }, { "epoch": 0.6452554296071022, "grad_norm": 0.36566805839538574, "learning_rate": 1.5316549727959582e-05, "loss": 0.552, "step": 21005 }, { "epoch": 0.6452861487420514, "grad_norm": 0.3765190541744232, "learning_rate": 1.531614040467945e-05, "loss": 0.5634, "step": 21006 }, { "epoch": 0.6453168678770006, "grad_norm": 0.42519429326057434, "learning_rate": 1.5315731068983096e-05, "loss": 0.5338, "step": 21007 }, { "epoch": 0.6453475870119497, "grad_norm": 0.3869919180870056, "learning_rate": 1.5315321720871486e-05, "loss": 0.5041, "step": 21008 }, { "epoch": 0.645378306146899, "grad_norm": 0.36877205967903137, "learning_rate": 1.5314912360345567e-05, "loss": 0.5554, "step": 21009 }, { "epoch": 0.6454090252818481, "grad_norm": 0.3693174719810486, "learning_rate": 1.5314502987406303e-05, "loss": 0.5226, "step": 21010 }, { "epoch": 0.6454397444167972, "grad_norm": 0.38518235087394714, "learning_rate": 1.531409360205464e-05, "loss": 0.6441, "step": 21011 }, { "epoch": 0.6454704635517464, "grad_norm": 0.3417920172214508, "learning_rate": 1.5313684204291548e-05, "loss": 0.5518, "step": 21012 }, { "epoch": 0.6455011826866955, "grad_norm": 0.32412490248680115, "learning_rate": 1.5313274794117972e-05, "loss": 0.5029, "step": 21013 }, { "epoch": 0.6455319018216447, "grad_norm": 0.36117231845855713, "learning_rate": 1.5312865371534873e-05, "loss": 0.63, "step": 21014 }, { "epoch": 0.6455626209565939, "grad_norm": 0.40169623494148254, "learning_rate": 1.53124559365432e-05, "loss": 0.63, "step": 21015 }, { "epoch": 0.645593340091543, "grad_norm": 0.390857070684433, "learning_rate": 1.5312046489143923e-05, "loss": 0.6665, "step": 21016 }, { "epoch": 0.6456240592264921, "grad_norm": 0.442899614572525, "learning_rate": 1.5311637029337988e-05, "loss": 0.5241, "step": 21017 }, { "epoch": 0.6456547783614414, "grad_norm": 0.35355377197265625, "learning_rate": 1.5311227557126354e-05, "loss": 0.5393, "step": 21018 }, { "epoch": 0.6456854974963905, "grad_norm": 0.35533440113067627, "learning_rate": 1.5310818072509976e-05, "loss": 0.5431, "step": 21019 }, { "epoch": 0.6457162166313397, "grad_norm": 0.359896183013916, "learning_rate": 1.531040857548981e-05, "loss": 0.5569, "step": 21020 }, { "epoch": 0.6457469357662888, "grad_norm": 0.3652799129486084, "learning_rate": 1.530999906606682e-05, "loss": 0.5601, "step": 21021 }, { "epoch": 0.645777654901238, "grad_norm": 0.40050986409187317, "learning_rate": 1.5309589544241948e-05, "loss": 0.5816, "step": 21022 }, { "epoch": 0.6458083740361872, "grad_norm": 0.36075711250305176, "learning_rate": 1.5309180010016166e-05, "loss": 0.5801, "step": 21023 }, { "epoch": 0.6458390931711363, "grad_norm": 0.383246511220932, "learning_rate": 1.5308770463390417e-05, "loss": 0.5274, "step": 21024 }, { "epoch": 0.6458698123060854, "grad_norm": 0.33581051230430603, "learning_rate": 1.5308360904365664e-05, "loss": 0.5112, "step": 21025 }, { "epoch": 0.6459005314410347, "grad_norm": 0.37318626046180725, "learning_rate": 1.5307951332942867e-05, "loss": 0.575, "step": 21026 }, { "epoch": 0.6459312505759838, "grad_norm": 0.3774493634700775, "learning_rate": 1.530754174912298e-05, "loss": 0.5493, "step": 21027 }, { "epoch": 0.6459619697109329, "grad_norm": 0.3477663993835449, "learning_rate": 1.5307132152906956e-05, "loss": 0.6598, "step": 21028 }, { "epoch": 0.6459926888458821, "grad_norm": 0.39382797479629517, "learning_rate": 1.5306722544295753e-05, "loss": 0.6182, "step": 21029 }, { "epoch": 0.6460234079808312, "grad_norm": 0.34387707710266113, "learning_rate": 1.5306312923290333e-05, "loss": 0.57, "step": 21030 }, { "epoch": 0.6460541271157805, "grad_norm": 0.3556790351867676, "learning_rate": 1.5305903289891643e-05, "loss": 0.4468, "step": 21031 }, { "epoch": 0.6460848462507296, "grad_norm": 0.3460606038570404, "learning_rate": 1.530549364410065e-05, "loss": 0.5962, "step": 21032 }, { "epoch": 0.6461155653856787, "grad_norm": 0.3530220687389374, "learning_rate": 1.5305083985918304e-05, "loss": 0.5943, "step": 21033 }, { "epoch": 0.646146284520628, "grad_norm": 0.341777503490448, "learning_rate": 1.5304674315345563e-05, "loss": 0.4825, "step": 21034 }, { "epoch": 0.6461770036555771, "grad_norm": 0.34201759099960327, "learning_rate": 1.5304264632383387e-05, "loss": 0.4961, "step": 21035 }, { "epoch": 0.6462077227905262, "grad_norm": 0.38698264956474304, "learning_rate": 1.530385493703273e-05, "loss": 0.5735, "step": 21036 }, { "epoch": 0.6462384419254754, "grad_norm": 0.3451104462146759, "learning_rate": 1.5303445229294553e-05, "loss": 0.5428, "step": 21037 }, { "epoch": 0.6462691610604245, "grad_norm": 0.4078007638454437, "learning_rate": 1.5303035509169802e-05, "loss": 0.5225, "step": 21038 }, { "epoch": 0.6462998801953737, "grad_norm": 0.3543389141559601, "learning_rate": 1.5302625776659444e-05, "loss": 0.6107, "step": 21039 }, { "epoch": 0.6463305993303229, "grad_norm": 0.35852688550949097, "learning_rate": 1.5302216031764438e-05, "loss": 0.5434, "step": 21040 }, { "epoch": 0.646361318465272, "grad_norm": 0.3610747754573822, "learning_rate": 1.530180627448573e-05, "loss": 0.5789, "step": 21041 }, { "epoch": 0.6463920376002211, "grad_norm": 0.34643131494522095, "learning_rate": 1.5301396504824286e-05, "loss": 0.6153, "step": 21042 }, { "epoch": 0.6464227567351704, "grad_norm": 0.3491239547729492, "learning_rate": 1.5300986722781054e-05, "loss": 0.5323, "step": 21043 }, { "epoch": 0.6464534758701195, "grad_norm": 0.33727267384529114, "learning_rate": 1.5300576928357006e-05, "loss": 0.6049, "step": 21044 }, { "epoch": 0.6464841950050687, "grad_norm": 0.3956087827682495, "learning_rate": 1.5300167121553083e-05, "loss": 0.531, "step": 21045 }, { "epoch": 0.6465149141400178, "grad_norm": 0.38314035534858704, "learning_rate": 1.5299757302370255e-05, "loss": 0.5321, "step": 21046 }, { "epoch": 0.646545633274967, "grad_norm": 0.3673381209373474, "learning_rate": 1.5299347470809474e-05, "loss": 0.6051, "step": 21047 }, { "epoch": 0.6465763524099162, "grad_norm": 0.34769871830940247, "learning_rate": 1.5298937626871694e-05, "loss": 0.5119, "step": 21048 }, { "epoch": 0.6466070715448653, "grad_norm": 0.3635618984699249, "learning_rate": 1.5298527770557876e-05, "loss": 0.5631, "step": 21049 }, { "epoch": 0.6466377906798144, "grad_norm": 0.3592568635940552, "learning_rate": 1.5298117901868978e-05, "loss": 0.6246, "step": 21050 }, { "epoch": 0.6466685098147636, "grad_norm": 0.44311055541038513, "learning_rate": 1.529770802080595e-05, "loss": 0.6018, "step": 21051 }, { "epoch": 0.6466992289497128, "grad_norm": 0.32642507553100586, "learning_rate": 1.5297298127369762e-05, "loss": 0.5602, "step": 21052 }, { "epoch": 0.6467299480846619, "grad_norm": 0.34433677792549133, "learning_rate": 1.529688822156136e-05, "loss": 0.4747, "step": 21053 }, { "epoch": 0.6467606672196111, "grad_norm": 0.36533817648887634, "learning_rate": 1.529647830338171e-05, "loss": 0.543, "step": 21054 }, { "epoch": 0.6467913863545602, "grad_norm": 0.3118935823440552, "learning_rate": 1.529606837283176e-05, "loss": 0.6023, "step": 21055 }, { "epoch": 0.6468221054895095, "grad_norm": 0.3961502015590668, "learning_rate": 1.5295658429912475e-05, "loss": 0.5481, "step": 21056 }, { "epoch": 0.6468528246244586, "grad_norm": 0.37988021969795227, "learning_rate": 1.5295248474624812e-05, "loss": 0.6167, "step": 21057 }, { "epoch": 0.6468835437594077, "grad_norm": 0.3486245274543762, "learning_rate": 1.529483850696972e-05, "loss": 0.5175, "step": 21058 }, { "epoch": 0.6469142628943569, "grad_norm": 0.3644658029079437, "learning_rate": 1.5294428526948165e-05, "loss": 0.5458, "step": 21059 }, { "epoch": 0.6469449820293061, "grad_norm": 0.3319699466228485, "learning_rate": 1.5294018534561106e-05, "loss": 0.5899, "step": 21060 }, { "epoch": 0.6469757011642552, "grad_norm": 0.35019388794898987, "learning_rate": 1.5293608529809494e-05, "loss": 0.5093, "step": 21061 }, { "epoch": 0.6470064202992044, "grad_norm": 0.34817495942115784, "learning_rate": 1.529319851269429e-05, "loss": 0.6231, "step": 21062 }, { "epoch": 0.6470371394341535, "grad_norm": 0.35597026348114014, "learning_rate": 1.529278848321645e-05, "loss": 0.4674, "step": 21063 }, { "epoch": 0.6470678585691026, "grad_norm": 0.46832388639450073, "learning_rate": 1.529237844137694e-05, "loss": 0.4954, "step": 21064 }, { "epoch": 0.6470985777040519, "grad_norm": 0.3815400302410126, "learning_rate": 1.52919683871767e-05, "loss": 0.5408, "step": 21065 }, { "epoch": 0.647129296839001, "grad_norm": 0.35776036977767944, "learning_rate": 1.5291558320616705e-05, "loss": 0.5501, "step": 21066 }, { "epoch": 0.6471600159739501, "grad_norm": 0.3446243107318878, "learning_rate": 1.5291148241697905e-05, "loss": 0.569, "step": 21067 }, { "epoch": 0.6471907351088994, "grad_norm": 0.3718982934951782, "learning_rate": 1.5290738150421257e-05, "loss": 0.5225, "step": 21068 }, { "epoch": 0.6472214542438485, "grad_norm": 0.33712413907051086, "learning_rate": 1.529032804678772e-05, "loss": 0.5125, "step": 21069 }, { "epoch": 0.6472521733787977, "grad_norm": 0.4483018219470978, "learning_rate": 1.528991793079826e-05, "loss": 0.5148, "step": 21070 }, { "epoch": 0.6472828925137468, "grad_norm": 0.41643697023391724, "learning_rate": 1.528950780245382e-05, "loss": 0.6332, "step": 21071 }, { "epoch": 0.6473136116486959, "grad_norm": 0.3428291082382202, "learning_rate": 1.5289097661755367e-05, "loss": 0.5674, "step": 21072 }, { "epoch": 0.6473443307836452, "grad_norm": 0.43564745783805847, "learning_rate": 1.5288687508703854e-05, "loss": 0.5165, "step": 21073 }, { "epoch": 0.6473750499185943, "grad_norm": 1.7530477046966553, "learning_rate": 1.5288277343300246e-05, "loss": 0.6084, "step": 21074 }, { "epoch": 0.6474057690535434, "grad_norm": 0.4651877284049988, "learning_rate": 1.5287867165545494e-05, "loss": 0.5259, "step": 21075 }, { "epoch": 0.6474364881884926, "grad_norm": 0.35585036873817444, "learning_rate": 1.528745697544056e-05, "loss": 0.6144, "step": 21076 }, { "epoch": 0.6474672073234418, "grad_norm": 0.3212881088256836, "learning_rate": 1.52870467729864e-05, "loss": 0.5196, "step": 21077 }, { "epoch": 0.6474979264583909, "grad_norm": 0.36482104659080505, "learning_rate": 1.5286636558183974e-05, "loss": 0.532, "step": 21078 }, { "epoch": 0.6475286455933401, "grad_norm": 0.3578993082046509, "learning_rate": 1.528622633103424e-05, "loss": 0.6466, "step": 21079 }, { "epoch": 0.6475593647282892, "grad_norm": 0.34349480271339417, "learning_rate": 1.5285816091538155e-05, "loss": 0.5535, "step": 21080 }, { "epoch": 0.6475900838632385, "grad_norm": 0.505435049533844, "learning_rate": 1.5285405839696674e-05, "loss": 0.5909, "step": 21081 }, { "epoch": 0.6476208029981876, "grad_norm": 0.351590633392334, "learning_rate": 1.5284995575510762e-05, "loss": 0.583, "step": 21082 }, { "epoch": 0.6476515221331367, "grad_norm": 0.3633093535900116, "learning_rate": 1.5284585298981375e-05, "loss": 0.564, "step": 21083 }, { "epoch": 0.6476822412680859, "grad_norm": 0.40705040097236633, "learning_rate": 1.5284175010109467e-05, "loss": 0.5188, "step": 21084 }, { "epoch": 0.647712960403035, "grad_norm": 0.3733178973197937, "learning_rate": 1.5283764708896e-05, "loss": 0.5606, "step": 21085 }, { "epoch": 0.6477436795379842, "grad_norm": 0.38366010785102844, "learning_rate": 1.5283354395341933e-05, "loss": 0.5804, "step": 21086 }, { "epoch": 0.6477743986729334, "grad_norm": 0.352815717458725, "learning_rate": 1.528294406944822e-05, "loss": 0.5702, "step": 21087 }, { "epoch": 0.6478051178078825, "grad_norm": 0.35607782006263733, "learning_rate": 1.5282533731215823e-05, "loss": 0.6011, "step": 21088 }, { "epoch": 0.6478358369428316, "grad_norm": 0.3563075661659241, "learning_rate": 1.52821233806457e-05, "loss": 0.5646, "step": 21089 }, { "epoch": 0.6478665560777809, "grad_norm": 0.37785565853118896, "learning_rate": 1.528171301773881e-05, "loss": 0.5322, "step": 21090 }, { "epoch": 0.64789727521273, "grad_norm": 0.5396628975868225, "learning_rate": 1.528130264249611e-05, "loss": 0.5781, "step": 21091 }, { "epoch": 0.6479279943476791, "grad_norm": 0.38104113936424255, "learning_rate": 1.528089225491856e-05, "loss": 0.6065, "step": 21092 }, { "epoch": 0.6479587134826283, "grad_norm": 0.3751753270626068, "learning_rate": 1.5280481855007117e-05, "loss": 0.5791, "step": 21093 }, { "epoch": 0.6479894326175775, "grad_norm": 0.3350427448749542, "learning_rate": 1.528007144276274e-05, "loss": 0.5738, "step": 21094 }, { "epoch": 0.6480201517525267, "grad_norm": 0.4106416404247284, "learning_rate": 1.5279661018186382e-05, "loss": 0.5113, "step": 21095 }, { "epoch": 0.6480508708874758, "grad_norm": 0.34743404388427734, "learning_rate": 1.527925058127901e-05, "loss": 0.5549, "step": 21096 }, { "epoch": 0.6480815900224249, "grad_norm": 0.3584442734718323, "learning_rate": 1.5278840132041584e-05, "loss": 0.5575, "step": 21097 }, { "epoch": 0.6481123091573742, "grad_norm": 1.0756334066390991, "learning_rate": 1.5278429670475054e-05, "loss": 0.6611, "step": 21098 }, { "epoch": 0.6481430282923233, "grad_norm": 0.36513200402259827, "learning_rate": 1.527801919658038e-05, "loss": 0.5914, "step": 21099 }, { "epoch": 0.6481737474272724, "grad_norm": 0.33125272393226624, "learning_rate": 1.5277608710358533e-05, "loss": 0.5564, "step": 21100 }, { "epoch": 0.6482044665622216, "grad_norm": 0.3366023600101471, "learning_rate": 1.5277198211810453e-05, "loss": 0.5452, "step": 21101 }, { "epoch": 0.6482351856971708, "grad_norm": 0.34793949127197266, "learning_rate": 1.527678770093711e-05, "loss": 0.5389, "step": 21102 }, { "epoch": 0.6482659048321199, "grad_norm": 0.3430945575237274, "learning_rate": 1.5276377177739465e-05, "loss": 0.5772, "step": 21103 }, { "epoch": 0.6482966239670691, "grad_norm": 0.32431560754776, "learning_rate": 1.527596664221847e-05, "loss": 0.506, "step": 21104 }, { "epoch": 0.6483273431020182, "grad_norm": 0.3477567732334137, "learning_rate": 1.5275556094375086e-05, "loss": 0.5228, "step": 21105 }, { "epoch": 0.6483580622369675, "grad_norm": 0.36967164278030396, "learning_rate": 1.527514553421027e-05, "loss": 0.6038, "step": 21106 }, { "epoch": 0.6483887813719166, "grad_norm": 0.34005457162857056, "learning_rate": 1.5274734961724987e-05, "loss": 0.4552, "step": 21107 }, { "epoch": 0.6484195005068657, "grad_norm": 0.3599511682987213, "learning_rate": 1.5274324376920195e-05, "loss": 0.5516, "step": 21108 }, { "epoch": 0.6484502196418149, "grad_norm": 0.32037338614463806, "learning_rate": 1.5273913779796843e-05, "loss": 0.5481, "step": 21109 }, { "epoch": 0.648480938776764, "grad_norm": 0.433799684047699, "learning_rate": 1.52735031703559e-05, "loss": 0.5316, "step": 21110 }, { "epoch": 0.6485116579117132, "grad_norm": 0.3411268889904022, "learning_rate": 1.5273092548598323e-05, "loss": 0.6092, "step": 21111 }, { "epoch": 0.6485423770466624, "grad_norm": 0.37603479623794556, "learning_rate": 1.5272681914525066e-05, "loss": 0.598, "step": 21112 }, { "epoch": 0.6485730961816115, "grad_norm": 0.3470586836338043, "learning_rate": 1.52722712681371e-05, "loss": 0.5358, "step": 21113 }, { "epoch": 0.6486038153165606, "grad_norm": 0.38413700461387634, "learning_rate": 1.527186060943537e-05, "loss": 0.6116, "step": 21114 }, { "epoch": 0.6486345344515099, "grad_norm": 0.35378944873809814, "learning_rate": 1.527144993842084e-05, "loss": 0.5722, "step": 21115 }, { "epoch": 0.648665253586459, "grad_norm": 0.3735957741737366, "learning_rate": 1.5271039255094475e-05, "loss": 0.5657, "step": 21116 }, { "epoch": 0.6486959727214082, "grad_norm": 0.33983147144317627, "learning_rate": 1.5270628559457227e-05, "loss": 0.5065, "step": 21117 }, { "epoch": 0.6487266918563573, "grad_norm": 0.35105741024017334, "learning_rate": 1.527021785151006e-05, "loss": 0.5303, "step": 21118 }, { "epoch": 0.6487574109913065, "grad_norm": 0.35426560044288635, "learning_rate": 1.526980713125393e-05, "loss": 0.5777, "step": 21119 }, { "epoch": 0.6487881301262557, "grad_norm": 0.37125492095947266, "learning_rate": 1.5269396398689798e-05, "loss": 0.4753, "step": 21120 }, { "epoch": 0.6488188492612048, "grad_norm": 0.40633946657180786, "learning_rate": 1.526898565381862e-05, "loss": 0.5954, "step": 21121 }, { "epoch": 0.6488495683961539, "grad_norm": 0.35663580894470215, "learning_rate": 1.526857489664136e-05, "loss": 0.5367, "step": 21122 }, { "epoch": 0.6488802875311032, "grad_norm": 0.3527800738811493, "learning_rate": 1.526816412715898e-05, "loss": 0.553, "step": 21123 }, { "epoch": 0.6489110066660523, "grad_norm": 0.3627687692642212, "learning_rate": 1.526775334537243e-05, "loss": 0.5455, "step": 21124 }, { "epoch": 0.6489417258010014, "grad_norm": 0.3883250653743744, "learning_rate": 1.5267342551282673e-05, "loss": 0.5409, "step": 21125 }, { "epoch": 0.6489724449359506, "grad_norm": 0.5203734636306763, "learning_rate": 1.5266931744890673e-05, "loss": 0.6979, "step": 21126 }, { "epoch": 0.6490031640708998, "grad_norm": 0.333779513835907, "learning_rate": 1.5266520926197383e-05, "loss": 0.5519, "step": 21127 }, { "epoch": 0.6490338832058489, "grad_norm": 0.3810933232307434, "learning_rate": 1.5266110095203766e-05, "loss": 0.5758, "step": 21128 }, { "epoch": 0.6490646023407981, "grad_norm": 0.35956528782844543, "learning_rate": 1.5265699251910782e-05, "loss": 0.5832, "step": 21129 }, { "epoch": 0.6490953214757472, "grad_norm": 0.3597363531589508, "learning_rate": 1.5265288396319392e-05, "loss": 0.4548, "step": 21130 }, { "epoch": 0.6491260406106965, "grad_norm": 0.5373230576515198, "learning_rate": 1.526487752843055e-05, "loss": 0.5914, "step": 21131 }, { "epoch": 0.6491567597456456, "grad_norm": 0.3682372272014618, "learning_rate": 1.5264466648245217e-05, "loss": 0.5942, "step": 21132 }, { "epoch": 0.6491874788805947, "grad_norm": 0.3866724371910095, "learning_rate": 1.526405575576436e-05, "loss": 0.6074, "step": 21133 }, { "epoch": 0.6492181980155439, "grad_norm": 0.3760939836502075, "learning_rate": 1.5263644850988928e-05, "loss": 0.5538, "step": 21134 }, { "epoch": 0.649248917150493, "grad_norm": 0.39537909626960754, "learning_rate": 1.5263233933919886e-05, "loss": 0.5886, "step": 21135 }, { "epoch": 0.6492796362854422, "grad_norm": 0.3362131714820862, "learning_rate": 1.5262823004558198e-05, "loss": 0.5293, "step": 21136 }, { "epoch": 0.6493103554203914, "grad_norm": 0.3889015018939972, "learning_rate": 1.5262412062904818e-05, "loss": 0.6232, "step": 21137 }, { "epoch": 0.6493410745553405, "grad_norm": 0.3733552098274231, "learning_rate": 1.5262001108960706e-05, "loss": 0.5098, "step": 21138 }, { "epoch": 0.6493717936902896, "grad_norm": 0.36628004908561707, "learning_rate": 1.5261590142726823e-05, "loss": 0.5179, "step": 21139 }, { "epoch": 0.6494025128252389, "grad_norm": 0.4027087688446045, "learning_rate": 1.5261179164204128e-05, "loss": 0.56, "step": 21140 }, { "epoch": 0.649433231960188, "grad_norm": 0.3608711361885071, "learning_rate": 1.5260768173393584e-05, "loss": 0.5189, "step": 21141 }, { "epoch": 0.6494639510951372, "grad_norm": 0.34560272097587585, "learning_rate": 1.5260357170296146e-05, "loss": 0.5644, "step": 21142 }, { "epoch": 0.6494946702300863, "grad_norm": 0.428324431180954, "learning_rate": 1.525994615491278e-05, "loss": 0.5665, "step": 21143 }, { "epoch": 0.6495253893650355, "grad_norm": 0.3784872591495514, "learning_rate": 1.5259535127244438e-05, "loss": 0.5886, "step": 21144 }, { "epoch": 0.6495561084999847, "grad_norm": 0.4633501470088959, "learning_rate": 1.5259124087292088e-05, "loss": 0.5553, "step": 21145 }, { "epoch": 0.6495868276349338, "grad_norm": 0.37394100427627563, "learning_rate": 1.5258713035056682e-05, "loss": 0.5901, "step": 21146 }, { "epoch": 0.6496175467698829, "grad_norm": 0.3423556089401245, "learning_rate": 1.5258301970539186e-05, "loss": 0.5744, "step": 21147 }, { "epoch": 0.6496482659048322, "grad_norm": 0.40909406542778015, "learning_rate": 1.525789089374056e-05, "loss": 0.5686, "step": 21148 }, { "epoch": 0.6496789850397813, "grad_norm": 0.36684271693229675, "learning_rate": 1.5257479804661764e-05, "loss": 0.4966, "step": 21149 }, { "epoch": 0.6497097041747304, "grad_norm": 0.3600979745388031, "learning_rate": 1.5257068703303754e-05, "loss": 0.5424, "step": 21150 }, { "epoch": 0.6497404233096796, "grad_norm": 0.36789947748184204, "learning_rate": 1.5256657589667493e-05, "loss": 0.5976, "step": 21151 }, { "epoch": 0.6497711424446287, "grad_norm": 0.3653792142868042, "learning_rate": 1.5256246463753943e-05, "loss": 0.4861, "step": 21152 }, { "epoch": 0.6498018615795779, "grad_norm": 0.35519087314605713, "learning_rate": 1.5255835325564059e-05, "loss": 0.5723, "step": 21153 }, { "epoch": 0.6498325807145271, "grad_norm": 0.3895490765571594, "learning_rate": 1.5255424175098807e-05, "loss": 0.6271, "step": 21154 }, { "epoch": 0.6498632998494762, "grad_norm": 0.399873286485672, "learning_rate": 1.5255013012359143e-05, "loss": 0.5207, "step": 21155 }, { "epoch": 0.6498940189844254, "grad_norm": 0.33470451831817627, "learning_rate": 1.5254601837346032e-05, "loss": 0.5508, "step": 21156 }, { "epoch": 0.6499247381193746, "grad_norm": 0.44321173429489136, "learning_rate": 1.525419065006043e-05, "loss": 0.6064, "step": 21157 }, { "epoch": 0.6499554572543237, "grad_norm": 0.3807515501976013, "learning_rate": 1.52537794505033e-05, "loss": 0.5958, "step": 21158 }, { "epoch": 0.6499861763892729, "grad_norm": 0.34488555788993835, "learning_rate": 1.5253368238675602e-05, "loss": 0.4996, "step": 21159 }, { "epoch": 0.650016895524222, "grad_norm": 0.37688541412353516, "learning_rate": 1.5252957014578291e-05, "loss": 0.6285, "step": 21160 }, { "epoch": 0.6500476146591712, "grad_norm": 0.3827815055847168, "learning_rate": 1.5252545778212335e-05, "loss": 0.5656, "step": 21161 }, { "epoch": 0.6500783337941204, "grad_norm": 0.384733647108078, "learning_rate": 1.525213452957869e-05, "loss": 0.5602, "step": 21162 }, { "epoch": 0.6501090529290695, "grad_norm": 0.36378607153892517, "learning_rate": 1.525172326867832e-05, "loss": 0.5124, "step": 21163 }, { "epoch": 0.6501397720640186, "grad_norm": 0.33082979917526245, "learning_rate": 1.5251311995512184e-05, "loss": 0.4463, "step": 21164 }, { "epoch": 0.6501704911989679, "grad_norm": 0.3333810567855835, "learning_rate": 1.525090071008124e-05, "loss": 0.5209, "step": 21165 }, { "epoch": 0.650201210333917, "grad_norm": 0.3513714373111725, "learning_rate": 1.525048941238645e-05, "loss": 0.5693, "step": 21166 }, { "epoch": 0.6502319294688662, "grad_norm": 0.3698136806488037, "learning_rate": 1.5250078102428779e-05, "loss": 0.586, "step": 21167 }, { "epoch": 0.6502626486038153, "grad_norm": 0.36513081192970276, "learning_rate": 1.524966678020918e-05, "loss": 0.5535, "step": 21168 }, { "epoch": 0.6502933677387644, "grad_norm": 0.36800143122673035, "learning_rate": 1.5249255445728622e-05, "loss": 0.5564, "step": 21169 }, { "epoch": 0.6503240868737137, "grad_norm": 0.31189021468162537, "learning_rate": 1.5248844098988059e-05, "loss": 0.46, "step": 21170 }, { "epoch": 0.6503548060086628, "grad_norm": 0.38094401359558105, "learning_rate": 1.5248432739988447e-05, "loss": 0.6047, "step": 21171 }, { "epoch": 0.6503855251436119, "grad_norm": 0.35802143812179565, "learning_rate": 1.5248021368730763e-05, "loss": 0.6221, "step": 21172 }, { "epoch": 0.6504162442785612, "grad_norm": 0.3509170114994049, "learning_rate": 1.5247609985215955e-05, "loss": 0.5095, "step": 21173 }, { "epoch": 0.6504469634135103, "grad_norm": 0.3608590364456177, "learning_rate": 1.524719858944499e-05, "loss": 0.6339, "step": 21174 }, { "epoch": 0.6504776825484594, "grad_norm": 0.34945669770240784, "learning_rate": 1.5246787181418825e-05, "loss": 0.552, "step": 21175 }, { "epoch": 0.6505084016834086, "grad_norm": 0.3786931335926056, "learning_rate": 1.5246375761138421e-05, "loss": 0.5368, "step": 21176 }, { "epoch": 0.6505391208183577, "grad_norm": 0.35752072930336, "learning_rate": 1.524596432860474e-05, "loss": 0.5962, "step": 21177 }, { "epoch": 0.6505698399533069, "grad_norm": 0.3636552691459656, "learning_rate": 1.5245552883818743e-05, "loss": 0.6091, "step": 21178 }, { "epoch": 0.6506005590882561, "grad_norm": 0.4017771780490875, "learning_rate": 1.5245141426781394e-05, "loss": 0.5984, "step": 21179 }, { "epoch": 0.6506312782232052, "grad_norm": 0.4169272482395172, "learning_rate": 1.5244729957493647e-05, "loss": 0.5848, "step": 21180 }, { "epoch": 0.6506619973581544, "grad_norm": 0.3711049258708954, "learning_rate": 1.524431847595647e-05, "loss": 0.6139, "step": 21181 }, { "epoch": 0.6506927164931036, "grad_norm": 0.38444459438323975, "learning_rate": 1.5243906982170818e-05, "loss": 0.6079, "step": 21182 }, { "epoch": 0.6507234356280527, "grad_norm": 0.39567333459854126, "learning_rate": 1.5243495476137655e-05, "loss": 0.5967, "step": 21183 }, { "epoch": 0.6507541547630019, "grad_norm": 0.34809330105781555, "learning_rate": 1.5243083957857945e-05, "loss": 0.4426, "step": 21184 }, { "epoch": 0.650784873897951, "grad_norm": 0.45033055543899536, "learning_rate": 1.5242672427332642e-05, "loss": 0.5408, "step": 21185 }, { "epoch": 0.6508155930329002, "grad_norm": 0.4507579207420349, "learning_rate": 1.524226088456272e-05, "loss": 0.6388, "step": 21186 }, { "epoch": 0.6508463121678494, "grad_norm": 0.37707382440567017, "learning_rate": 1.5241849329549121e-05, "loss": 0.5986, "step": 21187 }, { "epoch": 0.6508770313027985, "grad_norm": 0.39538252353668213, "learning_rate": 1.5241437762292823e-05, "loss": 0.6042, "step": 21188 }, { "epoch": 0.6509077504377476, "grad_norm": 0.33935999870300293, "learning_rate": 1.5241026182794777e-05, "loss": 0.5264, "step": 21189 }, { "epoch": 0.6509384695726969, "grad_norm": 0.3221813440322876, "learning_rate": 1.5240614591055951e-05, "loss": 0.4949, "step": 21190 }, { "epoch": 0.650969188707646, "grad_norm": 0.46106764674186707, "learning_rate": 1.5240202987077305e-05, "loss": 0.597, "step": 21191 }, { "epoch": 0.6509999078425952, "grad_norm": 0.345548540353775, "learning_rate": 1.5239791370859796e-05, "loss": 0.6411, "step": 21192 }, { "epoch": 0.6510306269775443, "grad_norm": 0.36963701248168945, "learning_rate": 1.523937974240439e-05, "loss": 0.5271, "step": 21193 }, { "epoch": 0.6510613461124934, "grad_norm": 0.33349621295928955, "learning_rate": 1.5238968101712047e-05, "loss": 0.5589, "step": 21194 }, { "epoch": 0.6510920652474427, "grad_norm": 0.3784375488758087, "learning_rate": 1.5238556448783727e-05, "loss": 0.5654, "step": 21195 }, { "epoch": 0.6511227843823918, "grad_norm": 0.4361644983291626, "learning_rate": 1.5238144783620393e-05, "loss": 0.5511, "step": 21196 }, { "epoch": 0.6511535035173409, "grad_norm": 0.3269464373588562, "learning_rate": 1.5237733106223007e-05, "loss": 0.5356, "step": 21197 }, { "epoch": 0.6511842226522901, "grad_norm": 0.33510851860046387, "learning_rate": 1.5237321416592528e-05, "loss": 0.5017, "step": 21198 }, { "epoch": 0.6512149417872393, "grad_norm": 0.38178780674934387, "learning_rate": 1.5236909714729919e-05, "loss": 0.5973, "step": 21199 }, { "epoch": 0.6512456609221884, "grad_norm": 0.3066851496696472, "learning_rate": 1.5236498000636142e-05, "loss": 0.5665, "step": 21200 }, { "epoch": 0.6512763800571376, "grad_norm": 0.3607138693332672, "learning_rate": 1.5236086274312158e-05, "loss": 0.6057, "step": 21201 }, { "epoch": 0.6513070991920867, "grad_norm": 0.3537081480026245, "learning_rate": 1.5235674535758928e-05, "loss": 0.623, "step": 21202 }, { "epoch": 0.6513378183270359, "grad_norm": 0.4000770151615143, "learning_rate": 1.5235262784977419e-05, "loss": 0.6122, "step": 21203 }, { "epoch": 0.6513685374619851, "grad_norm": 0.38868939876556396, "learning_rate": 1.5234851021968584e-05, "loss": 0.5906, "step": 21204 }, { "epoch": 0.6513992565969342, "grad_norm": 0.3384988605976105, "learning_rate": 1.5234439246733389e-05, "loss": 0.5657, "step": 21205 }, { "epoch": 0.6514299757318834, "grad_norm": 0.3289218544960022, "learning_rate": 1.5234027459272798e-05, "loss": 0.4906, "step": 21206 }, { "epoch": 0.6514606948668326, "grad_norm": 0.3533121347427368, "learning_rate": 1.5233615659587763e-05, "loss": 0.6142, "step": 21207 }, { "epoch": 0.6514914140017817, "grad_norm": 0.3462236523628235, "learning_rate": 1.523320384767926e-05, "loss": 0.509, "step": 21208 }, { "epoch": 0.6515221331367309, "grad_norm": 0.3846917450428009, "learning_rate": 1.5232792023548243e-05, "loss": 0.494, "step": 21209 }, { "epoch": 0.65155285227168, "grad_norm": 0.3785015642642975, "learning_rate": 1.5232380187195674e-05, "loss": 0.5748, "step": 21210 }, { "epoch": 0.6515835714066291, "grad_norm": 0.3926629424095154, "learning_rate": 1.5231968338622519e-05, "loss": 0.5207, "step": 21211 }, { "epoch": 0.6516142905415784, "grad_norm": 0.40358924865722656, "learning_rate": 1.5231556477829732e-05, "loss": 0.607, "step": 21212 }, { "epoch": 0.6516450096765275, "grad_norm": 0.374072790145874, "learning_rate": 1.5231144604818285e-05, "loss": 0.5213, "step": 21213 }, { "epoch": 0.6516757288114766, "grad_norm": 0.3935735821723938, "learning_rate": 1.5230732719589128e-05, "loss": 0.5309, "step": 21214 }, { "epoch": 0.6517064479464258, "grad_norm": 0.3700404465198517, "learning_rate": 1.5230320822143232e-05, "loss": 0.5491, "step": 21215 }, { "epoch": 0.651737167081375, "grad_norm": 0.33136528730392456, "learning_rate": 1.5229908912481555e-05, "loss": 0.5275, "step": 21216 }, { "epoch": 0.6517678862163242, "grad_norm": 0.34724855422973633, "learning_rate": 1.5229496990605064e-05, "loss": 0.6178, "step": 21217 }, { "epoch": 0.6517986053512733, "grad_norm": 0.37827226519584656, "learning_rate": 1.5229085056514714e-05, "loss": 0.5618, "step": 21218 }, { "epoch": 0.6518293244862224, "grad_norm": 0.3388160169124603, "learning_rate": 1.5228673110211473e-05, "loss": 0.5478, "step": 21219 }, { "epoch": 0.6518600436211717, "grad_norm": 0.34789344668388367, "learning_rate": 1.5228261151696297e-05, "loss": 0.5348, "step": 21220 }, { "epoch": 0.6518907627561208, "grad_norm": 0.3379877507686615, "learning_rate": 1.5227849180970157e-05, "loss": 0.5215, "step": 21221 }, { "epoch": 0.6519214818910699, "grad_norm": 0.3299976885318756, "learning_rate": 1.522743719803401e-05, "loss": 0.4988, "step": 21222 }, { "epoch": 0.6519522010260191, "grad_norm": 0.38278698921203613, "learning_rate": 1.5227025202888814e-05, "loss": 0.6276, "step": 21223 }, { "epoch": 0.6519829201609683, "grad_norm": 0.36548030376434326, "learning_rate": 1.5226613195535538e-05, "loss": 0.5138, "step": 21224 }, { "epoch": 0.6520136392959174, "grad_norm": 0.36305928230285645, "learning_rate": 1.5226201175975143e-05, "loss": 0.5087, "step": 21225 }, { "epoch": 0.6520443584308666, "grad_norm": 0.3938352167606354, "learning_rate": 1.5225789144208587e-05, "loss": 0.5316, "step": 21226 }, { "epoch": 0.6520750775658157, "grad_norm": 0.42634835839271545, "learning_rate": 1.5225377100236838e-05, "loss": 0.6, "step": 21227 }, { "epoch": 0.652105796700765, "grad_norm": 0.326777845621109, "learning_rate": 1.5224965044060853e-05, "loss": 0.6054, "step": 21228 }, { "epoch": 0.6521365158357141, "grad_norm": 0.3648871183395386, "learning_rate": 1.5224552975681604e-05, "loss": 0.5507, "step": 21229 }, { "epoch": 0.6521672349706632, "grad_norm": 0.3970741331577301, "learning_rate": 1.5224140895100039e-05, "loss": 0.5987, "step": 21230 }, { "epoch": 0.6521979541056124, "grad_norm": 0.5752553343772888, "learning_rate": 1.5223728802317132e-05, "loss": 0.5753, "step": 21231 }, { "epoch": 0.6522286732405616, "grad_norm": 0.34485694766044617, "learning_rate": 1.5223316697333839e-05, "loss": 0.5643, "step": 21232 }, { "epoch": 0.6522593923755107, "grad_norm": 0.39095208048820496, "learning_rate": 1.522290458015113e-05, "loss": 0.621, "step": 21233 }, { "epoch": 0.6522901115104599, "grad_norm": 0.3806520998477936, "learning_rate": 1.5222492450769957e-05, "loss": 0.5274, "step": 21234 }, { "epoch": 0.652320830645409, "grad_norm": 0.3421679437160492, "learning_rate": 1.5222080309191291e-05, "loss": 0.5138, "step": 21235 }, { "epoch": 0.6523515497803581, "grad_norm": 0.3755084276199341, "learning_rate": 1.5221668155416091e-05, "loss": 0.493, "step": 21236 }, { "epoch": 0.6523822689153074, "grad_norm": 0.3706680238246918, "learning_rate": 1.5221255989445323e-05, "loss": 0.6324, "step": 21237 }, { "epoch": 0.6524129880502565, "grad_norm": 0.3706282079219818, "learning_rate": 1.5220843811279943e-05, "loss": 0.5624, "step": 21238 }, { "epoch": 0.6524437071852056, "grad_norm": 0.36842653155326843, "learning_rate": 1.522043162092092e-05, "loss": 0.6023, "step": 21239 }, { "epoch": 0.6524744263201548, "grad_norm": 0.38241443037986755, "learning_rate": 1.5220019418369214e-05, "loss": 0.5162, "step": 21240 }, { "epoch": 0.652505145455104, "grad_norm": 0.3187280297279358, "learning_rate": 1.5219607203625785e-05, "loss": 0.4879, "step": 21241 }, { "epoch": 0.6525358645900532, "grad_norm": 0.34186357259750366, "learning_rate": 1.5219194976691604e-05, "loss": 0.5547, "step": 21242 }, { "epoch": 0.6525665837250023, "grad_norm": 0.4546814560890198, "learning_rate": 1.5218782737567625e-05, "loss": 0.5406, "step": 21243 }, { "epoch": 0.6525973028599514, "grad_norm": 0.36709558963775635, "learning_rate": 1.5218370486254816e-05, "loss": 0.5475, "step": 21244 }, { "epoch": 0.6526280219949007, "grad_norm": 0.35779911279678345, "learning_rate": 1.5217958222754136e-05, "loss": 0.5281, "step": 21245 }, { "epoch": 0.6526587411298498, "grad_norm": 0.34381788969039917, "learning_rate": 1.5217545947066552e-05, "loss": 0.6097, "step": 21246 }, { "epoch": 0.6526894602647989, "grad_norm": 0.3810712397098541, "learning_rate": 1.5217133659193026e-05, "loss": 0.6026, "step": 21247 }, { "epoch": 0.6527201793997481, "grad_norm": 0.38310810923576355, "learning_rate": 1.521672135913452e-05, "loss": 0.6051, "step": 21248 }, { "epoch": 0.6527508985346973, "grad_norm": 0.3571401536464691, "learning_rate": 1.5216309046891998e-05, "loss": 0.6091, "step": 21249 }, { "epoch": 0.6527816176696464, "grad_norm": 0.3265821039676666, "learning_rate": 1.5215896722466417e-05, "loss": 0.5606, "step": 21250 }, { "epoch": 0.6528123368045956, "grad_norm": 0.3505452871322632, "learning_rate": 1.5215484385858749e-05, "loss": 0.5649, "step": 21251 }, { "epoch": 0.6528430559395447, "grad_norm": 0.2990560829639435, "learning_rate": 1.5215072037069951e-05, "loss": 0.4978, "step": 21252 }, { "epoch": 0.652873775074494, "grad_norm": 0.35248392820358276, "learning_rate": 1.5214659676100988e-05, "loss": 0.5824, "step": 21253 }, { "epoch": 0.6529044942094431, "grad_norm": 0.3761134743690491, "learning_rate": 1.5214247302952825e-05, "loss": 0.5814, "step": 21254 }, { "epoch": 0.6529352133443922, "grad_norm": 0.3670865595340729, "learning_rate": 1.5213834917626424e-05, "loss": 0.537, "step": 21255 }, { "epoch": 0.6529659324793414, "grad_norm": 0.3877096474170685, "learning_rate": 1.5213422520122746e-05, "loss": 0.5634, "step": 21256 }, { "epoch": 0.6529966516142905, "grad_norm": 0.3734114170074463, "learning_rate": 1.5213010110442755e-05, "loss": 0.5622, "step": 21257 }, { "epoch": 0.6530273707492397, "grad_norm": 0.3824617266654968, "learning_rate": 1.5212597688587416e-05, "loss": 0.5786, "step": 21258 }, { "epoch": 0.6530580898841889, "grad_norm": 0.3273296356201172, "learning_rate": 1.521218525455769e-05, "loss": 0.5034, "step": 21259 }, { "epoch": 0.653088809019138, "grad_norm": 0.44981035590171814, "learning_rate": 1.5211772808354542e-05, "loss": 0.573, "step": 21260 }, { "epoch": 0.6531195281540871, "grad_norm": 0.36316850781440735, "learning_rate": 1.5211360349978932e-05, "loss": 0.5088, "step": 21261 }, { "epoch": 0.6531502472890364, "grad_norm": 0.3592377007007599, "learning_rate": 1.5210947879431828e-05, "loss": 0.5363, "step": 21262 }, { "epoch": 0.6531809664239855, "grad_norm": 0.36521923542022705, "learning_rate": 1.5210535396714191e-05, "loss": 0.5729, "step": 21263 }, { "epoch": 0.6532116855589346, "grad_norm": 0.3922928273677826, "learning_rate": 1.5210122901826984e-05, "loss": 0.5619, "step": 21264 }, { "epoch": 0.6532424046938838, "grad_norm": 0.32539695501327515, "learning_rate": 1.5209710394771176e-05, "loss": 0.5049, "step": 21265 }, { "epoch": 0.653273123828833, "grad_norm": 0.3704848885536194, "learning_rate": 1.5209297875547719e-05, "loss": 0.4964, "step": 21266 }, { "epoch": 0.6533038429637822, "grad_norm": 0.38023149967193604, "learning_rate": 1.5208885344157587e-05, "loss": 0.6916, "step": 21267 }, { "epoch": 0.6533345620987313, "grad_norm": 0.3372463285923004, "learning_rate": 1.5208472800601737e-05, "loss": 0.5244, "step": 21268 }, { "epoch": 0.6533652812336804, "grad_norm": 0.3408006429672241, "learning_rate": 1.5208060244881138e-05, "loss": 0.5001, "step": 21269 }, { "epoch": 0.6533960003686297, "grad_norm": 0.3811284303665161, "learning_rate": 1.5207647676996746e-05, "loss": 0.5898, "step": 21270 }, { "epoch": 0.6534267195035788, "grad_norm": 0.34707513451576233, "learning_rate": 1.5207235096949533e-05, "loss": 0.5367, "step": 21271 }, { "epoch": 0.6534574386385279, "grad_norm": 0.3497793674468994, "learning_rate": 1.5206822504740457e-05, "loss": 0.4749, "step": 21272 }, { "epoch": 0.6534881577734771, "grad_norm": 0.36589452624320984, "learning_rate": 1.5206409900370487e-05, "loss": 0.6019, "step": 21273 }, { "epoch": 0.6535188769084262, "grad_norm": 0.39004573225975037, "learning_rate": 1.520599728384058e-05, "loss": 0.5042, "step": 21274 }, { "epoch": 0.6535495960433754, "grad_norm": 0.3582098186016083, "learning_rate": 1.5205584655151702e-05, "loss": 0.5993, "step": 21275 }, { "epoch": 0.6535803151783246, "grad_norm": 0.32554054260253906, "learning_rate": 1.520517201430482e-05, "loss": 0.4254, "step": 21276 }, { "epoch": 0.6536110343132737, "grad_norm": 0.38111183047294617, "learning_rate": 1.5204759361300892e-05, "loss": 0.5358, "step": 21277 }, { "epoch": 0.653641753448223, "grad_norm": 0.38375672698020935, "learning_rate": 1.5204346696140886e-05, "loss": 0.5679, "step": 21278 }, { "epoch": 0.6536724725831721, "grad_norm": 0.37328851222991943, "learning_rate": 1.5203934018825766e-05, "loss": 0.5343, "step": 21279 }, { "epoch": 0.6537031917181212, "grad_norm": 0.33833566308021545, "learning_rate": 1.5203521329356492e-05, "loss": 0.6177, "step": 21280 }, { "epoch": 0.6537339108530704, "grad_norm": 0.456587016582489, "learning_rate": 1.5203108627734034e-05, "loss": 0.5771, "step": 21281 }, { "epoch": 0.6537646299880195, "grad_norm": 0.4157136082649231, "learning_rate": 1.5202695913959349e-05, "loss": 0.5732, "step": 21282 }, { "epoch": 0.6537953491229687, "grad_norm": 0.34203657507896423, "learning_rate": 1.5202283188033408e-05, "loss": 0.6026, "step": 21283 }, { "epoch": 0.6538260682579179, "grad_norm": 0.3782574534416199, "learning_rate": 1.520187044995717e-05, "loss": 0.5549, "step": 21284 }, { "epoch": 0.653856787392867, "grad_norm": 0.3188110589981079, "learning_rate": 1.52014576997316e-05, "loss": 0.5532, "step": 21285 }, { "epoch": 0.6538875065278161, "grad_norm": 0.40121740102767944, "learning_rate": 1.520104493735766e-05, "loss": 0.6014, "step": 21286 }, { "epoch": 0.6539182256627654, "grad_norm": 0.362213671207428, "learning_rate": 1.520063216283632e-05, "loss": 0.6257, "step": 21287 }, { "epoch": 0.6539489447977145, "grad_norm": 0.33908703923225403, "learning_rate": 1.5200219376168539e-05, "loss": 0.615, "step": 21288 }, { "epoch": 0.6539796639326636, "grad_norm": 0.38679787516593933, "learning_rate": 1.5199806577355283e-05, "loss": 0.5819, "step": 21289 }, { "epoch": 0.6540103830676128, "grad_norm": 0.33308082818984985, "learning_rate": 1.5199393766397514e-05, "loss": 0.5402, "step": 21290 }, { "epoch": 0.654041102202562, "grad_norm": 0.4583079218864441, "learning_rate": 1.51989809432962e-05, "loss": 0.6401, "step": 21291 }, { "epoch": 0.6540718213375112, "grad_norm": 0.4423460066318512, "learning_rate": 1.5198568108052303e-05, "loss": 0.597, "step": 21292 }, { "epoch": 0.6541025404724603, "grad_norm": 0.3427680432796478, "learning_rate": 1.5198155260666785e-05, "loss": 0.5168, "step": 21293 }, { "epoch": 0.6541332596074094, "grad_norm": 0.3829866945743561, "learning_rate": 1.5197742401140614e-05, "loss": 0.5833, "step": 21294 }, { "epoch": 0.6541639787423587, "grad_norm": 0.3797175884246826, "learning_rate": 1.5197329529474751e-05, "loss": 0.5674, "step": 21295 }, { "epoch": 0.6541946978773078, "grad_norm": 0.37866777181625366, "learning_rate": 1.5196916645670163e-05, "loss": 0.5696, "step": 21296 }, { "epoch": 0.6542254170122569, "grad_norm": 0.35549554228782654, "learning_rate": 1.5196503749727814e-05, "loss": 0.5515, "step": 21297 }, { "epoch": 0.6542561361472061, "grad_norm": 0.3419491946697235, "learning_rate": 1.5196090841648665e-05, "loss": 0.5257, "step": 21298 }, { "epoch": 0.6542868552821552, "grad_norm": 0.34124502539634705, "learning_rate": 1.5195677921433683e-05, "loss": 0.5131, "step": 21299 }, { "epoch": 0.6543175744171044, "grad_norm": 0.36015501618385315, "learning_rate": 1.5195264989083833e-05, "loss": 0.6274, "step": 21300 }, { "epoch": 0.6543482935520536, "grad_norm": 0.4206088185310364, "learning_rate": 1.519485204460008e-05, "loss": 0.6502, "step": 21301 }, { "epoch": 0.6543790126870027, "grad_norm": 0.39417484402656555, "learning_rate": 1.5194439087983389e-05, "loss": 0.6225, "step": 21302 }, { "epoch": 0.654409731821952, "grad_norm": 0.4232264459133148, "learning_rate": 1.5194026119234719e-05, "loss": 0.604, "step": 21303 }, { "epoch": 0.6544404509569011, "grad_norm": 0.3604901134967804, "learning_rate": 1.519361313835504e-05, "loss": 0.5869, "step": 21304 }, { "epoch": 0.6544711700918502, "grad_norm": 0.4860617220401764, "learning_rate": 1.5193200145345315e-05, "loss": 0.654, "step": 21305 }, { "epoch": 0.6545018892267994, "grad_norm": 0.552847683429718, "learning_rate": 1.5192787140206507e-05, "loss": 0.6204, "step": 21306 }, { "epoch": 0.6545326083617485, "grad_norm": 0.33214879035949707, "learning_rate": 1.5192374122939581e-05, "loss": 0.5637, "step": 21307 }, { "epoch": 0.6545633274966977, "grad_norm": 0.3869237005710602, "learning_rate": 1.5191961093545508e-05, "loss": 0.5709, "step": 21308 }, { "epoch": 0.6545940466316469, "grad_norm": 0.33624953031539917, "learning_rate": 1.5191548052025243e-05, "loss": 0.5412, "step": 21309 }, { "epoch": 0.654624765766596, "grad_norm": 0.4052075743675232, "learning_rate": 1.5191134998379755e-05, "loss": 0.6068, "step": 21310 }, { "epoch": 0.6546554849015451, "grad_norm": 0.38116681575775146, "learning_rate": 1.5190721932610012e-05, "loss": 0.5822, "step": 21311 }, { "epoch": 0.6546862040364944, "grad_norm": 0.3540833592414856, "learning_rate": 1.519030885471697e-05, "loss": 0.5364, "step": 21312 }, { "epoch": 0.6547169231714435, "grad_norm": 0.44599059224128723, "learning_rate": 1.51898957647016e-05, "loss": 0.503, "step": 21313 }, { "epoch": 0.6547476423063926, "grad_norm": 0.35608598589897156, "learning_rate": 1.5189482662564872e-05, "loss": 0.5945, "step": 21314 }, { "epoch": 0.6547783614413418, "grad_norm": 0.3844536542892456, "learning_rate": 1.518906954830774e-05, "loss": 0.4812, "step": 21315 }, { "epoch": 0.654809080576291, "grad_norm": 0.3706144690513611, "learning_rate": 1.5188656421931174e-05, "loss": 0.5512, "step": 21316 }, { "epoch": 0.6548397997112402, "grad_norm": 0.3958333730697632, "learning_rate": 1.5188243283436137e-05, "loss": 0.5213, "step": 21317 }, { "epoch": 0.6548705188461893, "grad_norm": 0.3822380304336548, "learning_rate": 1.5187830132823598e-05, "loss": 0.5795, "step": 21318 }, { "epoch": 0.6549012379811384, "grad_norm": 0.3616386651992798, "learning_rate": 1.5187416970094519e-05, "loss": 0.6099, "step": 21319 }, { "epoch": 0.6549319571160876, "grad_norm": 0.4148938059806824, "learning_rate": 1.5187003795249866e-05, "loss": 0.5945, "step": 21320 }, { "epoch": 0.6549626762510368, "grad_norm": 0.3439237177371979, "learning_rate": 1.5186590608290601e-05, "loss": 0.6031, "step": 21321 }, { "epoch": 0.6549933953859859, "grad_norm": 0.3367989659309387, "learning_rate": 1.5186177409217693e-05, "loss": 0.5675, "step": 21322 }, { "epoch": 0.6550241145209351, "grad_norm": 0.3483068645000458, "learning_rate": 1.5185764198032106e-05, "loss": 0.549, "step": 21323 }, { "epoch": 0.6550548336558842, "grad_norm": 0.3423927426338196, "learning_rate": 1.5185350974734804e-05, "loss": 0.5682, "step": 21324 }, { "epoch": 0.6550855527908334, "grad_norm": 0.4201408624649048, "learning_rate": 1.5184937739326752e-05, "loss": 0.5283, "step": 21325 }, { "epoch": 0.6551162719257826, "grad_norm": 0.4295091927051544, "learning_rate": 1.518452449180892e-05, "loss": 0.5266, "step": 21326 }, { "epoch": 0.6551469910607317, "grad_norm": 0.3739658296108246, "learning_rate": 1.5184111232182264e-05, "loss": 0.5916, "step": 21327 }, { "epoch": 0.6551777101956809, "grad_norm": 0.354027658700943, "learning_rate": 1.5183697960447757e-05, "loss": 0.5962, "step": 21328 }, { "epoch": 0.6552084293306301, "grad_norm": 0.3570646643638611, "learning_rate": 1.5183284676606358e-05, "loss": 0.587, "step": 21329 }, { "epoch": 0.6552391484655792, "grad_norm": 0.4249320328235626, "learning_rate": 1.5182871380659038e-05, "loss": 0.5683, "step": 21330 }, { "epoch": 0.6552698676005284, "grad_norm": 0.32747507095336914, "learning_rate": 1.518245807260676e-05, "loss": 0.6393, "step": 21331 }, { "epoch": 0.6553005867354775, "grad_norm": 0.3451526463031769, "learning_rate": 1.518204475245049e-05, "loss": 0.5564, "step": 21332 }, { "epoch": 0.6553313058704267, "grad_norm": 0.3794431984424591, "learning_rate": 1.518163142019119e-05, "loss": 0.5298, "step": 21333 }, { "epoch": 0.6553620250053759, "grad_norm": 0.42740288376808167, "learning_rate": 1.518121807582983e-05, "loss": 0.5005, "step": 21334 }, { "epoch": 0.655392744140325, "grad_norm": 0.42671099305152893, "learning_rate": 1.5180804719367372e-05, "loss": 0.5742, "step": 21335 }, { "epoch": 0.6554234632752741, "grad_norm": 0.32646438479423523, "learning_rate": 1.5180391350804783e-05, "loss": 0.5607, "step": 21336 }, { "epoch": 0.6554541824102234, "grad_norm": 0.34121397137641907, "learning_rate": 1.5179977970143029e-05, "loss": 0.5349, "step": 21337 }, { "epoch": 0.6554849015451725, "grad_norm": 0.3382683992385864, "learning_rate": 1.5179564577383072e-05, "loss": 0.5635, "step": 21338 }, { "epoch": 0.6555156206801217, "grad_norm": 0.42304089665412903, "learning_rate": 1.5179151172525886e-05, "loss": 0.5801, "step": 21339 }, { "epoch": 0.6555463398150708, "grad_norm": 0.3641531467437744, "learning_rate": 1.5178737755572424e-05, "loss": 0.5277, "step": 21340 }, { "epoch": 0.6555770589500199, "grad_norm": 0.402485728263855, "learning_rate": 1.5178324326523663e-05, "loss": 0.5766, "step": 21341 }, { "epoch": 0.6556077780849692, "grad_norm": 0.43885186314582825, "learning_rate": 1.5177910885380563e-05, "loss": 0.6454, "step": 21342 }, { "epoch": 0.6556384972199183, "grad_norm": 0.37003961205482483, "learning_rate": 1.517749743214409e-05, "loss": 0.5419, "step": 21343 }, { "epoch": 0.6556692163548674, "grad_norm": 0.34169864654541016, "learning_rate": 1.517708396681521e-05, "loss": 0.5698, "step": 21344 }, { "epoch": 0.6556999354898166, "grad_norm": 0.3646971881389618, "learning_rate": 1.517667048939489e-05, "loss": 0.5374, "step": 21345 }, { "epoch": 0.6557306546247658, "grad_norm": 0.37150439620018005, "learning_rate": 1.5176256999884093e-05, "loss": 0.5694, "step": 21346 }, { "epoch": 0.6557613737597149, "grad_norm": 0.4174787700176239, "learning_rate": 1.5175843498283786e-05, "loss": 0.5069, "step": 21347 }, { "epoch": 0.6557920928946641, "grad_norm": 0.31925278902053833, "learning_rate": 1.5175429984594936e-05, "loss": 0.5447, "step": 21348 }, { "epoch": 0.6558228120296132, "grad_norm": 0.32446563243865967, "learning_rate": 1.5175016458818506e-05, "loss": 0.482, "step": 21349 }, { "epoch": 0.6558535311645624, "grad_norm": 0.33477210998535156, "learning_rate": 1.5174602920955468e-05, "loss": 0.6458, "step": 21350 }, { "epoch": 0.6558842502995116, "grad_norm": 0.38155731558799744, "learning_rate": 1.5174189371006778e-05, "loss": 0.5897, "step": 21351 }, { "epoch": 0.6559149694344607, "grad_norm": 0.3606787323951721, "learning_rate": 1.5173775808973413e-05, "loss": 0.5602, "step": 21352 }, { "epoch": 0.6559456885694099, "grad_norm": 0.3643347918987274, "learning_rate": 1.5173362234856328e-05, "loss": 0.6095, "step": 21353 }, { "epoch": 0.655976407704359, "grad_norm": 0.38778337836265564, "learning_rate": 1.5172948648656496e-05, "loss": 0.5164, "step": 21354 }, { "epoch": 0.6560071268393082, "grad_norm": 0.397091269493103, "learning_rate": 1.517253505037488e-05, "loss": 0.5621, "step": 21355 }, { "epoch": 0.6560378459742574, "grad_norm": 0.3706255257129669, "learning_rate": 1.5172121440012451e-05, "loss": 0.5083, "step": 21356 }, { "epoch": 0.6560685651092065, "grad_norm": 0.36097967624664307, "learning_rate": 1.517170781757017e-05, "loss": 0.5223, "step": 21357 }, { "epoch": 0.6560992842441556, "grad_norm": 0.3413541615009308, "learning_rate": 1.5171294183049e-05, "loss": 0.5218, "step": 21358 }, { "epoch": 0.6561300033791049, "grad_norm": 0.3670789897441864, "learning_rate": 1.5170880536449917e-05, "loss": 0.5455, "step": 21359 }, { "epoch": 0.656160722514054, "grad_norm": 0.36502763628959656, "learning_rate": 1.5170466877773879e-05, "loss": 0.5231, "step": 21360 }, { "epoch": 0.6561914416490031, "grad_norm": 0.3550271689891815, "learning_rate": 1.5170053207021856e-05, "loss": 0.501, "step": 21361 }, { "epoch": 0.6562221607839523, "grad_norm": 0.3810182511806488, "learning_rate": 1.5169639524194811e-05, "loss": 0.5058, "step": 21362 }, { "epoch": 0.6562528799189015, "grad_norm": 0.33492833375930786, "learning_rate": 1.516922582929371e-05, "loss": 0.5493, "step": 21363 }, { "epoch": 0.6562835990538507, "grad_norm": 0.334085077047348, "learning_rate": 1.5168812122319522e-05, "loss": 0.5119, "step": 21364 }, { "epoch": 0.6563143181887998, "grad_norm": 0.3719846308231354, "learning_rate": 1.5168398403273214e-05, "loss": 0.5219, "step": 21365 }, { "epoch": 0.6563450373237489, "grad_norm": 0.3981349766254425, "learning_rate": 1.516798467215575e-05, "loss": 0.5098, "step": 21366 }, { "epoch": 0.6563757564586982, "grad_norm": 0.37250784039497375, "learning_rate": 1.5167570928968096e-05, "loss": 0.5796, "step": 21367 }, { "epoch": 0.6564064755936473, "grad_norm": 0.3397180438041687, "learning_rate": 1.5167157173711223e-05, "loss": 0.5512, "step": 21368 }, { "epoch": 0.6564371947285964, "grad_norm": 0.4083602726459503, "learning_rate": 1.5166743406386087e-05, "loss": 0.5605, "step": 21369 }, { "epoch": 0.6564679138635456, "grad_norm": 0.32612818479537964, "learning_rate": 1.5166329626993666e-05, "loss": 0.5256, "step": 21370 }, { "epoch": 0.6564986329984948, "grad_norm": 0.41936948895454407, "learning_rate": 1.5165915835534922e-05, "loss": 0.5875, "step": 21371 }, { "epoch": 0.6565293521334439, "grad_norm": 0.44222545623779297, "learning_rate": 1.5165502032010815e-05, "loss": 0.5521, "step": 21372 }, { "epoch": 0.6565600712683931, "grad_norm": 0.34852221608161926, "learning_rate": 1.516508821642232e-05, "loss": 0.5653, "step": 21373 }, { "epoch": 0.6565907904033422, "grad_norm": 0.33705440163612366, "learning_rate": 1.5164674388770402e-05, "loss": 0.5569, "step": 21374 }, { "epoch": 0.6566215095382913, "grad_norm": 0.37520697712898254, "learning_rate": 1.5164260549056025e-05, "loss": 0.5301, "step": 21375 }, { "epoch": 0.6566522286732406, "grad_norm": 0.4241773188114166, "learning_rate": 1.5163846697280158e-05, "loss": 0.5811, "step": 21376 }, { "epoch": 0.6566829478081897, "grad_norm": 0.34913191199302673, "learning_rate": 1.5163432833443766e-05, "loss": 0.5661, "step": 21377 }, { "epoch": 0.6567136669431389, "grad_norm": 0.3589656949043274, "learning_rate": 1.5163018957547819e-05, "loss": 0.5697, "step": 21378 }, { "epoch": 0.656744386078088, "grad_norm": 0.3666149377822876, "learning_rate": 1.5162605069593275e-05, "loss": 0.5653, "step": 21379 }, { "epoch": 0.6567751052130372, "grad_norm": 0.33648064732551575, "learning_rate": 1.516219116958111e-05, "loss": 0.5913, "step": 21380 }, { "epoch": 0.6568058243479864, "grad_norm": 0.35224977135658264, "learning_rate": 1.5161777257512285e-05, "loss": 0.4804, "step": 21381 }, { "epoch": 0.6568365434829355, "grad_norm": 0.3747677206993103, "learning_rate": 1.5161363333387771e-05, "loss": 0.6009, "step": 21382 }, { "epoch": 0.6568672626178846, "grad_norm": 0.3011206388473511, "learning_rate": 1.5160949397208531e-05, "loss": 0.4922, "step": 21383 }, { "epoch": 0.6568979817528339, "grad_norm": 0.31312501430511475, "learning_rate": 1.5160535448975534e-05, "loss": 0.4839, "step": 21384 }, { "epoch": 0.656928700887783, "grad_norm": 0.3819100558757782, "learning_rate": 1.5160121488689745e-05, "loss": 0.5523, "step": 21385 }, { "epoch": 0.6569594200227321, "grad_norm": 0.347486674785614, "learning_rate": 1.515970751635213e-05, "loss": 0.6206, "step": 21386 }, { "epoch": 0.6569901391576813, "grad_norm": 0.3591974973678589, "learning_rate": 1.5159293531963662e-05, "loss": 0.5763, "step": 21387 }, { "epoch": 0.6570208582926305, "grad_norm": 0.4106321334838867, "learning_rate": 1.51588795355253e-05, "loss": 0.6494, "step": 21388 }, { "epoch": 0.6570515774275797, "grad_norm": 0.41878724098205566, "learning_rate": 1.5158465527038018e-05, "loss": 0.6044, "step": 21389 }, { "epoch": 0.6570822965625288, "grad_norm": 0.45128133893013, "learning_rate": 1.5158051506502776e-05, "loss": 0.5811, "step": 21390 }, { "epoch": 0.6571130156974779, "grad_norm": 0.33213844895362854, "learning_rate": 1.5157637473920548e-05, "loss": 0.4924, "step": 21391 }, { "epoch": 0.6571437348324272, "grad_norm": 0.3574727773666382, "learning_rate": 1.5157223429292294e-05, "loss": 0.6311, "step": 21392 }, { "epoch": 0.6571744539673763, "grad_norm": 0.34079107642173767, "learning_rate": 1.5156809372618985e-05, "loss": 0.5239, "step": 21393 }, { "epoch": 0.6572051731023254, "grad_norm": 0.34001481533050537, "learning_rate": 1.5156395303901587e-05, "loss": 0.5005, "step": 21394 }, { "epoch": 0.6572358922372746, "grad_norm": 0.3560900092124939, "learning_rate": 1.5155981223141072e-05, "loss": 0.5807, "step": 21395 }, { "epoch": 0.6572666113722238, "grad_norm": 0.37193045020103455, "learning_rate": 1.5155567130338402e-05, "loss": 0.6064, "step": 21396 }, { "epoch": 0.6572973305071729, "grad_norm": 0.35850316286087036, "learning_rate": 1.5155153025494543e-05, "loss": 0.5276, "step": 21397 }, { "epoch": 0.6573280496421221, "grad_norm": 0.5291918516159058, "learning_rate": 1.5154738908610465e-05, "loss": 0.5162, "step": 21398 }, { "epoch": 0.6573587687770712, "grad_norm": 0.4553773105144501, "learning_rate": 1.515432477968713e-05, "loss": 0.5663, "step": 21399 }, { "epoch": 0.6573894879120203, "grad_norm": 0.37874406576156616, "learning_rate": 1.5153910638725516e-05, "loss": 0.5667, "step": 21400 }, { "epoch": 0.6574202070469696, "grad_norm": 0.35394057631492615, "learning_rate": 1.515349648572658e-05, "loss": 0.4824, "step": 21401 }, { "epoch": 0.6574509261819187, "grad_norm": 0.38720422983169556, "learning_rate": 1.5153082320691295e-05, "loss": 0.5867, "step": 21402 }, { "epoch": 0.6574816453168679, "grad_norm": 0.37196874618530273, "learning_rate": 1.5152668143620624e-05, "loss": 0.5795, "step": 21403 }, { "epoch": 0.657512364451817, "grad_norm": 0.3900143504142761, "learning_rate": 1.5152253954515537e-05, "loss": 0.5065, "step": 21404 }, { "epoch": 0.6575430835867662, "grad_norm": 0.40589484572410583, "learning_rate": 1.5151839753377e-05, "loss": 0.5513, "step": 21405 }, { "epoch": 0.6575738027217154, "grad_norm": 0.368908166885376, "learning_rate": 1.5151425540205984e-05, "loss": 0.616, "step": 21406 }, { "epoch": 0.6576045218566645, "grad_norm": 0.3660963773727417, "learning_rate": 1.5151011315003455e-05, "loss": 0.5618, "step": 21407 }, { "epoch": 0.6576352409916136, "grad_norm": 0.3742905557155609, "learning_rate": 1.5150597077770374e-05, "loss": 0.5235, "step": 21408 }, { "epoch": 0.6576659601265629, "grad_norm": 0.3400203585624695, "learning_rate": 1.5150182828507719e-05, "loss": 0.5459, "step": 21409 }, { "epoch": 0.657696679261512, "grad_norm": 0.35715004801750183, "learning_rate": 1.514976856721645e-05, "loss": 0.5893, "step": 21410 }, { "epoch": 0.6577273983964611, "grad_norm": 0.3420814871788025, "learning_rate": 1.5149354293897538e-05, "loss": 0.5797, "step": 21411 }, { "epoch": 0.6577581175314103, "grad_norm": 0.40958234667778015, "learning_rate": 1.514894000855195e-05, "loss": 0.6128, "step": 21412 }, { "epoch": 0.6577888366663595, "grad_norm": 0.41843804717063904, "learning_rate": 1.514852571118065e-05, "loss": 0.6083, "step": 21413 }, { "epoch": 0.6578195558013087, "grad_norm": 0.37203067541122437, "learning_rate": 1.5148111401784613e-05, "loss": 0.5738, "step": 21414 }, { "epoch": 0.6578502749362578, "grad_norm": 0.3540958762168884, "learning_rate": 1.5147697080364798e-05, "loss": 0.6415, "step": 21415 }, { "epoch": 0.6578809940712069, "grad_norm": 0.39587607979774475, "learning_rate": 1.5147282746922178e-05, "loss": 0.5833, "step": 21416 }, { "epoch": 0.6579117132061562, "grad_norm": 0.347836971282959, "learning_rate": 1.5146868401457721e-05, "loss": 0.5804, "step": 21417 }, { "epoch": 0.6579424323411053, "grad_norm": 0.39250636100769043, "learning_rate": 1.5146454043972395e-05, "loss": 0.5471, "step": 21418 }, { "epoch": 0.6579731514760544, "grad_norm": 0.368876576423645, "learning_rate": 1.5146039674467164e-05, "loss": 0.5734, "step": 21419 }, { "epoch": 0.6580038706110036, "grad_norm": 0.3617180585861206, "learning_rate": 1.5145625292942999e-05, "loss": 0.5985, "step": 21420 }, { "epoch": 0.6580345897459527, "grad_norm": 0.44370341300964355, "learning_rate": 1.5145210899400864e-05, "loss": 0.6021, "step": 21421 }, { "epoch": 0.6580653088809019, "grad_norm": 0.3637896478176117, "learning_rate": 1.5144796493841731e-05, "loss": 0.6323, "step": 21422 }, { "epoch": 0.6580960280158511, "grad_norm": 0.3441605269908905, "learning_rate": 1.5144382076266568e-05, "loss": 0.6214, "step": 21423 }, { "epoch": 0.6581267471508002, "grad_norm": 0.7899920344352722, "learning_rate": 1.5143967646676339e-05, "loss": 0.6146, "step": 21424 }, { "epoch": 0.6581574662857494, "grad_norm": 0.42392557859420776, "learning_rate": 1.5143553205072016e-05, "loss": 0.6125, "step": 21425 }, { "epoch": 0.6581881854206986, "grad_norm": 0.3676445782184601, "learning_rate": 1.5143138751454562e-05, "loss": 0.5234, "step": 21426 }, { "epoch": 0.6582189045556477, "grad_norm": 0.3689061403274536, "learning_rate": 1.5142724285824953e-05, "loss": 0.5382, "step": 21427 }, { "epoch": 0.6582496236905969, "grad_norm": 0.36866438388824463, "learning_rate": 1.5142309808184147e-05, "loss": 0.5926, "step": 21428 }, { "epoch": 0.658280342825546, "grad_norm": 0.4071403741836548, "learning_rate": 1.5141895318533123e-05, "loss": 0.643, "step": 21429 }, { "epoch": 0.6583110619604952, "grad_norm": 0.44519367814064026, "learning_rate": 1.5141480816872843e-05, "loss": 0.5567, "step": 21430 }, { "epoch": 0.6583417810954444, "grad_norm": 0.354015052318573, "learning_rate": 1.514106630320427e-05, "loss": 0.5278, "step": 21431 }, { "epoch": 0.6583725002303935, "grad_norm": 0.3890515863895416, "learning_rate": 1.5140651777528383e-05, "loss": 0.5562, "step": 21432 }, { "epoch": 0.6584032193653426, "grad_norm": 0.3459175229072571, "learning_rate": 1.514023723984614e-05, "loss": 0.5879, "step": 21433 }, { "epoch": 0.6584339385002919, "grad_norm": 0.37181705236434937, "learning_rate": 1.5139822690158519e-05, "loss": 0.5838, "step": 21434 }, { "epoch": 0.658464657635241, "grad_norm": 0.4267209470272064, "learning_rate": 1.5139408128466481e-05, "loss": 0.5512, "step": 21435 }, { "epoch": 0.6584953767701901, "grad_norm": 0.39904215931892395, "learning_rate": 1.5138993554770998e-05, "loss": 0.581, "step": 21436 }, { "epoch": 0.6585260959051393, "grad_norm": 0.33324140310287476, "learning_rate": 1.5138578969073032e-05, "loss": 0.5296, "step": 21437 }, { "epoch": 0.6585568150400885, "grad_norm": 0.35187578201293945, "learning_rate": 1.513816437137356e-05, "loss": 0.5413, "step": 21438 }, { "epoch": 0.6585875341750377, "grad_norm": 0.36013263463974, "learning_rate": 1.5137749761673544e-05, "loss": 0.5316, "step": 21439 }, { "epoch": 0.6586182533099868, "grad_norm": 0.3480548858642578, "learning_rate": 1.5137335139973957e-05, "loss": 0.5678, "step": 21440 }, { "epoch": 0.6586489724449359, "grad_norm": 0.35707542300224304, "learning_rate": 1.5136920506275765e-05, "loss": 0.6635, "step": 21441 }, { "epoch": 0.6586796915798852, "grad_norm": 0.5650831460952759, "learning_rate": 1.5136505860579935e-05, "loss": 0.5524, "step": 21442 }, { "epoch": 0.6587104107148343, "grad_norm": 0.3627931773662567, "learning_rate": 1.5136091202887438e-05, "loss": 0.6128, "step": 21443 }, { "epoch": 0.6587411298497834, "grad_norm": 0.36739397048950195, "learning_rate": 1.5135676533199237e-05, "loss": 0.5079, "step": 21444 }, { "epoch": 0.6587718489847326, "grad_norm": 0.37765148282051086, "learning_rate": 1.5135261851516307e-05, "loss": 0.6047, "step": 21445 }, { "epoch": 0.6588025681196817, "grad_norm": 0.37004679441452026, "learning_rate": 1.5134847157839616e-05, "loss": 0.6279, "step": 21446 }, { "epoch": 0.6588332872546309, "grad_norm": 0.3430337905883789, "learning_rate": 1.513443245217013e-05, "loss": 0.4933, "step": 21447 }, { "epoch": 0.6588640063895801, "grad_norm": 0.34481894969940186, "learning_rate": 1.513401773450882e-05, "loss": 0.5713, "step": 21448 }, { "epoch": 0.6588947255245292, "grad_norm": 0.34981468319892883, "learning_rate": 1.513360300485665e-05, "loss": 0.6, "step": 21449 }, { "epoch": 0.6589254446594784, "grad_norm": 0.38890519738197327, "learning_rate": 1.5133188263214595e-05, "loss": 0.5689, "step": 21450 }, { "epoch": 0.6589561637944276, "grad_norm": 0.36562806367874146, "learning_rate": 1.5132773509583618e-05, "loss": 0.5704, "step": 21451 }, { "epoch": 0.6589868829293767, "grad_norm": 0.3667706549167633, "learning_rate": 1.513235874396469e-05, "loss": 0.5444, "step": 21452 }, { "epoch": 0.6590176020643259, "grad_norm": 0.32318028807640076, "learning_rate": 1.5131943966358779e-05, "loss": 0.5362, "step": 21453 }, { "epoch": 0.659048321199275, "grad_norm": 0.33237916231155396, "learning_rate": 1.5131529176766857e-05, "loss": 0.6092, "step": 21454 }, { "epoch": 0.6590790403342242, "grad_norm": 0.35832616686820984, "learning_rate": 1.513111437518989e-05, "loss": 0.5556, "step": 21455 }, { "epoch": 0.6591097594691734, "grad_norm": 0.320765882730484, "learning_rate": 1.5130699561628848e-05, "loss": 0.5916, "step": 21456 }, { "epoch": 0.6591404786041225, "grad_norm": 0.35263916850090027, "learning_rate": 1.5130284736084693e-05, "loss": 0.5543, "step": 21457 }, { "epoch": 0.6591711977390716, "grad_norm": 0.32966575026512146, "learning_rate": 1.5129869898558406e-05, "loss": 0.4536, "step": 21458 }, { "epoch": 0.6592019168740209, "grad_norm": 0.377752423286438, "learning_rate": 1.512945504905095e-05, "loss": 0.618, "step": 21459 }, { "epoch": 0.65923263600897, "grad_norm": 0.32686948776245117, "learning_rate": 1.512904018756329e-05, "loss": 0.4796, "step": 21460 }, { "epoch": 0.6592633551439191, "grad_norm": 0.3874197006225586, "learning_rate": 1.5128625314096396e-05, "loss": 0.5212, "step": 21461 }, { "epoch": 0.6592940742788683, "grad_norm": 0.32616671919822693, "learning_rate": 1.5128210428651243e-05, "loss": 0.5218, "step": 21462 }, { "epoch": 0.6593247934138174, "grad_norm": 0.36737462878227234, "learning_rate": 1.5127795531228796e-05, "loss": 0.5372, "step": 21463 }, { "epoch": 0.6593555125487667, "grad_norm": 0.4097331166267395, "learning_rate": 1.5127380621830026e-05, "loss": 0.585, "step": 21464 }, { "epoch": 0.6593862316837158, "grad_norm": 0.3632565140724182, "learning_rate": 1.5126965700455895e-05, "loss": 0.5499, "step": 21465 }, { "epoch": 0.6594169508186649, "grad_norm": 0.4267031252384186, "learning_rate": 1.5126550767107384e-05, "loss": 0.5868, "step": 21466 }, { "epoch": 0.6594476699536141, "grad_norm": 0.5180332660675049, "learning_rate": 1.512613582178545e-05, "loss": 0.5838, "step": 21467 }, { "epoch": 0.6594783890885633, "grad_norm": 0.32962167263031006, "learning_rate": 1.5125720864491072e-05, "loss": 0.4804, "step": 21468 }, { "epoch": 0.6595091082235124, "grad_norm": 0.36834126710891724, "learning_rate": 1.5125305895225215e-05, "loss": 0.5709, "step": 21469 }, { "epoch": 0.6595398273584616, "grad_norm": 0.3667164146900177, "learning_rate": 1.5124890913988846e-05, "loss": 0.5428, "step": 21470 }, { "epoch": 0.6595705464934107, "grad_norm": 0.44555899500846863, "learning_rate": 1.5124475920782934e-05, "loss": 0.5359, "step": 21471 }, { "epoch": 0.6596012656283599, "grad_norm": 0.36030012369155884, "learning_rate": 1.5124060915608455e-05, "loss": 0.4663, "step": 21472 }, { "epoch": 0.6596319847633091, "grad_norm": 0.3763372600078583, "learning_rate": 1.5123645898466372e-05, "loss": 0.5758, "step": 21473 }, { "epoch": 0.6596627038982582, "grad_norm": 0.34288156032562256, "learning_rate": 1.5123230869357661e-05, "loss": 0.537, "step": 21474 }, { "epoch": 0.6596934230332074, "grad_norm": 0.33631911873817444, "learning_rate": 1.5122815828283281e-05, "loss": 0.559, "step": 21475 }, { "epoch": 0.6597241421681566, "grad_norm": 0.4178950786590576, "learning_rate": 1.5122400775244207e-05, "loss": 0.5422, "step": 21476 }, { "epoch": 0.6597548613031057, "grad_norm": 0.5344930291175842, "learning_rate": 1.5121985710241411e-05, "loss": 0.5794, "step": 21477 }, { "epoch": 0.6597855804380549, "grad_norm": 0.3561966121196747, "learning_rate": 1.5121570633275857e-05, "loss": 0.5308, "step": 21478 }, { "epoch": 0.659816299573004, "grad_norm": 0.3865406811237335, "learning_rate": 1.5121155544348521e-05, "loss": 0.5534, "step": 21479 }, { "epoch": 0.6598470187079531, "grad_norm": 0.3584230840206146, "learning_rate": 1.5120740443460364e-05, "loss": 0.5832, "step": 21480 }, { "epoch": 0.6598777378429024, "grad_norm": 0.3842792510986328, "learning_rate": 1.512032533061236e-05, "loss": 0.4825, "step": 21481 }, { "epoch": 0.6599084569778515, "grad_norm": 0.3396380841732025, "learning_rate": 1.5119910205805482e-05, "loss": 0.5423, "step": 21482 }, { "epoch": 0.6599391761128006, "grad_norm": 0.3795052170753479, "learning_rate": 1.5119495069040693e-05, "loss": 0.5786, "step": 21483 }, { "epoch": 0.6599698952477498, "grad_norm": 0.3311948776245117, "learning_rate": 1.5119079920318968e-05, "loss": 0.5828, "step": 21484 }, { "epoch": 0.660000614382699, "grad_norm": 0.34203943610191345, "learning_rate": 1.5118664759641273e-05, "loss": 0.5361, "step": 21485 }, { "epoch": 0.6600313335176481, "grad_norm": 0.3582075834274292, "learning_rate": 1.5118249587008582e-05, "loss": 0.5407, "step": 21486 }, { "epoch": 0.6600620526525973, "grad_norm": 0.34971901774406433, "learning_rate": 1.5117834402421858e-05, "loss": 0.5561, "step": 21487 }, { "epoch": 0.6600927717875464, "grad_norm": 0.476724773645401, "learning_rate": 1.5117419205882075e-05, "loss": 0.5831, "step": 21488 }, { "epoch": 0.6601234909224957, "grad_norm": 0.40123459696769714, "learning_rate": 1.51170039973902e-05, "loss": 0.5032, "step": 21489 }, { "epoch": 0.6601542100574448, "grad_norm": 0.3576597571372986, "learning_rate": 1.511658877694721e-05, "loss": 0.5312, "step": 21490 }, { "epoch": 0.6601849291923939, "grad_norm": 0.3642289638519287, "learning_rate": 1.5116173544554067e-05, "loss": 0.5875, "step": 21491 }, { "epoch": 0.6602156483273431, "grad_norm": 0.32313698530197144, "learning_rate": 1.5115758300211743e-05, "loss": 0.4595, "step": 21492 }, { "epoch": 0.6602463674622923, "grad_norm": 0.3468239903450012, "learning_rate": 1.5115343043921211e-05, "loss": 0.632, "step": 21493 }, { "epoch": 0.6602770865972414, "grad_norm": 0.34924691915512085, "learning_rate": 1.5114927775683432e-05, "loss": 0.6122, "step": 21494 }, { "epoch": 0.6603078057321906, "grad_norm": 0.3270723521709442, "learning_rate": 1.5114512495499387e-05, "loss": 0.5633, "step": 21495 }, { "epoch": 0.6603385248671397, "grad_norm": 0.3391047716140747, "learning_rate": 1.5114097203370039e-05, "loss": 0.6022, "step": 21496 }, { "epoch": 0.6603692440020889, "grad_norm": 0.6554144024848938, "learning_rate": 1.5113681899296361e-05, "loss": 0.5499, "step": 21497 }, { "epoch": 0.6603999631370381, "grad_norm": 0.3462245464324951, "learning_rate": 1.5113266583279322e-05, "loss": 0.5023, "step": 21498 }, { "epoch": 0.6604306822719872, "grad_norm": 0.6329216361045837, "learning_rate": 1.511285125531989e-05, "loss": 0.4611, "step": 21499 }, { "epoch": 0.6604614014069364, "grad_norm": 0.3910367786884308, "learning_rate": 1.5112435915419037e-05, "loss": 0.5845, "step": 21500 }, { "epoch": 0.6604921205418856, "grad_norm": 0.33518749475479126, "learning_rate": 1.5112020563577732e-05, "loss": 0.5734, "step": 21501 }, { "epoch": 0.6605228396768347, "grad_norm": 0.3216128945350647, "learning_rate": 1.5111605199796949e-05, "loss": 0.5047, "step": 21502 }, { "epoch": 0.6605535588117839, "grad_norm": 0.36419418454170227, "learning_rate": 1.5111189824077652e-05, "loss": 0.5297, "step": 21503 }, { "epoch": 0.660584277946733, "grad_norm": 0.34529975056648254, "learning_rate": 1.5110774436420817e-05, "loss": 0.6229, "step": 21504 }, { "epoch": 0.6606149970816821, "grad_norm": 0.3820805549621582, "learning_rate": 1.5110359036827408e-05, "loss": 0.6057, "step": 21505 }, { "epoch": 0.6606457162166314, "grad_norm": 0.3473156988620758, "learning_rate": 1.5109943625298402e-05, "loss": 0.5177, "step": 21506 }, { "epoch": 0.6606764353515805, "grad_norm": 0.4181376099586487, "learning_rate": 1.5109528201834765e-05, "loss": 0.5444, "step": 21507 }, { "epoch": 0.6607071544865296, "grad_norm": 0.3636975586414337, "learning_rate": 1.5109112766437468e-05, "loss": 0.6047, "step": 21508 }, { "epoch": 0.6607378736214788, "grad_norm": 0.3657945692539215, "learning_rate": 1.510869731910748e-05, "loss": 0.5486, "step": 21509 }, { "epoch": 0.660768592756428, "grad_norm": 0.45839935541152954, "learning_rate": 1.5108281859845774e-05, "loss": 0.4623, "step": 21510 }, { "epoch": 0.6607993118913771, "grad_norm": 0.3842880129814148, "learning_rate": 1.5107866388653318e-05, "loss": 0.5302, "step": 21511 }, { "epoch": 0.6608300310263263, "grad_norm": 0.352143257856369, "learning_rate": 1.5107450905531084e-05, "loss": 0.6085, "step": 21512 }, { "epoch": 0.6608607501612754, "grad_norm": 0.3804492950439453, "learning_rate": 1.5107035410480042e-05, "loss": 0.5967, "step": 21513 }, { "epoch": 0.6608914692962247, "grad_norm": 0.3281829059123993, "learning_rate": 1.510661990350116e-05, "loss": 0.6196, "step": 21514 }, { "epoch": 0.6609221884311738, "grad_norm": 0.6636606454849243, "learning_rate": 1.5106204384595415e-05, "loss": 0.5403, "step": 21515 }, { "epoch": 0.6609529075661229, "grad_norm": 0.34422165155410767, "learning_rate": 1.510578885376377e-05, "loss": 0.5328, "step": 21516 }, { "epoch": 0.6609836267010721, "grad_norm": 0.644050121307373, "learning_rate": 1.5105373311007199e-05, "loss": 0.6059, "step": 21517 }, { "epoch": 0.6610143458360213, "grad_norm": 0.35089996457099915, "learning_rate": 1.510495775632667e-05, "loss": 0.5824, "step": 21518 }, { "epoch": 0.6610450649709704, "grad_norm": 0.40813079476356506, "learning_rate": 1.5104542189723157e-05, "loss": 0.4322, "step": 21519 }, { "epoch": 0.6610757841059196, "grad_norm": 0.3864327371120453, "learning_rate": 1.5104126611197632e-05, "loss": 0.6251, "step": 21520 }, { "epoch": 0.6611065032408687, "grad_norm": 0.34733736515045166, "learning_rate": 1.5103711020751058e-05, "loss": 0.6066, "step": 21521 }, { "epoch": 0.6611372223758178, "grad_norm": 0.3628052771091461, "learning_rate": 1.5103295418384414e-05, "loss": 0.5451, "step": 21522 }, { "epoch": 0.6611679415107671, "grad_norm": 0.3566264510154724, "learning_rate": 1.5102879804098665e-05, "loss": 0.5392, "step": 21523 }, { "epoch": 0.6611986606457162, "grad_norm": 0.38928407430648804, "learning_rate": 1.5102464177894786e-05, "loss": 0.5276, "step": 21524 }, { "epoch": 0.6612293797806654, "grad_norm": 0.40123140811920166, "learning_rate": 1.5102048539773743e-05, "loss": 0.5468, "step": 21525 }, { "epoch": 0.6612600989156145, "grad_norm": 0.3632533848285675, "learning_rate": 1.5101632889736514e-05, "loss": 0.5972, "step": 21526 }, { "epoch": 0.6612908180505637, "grad_norm": 0.3901345133781433, "learning_rate": 1.510121722778406e-05, "loss": 0.5456, "step": 21527 }, { "epoch": 0.6613215371855129, "grad_norm": 0.4128034710884094, "learning_rate": 1.5100801553917359e-05, "loss": 0.5467, "step": 21528 }, { "epoch": 0.661352256320462, "grad_norm": 0.3460666537284851, "learning_rate": 1.5100385868137379e-05, "loss": 0.5532, "step": 21529 }, { "epoch": 0.6613829754554111, "grad_norm": 0.3721611797809601, "learning_rate": 1.5099970170445093e-05, "loss": 0.5464, "step": 21530 }, { "epoch": 0.6614136945903604, "grad_norm": 0.38341841101646423, "learning_rate": 1.509955446084147e-05, "loss": 0.599, "step": 21531 }, { "epoch": 0.6614444137253095, "grad_norm": 0.3615029454231262, "learning_rate": 1.5099138739327482e-05, "loss": 0.5446, "step": 21532 }, { "epoch": 0.6614751328602586, "grad_norm": 0.3712552785873413, "learning_rate": 1.5098723005904097e-05, "loss": 0.5463, "step": 21533 }, { "epoch": 0.6615058519952078, "grad_norm": 0.3422149121761322, "learning_rate": 1.509830726057229e-05, "loss": 0.5177, "step": 21534 }, { "epoch": 0.661536571130157, "grad_norm": 0.33942079544067383, "learning_rate": 1.5097891503333029e-05, "loss": 0.5657, "step": 21535 }, { "epoch": 0.6615672902651062, "grad_norm": 0.36343878507614136, "learning_rate": 1.5097475734187288e-05, "loss": 0.5677, "step": 21536 }, { "epoch": 0.6615980094000553, "grad_norm": 0.368473082780838, "learning_rate": 1.5097059953136036e-05, "loss": 0.5664, "step": 21537 }, { "epoch": 0.6616287285350044, "grad_norm": 0.3874472379684448, "learning_rate": 1.5096644160180245e-05, "loss": 0.5684, "step": 21538 }, { "epoch": 0.6616594476699537, "grad_norm": 0.4110534191131592, "learning_rate": 1.5096228355320883e-05, "loss": 0.5265, "step": 21539 }, { "epoch": 0.6616901668049028, "grad_norm": 0.35469913482666016, "learning_rate": 1.5095812538558926e-05, "loss": 0.5981, "step": 21540 }, { "epoch": 0.6617208859398519, "grad_norm": 0.3796463906764984, "learning_rate": 1.5095396709895342e-05, "loss": 0.5732, "step": 21541 }, { "epoch": 0.6617516050748011, "grad_norm": 0.38311338424682617, "learning_rate": 1.5094980869331104e-05, "loss": 0.5554, "step": 21542 }, { "epoch": 0.6617823242097503, "grad_norm": 0.3557189106941223, "learning_rate": 1.5094565016867183e-05, "loss": 0.5043, "step": 21543 }, { "epoch": 0.6618130433446994, "grad_norm": 0.352128267288208, "learning_rate": 1.5094149152504547e-05, "loss": 0.5257, "step": 21544 }, { "epoch": 0.6618437624796486, "grad_norm": 0.5612567663192749, "learning_rate": 1.5093733276244174e-05, "loss": 0.5631, "step": 21545 }, { "epoch": 0.6618744816145977, "grad_norm": 0.36795616149902344, "learning_rate": 1.5093317388087028e-05, "loss": 0.545, "step": 21546 }, { "epoch": 0.6619052007495468, "grad_norm": 0.35727813839912415, "learning_rate": 1.5092901488034084e-05, "loss": 0.5668, "step": 21547 }, { "epoch": 0.6619359198844961, "grad_norm": 0.3263242244720459, "learning_rate": 1.5092485576086314e-05, "loss": 0.4948, "step": 21548 }, { "epoch": 0.6619666390194452, "grad_norm": 0.3677486777305603, "learning_rate": 1.5092069652244688e-05, "loss": 0.4949, "step": 21549 }, { "epoch": 0.6619973581543944, "grad_norm": 0.3929874002933502, "learning_rate": 1.5091653716510176e-05, "loss": 0.5939, "step": 21550 }, { "epoch": 0.6620280772893435, "grad_norm": 0.37451937794685364, "learning_rate": 1.5091237768883753e-05, "loss": 0.5494, "step": 21551 }, { "epoch": 0.6620587964242927, "grad_norm": 0.3882429301738739, "learning_rate": 1.5090821809366387e-05, "loss": 0.6649, "step": 21552 }, { "epoch": 0.6620895155592419, "grad_norm": 0.35535311698913574, "learning_rate": 1.5090405837959052e-05, "loss": 0.5668, "step": 21553 }, { "epoch": 0.662120234694191, "grad_norm": 0.383810818195343, "learning_rate": 1.508998985466272e-05, "loss": 0.6452, "step": 21554 }, { "epoch": 0.6621509538291401, "grad_norm": 0.36246246099472046, "learning_rate": 1.5089573859478356e-05, "loss": 0.6017, "step": 21555 }, { "epoch": 0.6621816729640894, "grad_norm": 0.3712989389896393, "learning_rate": 1.5089157852406945e-05, "loss": 0.5307, "step": 21556 }, { "epoch": 0.6622123920990385, "grad_norm": 0.333763986825943, "learning_rate": 1.5088741833449443e-05, "loss": 0.5459, "step": 21557 }, { "epoch": 0.6622431112339876, "grad_norm": 0.3229312598705292, "learning_rate": 1.5088325802606836e-05, "loss": 0.5292, "step": 21558 }, { "epoch": 0.6622738303689368, "grad_norm": 0.3811585009098053, "learning_rate": 1.5087909759880082e-05, "loss": 0.5832, "step": 21559 }, { "epoch": 0.662304549503886, "grad_norm": 0.3944050371646881, "learning_rate": 1.5087493705270162e-05, "loss": 0.554, "step": 21560 }, { "epoch": 0.6623352686388352, "grad_norm": 0.33598533272743225, "learning_rate": 1.5087077638778044e-05, "loss": 0.6026, "step": 21561 }, { "epoch": 0.6623659877737843, "grad_norm": 0.37476402521133423, "learning_rate": 1.5086661560404705e-05, "loss": 0.6106, "step": 21562 }, { "epoch": 0.6623967069087334, "grad_norm": 0.35951852798461914, "learning_rate": 1.5086245470151114e-05, "loss": 0.6211, "step": 21563 }, { "epoch": 0.6624274260436827, "grad_norm": 0.357248455286026, "learning_rate": 1.5085829368018234e-05, "loss": 0.5287, "step": 21564 }, { "epoch": 0.6624581451786318, "grad_norm": 0.3782288730144501, "learning_rate": 1.5085413254007048e-05, "loss": 0.5353, "step": 21565 }, { "epoch": 0.6624888643135809, "grad_norm": 0.3370269238948822, "learning_rate": 1.5084997128118523e-05, "loss": 0.5482, "step": 21566 }, { "epoch": 0.6625195834485301, "grad_norm": 0.3584156930446625, "learning_rate": 1.5084580990353637e-05, "loss": 0.5758, "step": 21567 }, { "epoch": 0.6625503025834792, "grad_norm": 0.3736041486263275, "learning_rate": 1.5084164840713352e-05, "loss": 0.5827, "step": 21568 }, { "epoch": 0.6625810217184284, "grad_norm": 0.412290096282959, "learning_rate": 1.5083748679198645e-05, "loss": 0.617, "step": 21569 }, { "epoch": 0.6626117408533776, "grad_norm": 0.33453238010406494, "learning_rate": 1.5083332505810489e-05, "loss": 0.5543, "step": 21570 }, { "epoch": 0.6626424599883267, "grad_norm": 0.355253666639328, "learning_rate": 1.5082916320549856e-05, "loss": 0.5821, "step": 21571 }, { "epoch": 0.6626731791232758, "grad_norm": 0.3291566073894501, "learning_rate": 1.5082500123417713e-05, "loss": 0.506, "step": 21572 }, { "epoch": 0.6627038982582251, "grad_norm": 0.3212006688117981, "learning_rate": 1.508208391441504e-05, "loss": 0.5476, "step": 21573 }, { "epoch": 0.6627346173931742, "grad_norm": 0.3448812663555145, "learning_rate": 1.5081667693542805e-05, "loss": 0.5605, "step": 21574 }, { "epoch": 0.6627653365281234, "grad_norm": 0.41192397475242615, "learning_rate": 1.5081251460801976e-05, "loss": 0.576, "step": 21575 }, { "epoch": 0.6627960556630725, "grad_norm": 0.349031001329422, "learning_rate": 1.5080835216193534e-05, "loss": 0.5835, "step": 21576 }, { "epoch": 0.6628267747980217, "grad_norm": 0.42823368310928345, "learning_rate": 1.5080418959718444e-05, "loss": 0.6146, "step": 21577 }, { "epoch": 0.6628574939329709, "grad_norm": 0.36819255352020264, "learning_rate": 1.5080002691377682e-05, "loss": 0.5851, "step": 21578 }, { "epoch": 0.66288821306792, "grad_norm": 0.34732291102409363, "learning_rate": 1.507958641117222e-05, "loss": 0.5752, "step": 21579 }, { "epoch": 0.6629189322028691, "grad_norm": 0.35374489426612854, "learning_rate": 1.5079170119103028e-05, "loss": 0.5308, "step": 21580 }, { "epoch": 0.6629496513378184, "grad_norm": 0.4164109528064728, "learning_rate": 1.5078753815171081e-05, "loss": 0.551, "step": 21581 }, { "epoch": 0.6629803704727675, "grad_norm": 0.3827609717845917, "learning_rate": 1.5078337499377345e-05, "loss": 0.6196, "step": 21582 }, { "epoch": 0.6630110896077166, "grad_norm": 0.4115902781486511, "learning_rate": 1.5077921171722802e-05, "loss": 0.5881, "step": 21583 }, { "epoch": 0.6630418087426658, "grad_norm": 0.3617217242717743, "learning_rate": 1.5077504832208419e-05, "loss": 0.6296, "step": 21584 }, { "epoch": 0.663072527877615, "grad_norm": 0.37603461742401123, "learning_rate": 1.5077088480835165e-05, "loss": 0.5163, "step": 21585 }, { "epoch": 0.6631032470125642, "grad_norm": 0.6554247736930847, "learning_rate": 1.5076672117604019e-05, "loss": 0.549, "step": 21586 }, { "epoch": 0.6631339661475133, "grad_norm": 0.36480477452278137, "learning_rate": 1.5076255742515953e-05, "loss": 0.5725, "step": 21587 }, { "epoch": 0.6631646852824624, "grad_norm": 0.3351720869541168, "learning_rate": 1.5075839355571931e-05, "loss": 0.5371, "step": 21588 }, { "epoch": 0.6631954044174116, "grad_norm": 0.320401668548584, "learning_rate": 1.5075422956772938e-05, "loss": 0.5172, "step": 21589 }, { "epoch": 0.6632261235523608, "grad_norm": 0.33446434140205383, "learning_rate": 1.5075006546119934e-05, "loss": 0.5315, "step": 21590 }, { "epoch": 0.6632568426873099, "grad_norm": 0.35716623067855835, "learning_rate": 1.5074590123613902e-05, "loss": 0.5455, "step": 21591 }, { "epoch": 0.6632875618222591, "grad_norm": 0.4136248528957367, "learning_rate": 1.5074173689255812e-05, "loss": 0.5756, "step": 21592 }, { "epoch": 0.6633182809572082, "grad_norm": 0.34331175684928894, "learning_rate": 1.5073757243046631e-05, "loss": 0.5459, "step": 21593 }, { "epoch": 0.6633490000921574, "grad_norm": 0.39397427439689636, "learning_rate": 1.5073340784987338e-05, "loss": 0.5662, "step": 21594 }, { "epoch": 0.6633797192271066, "grad_norm": 0.3501124382019043, "learning_rate": 1.50729243150789e-05, "loss": 0.4766, "step": 21595 }, { "epoch": 0.6634104383620557, "grad_norm": 0.5666484832763672, "learning_rate": 1.5072507833322299e-05, "loss": 0.5312, "step": 21596 }, { "epoch": 0.6634411574970048, "grad_norm": 0.3735029697418213, "learning_rate": 1.5072091339718497e-05, "loss": 0.6209, "step": 21597 }, { "epoch": 0.6634718766319541, "grad_norm": 0.3322620689868927, "learning_rate": 1.5071674834268475e-05, "loss": 0.5336, "step": 21598 }, { "epoch": 0.6635025957669032, "grad_norm": 0.3423052132129669, "learning_rate": 1.50712583169732e-05, "loss": 0.5875, "step": 21599 }, { "epoch": 0.6635333149018524, "grad_norm": 0.3563271164894104, "learning_rate": 1.5070841787833645e-05, "loss": 0.5209, "step": 21600 }, { "epoch": 0.6635640340368015, "grad_norm": 0.33694881200790405, "learning_rate": 1.5070425246850789e-05, "loss": 0.5113, "step": 21601 }, { "epoch": 0.6635947531717507, "grad_norm": 0.33374300599098206, "learning_rate": 1.5070008694025595e-05, "loss": 0.5505, "step": 21602 }, { "epoch": 0.6636254723066999, "grad_norm": 0.3436729311943054, "learning_rate": 1.5069592129359048e-05, "loss": 0.533, "step": 21603 }, { "epoch": 0.663656191441649, "grad_norm": 0.36643296480178833, "learning_rate": 1.5069175552852109e-05, "loss": 0.6328, "step": 21604 }, { "epoch": 0.6636869105765981, "grad_norm": 0.35808756947517395, "learning_rate": 1.506875896450576e-05, "loss": 0.5481, "step": 21605 }, { "epoch": 0.6637176297115474, "grad_norm": 0.3508319854736328, "learning_rate": 1.506834236432097e-05, "loss": 0.5693, "step": 21606 }, { "epoch": 0.6637483488464965, "grad_norm": 0.3835712671279907, "learning_rate": 1.5067925752298713e-05, "loss": 0.5093, "step": 21607 }, { "epoch": 0.6637790679814456, "grad_norm": 0.3622341752052307, "learning_rate": 1.5067509128439962e-05, "loss": 0.6029, "step": 21608 }, { "epoch": 0.6638097871163948, "grad_norm": 0.34362319111824036, "learning_rate": 1.5067092492745686e-05, "loss": 0.545, "step": 21609 }, { "epoch": 0.6638405062513439, "grad_norm": 0.3492479622364044, "learning_rate": 1.5066675845216862e-05, "loss": 0.541, "step": 21610 }, { "epoch": 0.6638712253862932, "grad_norm": 0.337037056684494, "learning_rate": 1.5066259185854465e-05, "loss": 0.5497, "step": 21611 }, { "epoch": 0.6639019445212423, "grad_norm": 0.4026355743408203, "learning_rate": 1.5065842514659466e-05, "loss": 0.5216, "step": 21612 }, { "epoch": 0.6639326636561914, "grad_norm": 0.36301618814468384, "learning_rate": 1.5065425831632836e-05, "loss": 0.5869, "step": 21613 }, { "epoch": 0.6639633827911406, "grad_norm": 0.34598711133003235, "learning_rate": 1.5065009136775552e-05, "loss": 0.5923, "step": 21614 }, { "epoch": 0.6639941019260898, "grad_norm": 0.3762885630130768, "learning_rate": 1.5064592430088585e-05, "loss": 0.6073, "step": 21615 }, { "epoch": 0.6640248210610389, "grad_norm": 0.38768115639686584, "learning_rate": 1.5064175711572908e-05, "loss": 0.545, "step": 21616 }, { "epoch": 0.6640555401959881, "grad_norm": 0.3569813370704651, "learning_rate": 1.5063758981229497e-05, "loss": 0.4813, "step": 21617 }, { "epoch": 0.6640862593309372, "grad_norm": 0.40241539478302, "learning_rate": 1.506334223905932e-05, "loss": 0.6126, "step": 21618 }, { "epoch": 0.6641169784658864, "grad_norm": 0.3639895021915436, "learning_rate": 1.5062925485063357e-05, "loss": 0.5452, "step": 21619 }, { "epoch": 0.6641476976008356, "grad_norm": 0.3397344946861267, "learning_rate": 1.5062508719242574e-05, "loss": 0.5878, "step": 21620 }, { "epoch": 0.6641784167357847, "grad_norm": 0.4240603744983673, "learning_rate": 1.5062091941597952e-05, "loss": 0.5348, "step": 21621 }, { "epoch": 0.6642091358707339, "grad_norm": 0.3661581575870514, "learning_rate": 1.5061675152130456e-05, "loss": 0.6315, "step": 21622 }, { "epoch": 0.664239855005683, "grad_norm": 0.3687221109867096, "learning_rate": 1.5061258350841073e-05, "loss": 0.5309, "step": 21623 }, { "epoch": 0.6642705741406322, "grad_norm": 0.3438507914543152, "learning_rate": 1.5060841537730759e-05, "loss": 0.5149, "step": 21624 }, { "epoch": 0.6643012932755814, "grad_norm": 0.39039236307144165, "learning_rate": 1.5060424712800502e-05, "loss": 0.4969, "step": 21625 }, { "epoch": 0.6643320124105305, "grad_norm": 0.34760305285453796, "learning_rate": 1.5060007876051265e-05, "loss": 0.5042, "step": 21626 }, { "epoch": 0.6643627315454796, "grad_norm": 0.3589724004268646, "learning_rate": 1.5059591027484026e-05, "loss": 0.5096, "step": 21627 }, { "epoch": 0.6643934506804289, "grad_norm": 0.36252009868621826, "learning_rate": 1.5059174167099762e-05, "loss": 0.6162, "step": 21628 }, { "epoch": 0.664424169815378, "grad_norm": 0.4116900563240051, "learning_rate": 1.5058757294899441e-05, "loss": 0.5358, "step": 21629 }, { "epoch": 0.6644548889503271, "grad_norm": 0.4957659840583801, "learning_rate": 1.505834041088404e-05, "loss": 0.5205, "step": 21630 }, { "epoch": 0.6644856080852763, "grad_norm": 0.3651176989078522, "learning_rate": 1.5057923515054533e-05, "loss": 0.5058, "step": 21631 }, { "epoch": 0.6645163272202255, "grad_norm": 0.4462648630142212, "learning_rate": 1.5057506607411893e-05, "loss": 0.5434, "step": 21632 }, { "epoch": 0.6645470463551746, "grad_norm": 0.31999707221984863, "learning_rate": 1.5057089687957092e-05, "loss": 0.5257, "step": 21633 }, { "epoch": 0.6645777654901238, "grad_norm": 0.3676610291004181, "learning_rate": 1.5056672756691103e-05, "loss": 0.5752, "step": 21634 }, { "epoch": 0.6646084846250729, "grad_norm": 0.6018003225326538, "learning_rate": 1.5056255813614904e-05, "loss": 0.6288, "step": 21635 }, { "epoch": 0.6646392037600222, "grad_norm": 0.4123040437698364, "learning_rate": 1.5055838858729464e-05, "loss": 0.5427, "step": 21636 }, { "epoch": 0.6646699228949713, "grad_norm": 0.3513493537902832, "learning_rate": 1.5055421892035763e-05, "loss": 0.4983, "step": 21637 }, { "epoch": 0.6647006420299204, "grad_norm": 0.5258155465126038, "learning_rate": 1.5055004913534768e-05, "loss": 0.4856, "step": 21638 }, { "epoch": 0.6647313611648696, "grad_norm": 0.679106593132019, "learning_rate": 1.5054587923227456e-05, "loss": 0.5823, "step": 21639 }, { "epoch": 0.6647620802998188, "grad_norm": 0.38326624035835266, "learning_rate": 1.5054170921114801e-05, "loss": 0.5694, "step": 21640 }, { "epoch": 0.6647927994347679, "grad_norm": 0.3624212443828583, "learning_rate": 1.5053753907197778e-05, "loss": 0.5434, "step": 21641 }, { "epoch": 0.6648235185697171, "grad_norm": 0.38261833786964417, "learning_rate": 1.5053336881477361e-05, "loss": 0.533, "step": 21642 }, { "epoch": 0.6648542377046662, "grad_norm": 0.3750227391719818, "learning_rate": 1.5052919843954521e-05, "loss": 0.6285, "step": 21643 }, { "epoch": 0.6648849568396153, "grad_norm": 0.3817739486694336, "learning_rate": 1.5052502794630233e-05, "loss": 0.6018, "step": 21644 }, { "epoch": 0.6649156759745646, "grad_norm": 0.33327579498291016, "learning_rate": 1.5052085733505472e-05, "loss": 0.4935, "step": 21645 }, { "epoch": 0.6649463951095137, "grad_norm": 0.37058955430984497, "learning_rate": 1.5051668660581213e-05, "loss": 0.5271, "step": 21646 }, { "epoch": 0.6649771142444629, "grad_norm": 0.35666996240615845, "learning_rate": 1.5051251575858428e-05, "loss": 0.6061, "step": 21647 }, { "epoch": 0.665007833379412, "grad_norm": 0.3766808807849884, "learning_rate": 1.5050834479338093e-05, "loss": 0.581, "step": 21648 }, { "epoch": 0.6650385525143612, "grad_norm": 0.33393359184265137, "learning_rate": 1.5050417371021184e-05, "loss": 0.5227, "step": 21649 }, { "epoch": 0.6650692716493104, "grad_norm": 0.3431656062602997, "learning_rate": 1.5050000250908667e-05, "loss": 0.5618, "step": 21650 }, { "epoch": 0.6650999907842595, "grad_norm": 0.3742406368255615, "learning_rate": 1.5049583119001528e-05, "loss": 0.5448, "step": 21651 }, { "epoch": 0.6651307099192086, "grad_norm": 0.3461693227291107, "learning_rate": 1.5049165975300728e-05, "loss": 0.5487, "step": 21652 }, { "epoch": 0.6651614290541579, "grad_norm": 0.41576817631721497, "learning_rate": 1.5048748819807253e-05, "loss": 0.5659, "step": 21653 }, { "epoch": 0.665192148189107, "grad_norm": 0.3280407786369324, "learning_rate": 1.5048331652522071e-05, "loss": 0.5495, "step": 21654 }, { "epoch": 0.6652228673240561, "grad_norm": 0.33416685461997986, "learning_rate": 1.5047914473446157e-05, "loss": 0.5906, "step": 21655 }, { "epoch": 0.6652535864590053, "grad_norm": 0.3554842472076416, "learning_rate": 1.5047497282580487e-05, "loss": 0.6461, "step": 21656 }, { "epoch": 0.6652843055939545, "grad_norm": 0.34325146675109863, "learning_rate": 1.5047080079926035e-05, "loss": 0.4978, "step": 21657 }, { "epoch": 0.6653150247289036, "grad_norm": 0.4154335856437683, "learning_rate": 1.5046662865483775e-05, "loss": 0.5873, "step": 21658 }, { "epoch": 0.6653457438638528, "grad_norm": 0.35909712314605713, "learning_rate": 1.5046245639254684e-05, "loss": 0.5293, "step": 21659 }, { "epoch": 0.6653764629988019, "grad_norm": 0.409972220659256, "learning_rate": 1.504582840123973e-05, "loss": 0.6084, "step": 21660 }, { "epoch": 0.6654071821337512, "grad_norm": 0.36478105187416077, "learning_rate": 1.5045411151439892e-05, "loss": 0.5628, "step": 21661 }, { "epoch": 0.6654379012687003, "grad_norm": 0.3946358263492584, "learning_rate": 1.5044993889856145e-05, "loss": 0.7012, "step": 21662 }, { "epoch": 0.6654686204036494, "grad_norm": 0.3686845302581787, "learning_rate": 1.5044576616489462e-05, "loss": 0.5984, "step": 21663 }, { "epoch": 0.6654993395385986, "grad_norm": 0.3810825049877167, "learning_rate": 1.5044159331340815e-05, "loss": 0.6059, "step": 21664 }, { "epoch": 0.6655300586735478, "grad_norm": 0.35699236392974854, "learning_rate": 1.5043742034411186e-05, "loss": 0.509, "step": 21665 }, { "epoch": 0.6655607778084969, "grad_norm": 0.3459860682487488, "learning_rate": 1.5043324725701545e-05, "loss": 0.5643, "step": 21666 }, { "epoch": 0.6655914969434461, "grad_norm": 0.34433501958847046, "learning_rate": 1.5042907405212866e-05, "loss": 0.5793, "step": 21667 }, { "epoch": 0.6656222160783952, "grad_norm": 0.3607727885246277, "learning_rate": 1.5042490072946123e-05, "loss": 0.5596, "step": 21668 }, { "epoch": 0.6656529352133443, "grad_norm": 0.3364073634147644, "learning_rate": 1.5042072728902293e-05, "loss": 0.5173, "step": 21669 }, { "epoch": 0.6656836543482936, "grad_norm": 0.3266844153404236, "learning_rate": 1.5041655373082349e-05, "loss": 0.5471, "step": 21670 }, { "epoch": 0.6657143734832427, "grad_norm": 0.37523025274276733, "learning_rate": 1.5041238005487269e-05, "loss": 0.6243, "step": 21671 }, { "epoch": 0.6657450926181919, "grad_norm": 0.33527135848999023, "learning_rate": 1.5040820626118025e-05, "loss": 0.5947, "step": 21672 }, { "epoch": 0.665775811753141, "grad_norm": 0.3366093337535858, "learning_rate": 1.5040403234975594e-05, "loss": 0.5776, "step": 21673 }, { "epoch": 0.6658065308880902, "grad_norm": 0.33059191703796387, "learning_rate": 1.5039985832060946e-05, "loss": 0.4785, "step": 21674 }, { "epoch": 0.6658372500230394, "grad_norm": 0.39183685183525085, "learning_rate": 1.5039568417375061e-05, "loss": 0.5937, "step": 21675 }, { "epoch": 0.6658679691579885, "grad_norm": 0.4475761651992798, "learning_rate": 1.503915099091891e-05, "loss": 0.5392, "step": 21676 }, { "epoch": 0.6658986882929376, "grad_norm": 0.42780929803848267, "learning_rate": 1.5038733552693475e-05, "loss": 0.6717, "step": 21677 }, { "epoch": 0.6659294074278869, "grad_norm": 0.3453823924064636, "learning_rate": 1.503831610269972e-05, "loss": 0.6271, "step": 21678 }, { "epoch": 0.665960126562836, "grad_norm": 0.45835667848587036, "learning_rate": 1.5037898640938628e-05, "loss": 0.6258, "step": 21679 }, { "epoch": 0.6659908456977851, "grad_norm": 0.37534379959106445, "learning_rate": 1.5037481167411172e-05, "loss": 0.6343, "step": 21680 }, { "epoch": 0.6660215648327343, "grad_norm": 0.34359586238861084, "learning_rate": 1.5037063682118327e-05, "loss": 0.5571, "step": 21681 }, { "epoch": 0.6660522839676835, "grad_norm": 0.34556257724761963, "learning_rate": 1.5036646185061066e-05, "loss": 0.501, "step": 21682 }, { "epoch": 0.6660830031026326, "grad_norm": 0.33729276061058044, "learning_rate": 1.5036228676240368e-05, "loss": 0.5216, "step": 21683 }, { "epoch": 0.6661137222375818, "grad_norm": 0.32963991165161133, "learning_rate": 1.5035811155657208e-05, "loss": 0.4839, "step": 21684 }, { "epoch": 0.6661444413725309, "grad_norm": 0.38368701934814453, "learning_rate": 1.5035393623312558e-05, "loss": 0.6033, "step": 21685 }, { "epoch": 0.6661751605074802, "grad_norm": 0.40855297446250916, "learning_rate": 1.5034976079207393e-05, "loss": 0.6373, "step": 21686 }, { "epoch": 0.6662058796424293, "grad_norm": 0.37873756885528564, "learning_rate": 1.5034558523342693e-05, "loss": 0.5647, "step": 21687 }, { "epoch": 0.6662365987773784, "grad_norm": 0.3444276750087738, "learning_rate": 1.5034140955719426e-05, "loss": 0.4988, "step": 21688 }, { "epoch": 0.6662673179123276, "grad_norm": 0.3572183847427368, "learning_rate": 1.5033723376338574e-05, "loss": 0.5558, "step": 21689 }, { "epoch": 0.6662980370472767, "grad_norm": 0.36273282766342163, "learning_rate": 1.5033305785201107e-05, "loss": 0.488, "step": 21690 }, { "epoch": 0.6663287561822259, "grad_norm": 0.3687743842601776, "learning_rate": 1.5032888182308006e-05, "loss": 0.5985, "step": 21691 }, { "epoch": 0.6663594753171751, "grad_norm": 0.38204535841941833, "learning_rate": 1.503247056766024e-05, "loss": 0.546, "step": 21692 }, { "epoch": 0.6663901944521242, "grad_norm": 0.3436932861804962, "learning_rate": 1.5032052941258788e-05, "loss": 0.5472, "step": 21693 }, { "epoch": 0.6664209135870733, "grad_norm": 0.5076892375946045, "learning_rate": 1.5031635303104626e-05, "loss": 0.5907, "step": 21694 }, { "epoch": 0.6664516327220226, "grad_norm": 0.5274530053138733, "learning_rate": 1.5031217653198731e-05, "loss": 0.5656, "step": 21695 }, { "epoch": 0.6664823518569717, "grad_norm": 0.40041810274124146, "learning_rate": 1.5030799991542071e-05, "loss": 0.5624, "step": 21696 }, { "epoch": 0.6665130709919209, "grad_norm": 0.4410339295864105, "learning_rate": 1.5030382318135627e-05, "loss": 0.5284, "step": 21697 }, { "epoch": 0.66654379012687, "grad_norm": 0.33938995003700256, "learning_rate": 1.5029964632980378e-05, "loss": 0.5628, "step": 21698 }, { "epoch": 0.6665745092618192, "grad_norm": 0.33439287543296814, "learning_rate": 1.502954693607729e-05, "loss": 0.5518, "step": 21699 }, { "epoch": 0.6666052283967684, "grad_norm": 0.34746503829956055, "learning_rate": 1.5029129227427346e-05, "loss": 0.5578, "step": 21700 }, { "epoch": 0.6666359475317175, "grad_norm": 0.3382933437824249, "learning_rate": 1.502871150703152e-05, "loss": 0.4721, "step": 21701 }, { "epoch": 0.6666666666666666, "grad_norm": 0.3578824996948242, "learning_rate": 1.5028293774890788e-05, "loss": 0.5597, "step": 21702 }, { "epoch": 0.6666973858016159, "grad_norm": 0.4088318943977356, "learning_rate": 1.5027876031006124e-05, "loss": 0.5942, "step": 21703 }, { "epoch": 0.666728104936565, "grad_norm": 0.37355363368988037, "learning_rate": 1.5027458275378504e-05, "loss": 0.586, "step": 21704 }, { "epoch": 0.6667588240715141, "grad_norm": 0.3585800230503082, "learning_rate": 1.5027040508008903e-05, "loss": 0.5087, "step": 21705 }, { "epoch": 0.6667895432064633, "grad_norm": 0.33573850989341736, "learning_rate": 1.5026622728898298e-05, "loss": 0.4794, "step": 21706 }, { "epoch": 0.6668202623414125, "grad_norm": 0.3794483244419098, "learning_rate": 1.5026204938047669e-05, "loss": 0.5396, "step": 21707 }, { "epoch": 0.6668509814763616, "grad_norm": 0.4079286754131317, "learning_rate": 1.502578713545798e-05, "loss": 0.6279, "step": 21708 }, { "epoch": 0.6668817006113108, "grad_norm": 0.4086962044239044, "learning_rate": 1.5025369321130221e-05, "loss": 0.5956, "step": 21709 }, { "epoch": 0.6669124197462599, "grad_norm": 0.369299978017807, "learning_rate": 1.5024951495065354e-05, "loss": 0.5852, "step": 21710 }, { "epoch": 0.6669431388812092, "grad_norm": 0.33618709444999695, "learning_rate": 1.5024533657264369e-05, "loss": 0.5621, "step": 21711 }, { "epoch": 0.6669738580161583, "grad_norm": 0.3600922226905823, "learning_rate": 1.5024115807728232e-05, "loss": 0.5149, "step": 21712 }, { "epoch": 0.6670045771511074, "grad_norm": 0.3525899052619934, "learning_rate": 1.5023697946457919e-05, "loss": 0.4314, "step": 21713 }, { "epoch": 0.6670352962860566, "grad_norm": 0.370338499546051, "learning_rate": 1.5023280073454412e-05, "loss": 0.5703, "step": 21714 }, { "epoch": 0.6670660154210057, "grad_norm": 0.33940330147743225, "learning_rate": 1.502286218871868e-05, "loss": 0.4854, "step": 21715 }, { "epoch": 0.6670967345559549, "grad_norm": 0.3449198603630066, "learning_rate": 1.5022444292251707e-05, "loss": 0.5629, "step": 21716 }, { "epoch": 0.6671274536909041, "grad_norm": 0.34013885259628296, "learning_rate": 1.502202638405446e-05, "loss": 0.5578, "step": 21717 }, { "epoch": 0.6671581728258532, "grad_norm": 0.3264106214046478, "learning_rate": 1.5021608464127923e-05, "loss": 0.531, "step": 21718 }, { "epoch": 0.6671888919608023, "grad_norm": 0.4003428816795349, "learning_rate": 1.5021190532473068e-05, "loss": 0.5657, "step": 21719 }, { "epoch": 0.6672196110957516, "grad_norm": 0.38584426045417786, "learning_rate": 1.5020772589090871e-05, "loss": 0.5488, "step": 21720 }, { "epoch": 0.6672503302307007, "grad_norm": 0.37046703696250916, "learning_rate": 1.502035463398231e-05, "loss": 0.5607, "step": 21721 }, { "epoch": 0.6672810493656499, "grad_norm": 0.3687343895435333, "learning_rate": 1.501993666714836e-05, "loss": 0.5513, "step": 21722 }, { "epoch": 0.667311768500599, "grad_norm": 0.36429670453071594, "learning_rate": 1.5019518688589997e-05, "loss": 0.6065, "step": 21723 }, { "epoch": 0.6673424876355482, "grad_norm": 0.3768196403980255, "learning_rate": 1.5019100698308195e-05, "loss": 0.6075, "step": 21724 }, { "epoch": 0.6673732067704974, "grad_norm": 0.3622683882713318, "learning_rate": 1.5018682696303937e-05, "loss": 0.5044, "step": 21725 }, { "epoch": 0.6674039259054465, "grad_norm": 0.34609630703926086, "learning_rate": 1.5018264682578193e-05, "loss": 0.5593, "step": 21726 }, { "epoch": 0.6674346450403956, "grad_norm": 0.35423561930656433, "learning_rate": 1.5017846657131943e-05, "loss": 0.5165, "step": 21727 }, { "epoch": 0.6674653641753449, "grad_norm": 0.33571431040763855, "learning_rate": 1.5017428619966162e-05, "loss": 0.546, "step": 21728 }, { "epoch": 0.667496083310294, "grad_norm": 0.363196462392807, "learning_rate": 1.5017010571081825e-05, "loss": 0.5924, "step": 21729 }, { "epoch": 0.6675268024452431, "grad_norm": 0.3292766213417053, "learning_rate": 1.5016592510479907e-05, "loss": 0.5594, "step": 21730 }, { "epoch": 0.6675575215801923, "grad_norm": 0.3669489324092865, "learning_rate": 1.5016174438161391e-05, "loss": 0.5542, "step": 21731 }, { "epoch": 0.6675882407151414, "grad_norm": 0.3560146689414978, "learning_rate": 1.5015756354127247e-05, "loss": 0.521, "step": 21732 }, { "epoch": 0.6676189598500907, "grad_norm": 0.3333108425140381, "learning_rate": 1.5015338258378451e-05, "loss": 0.4795, "step": 21733 }, { "epoch": 0.6676496789850398, "grad_norm": 0.5607941746711731, "learning_rate": 1.5014920150915986e-05, "loss": 0.5856, "step": 21734 }, { "epoch": 0.6676803981199889, "grad_norm": 0.34721919894218445, "learning_rate": 1.5014502031740824e-05, "loss": 0.5259, "step": 21735 }, { "epoch": 0.6677111172549381, "grad_norm": 0.3745097219944, "learning_rate": 1.5014083900853943e-05, "loss": 0.5869, "step": 21736 }, { "epoch": 0.6677418363898873, "grad_norm": 0.3787437677383423, "learning_rate": 1.5013665758256318e-05, "loss": 0.6489, "step": 21737 }, { "epoch": 0.6677725555248364, "grad_norm": 0.35840171575546265, "learning_rate": 1.5013247603948924e-05, "loss": 0.537, "step": 21738 }, { "epoch": 0.6678032746597856, "grad_norm": 0.3575096130371094, "learning_rate": 1.5012829437932743e-05, "loss": 0.5817, "step": 21739 }, { "epoch": 0.6678339937947347, "grad_norm": 0.37597915530204773, "learning_rate": 1.5012411260208747e-05, "loss": 0.5285, "step": 21740 }, { "epoch": 0.6678647129296839, "grad_norm": 0.367623895406723, "learning_rate": 1.5011993070777918e-05, "loss": 0.4941, "step": 21741 }, { "epoch": 0.6678954320646331, "grad_norm": 0.35672321915626526, "learning_rate": 1.5011574869641225e-05, "loss": 0.6337, "step": 21742 }, { "epoch": 0.6679261511995822, "grad_norm": 0.3565579652786255, "learning_rate": 1.5011156656799653e-05, "loss": 0.5415, "step": 21743 }, { "epoch": 0.6679568703345313, "grad_norm": 0.33502480387687683, "learning_rate": 1.501073843225417e-05, "loss": 0.4422, "step": 21744 }, { "epoch": 0.6679875894694806, "grad_norm": 0.5020123720169067, "learning_rate": 1.501032019600576e-05, "loss": 0.6403, "step": 21745 }, { "epoch": 0.6680183086044297, "grad_norm": 0.33876320719718933, "learning_rate": 1.5009901948055397e-05, "loss": 0.5043, "step": 21746 }, { "epoch": 0.6680490277393789, "grad_norm": 0.5350170135498047, "learning_rate": 1.500948368840406e-05, "loss": 0.5482, "step": 21747 }, { "epoch": 0.668079746874328, "grad_norm": 0.389327734708786, "learning_rate": 1.5009065417052724e-05, "loss": 0.5366, "step": 21748 }, { "epoch": 0.6681104660092771, "grad_norm": 0.36329859495162964, "learning_rate": 1.5008647134002363e-05, "loss": 0.6022, "step": 21749 }, { "epoch": 0.6681411851442264, "grad_norm": 0.41659992933273315, "learning_rate": 1.5008228839253957e-05, "loss": 0.5494, "step": 21750 }, { "epoch": 0.6681719042791755, "grad_norm": 0.3734131455421448, "learning_rate": 1.5007810532808482e-05, "loss": 0.5063, "step": 21751 }, { "epoch": 0.6682026234141246, "grad_norm": 0.34192565083503723, "learning_rate": 1.500739221466692e-05, "loss": 0.6118, "step": 21752 }, { "epoch": 0.6682333425490738, "grad_norm": 0.341243177652359, "learning_rate": 1.500697388483024e-05, "loss": 0.5535, "step": 21753 }, { "epoch": 0.668264061684023, "grad_norm": 0.35031405091285706, "learning_rate": 1.5006555543299426e-05, "loss": 0.5992, "step": 21754 }, { "epoch": 0.6682947808189721, "grad_norm": 0.3425159156322479, "learning_rate": 1.5006137190075447e-05, "loss": 0.56, "step": 21755 }, { "epoch": 0.6683254999539213, "grad_norm": 0.41344985365867615, "learning_rate": 1.5005718825159287e-05, "loss": 0.6072, "step": 21756 }, { "epoch": 0.6683562190888704, "grad_norm": 0.6898861527442932, "learning_rate": 1.5005300448551925e-05, "loss": 0.641, "step": 21757 }, { "epoch": 0.6683869382238197, "grad_norm": 0.4472046196460724, "learning_rate": 1.500488206025433e-05, "loss": 0.5943, "step": 21758 }, { "epoch": 0.6684176573587688, "grad_norm": 0.36931687593460083, "learning_rate": 1.5004463660267485e-05, "loss": 0.577, "step": 21759 }, { "epoch": 0.6684483764937179, "grad_norm": 0.3610285520553589, "learning_rate": 1.5004045248592363e-05, "loss": 0.52, "step": 21760 }, { "epoch": 0.6684790956286671, "grad_norm": 0.36286208033561707, "learning_rate": 1.5003626825229947e-05, "loss": 0.5752, "step": 21761 }, { "epoch": 0.6685098147636163, "grad_norm": 0.3811629116535187, "learning_rate": 1.5003208390181212e-05, "loss": 0.5677, "step": 21762 }, { "epoch": 0.6685405338985654, "grad_norm": 0.35765811800956726, "learning_rate": 1.5002789943447131e-05, "loss": 0.4894, "step": 21763 }, { "epoch": 0.6685712530335146, "grad_norm": 0.364870548248291, "learning_rate": 1.5002371485028689e-05, "loss": 0.5523, "step": 21764 }, { "epoch": 0.6686019721684637, "grad_norm": 0.36567142605781555, "learning_rate": 1.5001953014926853e-05, "loss": 0.5942, "step": 21765 }, { "epoch": 0.6686326913034129, "grad_norm": 0.3593689203262329, "learning_rate": 1.5001534533142612e-05, "loss": 0.579, "step": 21766 }, { "epoch": 0.6686634104383621, "grad_norm": 0.3108760118484497, "learning_rate": 1.5001116039676936e-05, "loss": 0.506, "step": 21767 }, { "epoch": 0.6686941295733112, "grad_norm": 0.3535699248313904, "learning_rate": 1.5000697534530804e-05, "loss": 0.5828, "step": 21768 }, { "epoch": 0.6687248487082603, "grad_norm": 0.34123626351356506, "learning_rate": 1.5000279017705191e-05, "loss": 0.61, "step": 21769 }, { "epoch": 0.6687555678432096, "grad_norm": 0.35584235191345215, "learning_rate": 1.499986048920108e-05, "loss": 0.5795, "step": 21770 }, { "epoch": 0.6687862869781587, "grad_norm": 0.4122330844402313, "learning_rate": 1.4999441949019446e-05, "loss": 0.6153, "step": 21771 }, { "epoch": 0.6688170061131079, "grad_norm": 0.3653135299682617, "learning_rate": 1.4999023397161265e-05, "loss": 0.6013, "step": 21772 }, { "epoch": 0.668847725248057, "grad_norm": 0.41242989897727966, "learning_rate": 1.4998604833627517e-05, "loss": 0.5865, "step": 21773 }, { "epoch": 0.6688784443830061, "grad_norm": 0.394336074590683, "learning_rate": 1.4998186258419174e-05, "loss": 0.5328, "step": 21774 }, { "epoch": 0.6689091635179554, "grad_norm": 0.3939213752746582, "learning_rate": 1.4997767671537223e-05, "loss": 0.5484, "step": 21775 }, { "epoch": 0.6689398826529045, "grad_norm": 0.46171364188194275, "learning_rate": 1.4997349072982634e-05, "loss": 0.5277, "step": 21776 }, { "epoch": 0.6689706017878536, "grad_norm": 0.4208122491836548, "learning_rate": 1.4996930462756389e-05, "loss": 0.5446, "step": 21777 }, { "epoch": 0.6690013209228028, "grad_norm": 0.38486745953559875, "learning_rate": 1.4996511840859462e-05, "loss": 0.5771, "step": 21778 }, { "epoch": 0.669032040057752, "grad_norm": 0.35256117582321167, "learning_rate": 1.4996093207292834e-05, "loss": 0.4941, "step": 21779 }, { "epoch": 0.6690627591927011, "grad_norm": 0.39707016944885254, "learning_rate": 1.499567456205748e-05, "loss": 0.5577, "step": 21780 }, { "epoch": 0.6690934783276503, "grad_norm": 0.38282251358032227, "learning_rate": 1.4995255905154382e-05, "loss": 0.7034, "step": 21781 }, { "epoch": 0.6691241974625994, "grad_norm": 0.33049169182777405, "learning_rate": 1.4994837236584514e-05, "loss": 0.5427, "step": 21782 }, { "epoch": 0.6691549165975487, "grad_norm": 0.32842907309532166, "learning_rate": 1.4994418556348854e-05, "loss": 0.5423, "step": 21783 }, { "epoch": 0.6691856357324978, "grad_norm": 0.3443058431148529, "learning_rate": 1.4993999864448381e-05, "loss": 0.5238, "step": 21784 }, { "epoch": 0.6692163548674469, "grad_norm": 0.35747402906417847, "learning_rate": 1.499358116088407e-05, "loss": 0.5104, "step": 21785 }, { "epoch": 0.6692470740023961, "grad_norm": 0.6501975655555725, "learning_rate": 1.4993162445656905e-05, "loss": 0.4955, "step": 21786 }, { "epoch": 0.6692777931373453, "grad_norm": 0.3517424762248993, "learning_rate": 1.499274371876786e-05, "loss": 0.5777, "step": 21787 }, { "epoch": 0.6693085122722944, "grad_norm": 0.40006858110427856, "learning_rate": 1.4992324980217912e-05, "loss": 0.6148, "step": 21788 }, { "epoch": 0.6693392314072436, "grad_norm": 0.37253817915916443, "learning_rate": 1.499190623000804e-05, "loss": 0.6133, "step": 21789 }, { "epoch": 0.6693699505421927, "grad_norm": 0.40567854046821594, "learning_rate": 1.4991487468139223e-05, "loss": 0.5105, "step": 21790 }, { "epoch": 0.6694006696771418, "grad_norm": 0.38822075724601746, "learning_rate": 1.4991068694612438e-05, "loss": 0.5973, "step": 21791 }, { "epoch": 0.6694313888120911, "grad_norm": 0.3723812997341156, "learning_rate": 1.4990649909428663e-05, "loss": 0.5247, "step": 21792 }, { "epoch": 0.6694621079470402, "grad_norm": 0.35154983401298523, "learning_rate": 1.4990231112588877e-05, "loss": 0.5693, "step": 21793 }, { "epoch": 0.6694928270819893, "grad_norm": 0.35957157611846924, "learning_rate": 1.498981230409406e-05, "loss": 0.4811, "step": 21794 }, { "epoch": 0.6695235462169385, "grad_norm": 0.3814113736152649, "learning_rate": 1.4989393483945186e-05, "loss": 0.6171, "step": 21795 }, { "epoch": 0.6695542653518877, "grad_norm": 0.36948996782302856, "learning_rate": 1.4988974652143235e-05, "loss": 0.5859, "step": 21796 }, { "epoch": 0.6695849844868369, "grad_norm": 0.3916812837123871, "learning_rate": 1.4988555808689186e-05, "loss": 0.5954, "step": 21797 }, { "epoch": 0.669615703621786, "grad_norm": 0.3475976884365082, "learning_rate": 1.4988136953584013e-05, "loss": 0.5099, "step": 21798 }, { "epoch": 0.6696464227567351, "grad_norm": 0.3409101963043213, "learning_rate": 1.4987718086828703e-05, "loss": 0.5395, "step": 21799 }, { "epoch": 0.6696771418916844, "grad_norm": 0.37721702456474304, "learning_rate": 1.4987299208424229e-05, "loss": 0.6263, "step": 21800 }, { "epoch": 0.6697078610266335, "grad_norm": 0.3559037148952484, "learning_rate": 1.4986880318371566e-05, "loss": 0.3815, "step": 21801 }, { "epoch": 0.6697385801615826, "grad_norm": 0.3871418535709381, "learning_rate": 1.4986461416671698e-05, "loss": 0.5912, "step": 21802 }, { "epoch": 0.6697692992965318, "grad_norm": 0.3460095524787903, "learning_rate": 1.49860425033256e-05, "loss": 0.5368, "step": 21803 }, { "epoch": 0.669800018431481, "grad_norm": 0.37192922830581665, "learning_rate": 1.4985623578334253e-05, "loss": 0.5891, "step": 21804 }, { "epoch": 0.6698307375664301, "grad_norm": 0.38272279500961304, "learning_rate": 1.498520464169863e-05, "loss": 0.5964, "step": 21805 }, { "epoch": 0.6698614567013793, "grad_norm": 0.36618033051490784, "learning_rate": 1.4984785693419717e-05, "loss": 0.5457, "step": 21806 }, { "epoch": 0.6698921758363284, "grad_norm": 0.3672919273376465, "learning_rate": 1.4984366733498488e-05, "loss": 0.5463, "step": 21807 }, { "epoch": 0.6699228949712777, "grad_norm": 0.3146987557411194, "learning_rate": 1.4983947761935922e-05, "loss": 0.5283, "step": 21808 }, { "epoch": 0.6699536141062268, "grad_norm": 0.3271990418434143, "learning_rate": 1.4983528778732996e-05, "loss": 0.5741, "step": 21809 }, { "epoch": 0.6699843332411759, "grad_norm": 0.3811798393726349, "learning_rate": 1.4983109783890692e-05, "loss": 0.5312, "step": 21810 }, { "epoch": 0.6700150523761251, "grad_norm": 0.33606886863708496, "learning_rate": 1.4982690777409989e-05, "loss": 0.5879, "step": 21811 }, { "epoch": 0.6700457715110743, "grad_norm": 0.3319818675518036, "learning_rate": 1.4982271759291862e-05, "loss": 0.5648, "step": 21812 }, { "epoch": 0.6700764906460234, "grad_norm": 0.37068116664886475, "learning_rate": 1.4981852729537294e-05, "loss": 0.5777, "step": 21813 }, { "epoch": 0.6701072097809726, "grad_norm": 0.3822195827960968, "learning_rate": 1.4981433688147258e-05, "loss": 0.6525, "step": 21814 }, { "epoch": 0.6701379289159217, "grad_norm": 0.3225240409374237, "learning_rate": 1.4981014635122737e-05, "loss": 0.4856, "step": 21815 }, { "epoch": 0.6701686480508708, "grad_norm": 0.36982864141464233, "learning_rate": 1.498059557046471e-05, "loss": 0.4751, "step": 21816 }, { "epoch": 0.6701993671858201, "grad_norm": 0.3358457684516907, "learning_rate": 1.498017649417415e-05, "loss": 0.508, "step": 21817 }, { "epoch": 0.6702300863207692, "grad_norm": 0.3784545063972473, "learning_rate": 1.4979757406252042e-05, "loss": 0.57, "step": 21818 }, { "epoch": 0.6702608054557183, "grad_norm": 0.3468027412891388, "learning_rate": 1.4979338306699363e-05, "loss": 0.5397, "step": 21819 }, { "epoch": 0.6702915245906675, "grad_norm": 0.45492056012153625, "learning_rate": 1.4978919195517092e-05, "loss": 0.594, "step": 21820 }, { "epoch": 0.6703222437256167, "grad_norm": 0.33153235912323, "learning_rate": 1.4978500072706205e-05, "loss": 0.5363, "step": 21821 }, { "epoch": 0.6703529628605659, "grad_norm": 0.3736344277858734, "learning_rate": 1.4978080938267688e-05, "loss": 0.489, "step": 21822 }, { "epoch": 0.670383681995515, "grad_norm": 0.6026903390884399, "learning_rate": 1.4977661792202511e-05, "loss": 0.5089, "step": 21823 }, { "epoch": 0.6704144011304641, "grad_norm": 0.3650552034378052, "learning_rate": 1.497724263451166e-05, "loss": 0.5141, "step": 21824 }, { "epoch": 0.6704451202654134, "grad_norm": 0.3497178554534912, "learning_rate": 1.4976823465196109e-05, "loss": 0.567, "step": 21825 }, { "epoch": 0.6704758394003625, "grad_norm": 0.35937079787254333, "learning_rate": 1.4976404284256843e-05, "loss": 0.6388, "step": 21826 }, { "epoch": 0.6705065585353116, "grad_norm": 0.36280468106269836, "learning_rate": 1.4975985091694831e-05, "loss": 0.61, "step": 21827 }, { "epoch": 0.6705372776702608, "grad_norm": 0.36528778076171875, "learning_rate": 1.4975565887511063e-05, "loss": 0.5428, "step": 21828 }, { "epoch": 0.67056799680521, "grad_norm": 0.36561641097068787, "learning_rate": 1.4975146671706513e-05, "loss": 0.5456, "step": 21829 }, { "epoch": 0.6705987159401591, "grad_norm": 0.5630608797073364, "learning_rate": 1.4974727444282159e-05, "loss": 0.4733, "step": 21830 }, { "epoch": 0.6706294350751083, "grad_norm": 0.3779500722885132, "learning_rate": 1.4974308205238984e-05, "loss": 0.5519, "step": 21831 }, { "epoch": 0.6706601542100574, "grad_norm": 0.3415282368659973, "learning_rate": 1.4973888954577962e-05, "loss": 0.4875, "step": 21832 }, { "epoch": 0.6706908733450067, "grad_norm": 0.3942824602127075, "learning_rate": 1.4973469692300077e-05, "loss": 0.6109, "step": 21833 }, { "epoch": 0.6707215924799558, "grad_norm": 0.3671298325061798, "learning_rate": 1.4973050418406306e-05, "loss": 0.5652, "step": 21834 }, { "epoch": 0.6707523116149049, "grad_norm": 0.3818517029285431, "learning_rate": 1.4972631132897627e-05, "loss": 0.5009, "step": 21835 }, { "epoch": 0.6707830307498541, "grad_norm": 0.3498857915401459, "learning_rate": 1.4972211835775023e-05, "loss": 0.5232, "step": 21836 }, { "epoch": 0.6708137498848032, "grad_norm": 0.3200263977050781, "learning_rate": 1.4971792527039468e-05, "loss": 0.522, "step": 21837 }, { "epoch": 0.6708444690197524, "grad_norm": 0.3588224947452545, "learning_rate": 1.4971373206691948e-05, "loss": 0.5043, "step": 21838 }, { "epoch": 0.6708751881547016, "grad_norm": 0.35425055027008057, "learning_rate": 1.4970953874733436e-05, "loss": 0.5411, "step": 21839 }, { "epoch": 0.6709059072896507, "grad_norm": 0.36200302839279175, "learning_rate": 1.4970534531164915e-05, "loss": 0.5579, "step": 21840 }, { "epoch": 0.6709366264245998, "grad_norm": 0.41114452481269836, "learning_rate": 1.4970115175987361e-05, "loss": 0.4858, "step": 21841 }, { "epoch": 0.6709673455595491, "grad_norm": 0.3624226152896881, "learning_rate": 1.496969580920176e-05, "loss": 0.5445, "step": 21842 }, { "epoch": 0.6709980646944982, "grad_norm": 0.3647700846195221, "learning_rate": 1.4969276430809084e-05, "loss": 0.5729, "step": 21843 }, { "epoch": 0.6710287838294474, "grad_norm": 0.4315674602985382, "learning_rate": 1.4968857040810319e-05, "loss": 0.5385, "step": 21844 }, { "epoch": 0.6710595029643965, "grad_norm": 0.3766828775405884, "learning_rate": 1.496843763920644e-05, "loss": 0.5549, "step": 21845 }, { "epoch": 0.6710902220993457, "grad_norm": 0.3474023640155792, "learning_rate": 1.4968018225998425e-05, "loss": 0.571, "step": 21846 }, { "epoch": 0.6711209412342949, "grad_norm": 0.35568177700042725, "learning_rate": 1.496759880118726e-05, "loss": 0.5642, "step": 21847 }, { "epoch": 0.671151660369244, "grad_norm": 0.46426820755004883, "learning_rate": 1.4967179364773918e-05, "loss": 0.5285, "step": 21848 }, { "epoch": 0.6711823795041931, "grad_norm": 0.3894895315170288, "learning_rate": 1.4966759916759387e-05, "loss": 0.5148, "step": 21849 }, { "epoch": 0.6712130986391424, "grad_norm": 0.36672693490982056, "learning_rate": 1.4966340457144634e-05, "loss": 0.6858, "step": 21850 }, { "epoch": 0.6712438177740915, "grad_norm": 0.41600480675697327, "learning_rate": 1.4965920985930652e-05, "loss": 0.5883, "step": 21851 }, { "epoch": 0.6712745369090406, "grad_norm": 0.49586841464042664, "learning_rate": 1.4965501503118413e-05, "loss": 0.5237, "step": 21852 }, { "epoch": 0.6713052560439898, "grad_norm": 0.34260469675064087, "learning_rate": 1.4965082008708897e-05, "loss": 0.5355, "step": 21853 }, { "epoch": 0.671335975178939, "grad_norm": 0.3998924195766449, "learning_rate": 1.4964662502703089e-05, "loss": 0.5634, "step": 21854 }, { "epoch": 0.6713666943138881, "grad_norm": 0.3667650818824768, "learning_rate": 1.496424298510196e-05, "loss": 0.5272, "step": 21855 }, { "epoch": 0.6713974134488373, "grad_norm": 0.3857392966747284, "learning_rate": 1.4963823455906499e-05, "loss": 0.5467, "step": 21856 }, { "epoch": 0.6714281325837864, "grad_norm": 0.4109492003917694, "learning_rate": 1.496340391511768e-05, "loss": 0.574, "step": 21857 }, { "epoch": 0.6714588517187356, "grad_norm": 0.3473782241344452, "learning_rate": 1.4962984362736483e-05, "loss": 0.5469, "step": 21858 }, { "epoch": 0.6714895708536848, "grad_norm": 0.3486780822277069, "learning_rate": 1.4962564798763892e-05, "loss": 0.5288, "step": 21859 }, { "epoch": 0.6715202899886339, "grad_norm": 0.3672081530094147, "learning_rate": 1.4962145223200883e-05, "loss": 0.5937, "step": 21860 }, { "epoch": 0.6715510091235831, "grad_norm": 0.43399733304977417, "learning_rate": 1.4961725636048437e-05, "loss": 0.6297, "step": 21861 }, { "epoch": 0.6715817282585322, "grad_norm": 0.34485486149787903, "learning_rate": 1.4961306037307534e-05, "loss": 0.6229, "step": 21862 }, { "epoch": 0.6716124473934814, "grad_norm": 0.4011688530445099, "learning_rate": 1.4960886426979157e-05, "loss": 0.5065, "step": 21863 }, { "epoch": 0.6716431665284306, "grad_norm": 0.3807830214500427, "learning_rate": 1.496046680506428e-05, "loss": 0.5837, "step": 21864 }, { "epoch": 0.6716738856633797, "grad_norm": 0.3692626655101776, "learning_rate": 1.4960047171563887e-05, "loss": 0.5179, "step": 21865 }, { "epoch": 0.6717046047983288, "grad_norm": 0.3689953088760376, "learning_rate": 1.4959627526478956e-05, "loss": 0.5501, "step": 21866 }, { "epoch": 0.6717353239332781, "grad_norm": 0.3764823377132416, "learning_rate": 1.4959207869810473e-05, "loss": 0.5762, "step": 21867 }, { "epoch": 0.6717660430682272, "grad_norm": 0.3269963264465332, "learning_rate": 1.4958788201559408e-05, "loss": 0.53, "step": 21868 }, { "epoch": 0.6717967622031764, "grad_norm": 0.34063270688056946, "learning_rate": 1.4958368521726754e-05, "loss": 0.5416, "step": 21869 }, { "epoch": 0.6718274813381255, "grad_norm": 0.3388739824295044, "learning_rate": 1.495794883031348e-05, "loss": 0.5515, "step": 21870 }, { "epoch": 0.6718582004730747, "grad_norm": 0.3790370523929596, "learning_rate": 1.495752912732057e-05, "loss": 0.5823, "step": 21871 }, { "epoch": 0.6718889196080239, "grad_norm": 0.3345538377761841, "learning_rate": 1.4957109412749005e-05, "loss": 0.5536, "step": 21872 }, { "epoch": 0.671919638742973, "grad_norm": 0.36834093928337097, "learning_rate": 1.4956689686599764e-05, "loss": 0.6051, "step": 21873 }, { "epoch": 0.6719503578779221, "grad_norm": 0.3695923089981079, "learning_rate": 1.4956269948873829e-05, "loss": 0.5466, "step": 21874 }, { "epoch": 0.6719810770128714, "grad_norm": 0.40504056215286255, "learning_rate": 1.4955850199572179e-05, "loss": 0.5909, "step": 21875 }, { "epoch": 0.6720117961478205, "grad_norm": 0.5484756827354431, "learning_rate": 1.4955430438695795e-05, "loss": 0.5632, "step": 21876 }, { "epoch": 0.6720425152827696, "grad_norm": 0.3953056037425995, "learning_rate": 1.4955010666245654e-05, "loss": 0.5631, "step": 21877 }, { "epoch": 0.6720732344177188, "grad_norm": 0.38316425681114197, "learning_rate": 1.4954590882222746e-05, "loss": 0.5196, "step": 21878 }, { "epoch": 0.6721039535526679, "grad_norm": 0.4177243113517761, "learning_rate": 1.495417108662804e-05, "loss": 0.5492, "step": 21879 }, { "epoch": 0.6721346726876171, "grad_norm": 0.35973554849624634, "learning_rate": 1.4953751279462526e-05, "loss": 0.6413, "step": 21880 }, { "epoch": 0.6721653918225663, "grad_norm": 0.3249121904373169, "learning_rate": 1.4953331460727176e-05, "loss": 0.4898, "step": 21881 }, { "epoch": 0.6721961109575154, "grad_norm": 0.341777503490448, "learning_rate": 1.4952911630422976e-05, "loss": 0.4997, "step": 21882 }, { "epoch": 0.6722268300924646, "grad_norm": 0.36944061517715454, "learning_rate": 1.4952491788550902e-05, "loss": 0.5157, "step": 21883 }, { "epoch": 0.6722575492274138, "grad_norm": 0.3697746992111206, "learning_rate": 1.4952071935111939e-05, "loss": 0.58, "step": 21884 }, { "epoch": 0.6722882683623629, "grad_norm": 0.3673689067363739, "learning_rate": 1.4951652070107065e-05, "loss": 0.5972, "step": 21885 }, { "epoch": 0.6723189874973121, "grad_norm": 0.3601202368736267, "learning_rate": 1.4951232193537267e-05, "loss": 0.5738, "step": 21886 }, { "epoch": 0.6723497066322612, "grad_norm": 0.3280943036079407, "learning_rate": 1.4950812305403515e-05, "loss": 0.5529, "step": 21887 }, { "epoch": 0.6723804257672104, "grad_norm": 0.3702241778373718, "learning_rate": 1.49503924057068e-05, "loss": 0.6007, "step": 21888 }, { "epoch": 0.6724111449021596, "grad_norm": 0.357453316450119, "learning_rate": 1.4949972494448093e-05, "loss": 0.5214, "step": 21889 }, { "epoch": 0.6724418640371087, "grad_norm": 0.3648562729358673, "learning_rate": 1.4949552571628382e-05, "loss": 0.5628, "step": 21890 }, { "epoch": 0.6724725831720578, "grad_norm": 0.37655097246170044, "learning_rate": 1.4949132637248644e-05, "loss": 0.572, "step": 21891 }, { "epoch": 0.6725033023070071, "grad_norm": 0.37352022528648376, "learning_rate": 1.4948712691309864e-05, "loss": 0.4803, "step": 21892 }, { "epoch": 0.6725340214419562, "grad_norm": 0.36312398314476013, "learning_rate": 1.4948292733813015e-05, "loss": 0.6125, "step": 21893 }, { "epoch": 0.6725647405769054, "grad_norm": 0.3802487552165985, "learning_rate": 1.4947872764759087e-05, "loss": 0.5973, "step": 21894 }, { "epoch": 0.6725954597118545, "grad_norm": 0.3438098728656769, "learning_rate": 1.4947452784149054e-05, "loss": 0.5228, "step": 21895 }, { "epoch": 0.6726261788468036, "grad_norm": 0.36823153495788574, "learning_rate": 1.4947032791983902e-05, "loss": 0.5678, "step": 21896 }, { "epoch": 0.6726568979817529, "grad_norm": 0.40206441283226013, "learning_rate": 1.494661278826461e-05, "loss": 0.5055, "step": 21897 }, { "epoch": 0.672687617116702, "grad_norm": 0.4220505356788635, "learning_rate": 1.4946192772992155e-05, "loss": 0.5943, "step": 21898 }, { "epoch": 0.6727183362516511, "grad_norm": 0.3443201780319214, "learning_rate": 1.4945772746167524e-05, "loss": 0.5297, "step": 21899 }, { "epoch": 0.6727490553866003, "grad_norm": 0.4021417796611786, "learning_rate": 1.4945352707791691e-05, "loss": 0.5652, "step": 21900 }, { "epoch": 0.6727797745215495, "grad_norm": 0.3758036494255066, "learning_rate": 1.4944932657865646e-05, "loss": 0.5682, "step": 21901 }, { "epoch": 0.6728104936564986, "grad_norm": 0.34770312905311584, "learning_rate": 1.4944512596390362e-05, "loss": 0.5556, "step": 21902 }, { "epoch": 0.6728412127914478, "grad_norm": 0.3889213502407074, "learning_rate": 1.4944092523366826e-05, "loss": 0.5112, "step": 21903 }, { "epoch": 0.6728719319263969, "grad_norm": 0.39988040924072266, "learning_rate": 1.4943672438796017e-05, "loss": 0.5631, "step": 21904 }, { "epoch": 0.6729026510613461, "grad_norm": 0.3434937596321106, "learning_rate": 1.4943252342678914e-05, "loss": 0.5266, "step": 21905 }, { "epoch": 0.6729333701962953, "grad_norm": 0.30382752418518066, "learning_rate": 1.4942832235016502e-05, "loss": 0.5203, "step": 21906 }, { "epoch": 0.6729640893312444, "grad_norm": 0.35641032457351685, "learning_rate": 1.4942412115809757e-05, "loss": 0.4929, "step": 21907 }, { "epoch": 0.6729948084661936, "grad_norm": 0.44042426347732544, "learning_rate": 1.4941991985059667e-05, "loss": 0.64, "step": 21908 }, { "epoch": 0.6730255276011428, "grad_norm": 0.39741331338882446, "learning_rate": 1.4941571842767206e-05, "loss": 0.5509, "step": 21909 }, { "epoch": 0.6730562467360919, "grad_norm": 0.40061479806900024, "learning_rate": 1.4941151688933361e-05, "loss": 0.612, "step": 21910 }, { "epoch": 0.6730869658710411, "grad_norm": 0.3666587769985199, "learning_rate": 1.494073152355911e-05, "loss": 0.5336, "step": 21911 }, { "epoch": 0.6731176850059902, "grad_norm": 0.347756028175354, "learning_rate": 1.4940311346645437e-05, "loss": 0.5707, "step": 21912 }, { "epoch": 0.6731484041409393, "grad_norm": 0.3638160526752472, "learning_rate": 1.4939891158193321e-05, "loss": 0.5078, "step": 21913 }, { "epoch": 0.6731791232758886, "grad_norm": 0.36156684160232544, "learning_rate": 1.4939470958203746e-05, "loss": 0.566, "step": 21914 }, { "epoch": 0.6732098424108377, "grad_norm": 0.3524653911590576, "learning_rate": 1.493905074667769e-05, "loss": 0.5814, "step": 21915 }, { "epoch": 0.6732405615457868, "grad_norm": 0.3495686650276184, "learning_rate": 1.4938630523616135e-05, "loss": 0.5015, "step": 21916 }, { "epoch": 0.673271280680736, "grad_norm": 0.3950243294239044, "learning_rate": 1.4938210289020065e-05, "loss": 0.5554, "step": 21917 }, { "epoch": 0.6733019998156852, "grad_norm": 0.37011972069740295, "learning_rate": 1.4937790042890459e-05, "loss": 0.5089, "step": 21918 }, { "epoch": 0.6733327189506344, "grad_norm": 0.34794119000434875, "learning_rate": 1.4937369785228298e-05, "loss": 0.5411, "step": 21919 }, { "epoch": 0.6733634380855835, "grad_norm": 0.35719773173332214, "learning_rate": 1.4936949516034567e-05, "loss": 0.5777, "step": 21920 }, { "epoch": 0.6733941572205326, "grad_norm": 0.3707546591758728, "learning_rate": 1.4936529235310246e-05, "loss": 0.5258, "step": 21921 }, { "epoch": 0.6734248763554819, "grad_norm": 0.37664976716041565, "learning_rate": 1.4936108943056315e-05, "loss": 0.5095, "step": 21922 }, { "epoch": 0.673455595490431, "grad_norm": 0.3792751431465149, "learning_rate": 1.4935688639273758e-05, "loss": 0.5924, "step": 21923 }, { "epoch": 0.6734863146253801, "grad_norm": 0.36416709423065186, "learning_rate": 1.4935268323963556e-05, "loss": 0.497, "step": 21924 }, { "epoch": 0.6735170337603293, "grad_norm": 0.386753648519516, "learning_rate": 1.4934847997126687e-05, "loss": 0.549, "step": 21925 }, { "epoch": 0.6735477528952785, "grad_norm": 0.3501449525356293, "learning_rate": 1.493442765876414e-05, "loss": 0.5246, "step": 21926 }, { "epoch": 0.6735784720302276, "grad_norm": 0.3628770112991333, "learning_rate": 1.493400730887689e-05, "loss": 0.5809, "step": 21927 }, { "epoch": 0.6736091911651768, "grad_norm": 0.3720833361148834, "learning_rate": 1.493358694746592e-05, "loss": 0.5293, "step": 21928 }, { "epoch": 0.6736399103001259, "grad_norm": 0.3926211893558502, "learning_rate": 1.4933166574532216e-05, "loss": 0.5458, "step": 21929 }, { "epoch": 0.6736706294350752, "grad_norm": 0.44198694825172424, "learning_rate": 1.493274619007676e-05, "loss": 0.4841, "step": 21930 }, { "epoch": 0.6737013485700243, "grad_norm": 0.4014030992984772, "learning_rate": 1.4932325794100525e-05, "loss": 0.5954, "step": 21931 }, { "epoch": 0.6737320677049734, "grad_norm": 0.37059536576271057, "learning_rate": 1.49319053866045e-05, "loss": 0.5696, "step": 21932 }, { "epoch": 0.6737627868399226, "grad_norm": 0.4080284833908081, "learning_rate": 1.4931484967589667e-05, "loss": 0.5529, "step": 21933 }, { "epoch": 0.6737935059748718, "grad_norm": 0.4272373914718628, "learning_rate": 1.4931064537057005e-05, "loss": 0.503, "step": 21934 }, { "epoch": 0.6738242251098209, "grad_norm": 0.3457905054092407, "learning_rate": 1.4930644095007499e-05, "loss": 0.6019, "step": 21935 }, { "epoch": 0.6738549442447701, "grad_norm": 0.37062689661979675, "learning_rate": 1.4930223641442126e-05, "loss": 0.5697, "step": 21936 }, { "epoch": 0.6738856633797192, "grad_norm": 0.38152849674224854, "learning_rate": 1.4929803176361872e-05, "loss": 0.5684, "step": 21937 }, { "epoch": 0.6739163825146683, "grad_norm": 0.35052046179771423, "learning_rate": 1.4929382699767723e-05, "loss": 0.5678, "step": 21938 }, { "epoch": 0.6739471016496176, "grad_norm": 0.3200049102306366, "learning_rate": 1.4928962211660652e-05, "loss": 0.5267, "step": 21939 }, { "epoch": 0.6739778207845667, "grad_norm": 0.35475748777389526, "learning_rate": 1.4928541712041647e-05, "loss": 0.6058, "step": 21940 }, { "epoch": 0.6740085399195158, "grad_norm": 0.3759993016719818, "learning_rate": 1.4928121200911688e-05, "loss": 0.525, "step": 21941 }, { "epoch": 0.674039259054465, "grad_norm": 0.37516549229621887, "learning_rate": 1.4927700678271759e-05, "loss": 0.5718, "step": 21942 }, { "epoch": 0.6740699781894142, "grad_norm": 0.3706417381763458, "learning_rate": 1.492728014412284e-05, "loss": 0.517, "step": 21943 }, { "epoch": 0.6741006973243634, "grad_norm": 0.3344963490962982, "learning_rate": 1.4926859598465915e-05, "loss": 0.5091, "step": 21944 }, { "epoch": 0.6741314164593125, "grad_norm": 0.34625300765037537, "learning_rate": 1.4926439041301963e-05, "loss": 0.5404, "step": 21945 }, { "epoch": 0.6741621355942616, "grad_norm": 0.33875221014022827, "learning_rate": 1.492601847263197e-05, "loss": 0.5638, "step": 21946 }, { "epoch": 0.6741928547292109, "grad_norm": 0.35900476574897766, "learning_rate": 1.4925597892456919e-05, "loss": 0.5569, "step": 21947 }, { "epoch": 0.67422357386416, "grad_norm": 0.383734792470932, "learning_rate": 1.4925177300777787e-05, "loss": 0.601, "step": 21948 }, { "epoch": 0.6742542929991091, "grad_norm": 0.4184529483318329, "learning_rate": 1.4924756697595562e-05, "loss": 0.6112, "step": 21949 }, { "epoch": 0.6742850121340583, "grad_norm": 0.3358507454395294, "learning_rate": 1.4924336082911221e-05, "loss": 0.5324, "step": 21950 }, { "epoch": 0.6743157312690075, "grad_norm": 0.42552074790000916, "learning_rate": 1.4923915456725753e-05, "loss": 0.5867, "step": 21951 }, { "epoch": 0.6743464504039566, "grad_norm": 0.3441917896270752, "learning_rate": 1.4923494819040132e-05, "loss": 0.5644, "step": 21952 }, { "epoch": 0.6743771695389058, "grad_norm": 0.3634584844112396, "learning_rate": 1.4923074169855348e-05, "loss": 0.5581, "step": 21953 }, { "epoch": 0.6744078886738549, "grad_norm": 0.5366572737693787, "learning_rate": 1.4922653509172377e-05, "loss": 0.5898, "step": 21954 }, { "epoch": 0.6744386078088042, "grad_norm": 0.3975207209587097, "learning_rate": 1.492223283699221e-05, "loss": 0.597, "step": 21955 }, { "epoch": 0.6744693269437533, "grad_norm": 0.37039101123809814, "learning_rate": 1.492181215331582e-05, "loss": 0.5928, "step": 21956 }, { "epoch": 0.6745000460787024, "grad_norm": 0.3359372019767761, "learning_rate": 1.4921391458144196e-05, "loss": 0.6435, "step": 21957 }, { "epoch": 0.6745307652136516, "grad_norm": 0.3487991690635681, "learning_rate": 1.4920970751478318e-05, "loss": 0.5747, "step": 21958 }, { "epoch": 0.6745614843486007, "grad_norm": 0.36599844694137573, "learning_rate": 1.492055003331917e-05, "loss": 0.595, "step": 21959 }, { "epoch": 0.6745922034835499, "grad_norm": 0.3594282865524292, "learning_rate": 1.4920129303667734e-05, "loss": 0.5295, "step": 21960 }, { "epoch": 0.6746229226184991, "grad_norm": 0.3228270411491394, "learning_rate": 1.4919708562524991e-05, "loss": 0.5278, "step": 21961 }, { "epoch": 0.6746536417534482, "grad_norm": 0.40170028805732727, "learning_rate": 1.4919287809891927e-05, "loss": 0.6406, "step": 21962 }, { "epoch": 0.6746843608883973, "grad_norm": 0.3816932737827301, "learning_rate": 1.491886704576952e-05, "loss": 0.518, "step": 21963 }, { "epoch": 0.6747150800233466, "grad_norm": 0.3733949363231659, "learning_rate": 1.4918446270158757e-05, "loss": 0.5932, "step": 21964 }, { "epoch": 0.6747457991582957, "grad_norm": 0.35410141944885254, "learning_rate": 1.4918025483060618e-05, "loss": 0.4947, "step": 21965 }, { "epoch": 0.6747765182932448, "grad_norm": 0.3735851049423218, "learning_rate": 1.491760468447609e-05, "loss": 0.617, "step": 21966 }, { "epoch": 0.674807237428194, "grad_norm": 0.35266318917274475, "learning_rate": 1.4917183874406154e-05, "loss": 0.5322, "step": 21967 }, { "epoch": 0.6748379565631432, "grad_norm": 0.33741629123687744, "learning_rate": 1.4916763052851786e-05, "loss": 0.5491, "step": 21968 }, { "epoch": 0.6748686756980924, "grad_norm": 0.3796861171722412, "learning_rate": 1.4916342219813977e-05, "loss": 0.5263, "step": 21969 }, { "epoch": 0.6748993948330415, "grad_norm": 0.3536703586578369, "learning_rate": 1.4915921375293709e-05, "loss": 0.5279, "step": 21970 }, { "epoch": 0.6749301139679906, "grad_norm": 0.3972707986831665, "learning_rate": 1.4915500519291963e-05, "loss": 0.6131, "step": 21971 }, { "epoch": 0.6749608331029399, "grad_norm": 0.4502725303173065, "learning_rate": 1.491507965180972e-05, "loss": 0.5785, "step": 21972 }, { "epoch": 0.674991552237889, "grad_norm": 0.3894447684288025, "learning_rate": 1.4914658772847967e-05, "loss": 0.5937, "step": 21973 }, { "epoch": 0.6750222713728381, "grad_norm": 0.40534600615501404, "learning_rate": 1.4914237882407682e-05, "loss": 0.5238, "step": 21974 }, { "epoch": 0.6750529905077873, "grad_norm": 0.37768471240997314, "learning_rate": 1.4913816980489854e-05, "loss": 0.5944, "step": 21975 }, { "epoch": 0.6750837096427365, "grad_norm": 0.34300222992897034, "learning_rate": 1.4913396067095464e-05, "loss": 0.4815, "step": 21976 }, { "epoch": 0.6751144287776856, "grad_norm": 0.373392790555954, "learning_rate": 1.4912975142225491e-05, "loss": 0.591, "step": 21977 }, { "epoch": 0.6751451479126348, "grad_norm": 0.37239471077919006, "learning_rate": 1.4912554205880927e-05, "loss": 0.6172, "step": 21978 }, { "epoch": 0.6751758670475839, "grad_norm": 0.3485792875289917, "learning_rate": 1.4912133258062744e-05, "loss": 0.5133, "step": 21979 }, { "epoch": 0.6752065861825332, "grad_norm": 0.3725620210170746, "learning_rate": 1.4911712298771934e-05, "loss": 0.5452, "step": 21980 }, { "epoch": 0.6752373053174823, "grad_norm": 0.3273075222969055, "learning_rate": 1.4911291328009476e-05, "loss": 0.5583, "step": 21981 }, { "epoch": 0.6752680244524314, "grad_norm": 0.364582896232605, "learning_rate": 1.4910870345776357e-05, "loss": 0.5009, "step": 21982 }, { "epoch": 0.6752987435873806, "grad_norm": 0.31916651129722595, "learning_rate": 1.4910449352073552e-05, "loss": 0.4682, "step": 21983 }, { "epoch": 0.6753294627223297, "grad_norm": 0.655091404914856, "learning_rate": 1.4910028346902052e-05, "loss": 0.5204, "step": 21984 }, { "epoch": 0.6753601818572789, "grad_norm": 0.3338433802127838, "learning_rate": 1.4909607330262842e-05, "loss": 0.5349, "step": 21985 }, { "epoch": 0.6753909009922281, "grad_norm": 0.38108620047569275, "learning_rate": 1.4909186302156893e-05, "loss": 0.529, "step": 21986 }, { "epoch": 0.6754216201271772, "grad_norm": 0.35736867785453796, "learning_rate": 1.4908765262585202e-05, "loss": 0.5344, "step": 21987 }, { "epoch": 0.6754523392621263, "grad_norm": 0.3761395215988159, "learning_rate": 1.4908344211548747e-05, "loss": 0.5391, "step": 21988 }, { "epoch": 0.6754830583970756, "grad_norm": 0.3662125766277313, "learning_rate": 1.490792314904851e-05, "loss": 0.4397, "step": 21989 }, { "epoch": 0.6755137775320247, "grad_norm": 0.3439617156982422, "learning_rate": 1.4907502075085474e-05, "loss": 0.5308, "step": 21990 }, { "epoch": 0.6755444966669738, "grad_norm": 0.34499675035476685, "learning_rate": 1.4907080989660628e-05, "loss": 0.5873, "step": 21991 }, { "epoch": 0.675575215801923, "grad_norm": 0.4523753821849823, "learning_rate": 1.4906659892774946e-05, "loss": 0.4978, "step": 21992 }, { "epoch": 0.6756059349368722, "grad_norm": 0.3327869772911072, "learning_rate": 1.4906238784429422e-05, "loss": 0.6029, "step": 21993 }, { "epoch": 0.6756366540718214, "grad_norm": 0.3492189645767212, "learning_rate": 1.4905817664625033e-05, "loss": 0.5125, "step": 21994 }, { "epoch": 0.6756673732067705, "grad_norm": 0.3759780824184418, "learning_rate": 1.4905396533362764e-05, "loss": 0.5728, "step": 21995 }, { "epoch": 0.6756980923417196, "grad_norm": 0.3445685803890228, "learning_rate": 1.49049753906436e-05, "loss": 0.5216, "step": 21996 }, { "epoch": 0.6757288114766689, "grad_norm": 0.3378606140613556, "learning_rate": 1.4904554236468523e-05, "loss": 0.5527, "step": 21997 }, { "epoch": 0.675759530611618, "grad_norm": 0.3522692322731018, "learning_rate": 1.4904133070838516e-05, "loss": 0.5508, "step": 21998 }, { "epoch": 0.6757902497465671, "grad_norm": 0.47690990567207336, "learning_rate": 1.4903711893754564e-05, "loss": 0.5499, "step": 21999 }, { "epoch": 0.6758209688815163, "grad_norm": 0.3717408776283264, "learning_rate": 1.490329070521765e-05, "loss": 0.5064, "step": 22000 }, { "epoch": 0.6758516880164654, "grad_norm": 0.29992109537124634, "learning_rate": 1.490286950522876e-05, "loss": 0.5479, "step": 22001 }, { "epoch": 0.6758824071514146, "grad_norm": 0.34636056423187256, "learning_rate": 1.4902448293788876e-05, "loss": 0.6443, "step": 22002 }, { "epoch": 0.6759131262863638, "grad_norm": 0.35563305020332336, "learning_rate": 1.4902027070898981e-05, "loss": 0.5228, "step": 22003 }, { "epoch": 0.6759438454213129, "grad_norm": 0.39106741547584534, "learning_rate": 1.4901605836560058e-05, "loss": 0.6054, "step": 22004 }, { "epoch": 0.6759745645562621, "grad_norm": 0.3728815019130707, "learning_rate": 1.4901184590773094e-05, "loss": 0.5574, "step": 22005 }, { "epoch": 0.6760052836912113, "grad_norm": 0.4282878637313843, "learning_rate": 1.490076333353907e-05, "loss": 0.6158, "step": 22006 }, { "epoch": 0.6760360028261604, "grad_norm": 0.33841943740844727, "learning_rate": 1.490034206485897e-05, "loss": 0.6216, "step": 22007 }, { "epoch": 0.6760667219611096, "grad_norm": 0.3763122260570526, "learning_rate": 1.4899920784733782e-05, "loss": 0.5562, "step": 22008 }, { "epoch": 0.6760974410960587, "grad_norm": 0.785895586013794, "learning_rate": 1.4899499493164484e-05, "loss": 0.6146, "step": 22009 }, { "epoch": 0.6761281602310079, "grad_norm": 0.39162376523017883, "learning_rate": 1.4899078190152064e-05, "loss": 0.5529, "step": 22010 }, { "epoch": 0.6761588793659571, "grad_norm": 0.3615739643573761, "learning_rate": 1.48986568756975e-05, "loss": 0.5247, "step": 22011 }, { "epoch": 0.6761895985009062, "grad_norm": 0.3341018855571747, "learning_rate": 1.4898235549801788e-05, "loss": 0.5847, "step": 22012 }, { "epoch": 0.6762203176358553, "grad_norm": 0.3398255407810211, "learning_rate": 1.48978142124659e-05, "loss": 0.5842, "step": 22013 }, { "epoch": 0.6762510367708046, "grad_norm": 0.3261178135871887, "learning_rate": 1.489739286369083e-05, "loss": 0.5295, "step": 22014 }, { "epoch": 0.6762817559057537, "grad_norm": 0.35671305656433105, "learning_rate": 1.4896971503477552e-05, "loss": 0.5536, "step": 22015 }, { "epoch": 0.6763124750407028, "grad_norm": 0.38038477301597595, "learning_rate": 1.4896550131827057e-05, "loss": 0.4416, "step": 22016 }, { "epoch": 0.676343194175652, "grad_norm": 0.4290097951889038, "learning_rate": 1.4896128748740326e-05, "loss": 0.6288, "step": 22017 }, { "epoch": 0.6763739133106011, "grad_norm": 0.3331545889377594, "learning_rate": 1.4895707354218348e-05, "loss": 0.5328, "step": 22018 }, { "epoch": 0.6764046324455504, "grad_norm": 0.34939754009246826, "learning_rate": 1.4895285948262099e-05, "loss": 0.5378, "step": 22019 }, { "epoch": 0.6764353515804995, "grad_norm": 0.3516475260257721, "learning_rate": 1.489486453087257e-05, "loss": 0.5061, "step": 22020 }, { "epoch": 0.6764660707154486, "grad_norm": 0.3165721595287323, "learning_rate": 1.4894443102050745e-05, "loss": 0.527, "step": 22021 }, { "epoch": 0.6764967898503979, "grad_norm": 0.37126755714416504, "learning_rate": 1.4894021661797602e-05, "loss": 0.5453, "step": 22022 }, { "epoch": 0.676527508985347, "grad_norm": 0.4275590479373932, "learning_rate": 1.4893600210114131e-05, "loss": 0.507, "step": 22023 }, { "epoch": 0.6765582281202961, "grad_norm": 0.41957661509513855, "learning_rate": 1.4893178747001316e-05, "loss": 0.6138, "step": 22024 }, { "epoch": 0.6765889472552453, "grad_norm": 0.3699117600917816, "learning_rate": 1.489275727246014e-05, "loss": 0.6002, "step": 22025 }, { "epoch": 0.6766196663901944, "grad_norm": 0.3787708580493927, "learning_rate": 1.4892335786491585e-05, "loss": 0.5015, "step": 22026 }, { "epoch": 0.6766503855251436, "grad_norm": 0.3470847010612488, "learning_rate": 1.4891914289096643e-05, "loss": 0.5485, "step": 22027 }, { "epoch": 0.6766811046600928, "grad_norm": 0.4015912413597107, "learning_rate": 1.4891492780276291e-05, "loss": 0.6577, "step": 22028 }, { "epoch": 0.6767118237950419, "grad_norm": 0.43208736181259155, "learning_rate": 1.4891071260031513e-05, "loss": 0.6261, "step": 22029 }, { "epoch": 0.6767425429299911, "grad_norm": 0.34447383880615234, "learning_rate": 1.4890649728363303e-05, "loss": 0.5354, "step": 22030 }, { "epoch": 0.6767732620649403, "grad_norm": 0.3563314974308014, "learning_rate": 1.4890228185272633e-05, "loss": 0.5184, "step": 22031 }, { "epoch": 0.6768039811998894, "grad_norm": 0.35011857748031616, "learning_rate": 1.4889806630760497e-05, "loss": 0.5792, "step": 22032 }, { "epoch": 0.6768347003348386, "grad_norm": 0.36940446496009827, "learning_rate": 1.4889385064827874e-05, "loss": 0.6201, "step": 22033 }, { "epoch": 0.6768654194697877, "grad_norm": 0.375895231962204, "learning_rate": 1.4888963487475753e-05, "loss": 0.6249, "step": 22034 }, { "epoch": 0.6768961386047369, "grad_norm": 0.3702835142612457, "learning_rate": 1.4888541898705115e-05, "loss": 0.5728, "step": 22035 }, { "epoch": 0.6769268577396861, "grad_norm": 0.4068738520145416, "learning_rate": 1.4888120298516949e-05, "loss": 0.5439, "step": 22036 }, { "epoch": 0.6769575768746352, "grad_norm": 0.36504966020584106, "learning_rate": 1.4887698686912234e-05, "loss": 0.5757, "step": 22037 }, { "epoch": 0.6769882960095843, "grad_norm": 0.42260459065437317, "learning_rate": 1.4887277063891955e-05, "loss": 0.611, "step": 22038 }, { "epoch": 0.6770190151445336, "grad_norm": 0.43044397234916687, "learning_rate": 1.4886855429457104e-05, "loss": 0.5111, "step": 22039 }, { "epoch": 0.6770497342794827, "grad_norm": 0.34634727239608765, "learning_rate": 1.4886433783608656e-05, "loss": 0.5027, "step": 22040 }, { "epoch": 0.6770804534144319, "grad_norm": 0.39839503169059753, "learning_rate": 1.4886012126347604e-05, "loss": 0.5025, "step": 22041 }, { "epoch": 0.677111172549381, "grad_norm": 0.37853187322616577, "learning_rate": 1.4885590457674928e-05, "loss": 0.593, "step": 22042 }, { "epoch": 0.6771418916843301, "grad_norm": 0.4178082048892975, "learning_rate": 1.4885168777591616e-05, "loss": 0.7145, "step": 22043 }, { "epoch": 0.6771726108192794, "grad_norm": 0.33137279748916626, "learning_rate": 1.4884747086098648e-05, "loss": 0.6007, "step": 22044 }, { "epoch": 0.6772033299542285, "grad_norm": 0.3378792107105255, "learning_rate": 1.4884325383197015e-05, "loss": 0.5545, "step": 22045 }, { "epoch": 0.6772340490891776, "grad_norm": 0.36839744448661804, "learning_rate": 1.4883903668887697e-05, "loss": 0.5727, "step": 22046 }, { "epoch": 0.6772647682241268, "grad_norm": 0.4636010229587555, "learning_rate": 1.488348194317168e-05, "loss": 0.5866, "step": 22047 }, { "epoch": 0.677295487359076, "grad_norm": 0.42446765303611755, "learning_rate": 1.4883060206049953e-05, "loss": 0.5507, "step": 22048 }, { "epoch": 0.6773262064940251, "grad_norm": 0.36833158135414124, "learning_rate": 1.4882638457523496e-05, "loss": 0.6148, "step": 22049 }, { "epoch": 0.6773569256289743, "grad_norm": 0.38636332750320435, "learning_rate": 1.4882216697593296e-05, "loss": 0.5501, "step": 22050 }, { "epoch": 0.6773876447639234, "grad_norm": 0.3685530424118042, "learning_rate": 1.4881794926260337e-05, "loss": 0.558, "step": 22051 }, { "epoch": 0.6774183638988726, "grad_norm": 0.3602665066719055, "learning_rate": 1.4881373143525609e-05, "loss": 0.5897, "step": 22052 }, { "epoch": 0.6774490830338218, "grad_norm": 0.3439449369907379, "learning_rate": 1.4880951349390089e-05, "loss": 0.517, "step": 22053 }, { "epoch": 0.6774798021687709, "grad_norm": 0.39522913098335266, "learning_rate": 1.4880529543854767e-05, "loss": 0.5498, "step": 22054 }, { "epoch": 0.6775105213037201, "grad_norm": 0.3335849642753601, "learning_rate": 1.488010772692063e-05, "loss": 0.5869, "step": 22055 }, { "epoch": 0.6775412404386693, "grad_norm": 0.39707332849502563, "learning_rate": 1.4879685898588658e-05, "loss": 0.5557, "step": 22056 }, { "epoch": 0.6775719595736184, "grad_norm": 0.36184823513031006, "learning_rate": 1.487926405885984e-05, "loss": 0.494, "step": 22057 }, { "epoch": 0.6776026787085676, "grad_norm": 0.35812410712242126, "learning_rate": 1.4878842207735158e-05, "loss": 0.4948, "step": 22058 }, { "epoch": 0.6776333978435167, "grad_norm": 0.37705615162849426, "learning_rate": 1.48784203452156e-05, "loss": 0.5516, "step": 22059 }, { "epoch": 0.6776641169784658, "grad_norm": 0.4065684676170349, "learning_rate": 1.4877998471302152e-05, "loss": 0.5342, "step": 22060 }, { "epoch": 0.6776948361134151, "grad_norm": 0.3690575659275055, "learning_rate": 1.48775765859958e-05, "loss": 0.5672, "step": 22061 }, { "epoch": 0.6777255552483642, "grad_norm": 0.36962929368019104, "learning_rate": 1.4877154689297521e-05, "loss": 0.4878, "step": 22062 }, { "epoch": 0.6777562743833133, "grad_norm": 0.4202807545661926, "learning_rate": 1.4876732781208311e-05, "loss": 0.6179, "step": 22063 }, { "epoch": 0.6777869935182625, "grad_norm": 0.3546825647354126, "learning_rate": 1.4876310861729151e-05, "loss": 0.5451, "step": 22064 }, { "epoch": 0.6778177126532117, "grad_norm": 0.3424154222011566, "learning_rate": 1.4875888930861023e-05, "loss": 0.5797, "step": 22065 }, { "epoch": 0.6778484317881609, "grad_norm": 0.3719116151332855, "learning_rate": 1.4875466988604918e-05, "loss": 0.6302, "step": 22066 }, { "epoch": 0.67787915092311, "grad_norm": 0.3858247995376587, "learning_rate": 1.4875045034961819e-05, "loss": 0.4758, "step": 22067 }, { "epoch": 0.6779098700580591, "grad_norm": 0.3224858045578003, "learning_rate": 1.4874623069932715e-05, "loss": 0.606, "step": 22068 }, { "epoch": 0.6779405891930084, "grad_norm": 0.3494868576526642, "learning_rate": 1.4874201093518584e-05, "loss": 0.5202, "step": 22069 }, { "epoch": 0.6779713083279575, "grad_norm": 0.3432576358318329, "learning_rate": 1.4873779105720419e-05, "loss": 0.5347, "step": 22070 }, { "epoch": 0.6780020274629066, "grad_norm": 0.34298667311668396, "learning_rate": 1.48733571065392e-05, "loss": 0.5191, "step": 22071 }, { "epoch": 0.6780327465978558, "grad_norm": 0.3300562798976898, "learning_rate": 1.4872935095975918e-05, "loss": 0.5551, "step": 22072 }, { "epoch": 0.678063465732805, "grad_norm": 0.35696059465408325, "learning_rate": 1.4872513074031553e-05, "loss": 0.6009, "step": 22073 }, { "epoch": 0.6780941848677541, "grad_norm": 0.38155168294906616, "learning_rate": 1.4872091040707095e-05, "loss": 0.5421, "step": 22074 }, { "epoch": 0.6781249040027033, "grad_norm": 0.407748281955719, "learning_rate": 1.4871668996003528e-05, "loss": 0.5752, "step": 22075 }, { "epoch": 0.6781556231376524, "grad_norm": 0.3444710969924927, "learning_rate": 1.4871246939921838e-05, "loss": 0.5926, "step": 22076 }, { "epoch": 0.6781863422726016, "grad_norm": 0.3999914526939392, "learning_rate": 1.487082487246301e-05, "loss": 0.5738, "step": 22077 }, { "epoch": 0.6782170614075508, "grad_norm": 0.3425615131855011, "learning_rate": 1.487040279362803e-05, "loss": 0.5416, "step": 22078 }, { "epoch": 0.6782477805424999, "grad_norm": 0.37862515449523926, "learning_rate": 1.4869980703417886e-05, "loss": 0.5082, "step": 22079 }, { "epoch": 0.6782784996774491, "grad_norm": 0.3513711094856262, "learning_rate": 1.4869558601833557e-05, "loss": 0.5622, "step": 22080 }, { "epoch": 0.6783092188123983, "grad_norm": 0.35556334257125854, "learning_rate": 1.4869136488876041e-05, "loss": 0.5974, "step": 22081 }, { "epoch": 0.6783399379473474, "grad_norm": 0.35760143399238586, "learning_rate": 1.4868714364546313e-05, "loss": 0.5676, "step": 22082 }, { "epoch": 0.6783706570822966, "grad_norm": 0.39501118659973145, "learning_rate": 1.486829222884536e-05, "loss": 0.5663, "step": 22083 }, { "epoch": 0.6784013762172457, "grad_norm": 0.46661633253097534, "learning_rate": 1.4867870081774173e-05, "loss": 0.5456, "step": 22084 }, { "epoch": 0.6784320953521948, "grad_norm": 0.40184640884399414, "learning_rate": 1.4867447923333736e-05, "loss": 0.5752, "step": 22085 }, { "epoch": 0.6784628144871441, "grad_norm": 0.4102281332015991, "learning_rate": 1.4867025753525035e-05, "loss": 0.5721, "step": 22086 }, { "epoch": 0.6784935336220932, "grad_norm": 0.35518795251846313, "learning_rate": 1.4866603572349054e-05, "loss": 0.5175, "step": 22087 }, { "epoch": 0.6785242527570423, "grad_norm": 0.35827556252479553, "learning_rate": 1.4866181379806781e-05, "loss": 0.6128, "step": 22088 }, { "epoch": 0.6785549718919915, "grad_norm": 0.3861767649650574, "learning_rate": 1.48657591758992e-05, "loss": 0.5446, "step": 22089 }, { "epoch": 0.6785856910269407, "grad_norm": 0.37139710783958435, "learning_rate": 1.48653369606273e-05, "loss": 0.622, "step": 22090 }, { "epoch": 0.6786164101618899, "grad_norm": 0.38762643933296204, "learning_rate": 1.486491473399207e-05, "loss": 0.4933, "step": 22091 }, { "epoch": 0.678647129296839, "grad_norm": 0.3300459384918213, "learning_rate": 1.4864492495994485e-05, "loss": 0.5641, "step": 22092 }, { "epoch": 0.6786778484317881, "grad_norm": 0.3642865717411041, "learning_rate": 1.4864070246635542e-05, "loss": 0.5273, "step": 22093 }, { "epoch": 0.6787085675667374, "grad_norm": 0.3671911656856537, "learning_rate": 1.4863647985916221e-05, "loss": 0.5956, "step": 22094 }, { "epoch": 0.6787392867016865, "grad_norm": 1.3949490785598755, "learning_rate": 1.4863225713837513e-05, "loss": 0.5867, "step": 22095 }, { "epoch": 0.6787700058366356, "grad_norm": 0.3950279653072357, "learning_rate": 1.48628034304004e-05, "loss": 0.6068, "step": 22096 }, { "epoch": 0.6788007249715848, "grad_norm": 0.43435680866241455, "learning_rate": 1.486238113560587e-05, "loss": 0.5242, "step": 22097 }, { "epoch": 0.678831444106534, "grad_norm": 0.352478951215744, "learning_rate": 1.4861958829454907e-05, "loss": 0.5038, "step": 22098 }, { "epoch": 0.6788621632414831, "grad_norm": 0.36525556445121765, "learning_rate": 1.4861536511948504e-05, "loss": 0.5749, "step": 22099 }, { "epoch": 0.6788928823764323, "grad_norm": 0.3951285183429718, "learning_rate": 1.4861114183087641e-05, "loss": 0.5331, "step": 22100 }, { "epoch": 0.6789236015113814, "grad_norm": 0.3826413154602051, "learning_rate": 1.4860691842873304e-05, "loss": 0.5408, "step": 22101 }, { "epoch": 0.6789543206463305, "grad_norm": 0.38105905055999756, "learning_rate": 1.4860269491306485e-05, "loss": 0.536, "step": 22102 }, { "epoch": 0.6789850397812798, "grad_norm": 0.3793348968029022, "learning_rate": 1.4859847128388165e-05, "loss": 0.6254, "step": 22103 }, { "epoch": 0.6790157589162289, "grad_norm": 0.3544193506240845, "learning_rate": 1.4859424754119334e-05, "loss": 0.5156, "step": 22104 }, { "epoch": 0.6790464780511781, "grad_norm": 0.39324551820755005, "learning_rate": 1.4859002368500977e-05, "loss": 0.5053, "step": 22105 }, { "epoch": 0.6790771971861272, "grad_norm": 0.35126134753227234, "learning_rate": 1.4858579971534081e-05, "loss": 0.5595, "step": 22106 }, { "epoch": 0.6791079163210764, "grad_norm": 0.3784853518009186, "learning_rate": 1.4858157563219632e-05, "loss": 0.5381, "step": 22107 }, { "epoch": 0.6791386354560256, "grad_norm": 0.33681240677833557, "learning_rate": 1.4857735143558616e-05, "loss": 0.5426, "step": 22108 }, { "epoch": 0.6791693545909747, "grad_norm": 0.3299916088581085, "learning_rate": 1.4857312712552022e-05, "loss": 0.559, "step": 22109 }, { "epoch": 0.6792000737259238, "grad_norm": 0.43774479627609253, "learning_rate": 1.4856890270200833e-05, "loss": 0.5921, "step": 22110 }, { "epoch": 0.6792307928608731, "grad_norm": 0.9641984701156616, "learning_rate": 1.4856467816506038e-05, "loss": 0.5802, "step": 22111 }, { "epoch": 0.6792615119958222, "grad_norm": 0.3762319087982178, "learning_rate": 1.4856045351468621e-05, "loss": 0.5702, "step": 22112 }, { "epoch": 0.6792922311307713, "grad_norm": 0.4951172471046448, "learning_rate": 1.4855622875089574e-05, "loss": 0.6075, "step": 22113 }, { "epoch": 0.6793229502657205, "grad_norm": 0.40628930926322937, "learning_rate": 1.4855200387369881e-05, "loss": 0.5483, "step": 22114 }, { "epoch": 0.6793536694006697, "grad_norm": 0.35693085193634033, "learning_rate": 1.4854777888310527e-05, "loss": 0.5006, "step": 22115 }, { "epoch": 0.6793843885356189, "grad_norm": 0.46579375863075256, "learning_rate": 1.48543553779125e-05, "loss": 0.5678, "step": 22116 }, { "epoch": 0.679415107670568, "grad_norm": 0.4174996614456177, "learning_rate": 1.485393285617679e-05, "loss": 0.551, "step": 22117 }, { "epoch": 0.6794458268055171, "grad_norm": 0.36591702699661255, "learning_rate": 1.4853510323104379e-05, "loss": 0.6163, "step": 22118 }, { "epoch": 0.6794765459404664, "grad_norm": 0.35869690775871277, "learning_rate": 1.4853087778696253e-05, "loss": 0.5581, "step": 22119 }, { "epoch": 0.6795072650754155, "grad_norm": 0.38964685797691345, "learning_rate": 1.4852665222953404e-05, "loss": 0.6359, "step": 22120 }, { "epoch": 0.6795379842103646, "grad_norm": 0.36953893303871155, "learning_rate": 1.4852242655876815e-05, "loss": 0.5004, "step": 22121 }, { "epoch": 0.6795687033453138, "grad_norm": 0.47858649492263794, "learning_rate": 1.4851820077467476e-05, "loss": 0.5282, "step": 22122 }, { "epoch": 0.679599422480263, "grad_norm": 0.4109785556793213, "learning_rate": 1.4851397487726374e-05, "loss": 0.4915, "step": 22123 }, { "epoch": 0.6796301416152121, "grad_norm": 0.3261522352695465, "learning_rate": 1.485097488665449e-05, "loss": 0.5368, "step": 22124 }, { "epoch": 0.6796608607501613, "grad_norm": 0.36818981170654297, "learning_rate": 1.4850552274252819e-05, "loss": 0.5734, "step": 22125 }, { "epoch": 0.6796915798851104, "grad_norm": 0.37101539969444275, "learning_rate": 1.4850129650522342e-05, "loss": 0.6249, "step": 22126 }, { "epoch": 0.6797222990200597, "grad_norm": 0.35864776372909546, "learning_rate": 1.484970701546405e-05, "loss": 0.5758, "step": 22127 }, { "epoch": 0.6797530181550088, "grad_norm": 0.349310964345932, "learning_rate": 1.4849284369078926e-05, "loss": 0.5472, "step": 22128 }, { "epoch": 0.6797837372899579, "grad_norm": 0.3612159490585327, "learning_rate": 1.4848861711367964e-05, "loss": 0.6167, "step": 22129 }, { "epoch": 0.6798144564249071, "grad_norm": 0.35629311203956604, "learning_rate": 1.4848439042332144e-05, "loss": 0.5174, "step": 22130 }, { "epoch": 0.6798451755598562, "grad_norm": 0.44505611062049866, "learning_rate": 1.4848016361972457e-05, "loss": 0.4916, "step": 22131 }, { "epoch": 0.6798758946948054, "grad_norm": 0.4399089217185974, "learning_rate": 1.4847593670289888e-05, "loss": 0.4548, "step": 22132 }, { "epoch": 0.6799066138297546, "grad_norm": 0.33396437764167786, "learning_rate": 1.4847170967285427e-05, "loss": 0.4833, "step": 22133 }, { "epoch": 0.6799373329647037, "grad_norm": 0.37625035643577576, "learning_rate": 1.484674825296006e-05, "loss": 0.5843, "step": 22134 }, { "epoch": 0.6799680520996528, "grad_norm": 0.4175543487071991, "learning_rate": 1.4846325527314771e-05, "loss": 0.5639, "step": 22135 }, { "epoch": 0.6799987712346021, "grad_norm": 0.37658369541168213, "learning_rate": 1.4845902790350555e-05, "loss": 0.5273, "step": 22136 }, { "epoch": 0.6800294903695512, "grad_norm": 0.42550191283226013, "learning_rate": 1.484548004206839e-05, "loss": 0.5923, "step": 22137 }, { "epoch": 0.6800602095045003, "grad_norm": 0.3247193992137909, "learning_rate": 1.484505728246927e-05, "loss": 0.5517, "step": 22138 }, { "epoch": 0.6800909286394495, "grad_norm": 0.34617891907691956, "learning_rate": 1.4844634511554176e-05, "loss": 0.5397, "step": 22139 }, { "epoch": 0.6801216477743987, "grad_norm": 0.3463086783885956, "learning_rate": 1.4844211729324105e-05, "loss": 0.5716, "step": 22140 }, { "epoch": 0.6801523669093479, "grad_norm": 0.38315537571907043, "learning_rate": 1.484378893578004e-05, "loss": 0.5841, "step": 22141 }, { "epoch": 0.680183086044297, "grad_norm": 0.35016703605651855, "learning_rate": 1.4843366130922965e-05, "loss": 0.5333, "step": 22142 }, { "epoch": 0.6802138051792461, "grad_norm": 0.3545159101486206, "learning_rate": 1.4842943314753871e-05, "loss": 0.5501, "step": 22143 }, { "epoch": 0.6802445243141954, "grad_norm": 0.43917107582092285, "learning_rate": 1.4842520487273743e-05, "loss": 0.5548, "step": 22144 }, { "epoch": 0.6802752434491445, "grad_norm": 0.32861924171447754, "learning_rate": 1.4842097648483572e-05, "loss": 0.506, "step": 22145 }, { "epoch": 0.6803059625840936, "grad_norm": 0.3602423369884491, "learning_rate": 1.4841674798384345e-05, "loss": 0.5807, "step": 22146 }, { "epoch": 0.6803366817190428, "grad_norm": 0.38858675956726074, "learning_rate": 1.4841251936977047e-05, "loss": 0.5573, "step": 22147 }, { "epoch": 0.6803674008539919, "grad_norm": 0.4082488715648651, "learning_rate": 1.4840829064262665e-05, "loss": 0.563, "step": 22148 }, { "epoch": 0.6803981199889411, "grad_norm": 0.37086814641952515, "learning_rate": 1.4840406180242193e-05, "loss": 0.5837, "step": 22149 }, { "epoch": 0.6804288391238903, "grad_norm": 0.3493087887763977, "learning_rate": 1.483998328491661e-05, "loss": 0.5697, "step": 22150 }, { "epoch": 0.6804595582588394, "grad_norm": 0.33861762285232544, "learning_rate": 1.483956037828691e-05, "loss": 0.592, "step": 22151 }, { "epoch": 0.6804902773937886, "grad_norm": 0.3955620527267456, "learning_rate": 1.483913746035408e-05, "loss": 0.5215, "step": 22152 }, { "epoch": 0.6805209965287378, "grad_norm": 0.38833898305892944, "learning_rate": 1.4838714531119105e-05, "loss": 0.5411, "step": 22153 }, { "epoch": 0.6805517156636869, "grad_norm": 0.3675617575645447, "learning_rate": 1.4838291590582974e-05, "loss": 0.5551, "step": 22154 }, { "epoch": 0.6805824347986361, "grad_norm": 0.3962963819503784, "learning_rate": 1.4837868638746673e-05, "loss": 0.542, "step": 22155 }, { "epoch": 0.6806131539335852, "grad_norm": 0.37194663286209106, "learning_rate": 1.4837445675611195e-05, "loss": 0.5595, "step": 22156 }, { "epoch": 0.6806438730685344, "grad_norm": 0.3636319041252136, "learning_rate": 1.4837022701177523e-05, "loss": 0.5857, "step": 22157 }, { "epoch": 0.6806745922034836, "grad_norm": 0.40092724561691284, "learning_rate": 1.4836599715446648e-05, "loss": 0.631, "step": 22158 }, { "epoch": 0.6807053113384327, "grad_norm": 0.33721405267715454, "learning_rate": 1.4836176718419557e-05, "loss": 0.5273, "step": 22159 }, { "epoch": 0.6807360304733818, "grad_norm": 0.37753498554229736, "learning_rate": 1.4835753710097236e-05, "loss": 0.6287, "step": 22160 }, { "epoch": 0.6807667496083311, "grad_norm": 0.35327214002609253, "learning_rate": 1.4835330690480676e-05, "loss": 0.5957, "step": 22161 }, { "epoch": 0.6807974687432802, "grad_norm": 0.43082863092422485, "learning_rate": 1.4834907659570863e-05, "loss": 0.5198, "step": 22162 }, { "epoch": 0.6808281878782293, "grad_norm": 0.37729859352111816, "learning_rate": 1.4834484617368786e-05, "loss": 0.5864, "step": 22163 }, { "epoch": 0.6808589070131785, "grad_norm": 0.3935149013996124, "learning_rate": 1.4834061563875431e-05, "loss": 0.5474, "step": 22164 }, { "epoch": 0.6808896261481276, "grad_norm": 0.37700724601745605, "learning_rate": 1.483363849909179e-05, "loss": 0.5837, "step": 22165 }, { "epoch": 0.6809203452830769, "grad_norm": 0.3355126678943634, "learning_rate": 1.4833215423018845e-05, "loss": 0.5207, "step": 22166 }, { "epoch": 0.680951064418026, "grad_norm": 0.38831827044487, "learning_rate": 1.4832792335657591e-05, "loss": 0.5219, "step": 22167 }, { "epoch": 0.6809817835529751, "grad_norm": 0.33946067094802856, "learning_rate": 1.483236923700901e-05, "loss": 0.5658, "step": 22168 }, { "epoch": 0.6810125026879243, "grad_norm": 0.39119675755500793, "learning_rate": 1.4831946127074097e-05, "loss": 0.5625, "step": 22169 }, { "epoch": 0.6810432218228735, "grad_norm": 0.3245536983013153, "learning_rate": 1.4831523005853836e-05, "loss": 0.5771, "step": 22170 }, { "epoch": 0.6810739409578226, "grad_norm": 0.3521042764186859, "learning_rate": 1.4831099873349212e-05, "loss": 0.574, "step": 22171 }, { "epoch": 0.6811046600927718, "grad_norm": 0.4059060215950012, "learning_rate": 1.483067672956122e-05, "loss": 0.615, "step": 22172 }, { "epoch": 0.6811353792277209, "grad_norm": 0.3792570233345032, "learning_rate": 1.4830253574490843e-05, "loss": 0.5603, "step": 22173 }, { "epoch": 0.6811660983626701, "grad_norm": 0.3655550479888916, "learning_rate": 1.4829830408139074e-05, "loss": 0.5132, "step": 22174 }, { "epoch": 0.6811968174976193, "grad_norm": 0.3231363296508789, "learning_rate": 1.4829407230506896e-05, "loss": 0.4987, "step": 22175 }, { "epoch": 0.6812275366325684, "grad_norm": 0.3765471279621124, "learning_rate": 1.48289840415953e-05, "loss": 0.5764, "step": 22176 }, { "epoch": 0.6812582557675176, "grad_norm": 0.36155396699905396, "learning_rate": 1.4828560841405277e-05, "loss": 0.5342, "step": 22177 }, { "epoch": 0.6812889749024668, "grad_norm": 0.3669131398200989, "learning_rate": 1.482813762993781e-05, "loss": 0.5438, "step": 22178 }, { "epoch": 0.6813196940374159, "grad_norm": 0.7130789756774902, "learning_rate": 1.4827714407193893e-05, "loss": 0.5511, "step": 22179 }, { "epoch": 0.6813504131723651, "grad_norm": 0.40458452701568604, "learning_rate": 1.4827291173174512e-05, "loss": 0.5676, "step": 22180 }, { "epoch": 0.6813811323073142, "grad_norm": 0.3640938997268677, "learning_rate": 1.4826867927880655e-05, "loss": 0.4875, "step": 22181 }, { "epoch": 0.6814118514422633, "grad_norm": 0.39882224798202515, "learning_rate": 1.4826444671313309e-05, "loss": 0.5493, "step": 22182 }, { "epoch": 0.6814425705772126, "grad_norm": 0.3683452606201172, "learning_rate": 1.4826021403473468e-05, "loss": 0.5725, "step": 22183 }, { "epoch": 0.6814732897121617, "grad_norm": 0.38578706979751587, "learning_rate": 1.4825598124362113e-05, "loss": 0.5526, "step": 22184 }, { "epoch": 0.6815040088471108, "grad_norm": 0.36815759539604187, "learning_rate": 1.4825174833980238e-05, "loss": 0.5357, "step": 22185 }, { "epoch": 0.68153472798206, "grad_norm": 0.32993149757385254, "learning_rate": 1.4824751532328832e-05, "loss": 0.602, "step": 22186 }, { "epoch": 0.6815654471170092, "grad_norm": 0.3609321415424347, "learning_rate": 1.4824328219408879e-05, "loss": 0.5709, "step": 22187 }, { "epoch": 0.6815961662519583, "grad_norm": 0.3687096834182739, "learning_rate": 1.4823904895221373e-05, "loss": 0.5898, "step": 22188 }, { "epoch": 0.6816268853869075, "grad_norm": 0.3951222896575928, "learning_rate": 1.4823481559767296e-05, "loss": 0.6349, "step": 22189 }, { "epoch": 0.6816576045218566, "grad_norm": 0.3481368124485016, "learning_rate": 1.4823058213047645e-05, "loss": 0.5876, "step": 22190 }, { "epoch": 0.6816883236568059, "grad_norm": 0.3755500316619873, "learning_rate": 1.4822634855063403e-05, "loss": 0.5774, "step": 22191 }, { "epoch": 0.681719042791755, "grad_norm": 0.3498498499393463, "learning_rate": 1.4822211485815562e-05, "loss": 0.5526, "step": 22192 }, { "epoch": 0.6817497619267041, "grad_norm": 0.36222022771835327, "learning_rate": 1.4821788105305108e-05, "loss": 0.561, "step": 22193 }, { "epoch": 0.6817804810616533, "grad_norm": 0.37609559297561646, "learning_rate": 1.4821364713533031e-05, "loss": 0.5953, "step": 22194 }, { "epoch": 0.6818112001966025, "grad_norm": 0.34140267968177795, "learning_rate": 1.4820941310500321e-05, "loss": 0.5005, "step": 22195 }, { "epoch": 0.6818419193315516, "grad_norm": 0.4103870689868927, "learning_rate": 1.4820517896207965e-05, "loss": 0.6046, "step": 22196 }, { "epoch": 0.6818726384665008, "grad_norm": 0.3942112624645233, "learning_rate": 1.4820094470656953e-05, "loss": 0.7537, "step": 22197 }, { "epoch": 0.6819033576014499, "grad_norm": 0.3822396397590637, "learning_rate": 1.4819671033848272e-05, "loss": 0.5579, "step": 22198 }, { "epoch": 0.681934076736399, "grad_norm": 0.41991814970970154, "learning_rate": 1.4819247585782916e-05, "loss": 0.6714, "step": 22199 }, { "epoch": 0.6819647958713483, "grad_norm": 0.36395299434661865, "learning_rate": 1.4818824126461868e-05, "loss": 0.5357, "step": 22200 }, { "epoch": 0.6819955150062974, "grad_norm": 0.4017806053161621, "learning_rate": 1.4818400655886122e-05, "loss": 0.5972, "step": 22201 }, { "epoch": 0.6820262341412466, "grad_norm": 0.37514379620552063, "learning_rate": 1.4817977174056663e-05, "loss": 0.5416, "step": 22202 }, { "epoch": 0.6820569532761958, "grad_norm": 0.41145485639572144, "learning_rate": 1.4817553680974484e-05, "loss": 0.5539, "step": 22203 }, { "epoch": 0.6820876724111449, "grad_norm": 0.5077033638954163, "learning_rate": 1.481713017664057e-05, "loss": 0.4546, "step": 22204 }, { "epoch": 0.6821183915460941, "grad_norm": 0.35223135352134705, "learning_rate": 1.4816706661055913e-05, "loss": 0.6412, "step": 22205 }, { "epoch": 0.6821491106810432, "grad_norm": 0.36534014344215393, "learning_rate": 1.4816283134221502e-05, "loss": 0.5827, "step": 22206 }, { "epoch": 0.6821798298159923, "grad_norm": 0.35061728954315186, "learning_rate": 1.4815859596138322e-05, "loss": 0.5458, "step": 22207 }, { "epoch": 0.6822105489509416, "grad_norm": 0.34773802757263184, "learning_rate": 1.481543604680737e-05, "loss": 0.5485, "step": 22208 }, { "epoch": 0.6822412680858907, "grad_norm": 0.3850644826889038, "learning_rate": 1.4815012486229627e-05, "loss": 0.5964, "step": 22209 }, { "epoch": 0.6822719872208398, "grad_norm": 0.3674179017543793, "learning_rate": 1.481458891440609e-05, "loss": 0.5002, "step": 22210 }, { "epoch": 0.682302706355789, "grad_norm": 0.3804352581501007, "learning_rate": 1.481416533133774e-05, "loss": 0.5598, "step": 22211 }, { "epoch": 0.6823334254907382, "grad_norm": 0.5574365258216858, "learning_rate": 1.481374173702557e-05, "loss": 0.54, "step": 22212 }, { "epoch": 0.6823641446256873, "grad_norm": 0.35065412521362305, "learning_rate": 1.4813318131470572e-05, "loss": 0.5554, "step": 22213 }, { "epoch": 0.6823948637606365, "grad_norm": 0.343891978263855, "learning_rate": 1.4812894514673735e-05, "loss": 0.5602, "step": 22214 }, { "epoch": 0.6824255828955856, "grad_norm": 0.3854948878288269, "learning_rate": 1.4812470886636048e-05, "loss": 0.5233, "step": 22215 }, { "epoch": 0.6824563020305349, "grad_norm": 0.3765072822570801, "learning_rate": 1.4812047247358495e-05, "loss": 0.6034, "step": 22216 }, { "epoch": 0.682487021165484, "grad_norm": 0.3754224181175232, "learning_rate": 1.4811623596842073e-05, "loss": 0.561, "step": 22217 }, { "epoch": 0.6825177403004331, "grad_norm": 0.3579805791378021, "learning_rate": 1.4811199935087764e-05, "loss": 0.5979, "step": 22218 }, { "epoch": 0.6825484594353823, "grad_norm": 0.3612287938594818, "learning_rate": 1.4810776262096565e-05, "loss": 0.5857, "step": 22219 }, { "epoch": 0.6825791785703315, "grad_norm": 0.4651626944541931, "learning_rate": 1.481035257786946e-05, "loss": 0.5721, "step": 22220 }, { "epoch": 0.6826098977052806, "grad_norm": 0.33850157260894775, "learning_rate": 1.4809928882407442e-05, "loss": 0.5379, "step": 22221 }, { "epoch": 0.6826406168402298, "grad_norm": 0.4019697606563568, "learning_rate": 1.4809505175711501e-05, "loss": 0.5744, "step": 22222 }, { "epoch": 0.6826713359751789, "grad_norm": 0.3800772726535797, "learning_rate": 1.4809081457782622e-05, "loss": 0.59, "step": 22223 }, { "epoch": 0.682702055110128, "grad_norm": 0.3500741422176361, "learning_rate": 1.4808657728621799e-05, "loss": 0.5555, "step": 22224 }, { "epoch": 0.6827327742450773, "grad_norm": 0.3243032395839691, "learning_rate": 1.4808233988230016e-05, "loss": 0.5556, "step": 22225 }, { "epoch": 0.6827634933800264, "grad_norm": 0.3588520586490631, "learning_rate": 1.480781023660827e-05, "loss": 0.545, "step": 22226 }, { "epoch": 0.6827942125149756, "grad_norm": 0.40104860067367554, "learning_rate": 1.4807386473757548e-05, "loss": 0.5169, "step": 22227 }, { "epoch": 0.6828249316499247, "grad_norm": 0.3433799147605896, "learning_rate": 1.480696269967884e-05, "loss": 0.5169, "step": 22228 }, { "epoch": 0.6828556507848739, "grad_norm": 0.346200168132782, "learning_rate": 1.4806538914373132e-05, "loss": 0.5942, "step": 22229 }, { "epoch": 0.6828863699198231, "grad_norm": 0.37860584259033203, "learning_rate": 1.4806115117841416e-05, "loss": 0.5297, "step": 22230 }, { "epoch": 0.6829170890547722, "grad_norm": 0.3422708213329315, "learning_rate": 1.4805691310084684e-05, "loss": 0.4955, "step": 22231 }, { "epoch": 0.6829478081897213, "grad_norm": 0.356072336435318, "learning_rate": 1.4805267491103924e-05, "loss": 0.5867, "step": 22232 }, { "epoch": 0.6829785273246706, "grad_norm": 0.35442855954170227, "learning_rate": 1.4804843660900127e-05, "loss": 0.4711, "step": 22233 }, { "epoch": 0.6830092464596197, "grad_norm": 0.34832268953323364, "learning_rate": 1.4804419819474283e-05, "loss": 0.5151, "step": 22234 }, { "epoch": 0.6830399655945688, "grad_norm": 0.3589058518409729, "learning_rate": 1.4803995966827381e-05, "loss": 0.5061, "step": 22235 }, { "epoch": 0.683070684729518, "grad_norm": 0.36883097887039185, "learning_rate": 1.4803572102960409e-05, "loss": 0.6039, "step": 22236 }, { "epoch": 0.6831014038644672, "grad_norm": 0.3689591586589813, "learning_rate": 1.480314822787436e-05, "loss": 0.575, "step": 22237 }, { "epoch": 0.6831321229994164, "grad_norm": 0.37607231736183167, "learning_rate": 1.4802724341570222e-05, "loss": 0.6259, "step": 22238 }, { "epoch": 0.6831628421343655, "grad_norm": 0.36341506242752075, "learning_rate": 1.4802300444048987e-05, "loss": 0.5016, "step": 22239 }, { "epoch": 0.6831935612693146, "grad_norm": 0.3764829933643341, "learning_rate": 1.4801876535311644e-05, "loss": 0.5596, "step": 22240 }, { "epoch": 0.6832242804042639, "grad_norm": 0.39373353123664856, "learning_rate": 1.4801452615359181e-05, "loss": 0.5507, "step": 22241 }, { "epoch": 0.683254999539213, "grad_norm": 0.41016581654548645, "learning_rate": 1.4801028684192594e-05, "loss": 0.6361, "step": 22242 }, { "epoch": 0.6832857186741621, "grad_norm": 0.34405845403671265, "learning_rate": 1.4800604741812862e-05, "loss": 0.5253, "step": 22243 }, { "epoch": 0.6833164378091113, "grad_norm": 0.39178839325904846, "learning_rate": 1.480018078822099e-05, "loss": 0.5305, "step": 22244 }, { "epoch": 0.6833471569440605, "grad_norm": 0.34684786200523376, "learning_rate": 1.4799756823417956e-05, "loss": 0.4938, "step": 22245 }, { "epoch": 0.6833778760790096, "grad_norm": 0.41800457239151, "learning_rate": 1.4799332847404757e-05, "loss": 0.6106, "step": 22246 }, { "epoch": 0.6834085952139588, "grad_norm": 0.41450586915016174, "learning_rate": 1.479890886018238e-05, "loss": 0.5652, "step": 22247 }, { "epoch": 0.6834393143489079, "grad_norm": 0.3535746932029724, "learning_rate": 1.4798484861751817e-05, "loss": 0.5841, "step": 22248 }, { "epoch": 0.683470033483857, "grad_norm": 0.4018350839614868, "learning_rate": 1.4798060852114056e-05, "loss": 0.5545, "step": 22249 }, { "epoch": 0.6835007526188063, "grad_norm": 0.3481876850128174, "learning_rate": 1.4797636831270088e-05, "loss": 0.5666, "step": 22250 }, { "epoch": 0.6835314717537554, "grad_norm": 0.3334049880504608, "learning_rate": 1.4797212799220908e-05, "loss": 0.5201, "step": 22251 }, { "epoch": 0.6835621908887046, "grad_norm": 0.38042202591896057, "learning_rate": 1.4796788755967497e-05, "loss": 0.5854, "step": 22252 }, { "epoch": 0.6835929100236537, "grad_norm": 0.384789377450943, "learning_rate": 1.4796364701510857e-05, "loss": 0.5371, "step": 22253 }, { "epoch": 0.6836236291586029, "grad_norm": 0.3517173230648041, "learning_rate": 1.4795940635851968e-05, "loss": 0.4959, "step": 22254 }, { "epoch": 0.6836543482935521, "grad_norm": 0.33009183406829834, "learning_rate": 1.4795516558991827e-05, "loss": 0.4779, "step": 22255 }, { "epoch": 0.6836850674285012, "grad_norm": 0.3506251275539398, "learning_rate": 1.4795092470931423e-05, "loss": 0.5585, "step": 22256 }, { "epoch": 0.6837157865634503, "grad_norm": 0.3336748480796814, "learning_rate": 1.4794668371671743e-05, "loss": 0.5779, "step": 22257 }, { "epoch": 0.6837465056983996, "grad_norm": 0.4002193510532379, "learning_rate": 1.4794244261213782e-05, "loss": 0.5897, "step": 22258 }, { "epoch": 0.6837772248333487, "grad_norm": 0.38220927119255066, "learning_rate": 1.4793820139558526e-05, "loss": 0.4615, "step": 22259 }, { "epoch": 0.6838079439682978, "grad_norm": 0.347542941570282, "learning_rate": 1.479339600670697e-05, "loss": 0.5394, "step": 22260 }, { "epoch": 0.683838663103247, "grad_norm": 0.3968759775161743, "learning_rate": 1.4792971862660102e-05, "loss": 0.4995, "step": 22261 }, { "epoch": 0.6838693822381962, "grad_norm": 0.3786665201187134, "learning_rate": 1.4792547707418917e-05, "loss": 0.555, "step": 22262 }, { "epoch": 0.6839001013731454, "grad_norm": 0.32154178619384766, "learning_rate": 1.4792123540984398e-05, "loss": 0.5566, "step": 22263 }, { "epoch": 0.6839308205080945, "grad_norm": 0.3642008304595947, "learning_rate": 1.479169936335754e-05, "loss": 0.5194, "step": 22264 }, { "epoch": 0.6839615396430436, "grad_norm": 0.32767942547798157, "learning_rate": 1.4791275174539335e-05, "loss": 0.5782, "step": 22265 }, { "epoch": 0.6839922587779929, "grad_norm": 0.35843732953071594, "learning_rate": 1.479085097453077e-05, "loss": 0.5592, "step": 22266 }, { "epoch": 0.684022977912942, "grad_norm": 0.3200001120567322, "learning_rate": 1.4790426763332842e-05, "loss": 0.5301, "step": 22267 }, { "epoch": 0.6840536970478911, "grad_norm": 0.38590767979621887, "learning_rate": 1.4790002540946536e-05, "loss": 0.5899, "step": 22268 }, { "epoch": 0.6840844161828403, "grad_norm": 0.35063958168029785, "learning_rate": 1.4789578307372842e-05, "loss": 0.4849, "step": 22269 }, { "epoch": 0.6841151353177894, "grad_norm": 0.35643404722213745, "learning_rate": 1.4789154062612757e-05, "loss": 0.5792, "step": 22270 }, { "epoch": 0.6841458544527386, "grad_norm": 0.33064237236976624, "learning_rate": 1.4788729806667266e-05, "loss": 0.5279, "step": 22271 }, { "epoch": 0.6841765735876878, "grad_norm": 0.3766935467720032, "learning_rate": 1.4788305539537362e-05, "loss": 0.4976, "step": 22272 }, { "epoch": 0.6842072927226369, "grad_norm": 0.3716386556625366, "learning_rate": 1.478788126122404e-05, "loss": 0.5359, "step": 22273 }, { "epoch": 0.684238011857586, "grad_norm": 0.3464394807815552, "learning_rate": 1.4787456971728284e-05, "loss": 0.5225, "step": 22274 }, { "epoch": 0.6842687309925353, "grad_norm": 0.3254960775375366, "learning_rate": 1.4787032671051087e-05, "loss": 0.5565, "step": 22275 }, { "epoch": 0.6842994501274844, "grad_norm": 0.37534868717193604, "learning_rate": 1.4786608359193442e-05, "loss": 0.5125, "step": 22276 }, { "epoch": 0.6843301692624336, "grad_norm": 0.3865952789783478, "learning_rate": 1.4786184036156337e-05, "loss": 0.544, "step": 22277 }, { "epoch": 0.6843608883973827, "grad_norm": 0.35770416259765625, "learning_rate": 1.4785759701940767e-05, "loss": 0.5685, "step": 22278 }, { "epoch": 0.6843916075323319, "grad_norm": 0.34681347012519836, "learning_rate": 1.4785335356547719e-05, "loss": 0.5804, "step": 22279 }, { "epoch": 0.6844223266672811, "grad_norm": 0.40937232971191406, "learning_rate": 1.4784910999978189e-05, "loss": 0.5749, "step": 22280 }, { "epoch": 0.6844530458022302, "grad_norm": 0.3323512077331543, "learning_rate": 1.4784486632233161e-05, "loss": 0.5181, "step": 22281 }, { "epoch": 0.6844837649371793, "grad_norm": 0.4001200497150421, "learning_rate": 1.4784062253313635e-05, "loss": 0.5718, "step": 22282 }, { "epoch": 0.6845144840721286, "grad_norm": 0.41735535860061646, "learning_rate": 1.4783637863220595e-05, "loss": 0.5937, "step": 22283 }, { "epoch": 0.6845452032070777, "grad_norm": 0.3670138120651245, "learning_rate": 1.4783213461955032e-05, "loss": 0.6421, "step": 22284 }, { "epoch": 0.6845759223420268, "grad_norm": 0.4336550235748291, "learning_rate": 1.4782789049517944e-05, "loss": 0.5967, "step": 22285 }, { "epoch": 0.684606641476976, "grad_norm": 0.3548167943954468, "learning_rate": 1.4782364625910316e-05, "loss": 0.5894, "step": 22286 }, { "epoch": 0.6846373606119251, "grad_norm": 0.435347318649292, "learning_rate": 1.4781940191133146e-05, "loss": 0.5318, "step": 22287 }, { "epoch": 0.6846680797468744, "grad_norm": 0.3556582033634186, "learning_rate": 1.4781515745187415e-05, "loss": 0.5668, "step": 22288 }, { "epoch": 0.6846987988818235, "grad_norm": 0.3755463659763336, "learning_rate": 1.4781091288074123e-05, "loss": 0.5674, "step": 22289 }, { "epoch": 0.6847295180167726, "grad_norm": 0.3355221748352051, "learning_rate": 1.4780666819794261e-05, "loss": 0.4824, "step": 22290 }, { "epoch": 0.6847602371517219, "grad_norm": 0.3381446301937103, "learning_rate": 1.4780242340348812e-05, "loss": 0.5293, "step": 22291 }, { "epoch": 0.684790956286671, "grad_norm": 0.3731398284435272, "learning_rate": 1.4779817849738777e-05, "loss": 0.5788, "step": 22292 }, { "epoch": 0.6848216754216201, "grad_norm": 0.39404022693634033, "learning_rate": 1.4779393347965142e-05, "loss": 0.589, "step": 22293 }, { "epoch": 0.6848523945565693, "grad_norm": 0.33856692910194397, "learning_rate": 1.4778968835028899e-05, "loss": 0.5579, "step": 22294 }, { "epoch": 0.6848831136915184, "grad_norm": 0.3255302608013153, "learning_rate": 1.4778544310931043e-05, "loss": 0.5106, "step": 22295 }, { "epoch": 0.6849138328264676, "grad_norm": 0.34864169359207153, "learning_rate": 1.4778119775672561e-05, "loss": 0.5566, "step": 22296 }, { "epoch": 0.6849445519614168, "grad_norm": 0.3541164994239807, "learning_rate": 1.4777695229254447e-05, "loss": 0.4879, "step": 22297 }, { "epoch": 0.6849752710963659, "grad_norm": 0.36169862747192383, "learning_rate": 1.4777270671677694e-05, "loss": 0.5486, "step": 22298 }, { "epoch": 0.685005990231315, "grad_norm": 0.3894304037094116, "learning_rate": 1.477684610294329e-05, "loss": 0.5268, "step": 22299 }, { "epoch": 0.6850367093662643, "grad_norm": 0.34811967611312866, "learning_rate": 1.4776421523052229e-05, "loss": 0.5462, "step": 22300 }, { "epoch": 0.6850674285012134, "grad_norm": 0.4495047926902771, "learning_rate": 1.4775996932005501e-05, "loss": 0.5674, "step": 22301 }, { "epoch": 0.6850981476361626, "grad_norm": 0.3368246257305145, "learning_rate": 1.4775572329804097e-05, "loss": 0.4841, "step": 22302 }, { "epoch": 0.6851288667711117, "grad_norm": 0.37233966588974, "learning_rate": 1.4775147716449014e-05, "loss": 0.5522, "step": 22303 }, { "epoch": 0.6851595859060609, "grad_norm": 0.6251071691513062, "learning_rate": 1.4774723091941236e-05, "loss": 0.5913, "step": 22304 }, { "epoch": 0.6851903050410101, "grad_norm": 0.36423981189727783, "learning_rate": 1.4774298456281764e-05, "loss": 0.5519, "step": 22305 }, { "epoch": 0.6852210241759592, "grad_norm": 0.3686211109161377, "learning_rate": 1.477387380947158e-05, "loss": 0.4992, "step": 22306 }, { "epoch": 0.6852517433109083, "grad_norm": 0.3721068501472473, "learning_rate": 1.4773449151511682e-05, "loss": 0.531, "step": 22307 }, { "epoch": 0.6852824624458576, "grad_norm": 0.43197715282440186, "learning_rate": 1.4773024482403063e-05, "loss": 0.6337, "step": 22308 }, { "epoch": 0.6853131815808067, "grad_norm": 0.4323098957538605, "learning_rate": 1.4772599802146708e-05, "loss": 0.5893, "step": 22309 }, { "epoch": 0.6853439007157558, "grad_norm": 0.36336231231689453, "learning_rate": 1.4772175110743614e-05, "loss": 0.5851, "step": 22310 }, { "epoch": 0.685374619850705, "grad_norm": 0.3443112075328827, "learning_rate": 1.4771750408194772e-05, "loss": 0.5561, "step": 22311 }, { "epoch": 0.6854053389856541, "grad_norm": 0.3817789852619171, "learning_rate": 1.4771325694501172e-05, "loss": 0.6055, "step": 22312 }, { "epoch": 0.6854360581206034, "grad_norm": 0.346361368894577, "learning_rate": 1.4770900969663809e-05, "loss": 0.482, "step": 22313 }, { "epoch": 0.6854667772555525, "grad_norm": 0.36099985241889954, "learning_rate": 1.4770476233683674e-05, "loss": 0.5278, "step": 22314 }, { "epoch": 0.6854974963905016, "grad_norm": 0.4062071144580841, "learning_rate": 1.4770051486561758e-05, "loss": 0.5235, "step": 22315 }, { "epoch": 0.6855282155254508, "grad_norm": 0.3940903842449188, "learning_rate": 1.4769626728299056e-05, "loss": 0.5417, "step": 22316 }, { "epoch": 0.6855589346604, "grad_norm": 0.3900858759880066, "learning_rate": 1.4769201958896554e-05, "loss": 0.5621, "step": 22317 }, { "epoch": 0.6855896537953491, "grad_norm": 0.5134332180023193, "learning_rate": 1.4768777178355249e-05, "loss": 0.5572, "step": 22318 }, { "epoch": 0.6856203729302983, "grad_norm": 0.37263745069503784, "learning_rate": 1.4768352386676132e-05, "loss": 0.4788, "step": 22319 }, { "epoch": 0.6856510920652474, "grad_norm": 0.4311971068382263, "learning_rate": 1.4767927583860195e-05, "loss": 0.6085, "step": 22320 }, { "epoch": 0.6856818112001966, "grad_norm": 0.38544154167175293, "learning_rate": 1.4767502769908428e-05, "loss": 0.576, "step": 22321 }, { "epoch": 0.6857125303351458, "grad_norm": 0.44126421213150024, "learning_rate": 1.4767077944821829e-05, "loss": 0.5586, "step": 22322 }, { "epoch": 0.6857432494700949, "grad_norm": 0.39214202761650085, "learning_rate": 1.4766653108601386e-05, "loss": 0.5284, "step": 22323 }, { "epoch": 0.6857739686050441, "grad_norm": 0.3679179847240448, "learning_rate": 1.476622826124809e-05, "loss": 0.5344, "step": 22324 }, { "epoch": 0.6858046877399933, "grad_norm": 0.3631734848022461, "learning_rate": 1.4765803402762935e-05, "loss": 0.5351, "step": 22325 }, { "epoch": 0.6858354068749424, "grad_norm": 0.3324226140975952, "learning_rate": 1.4765378533146915e-05, "loss": 0.536, "step": 22326 }, { "epoch": 0.6858661260098916, "grad_norm": 0.36465033888816833, "learning_rate": 1.476495365240102e-05, "loss": 0.5581, "step": 22327 }, { "epoch": 0.6858968451448407, "grad_norm": 0.33881503343582153, "learning_rate": 1.4764528760526244e-05, "loss": 0.5393, "step": 22328 }, { "epoch": 0.6859275642797898, "grad_norm": 0.37398549914360046, "learning_rate": 1.4764103857523575e-05, "loss": 0.5427, "step": 22329 }, { "epoch": 0.6859582834147391, "grad_norm": 0.39993977546691895, "learning_rate": 1.4763678943394011e-05, "loss": 0.5407, "step": 22330 }, { "epoch": 0.6859890025496882, "grad_norm": 0.3893808126449585, "learning_rate": 1.4763254018138542e-05, "loss": 0.5853, "step": 22331 }, { "epoch": 0.6860197216846373, "grad_norm": 0.3862139880657196, "learning_rate": 1.476282908175816e-05, "loss": 0.5498, "step": 22332 }, { "epoch": 0.6860504408195865, "grad_norm": 0.3720901310443878, "learning_rate": 1.4762404134253859e-05, "loss": 0.5615, "step": 22333 }, { "epoch": 0.6860811599545357, "grad_norm": 0.3489682674407959, "learning_rate": 1.4761979175626631e-05, "loss": 0.5308, "step": 22334 }, { "epoch": 0.6861118790894848, "grad_norm": 0.4014577269554138, "learning_rate": 1.4761554205877467e-05, "loss": 0.5744, "step": 22335 }, { "epoch": 0.686142598224434, "grad_norm": 0.40987345576286316, "learning_rate": 1.476112922500736e-05, "loss": 0.5706, "step": 22336 }, { "epoch": 0.6861733173593831, "grad_norm": 0.37753501534461975, "learning_rate": 1.4760704233017307e-05, "loss": 0.5785, "step": 22337 }, { "epoch": 0.6862040364943324, "grad_norm": 0.5889684557914734, "learning_rate": 1.4760279229908292e-05, "loss": 0.5153, "step": 22338 }, { "epoch": 0.6862347556292815, "grad_norm": 0.42323732376098633, "learning_rate": 1.4759854215681312e-05, "loss": 0.5657, "step": 22339 }, { "epoch": 0.6862654747642306, "grad_norm": 0.3398694396018982, "learning_rate": 1.4759429190337362e-05, "loss": 0.5085, "step": 22340 }, { "epoch": 0.6862961938991798, "grad_norm": 0.3879204988479614, "learning_rate": 1.4759004153877434e-05, "loss": 0.5734, "step": 22341 }, { "epoch": 0.686326913034129, "grad_norm": 0.3882334530353546, "learning_rate": 1.475857910630252e-05, "loss": 0.4921, "step": 22342 }, { "epoch": 0.6863576321690781, "grad_norm": 0.3473150134086609, "learning_rate": 1.4758154047613609e-05, "loss": 0.5583, "step": 22343 }, { "epoch": 0.6863883513040273, "grad_norm": 0.345386803150177, "learning_rate": 1.4757728977811698e-05, "loss": 0.5745, "step": 22344 }, { "epoch": 0.6864190704389764, "grad_norm": 0.35682782530784607, "learning_rate": 1.4757303896897779e-05, "loss": 0.5647, "step": 22345 }, { "epoch": 0.6864497895739256, "grad_norm": 0.3969445824623108, "learning_rate": 1.4756878804872845e-05, "loss": 0.5764, "step": 22346 }, { "epoch": 0.6864805087088748, "grad_norm": 0.4185381233692169, "learning_rate": 1.4756453701737887e-05, "loss": 0.5992, "step": 22347 }, { "epoch": 0.6865112278438239, "grad_norm": 0.3749343752861023, "learning_rate": 1.4756028587493903e-05, "loss": 0.5768, "step": 22348 }, { "epoch": 0.6865419469787731, "grad_norm": 0.3482033610343933, "learning_rate": 1.4755603462141877e-05, "loss": 0.5251, "step": 22349 }, { "epoch": 0.6865726661137223, "grad_norm": 0.4630645513534546, "learning_rate": 1.475517832568281e-05, "loss": 0.5183, "step": 22350 }, { "epoch": 0.6866033852486714, "grad_norm": 0.3697907030582428, "learning_rate": 1.475475317811769e-05, "loss": 0.5781, "step": 22351 }, { "epoch": 0.6866341043836206, "grad_norm": 0.3608085811138153, "learning_rate": 1.4754328019447514e-05, "loss": 0.513, "step": 22352 }, { "epoch": 0.6866648235185697, "grad_norm": 0.3353351056575775, "learning_rate": 1.4753902849673271e-05, "loss": 0.5338, "step": 22353 }, { "epoch": 0.6866955426535188, "grad_norm": 0.336324542760849, "learning_rate": 1.4753477668795957e-05, "loss": 0.5581, "step": 22354 }, { "epoch": 0.6867262617884681, "grad_norm": 0.3491520881652832, "learning_rate": 1.4753052476816565e-05, "loss": 0.5609, "step": 22355 }, { "epoch": 0.6867569809234172, "grad_norm": 0.3609473705291748, "learning_rate": 1.4752627273736084e-05, "loss": 0.4806, "step": 22356 }, { "epoch": 0.6867877000583663, "grad_norm": 0.3369573652744293, "learning_rate": 1.4752202059555511e-05, "loss": 0.5395, "step": 22357 }, { "epoch": 0.6868184191933155, "grad_norm": 0.36505231261253357, "learning_rate": 1.475177683427584e-05, "loss": 0.5871, "step": 22358 }, { "epoch": 0.6868491383282647, "grad_norm": 0.382931113243103, "learning_rate": 1.475135159789806e-05, "loss": 0.5042, "step": 22359 }, { "epoch": 0.6868798574632138, "grad_norm": 0.3991295397281647, "learning_rate": 1.4750926350423169e-05, "loss": 0.5804, "step": 22360 }, { "epoch": 0.686910576598163, "grad_norm": 0.41785621643066406, "learning_rate": 1.4750501091852156e-05, "loss": 0.5193, "step": 22361 }, { "epoch": 0.6869412957331121, "grad_norm": 0.3722258508205414, "learning_rate": 1.4750075822186018e-05, "loss": 0.5708, "step": 22362 }, { "epoch": 0.6869720148680614, "grad_norm": 0.3718307316303253, "learning_rate": 1.4749650541425744e-05, "loss": 0.6225, "step": 22363 }, { "epoch": 0.6870027340030105, "grad_norm": 0.3465326130390167, "learning_rate": 1.4749225249572331e-05, "loss": 0.4983, "step": 22364 }, { "epoch": 0.6870334531379596, "grad_norm": 0.34115779399871826, "learning_rate": 1.4748799946626769e-05, "loss": 0.5223, "step": 22365 }, { "epoch": 0.6870641722729088, "grad_norm": 0.3741704523563385, "learning_rate": 1.4748374632590055e-05, "loss": 0.6011, "step": 22366 }, { "epoch": 0.687094891407858, "grad_norm": 0.3747040927410126, "learning_rate": 1.4747949307463179e-05, "loss": 0.6059, "step": 22367 }, { "epoch": 0.6871256105428071, "grad_norm": 0.43832188844680786, "learning_rate": 1.4747523971247136e-05, "loss": 0.5371, "step": 22368 }, { "epoch": 0.6871563296777563, "grad_norm": 0.31273186206817627, "learning_rate": 1.474709862394292e-05, "loss": 0.4413, "step": 22369 }, { "epoch": 0.6871870488127054, "grad_norm": 0.5333507657051086, "learning_rate": 1.474667326555152e-05, "loss": 0.5738, "step": 22370 }, { "epoch": 0.6872177679476545, "grad_norm": 0.37260785698890686, "learning_rate": 1.474624789607394e-05, "loss": 0.5573, "step": 22371 }, { "epoch": 0.6872484870826038, "grad_norm": 0.3530086278915405, "learning_rate": 1.474582251551116e-05, "loss": 0.5542, "step": 22372 }, { "epoch": 0.6872792062175529, "grad_norm": 0.3579792082309723, "learning_rate": 1.4745397123864184e-05, "loss": 0.5868, "step": 22373 }, { "epoch": 0.6873099253525021, "grad_norm": 0.5266643166542053, "learning_rate": 1.4744971721134001e-05, "loss": 0.5519, "step": 22374 }, { "epoch": 0.6873406444874512, "grad_norm": 0.37177756428718567, "learning_rate": 1.4744546307321602e-05, "loss": 0.6035, "step": 22375 }, { "epoch": 0.6873713636224004, "grad_norm": 0.4077036678791046, "learning_rate": 1.4744120882427986e-05, "loss": 0.5275, "step": 22376 }, { "epoch": 0.6874020827573496, "grad_norm": 0.3727896809577942, "learning_rate": 1.4743695446454145e-05, "loss": 0.6031, "step": 22377 }, { "epoch": 0.6874328018922987, "grad_norm": 0.39038312435150146, "learning_rate": 1.4743269999401073e-05, "loss": 0.543, "step": 22378 }, { "epoch": 0.6874635210272478, "grad_norm": 0.37423965334892273, "learning_rate": 1.474284454126976e-05, "loss": 0.6125, "step": 22379 }, { "epoch": 0.6874942401621971, "grad_norm": 0.39887017011642456, "learning_rate": 1.4742419072061204e-05, "loss": 0.533, "step": 22380 }, { "epoch": 0.6875249592971462, "grad_norm": 0.33520635962486267, "learning_rate": 1.4741993591776395e-05, "loss": 0.566, "step": 22381 }, { "epoch": 0.6875556784320953, "grad_norm": 0.36356282234191895, "learning_rate": 1.4741568100416332e-05, "loss": 0.5403, "step": 22382 }, { "epoch": 0.6875863975670445, "grad_norm": 0.36575838923454285, "learning_rate": 1.4741142597982003e-05, "loss": 0.5699, "step": 22383 }, { "epoch": 0.6876171167019937, "grad_norm": 0.36901339888572693, "learning_rate": 1.4740717084474407e-05, "loss": 0.5662, "step": 22384 }, { "epoch": 0.6876478358369428, "grad_norm": 0.3753169775009155, "learning_rate": 1.4740291559894533e-05, "loss": 0.5556, "step": 22385 }, { "epoch": 0.687678554971892, "grad_norm": 0.3644852042198181, "learning_rate": 1.4739866024243377e-05, "loss": 0.5332, "step": 22386 }, { "epoch": 0.6877092741068411, "grad_norm": 0.3406446874141693, "learning_rate": 1.4739440477521933e-05, "loss": 0.5407, "step": 22387 }, { "epoch": 0.6877399932417904, "grad_norm": 0.34284546971321106, "learning_rate": 1.4739014919731196e-05, "loss": 0.5613, "step": 22388 }, { "epoch": 0.6877707123767395, "grad_norm": 0.3722798526287079, "learning_rate": 1.4738589350872156e-05, "loss": 0.5416, "step": 22389 }, { "epoch": 0.6878014315116886, "grad_norm": 0.33350932598114014, "learning_rate": 1.4738163770945813e-05, "loss": 0.5595, "step": 22390 }, { "epoch": 0.6878321506466378, "grad_norm": 0.3624928891658783, "learning_rate": 1.4737738179953156e-05, "loss": 0.5203, "step": 22391 }, { "epoch": 0.687862869781587, "grad_norm": 0.36742350459098816, "learning_rate": 1.473731257789518e-05, "loss": 0.5475, "step": 22392 }, { "epoch": 0.6878935889165361, "grad_norm": 0.34641027450561523, "learning_rate": 1.4736886964772881e-05, "loss": 0.6441, "step": 22393 }, { "epoch": 0.6879243080514853, "grad_norm": 0.42731547355651855, "learning_rate": 1.4736461340587247e-05, "loss": 0.5197, "step": 22394 }, { "epoch": 0.6879550271864344, "grad_norm": 0.37275049090385437, "learning_rate": 1.4736035705339282e-05, "loss": 0.4977, "step": 22395 }, { "epoch": 0.6879857463213835, "grad_norm": 0.3233494162559509, "learning_rate": 1.4735610059029974e-05, "loss": 0.5202, "step": 22396 }, { "epoch": 0.6880164654563328, "grad_norm": 0.4228224456310272, "learning_rate": 1.4735184401660317e-05, "loss": 0.5778, "step": 22397 }, { "epoch": 0.6880471845912819, "grad_norm": 0.38391566276550293, "learning_rate": 1.473475873323131e-05, "loss": 0.5191, "step": 22398 }, { "epoch": 0.6880779037262311, "grad_norm": 0.3771548569202423, "learning_rate": 1.4734333053743936e-05, "loss": 0.5421, "step": 22399 }, { "epoch": 0.6881086228611802, "grad_norm": 0.3270934224128723, "learning_rate": 1.4733907363199202e-05, "loss": 0.5733, "step": 22400 }, { "epoch": 0.6881393419961294, "grad_norm": 0.3983690142631531, "learning_rate": 1.4733481661598094e-05, "loss": 0.533, "step": 22401 }, { "epoch": 0.6881700611310786, "grad_norm": 0.3734649419784546, "learning_rate": 1.473305594894161e-05, "loss": 0.5793, "step": 22402 }, { "epoch": 0.6882007802660277, "grad_norm": 0.34604260325431824, "learning_rate": 1.4732630225230744e-05, "loss": 0.4953, "step": 22403 }, { "epoch": 0.6882314994009768, "grad_norm": 0.42020168900489807, "learning_rate": 1.473220449046649e-05, "loss": 0.5222, "step": 22404 }, { "epoch": 0.6882622185359261, "grad_norm": 0.3245352804660797, "learning_rate": 1.473177874464984e-05, "loss": 0.5263, "step": 22405 }, { "epoch": 0.6882929376708752, "grad_norm": 0.46783024072647095, "learning_rate": 1.4731352987781793e-05, "loss": 0.4438, "step": 22406 }, { "epoch": 0.6883236568058243, "grad_norm": 0.3699895739555359, "learning_rate": 1.4730927219863338e-05, "loss": 0.482, "step": 22407 }, { "epoch": 0.6883543759407735, "grad_norm": 0.36012160778045654, "learning_rate": 1.4730501440895472e-05, "loss": 0.5542, "step": 22408 }, { "epoch": 0.6883850950757227, "grad_norm": 0.6355479955673218, "learning_rate": 1.4730075650879192e-05, "loss": 0.6191, "step": 22409 }, { "epoch": 0.6884158142106718, "grad_norm": 0.34529560804367065, "learning_rate": 1.4729649849815489e-05, "loss": 0.5903, "step": 22410 }, { "epoch": 0.688446533345621, "grad_norm": 0.37414464354515076, "learning_rate": 1.4729224037705358e-05, "loss": 0.5932, "step": 22411 }, { "epoch": 0.6884772524805701, "grad_norm": 0.3626275956630707, "learning_rate": 1.4728798214549793e-05, "loss": 0.5665, "step": 22412 }, { "epoch": 0.6885079716155194, "grad_norm": 0.5061477422714233, "learning_rate": 1.4728372380349789e-05, "loss": 0.5599, "step": 22413 }, { "epoch": 0.6885386907504685, "grad_norm": 0.35348477959632874, "learning_rate": 1.4727946535106343e-05, "loss": 0.5862, "step": 22414 }, { "epoch": 0.6885694098854176, "grad_norm": 0.416599303483963, "learning_rate": 1.4727520678820446e-05, "loss": 0.5552, "step": 22415 }, { "epoch": 0.6886001290203668, "grad_norm": 0.382624089717865, "learning_rate": 1.4727094811493097e-05, "loss": 0.5434, "step": 22416 }, { "epoch": 0.6886308481553159, "grad_norm": 0.38113531470298767, "learning_rate": 1.4726668933125286e-05, "loss": 0.5368, "step": 22417 }, { "epoch": 0.6886615672902651, "grad_norm": 0.33062174916267395, "learning_rate": 1.4726243043718009e-05, "loss": 0.5089, "step": 22418 }, { "epoch": 0.6886922864252143, "grad_norm": 0.4165412187576294, "learning_rate": 1.4725817143272262e-05, "loss": 0.6295, "step": 22419 }, { "epoch": 0.6887230055601634, "grad_norm": 0.4092147946357727, "learning_rate": 1.472539123178904e-05, "loss": 0.6866, "step": 22420 }, { "epoch": 0.6887537246951125, "grad_norm": 0.4185231328010559, "learning_rate": 1.4724965309269337e-05, "loss": 0.6615, "step": 22421 }, { "epoch": 0.6887844438300618, "grad_norm": 0.38640981912612915, "learning_rate": 1.4724539375714147e-05, "loss": 0.6161, "step": 22422 }, { "epoch": 0.6888151629650109, "grad_norm": 0.32950088381767273, "learning_rate": 1.4724113431124465e-05, "loss": 0.4975, "step": 22423 }, { "epoch": 0.6888458820999601, "grad_norm": 0.39729154109954834, "learning_rate": 1.4723687475501285e-05, "loss": 0.5964, "step": 22424 }, { "epoch": 0.6888766012349092, "grad_norm": 0.5031489133834839, "learning_rate": 1.4723261508845606e-05, "loss": 0.6257, "step": 22425 }, { "epoch": 0.6889073203698584, "grad_norm": 0.36117419600486755, "learning_rate": 1.4722835531158416e-05, "loss": 0.5753, "step": 22426 }, { "epoch": 0.6889380395048076, "grad_norm": 0.38206082582473755, "learning_rate": 1.4722409542440717e-05, "loss": 0.5541, "step": 22427 }, { "epoch": 0.6889687586397567, "grad_norm": 0.37291306257247925, "learning_rate": 1.4721983542693497e-05, "loss": 0.5927, "step": 22428 }, { "epoch": 0.6889994777747058, "grad_norm": 0.3247329592704773, "learning_rate": 1.4721557531917759e-05, "loss": 0.5214, "step": 22429 }, { "epoch": 0.6890301969096551, "grad_norm": 0.35901784896850586, "learning_rate": 1.472113151011449e-05, "loss": 0.5397, "step": 22430 }, { "epoch": 0.6890609160446042, "grad_norm": 0.3870009779930115, "learning_rate": 1.4720705477284688e-05, "loss": 0.4573, "step": 22431 }, { "epoch": 0.6890916351795533, "grad_norm": 0.3321213126182556, "learning_rate": 1.4720279433429354e-05, "loss": 0.4979, "step": 22432 }, { "epoch": 0.6891223543145025, "grad_norm": 0.3658146858215332, "learning_rate": 1.4719853378549474e-05, "loss": 0.5717, "step": 22433 }, { "epoch": 0.6891530734494516, "grad_norm": 0.3413306474685669, "learning_rate": 1.4719427312646048e-05, "loss": 0.6051, "step": 22434 }, { "epoch": 0.6891837925844009, "grad_norm": 0.5235626697540283, "learning_rate": 1.4719001235720067e-05, "loss": 0.6196, "step": 22435 }, { "epoch": 0.68921451171935, "grad_norm": 0.4190088212490082, "learning_rate": 1.4718575147772534e-05, "loss": 0.6052, "step": 22436 }, { "epoch": 0.6892452308542991, "grad_norm": 0.3714768588542938, "learning_rate": 1.4718149048804435e-05, "loss": 0.605, "step": 22437 }, { "epoch": 0.6892759499892483, "grad_norm": 0.3650296628475189, "learning_rate": 1.4717722938816772e-05, "loss": 0.5053, "step": 22438 }, { "epoch": 0.6893066691241975, "grad_norm": 0.3445107638835907, "learning_rate": 1.4717296817810535e-05, "loss": 0.5426, "step": 22439 }, { "epoch": 0.6893373882591466, "grad_norm": 0.31515902280807495, "learning_rate": 1.4716870685786727e-05, "loss": 0.5096, "step": 22440 }, { "epoch": 0.6893681073940958, "grad_norm": 0.4794255793094635, "learning_rate": 1.4716444542746335e-05, "loss": 0.585, "step": 22441 }, { "epoch": 0.6893988265290449, "grad_norm": 0.3724626302719116, "learning_rate": 1.4716018388690353e-05, "loss": 0.5421, "step": 22442 }, { "epoch": 0.6894295456639941, "grad_norm": 0.36503490805625916, "learning_rate": 1.4715592223619788e-05, "loss": 0.5692, "step": 22443 }, { "epoch": 0.6894602647989433, "grad_norm": 0.3977644145488739, "learning_rate": 1.4715166047535622e-05, "loss": 0.6244, "step": 22444 }, { "epoch": 0.6894909839338924, "grad_norm": 0.3643122911453247, "learning_rate": 1.4714739860438863e-05, "loss": 0.4547, "step": 22445 }, { "epoch": 0.6895217030688415, "grad_norm": 0.41290467977523804, "learning_rate": 1.4714313662330492e-05, "loss": 0.494, "step": 22446 }, { "epoch": 0.6895524222037908, "grad_norm": 0.3722561001777649, "learning_rate": 1.4713887453211517e-05, "loss": 0.5384, "step": 22447 }, { "epoch": 0.6895831413387399, "grad_norm": 0.36090177297592163, "learning_rate": 1.4713461233082929e-05, "loss": 0.5796, "step": 22448 }, { "epoch": 0.6896138604736891, "grad_norm": 0.37591812014579773, "learning_rate": 1.471303500194572e-05, "loss": 0.5929, "step": 22449 }, { "epoch": 0.6896445796086382, "grad_norm": 0.3979642391204834, "learning_rate": 1.471260875980089e-05, "loss": 0.6125, "step": 22450 }, { "epoch": 0.6896752987435874, "grad_norm": 0.3705897927284241, "learning_rate": 1.4712182506649432e-05, "loss": 0.6706, "step": 22451 }, { "epoch": 0.6897060178785366, "grad_norm": 0.3809659481048584, "learning_rate": 1.4711756242492348e-05, "loss": 0.6062, "step": 22452 }, { "epoch": 0.6897367370134857, "grad_norm": 0.33459922671318054, "learning_rate": 1.4711329967330623e-05, "loss": 0.5582, "step": 22453 }, { "epoch": 0.6897674561484348, "grad_norm": 0.4103931188583374, "learning_rate": 1.4710903681165261e-05, "loss": 0.6159, "step": 22454 }, { "epoch": 0.689798175283384, "grad_norm": 0.3766176104545593, "learning_rate": 1.471047738399725e-05, "loss": 0.5685, "step": 22455 }, { "epoch": 0.6898288944183332, "grad_norm": 0.36424872279167175, "learning_rate": 1.4710051075827593e-05, "loss": 0.5453, "step": 22456 }, { "epoch": 0.6898596135532823, "grad_norm": 0.3448154926300049, "learning_rate": 1.470962475665728e-05, "loss": 0.565, "step": 22457 }, { "epoch": 0.6898903326882315, "grad_norm": 0.40862980484962463, "learning_rate": 1.4709198426487314e-05, "loss": 0.5954, "step": 22458 }, { "epoch": 0.6899210518231806, "grad_norm": 0.3622902035713196, "learning_rate": 1.4708772085318685e-05, "loss": 0.5507, "step": 22459 }, { "epoch": 0.6899517709581299, "grad_norm": 0.3741076588630676, "learning_rate": 1.4708345733152389e-05, "loss": 0.6218, "step": 22460 }, { "epoch": 0.689982490093079, "grad_norm": 0.41968396306037903, "learning_rate": 1.4707919369989423e-05, "loss": 0.5705, "step": 22461 }, { "epoch": 0.6900132092280281, "grad_norm": 0.6588703393936157, "learning_rate": 1.4707492995830784e-05, "loss": 0.5694, "step": 22462 }, { "epoch": 0.6900439283629773, "grad_norm": 0.36038774251937866, "learning_rate": 1.4707066610677464e-05, "loss": 0.5875, "step": 22463 }, { "epoch": 0.6900746474979265, "grad_norm": 0.4661013185977936, "learning_rate": 1.470664021453046e-05, "loss": 0.5682, "step": 22464 }, { "epoch": 0.6901053666328756, "grad_norm": 0.3658093214035034, "learning_rate": 1.4706213807390772e-05, "loss": 0.5843, "step": 22465 }, { "epoch": 0.6901360857678248, "grad_norm": 0.34051448106765747, "learning_rate": 1.4705787389259392e-05, "loss": 0.588, "step": 22466 }, { "epoch": 0.6901668049027739, "grad_norm": 0.3446017801761627, "learning_rate": 1.4705360960137316e-05, "loss": 0.5528, "step": 22467 }, { "epoch": 0.690197524037723, "grad_norm": 0.3833146393299103, "learning_rate": 1.4704934520025543e-05, "loss": 0.5567, "step": 22468 }, { "epoch": 0.6902282431726723, "grad_norm": 0.37998130917549133, "learning_rate": 1.4704508068925063e-05, "loss": 0.5044, "step": 22469 }, { "epoch": 0.6902589623076214, "grad_norm": 0.3520967960357666, "learning_rate": 1.470408160683688e-05, "loss": 0.542, "step": 22470 }, { "epoch": 0.6902896814425705, "grad_norm": 0.34321096539497375, "learning_rate": 1.4703655133761982e-05, "loss": 0.5128, "step": 22471 }, { "epoch": 0.6903204005775198, "grad_norm": 0.34290727972984314, "learning_rate": 1.4703228649701374e-05, "loss": 0.5452, "step": 22472 }, { "epoch": 0.6903511197124689, "grad_norm": 0.32775747776031494, "learning_rate": 1.4702802154656044e-05, "loss": 0.5196, "step": 22473 }, { "epoch": 0.6903818388474181, "grad_norm": 0.3545583188533783, "learning_rate": 1.470237564862699e-05, "loss": 0.5062, "step": 22474 }, { "epoch": 0.6904125579823672, "grad_norm": 0.37747952342033386, "learning_rate": 1.4701949131615211e-05, "loss": 0.6614, "step": 22475 }, { "epoch": 0.6904432771173163, "grad_norm": 0.36417070031166077, "learning_rate": 1.4701522603621702e-05, "loss": 0.5551, "step": 22476 }, { "epoch": 0.6904739962522656, "grad_norm": 0.36601394414901733, "learning_rate": 1.4701096064647458e-05, "loss": 0.5547, "step": 22477 }, { "epoch": 0.6905047153872147, "grad_norm": 0.3987535834312439, "learning_rate": 1.4700669514693475e-05, "loss": 0.5157, "step": 22478 }, { "epoch": 0.6905354345221638, "grad_norm": 0.37859460711479187, "learning_rate": 1.470024295376075e-05, "loss": 0.5519, "step": 22479 }, { "epoch": 0.690566153657113, "grad_norm": 0.349500834941864, "learning_rate": 1.469981638185028e-05, "loss": 0.4946, "step": 22480 }, { "epoch": 0.6905968727920622, "grad_norm": 0.39733606576919556, "learning_rate": 1.4699389798963062e-05, "loss": 0.4854, "step": 22481 }, { "epoch": 0.6906275919270113, "grad_norm": 0.38659775257110596, "learning_rate": 1.4698963205100087e-05, "loss": 0.6318, "step": 22482 }, { "epoch": 0.6906583110619605, "grad_norm": 0.3505514860153198, "learning_rate": 1.4698536600262358e-05, "loss": 0.5349, "step": 22483 }, { "epoch": 0.6906890301969096, "grad_norm": 0.36934563517570496, "learning_rate": 1.4698109984450869e-05, "loss": 0.5288, "step": 22484 }, { "epoch": 0.6907197493318589, "grad_norm": 0.4001167416572571, "learning_rate": 1.4697683357666614e-05, "loss": 0.5064, "step": 22485 }, { "epoch": 0.690750468466808, "grad_norm": 0.36179304122924805, "learning_rate": 1.4697256719910594e-05, "loss": 0.5552, "step": 22486 }, { "epoch": 0.6907811876017571, "grad_norm": 0.35671380162239075, "learning_rate": 1.46968300711838e-05, "loss": 0.5151, "step": 22487 }, { "epoch": 0.6908119067367063, "grad_norm": 0.3864904046058655, "learning_rate": 1.4696403411487234e-05, "loss": 0.5231, "step": 22488 }, { "epoch": 0.6908426258716555, "grad_norm": 0.3146601915359497, "learning_rate": 1.4695976740821887e-05, "loss": 0.4916, "step": 22489 }, { "epoch": 0.6908733450066046, "grad_norm": 0.3671146631240845, "learning_rate": 1.4695550059188762e-05, "loss": 0.5271, "step": 22490 }, { "epoch": 0.6909040641415538, "grad_norm": 0.35593509674072266, "learning_rate": 1.4695123366588848e-05, "loss": 0.5725, "step": 22491 }, { "epoch": 0.6909347832765029, "grad_norm": 0.35483115911483765, "learning_rate": 1.469469666302315e-05, "loss": 0.5977, "step": 22492 }, { "epoch": 0.690965502411452, "grad_norm": 0.35014358162879944, "learning_rate": 1.4694269948492657e-05, "loss": 0.5414, "step": 22493 }, { "epoch": 0.6909962215464013, "grad_norm": 0.33750632405281067, "learning_rate": 1.469384322299837e-05, "loss": 0.6034, "step": 22494 }, { "epoch": 0.6910269406813504, "grad_norm": 0.39207449555397034, "learning_rate": 1.4693416486541283e-05, "loss": 0.6193, "step": 22495 }, { "epoch": 0.6910576598162995, "grad_norm": 0.3887064456939697, "learning_rate": 1.4692989739122394e-05, "loss": 0.5891, "step": 22496 }, { "epoch": 0.6910883789512487, "grad_norm": 0.4805712103843689, "learning_rate": 1.4692562980742702e-05, "loss": 0.6063, "step": 22497 }, { "epoch": 0.6911190980861979, "grad_norm": 0.3715360164642334, "learning_rate": 1.46921362114032e-05, "loss": 0.5666, "step": 22498 }, { "epoch": 0.6911498172211471, "grad_norm": 0.3495534360408783, "learning_rate": 1.4691709431104886e-05, "loss": 0.619, "step": 22499 }, { "epoch": 0.6911805363560962, "grad_norm": 0.3215441405773163, "learning_rate": 1.4691282639848755e-05, "loss": 0.5745, "step": 22500 }, { "epoch": 0.6912112554910453, "grad_norm": 0.36030057072639465, "learning_rate": 1.4690855837635808e-05, "loss": 0.5049, "step": 22501 }, { "epoch": 0.6912419746259946, "grad_norm": 0.3544063866138458, "learning_rate": 1.4690429024467038e-05, "loss": 0.5462, "step": 22502 }, { "epoch": 0.6912726937609437, "grad_norm": 0.32455918192863464, "learning_rate": 1.4690002200343445e-05, "loss": 0.4127, "step": 22503 }, { "epoch": 0.6913034128958928, "grad_norm": 0.38817670941352844, "learning_rate": 1.4689575365266024e-05, "loss": 0.5416, "step": 22504 }, { "epoch": 0.691334132030842, "grad_norm": 0.34178704023361206, "learning_rate": 1.4689148519235772e-05, "loss": 0.5777, "step": 22505 }, { "epoch": 0.6913648511657912, "grad_norm": 0.3447449207305908, "learning_rate": 1.4688721662253687e-05, "loss": 0.5994, "step": 22506 }, { "epoch": 0.6913955703007403, "grad_norm": 0.36484840512275696, "learning_rate": 1.4688294794320764e-05, "loss": 0.5322, "step": 22507 }, { "epoch": 0.6914262894356895, "grad_norm": 0.36023157835006714, "learning_rate": 1.4687867915438002e-05, "loss": 0.5327, "step": 22508 }, { "epoch": 0.6914570085706386, "grad_norm": 0.3442690968513489, "learning_rate": 1.4687441025606395e-05, "loss": 0.5063, "step": 22509 }, { "epoch": 0.6914877277055879, "grad_norm": 0.34606507420539856, "learning_rate": 1.4687014124826946e-05, "loss": 0.6503, "step": 22510 }, { "epoch": 0.691518446840537, "grad_norm": 0.407153457403183, "learning_rate": 1.4686587213100647e-05, "loss": 0.5836, "step": 22511 }, { "epoch": 0.6915491659754861, "grad_norm": 0.34317201375961304, "learning_rate": 1.4686160290428493e-05, "loss": 0.5248, "step": 22512 }, { "epoch": 0.6915798851104353, "grad_norm": 0.4040686786174774, "learning_rate": 1.4685733356811488e-05, "loss": 0.5582, "step": 22513 }, { "epoch": 0.6916106042453845, "grad_norm": 0.39815759658813477, "learning_rate": 1.4685306412250623e-05, "loss": 0.4964, "step": 22514 }, { "epoch": 0.6916413233803336, "grad_norm": 0.32357627153396606, "learning_rate": 1.4684879456746899e-05, "loss": 0.466, "step": 22515 }, { "epoch": 0.6916720425152828, "grad_norm": 0.3549063503742218, "learning_rate": 1.4684452490301312e-05, "loss": 0.4976, "step": 22516 }, { "epoch": 0.6917027616502319, "grad_norm": 0.36181050539016724, "learning_rate": 1.4684025512914859e-05, "loss": 0.5323, "step": 22517 }, { "epoch": 0.691733480785181, "grad_norm": 0.3314193785190582, "learning_rate": 1.4683598524588537e-05, "loss": 0.5433, "step": 22518 }, { "epoch": 0.6917641999201303, "grad_norm": 0.33947011828422546, "learning_rate": 1.4683171525323343e-05, "loss": 0.5855, "step": 22519 }, { "epoch": 0.6917949190550794, "grad_norm": 0.4131937325000763, "learning_rate": 1.4682744515120276e-05, "loss": 0.5967, "step": 22520 }, { "epoch": 0.6918256381900285, "grad_norm": 0.3657763600349426, "learning_rate": 1.468231749398033e-05, "loss": 0.5616, "step": 22521 }, { "epoch": 0.6918563573249777, "grad_norm": 0.3908522427082062, "learning_rate": 1.4681890461904505e-05, "loss": 0.6054, "step": 22522 }, { "epoch": 0.6918870764599269, "grad_norm": 0.3501698970794678, "learning_rate": 1.4681463418893799e-05, "loss": 0.5656, "step": 22523 }, { "epoch": 0.6919177955948761, "grad_norm": 0.3811292052268982, "learning_rate": 1.4681036364949208e-05, "loss": 0.523, "step": 22524 }, { "epoch": 0.6919485147298252, "grad_norm": 0.41629523038864136, "learning_rate": 1.468060930007173e-05, "loss": 0.594, "step": 22525 }, { "epoch": 0.6919792338647743, "grad_norm": 0.37718406319618225, "learning_rate": 1.468018222426236e-05, "loss": 0.5789, "step": 22526 }, { "epoch": 0.6920099529997236, "grad_norm": 0.3482378423213959, "learning_rate": 1.4679755137522102e-05, "loss": 0.5183, "step": 22527 }, { "epoch": 0.6920406721346727, "grad_norm": 0.37830743193626404, "learning_rate": 1.4679328039851945e-05, "loss": 0.575, "step": 22528 }, { "epoch": 0.6920713912696218, "grad_norm": 0.4054834544658661, "learning_rate": 1.4678900931252892e-05, "loss": 0.6109, "step": 22529 }, { "epoch": 0.692102110404571, "grad_norm": 0.36062583327293396, "learning_rate": 1.4678473811725939e-05, "loss": 0.4959, "step": 22530 }, { "epoch": 0.6921328295395202, "grad_norm": 0.3512282967567444, "learning_rate": 1.4678046681272084e-05, "loss": 0.5233, "step": 22531 }, { "epoch": 0.6921635486744693, "grad_norm": 0.36497050523757935, "learning_rate": 1.4677619539892323e-05, "loss": 0.5708, "step": 22532 }, { "epoch": 0.6921942678094185, "grad_norm": 0.3306076228618622, "learning_rate": 1.4677192387587657e-05, "loss": 0.4927, "step": 22533 }, { "epoch": 0.6922249869443676, "grad_norm": 0.43176382780075073, "learning_rate": 1.467676522435908e-05, "loss": 0.4998, "step": 22534 }, { "epoch": 0.6922557060793169, "grad_norm": 0.3248539865016937, "learning_rate": 1.4676338050207593e-05, "loss": 0.5084, "step": 22535 }, { "epoch": 0.692286425214266, "grad_norm": 0.38297298550605774, "learning_rate": 1.4675910865134189e-05, "loss": 0.5661, "step": 22536 }, { "epoch": 0.6923171443492151, "grad_norm": 0.3519614040851593, "learning_rate": 1.467548366913987e-05, "loss": 0.5414, "step": 22537 }, { "epoch": 0.6923478634841643, "grad_norm": 0.3854628801345825, "learning_rate": 1.4675056462225633e-05, "loss": 0.587, "step": 22538 }, { "epoch": 0.6923785826191134, "grad_norm": 0.337139368057251, "learning_rate": 1.4674629244392473e-05, "loss": 0.596, "step": 22539 }, { "epoch": 0.6924093017540626, "grad_norm": 0.34613510966300964, "learning_rate": 1.467420201564139e-05, "loss": 0.5255, "step": 22540 }, { "epoch": 0.6924400208890118, "grad_norm": 0.3370524048805237, "learning_rate": 1.4673774775973384e-05, "loss": 0.5704, "step": 22541 }, { "epoch": 0.6924707400239609, "grad_norm": 0.37837934494018555, "learning_rate": 1.4673347525389449e-05, "loss": 0.5237, "step": 22542 }, { "epoch": 0.69250145915891, "grad_norm": 0.3654174208641052, "learning_rate": 1.4672920263890587e-05, "loss": 0.5441, "step": 22543 }, { "epoch": 0.6925321782938593, "grad_norm": 0.4050576686859131, "learning_rate": 1.4672492991477791e-05, "loss": 0.5638, "step": 22544 }, { "epoch": 0.6925628974288084, "grad_norm": 0.3716464936733246, "learning_rate": 1.4672065708152064e-05, "loss": 0.5198, "step": 22545 }, { "epoch": 0.6925936165637576, "grad_norm": 0.40844935178756714, "learning_rate": 1.4671638413914397e-05, "loss": 0.5508, "step": 22546 }, { "epoch": 0.6926243356987067, "grad_norm": 0.32956022024154663, "learning_rate": 1.4671211108765794e-05, "loss": 0.5552, "step": 22547 }, { "epoch": 0.6926550548336559, "grad_norm": 0.39809873700141907, "learning_rate": 1.4670783792707253e-05, "loss": 0.5297, "step": 22548 }, { "epoch": 0.6926857739686051, "grad_norm": 0.43176162242889404, "learning_rate": 1.4670356465739767e-05, "loss": 0.6391, "step": 22549 }, { "epoch": 0.6927164931035542, "grad_norm": 0.3664095401763916, "learning_rate": 1.4669929127864339e-05, "loss": 0.586, "step": 22550 }, { "epoch": 0.6927472122385033, "grad_norm": 0.3541267514228821, "learning_rate": 1.4669501779081967e-05, "loss": 0.5522, "step": 22551 }, { "epoch": 0.6927779313734526, "grad_norm": 0.34257569909095764, "learning_rate": 1.4669074419393645e-05, "loss": 0.5439, "step": 22552 }, { "epoch": 0.6928086505084017, "grad_norm": 0.3385346531867981, "learning_rate": 1.4668647048800378e-05, "loss": 0.5745, "step": 22553 }, { "epoch": 0.6928393696433508, "grad_norm": 0.3551054298877716, "learning_rate": 1.4668219667303154e-05, "loss": 0.6038, "step": 22554 }, { "epoch": 0.6928700887783, "grad_norm": 0.4258062243461609, "learning_rate": 1.4667792274902981e-05, "loss": 0.5051, "step": 22555 }, { "epoch": 0.6929008079132492, "grad_norm": 0.3770347535610199, "learning_rate": 1.4667364871600851e-05, "loss": 0.552, "step": 22556 }, { "epoch": 0.6929315270481983, "grad_norm": 0.44987303018569946, "learning_rate": 1.4666937457397766e-05, "loss": 0.6039, "step": 22557 }, { "epoch": 0.6929622461831475, "grad_norm": 0.3540380597114563, "learning_rate": 1.4666510032294718e-05, "loss": 0.6019, "step": 22558 }, { "epoch": 0.6929929653180966, "grad_norm": 0.34906822443008423, "learning_rate": 1.4666082596292712e-05, "loss": 0.5306, "step": 22559 }, { "epoch": 0.6930236844530459, "grad_norm": 0.3426465690135956, "learning_rate": 1.4665655149392749e-05, "loss": 0.5237, "step": 22560 }, { "epoch": 0.693054403587995, "grad_norm": 0.34698736667633057, "learning_rate": 1.4665227691595819e-05, "loss": 0.4998, "step": 22561 }, { "epoch": 0.6930851227229441, "grad_norm": 0.3700084984302521, "learning_rate": 1.4664800222902923e-05, "loss": 0.5698, "step": 22562 }, { "epoch": 0.6931158418578933, "grad_norm": 0.3820263147354126, "learning_rate": 1.4664372743315064e-05, "loss": 0.5721, "step": 22563 }, { "epoch": 0.6931465609928424, "grad_norm": 0.3521619439125061, "learning_rate": 1.4663945252833231e-05, "loss": 0.5731, "step": 22564 }, { "epoch": 0.6931772801277916, "grad_norm": 0.3536050021648407, "learning_rate": 1.4663517751458431e-05, "loss": 0.5496, "step": 22565 }, { "epoch": 0.6932079992627408, "grad_norm": 0.340120792388916, "learning_rate": 1.466309023919166e-05, "loss": 0.5224, "step": 22566 }, { "epoch": 0.6932387183976899, "grad_norm": 0.362618088722229, "learning_rate": 1.4662662716033917e-05, "loss": 0.5888, "step": 22567 }, { "epoch": 0.693269437532639, "grad_norm": 0.3578249216079712, "learning_rate": 1.4662235181986197e-05, "loss": 0.5574, "step": 22568 }, { "epoch": 0.6933001566675883, "grad_norm": 0.34848397970199585, "learning_rate": 1.4661807637049504e-05, "loss": 0.508, "step": 22569 }, { "epoch": 0.6933308758025374, "grad_norm": 0.34799882769584656, "learning_rate": 1.4661380081224832e-05, "loss": 0.6142, "step": 22570 }, { "epoch": 0.6933615949374866, "grad_norm": 0.3645077049732208, "learning_rate": 1.4660952514513182e-05, "loss": 0.5519, "step": 22571 }, { "epoch": 0.6933923140724357, "grad_norm": 0.41492921113967896, "learning_rate": 1.466052493691555e-05, "loss": 0.5442, "step": 22572 }, { "epoch": 0.6934230332073849, "grad_norm": 0.38530921936035156, "learning_rate": 1.466009734843294e-05, "loss": 0.5774, "step": 22573 }, { "epoch": 0.6934537523423341, "grad_norm": 0.36865004897117615, "learning_rate": 1.4659669749066345e-05, "loss": 0.5307, "step": 22574 }, { "epoch": 0.6934844714772832, "grad_norm": 0.3915439248085022, "learning_rate": 1.4659242138816765e-05, "loss": 0.536, "step": 22575 }, { "epoch": 0.6935151906122323, "grad_norm": 0.390075147151947, "learning_rate": 1.46588145176852e-05, "loss": 0.5315, "step": 22576 }, { "epoch": 0.6935459097471816, "grad_norm": 0.32660725712776184, "learning_rate": 1.4658386885672649e-05, "loss": 0.5708, "step": 22577 }, { "epoch": 0.6935766288821307, "grad_norm": 0.35460761189460754, "learning_rate": 1.465795924278011e-05, "loss": 0.5565, "step": 22578 }, { "epoch": 0.6936073480170798, "grad_norm": 0.43957608938217163, "learning_rate": 1.4657531589008583e-05, "loss": 0.5815, "step": 22579 }, { "epoch": 0.693638067152029, "grad_norm": 0.34490275382995605, "learning_rate": 1.4657103924359063e-05, "loss": 0.545, "step": 22580 }, { "epoch": 0.6936687862869781, "grad_norm": 0.414168119430542, "learning_rate": 1.4656676248832555e-05, "loss": 0.5657, "step": 22581 }, { "epoch": 0.6936995054219273, "grad_norm": 0.5001202821731567, "learning_rate": 1.4656248562430051e-05, "loss": 0.6548, "step": 22582 }, { "epoch": 0.6937302245568765, "grad_norm": 0.6297950148582458, "learning_rate": 1.4655820865152555e-05, "loss": 0.5864, "step": 22583 }, { "epoch": 0.6937609436918256, "grad_norm": 0.377959668636322, "learning_rate": 1.4655393157001063e-05, "loss": 0.5584, "step": 22584 }, { "epoch": 0.6937916628267748, "grad_norm": 0.3610199987888336, "learning_rate": 1.4654965437976578e-05, "loss": 0.6301, "step": 22585 }, { "epoch": 0.693822381961724, "grad_norm": 0.3828895688056946, "learning_rate": 1.4654537708080092e-05, "loss": 0.5294, "step": 22586 }, { "epoch": 0.6938531010966731, "grad_norm": 3.4689910411834717, "learning_rate": 1.4654109967312614e-05, "loss": 0.4999, "step": 22587 }, { "epoch": 0.6938838202316223, "grad_norm": 0.38492006063461304, "learning_rate": 1.4653682215675131e-05, "loss": 0.5897, "step": 22588 }, { "epoch": 0.6939145393665714, "grad_norm": 0.39945968985557556, "learning_rate": 1.4653254453168653e-05, "loss": 0.5371, "step": 22589 }, { "epoch": 0.6939452585015206, "grad_norm": 0.3768009841442108, "learning_rate": 1.4652826679794172e-05, "loss": 0.6031, "step": 22590 }, { "epoch": 0.6939759776364698, "grad_norm": 0.39094969630241394, "learning_rate": 1.4652398895552689e-05, "loss": 0.5588, "step": 22591 }, { "epoch": 0.6940066967714189, "grad_norm": 0.3754323720932007, "learning_rate": 1.4651971100445204e-05, "loss": 0.5579, "step": 22592 }, { "epoch": 0.694037415906368, "grad_norm": 0.363925576210022, "learning_rate": 1.4651543294472715e-05, "loss": 0.5404, "step": 22593 }, { "epoch": 0.6940681350413173, "grad_norm": 0.3391965329647064, "learning_rate": 1.465111547763622e-05, "loss": 0.513, "step": 22594 }, { "epoch": 0.6940988541762664, "grad_norm": 0.3773689866065979, "learning_rate": 1.4650687649936722e-05, "loss": 0.5075, "step": 22595 }, { "epoch": 0.6941295733112156, "grad_norm": 0.3840753436088562, "learning_rate": 1.4650259811375219e-05, "loss": 0.5683, "step": 22596 }, { "epoch": 0.6941602924461647, "grad_norm": 0.4615083634853363, "learning_rate": 1.4649831961952709e-05, "loss": 0.5693, "step": 22597 }, { "epoch": 0.6941910115811138, "grad_norm": 0.37923750281333923, "learning_rate": 1.464940410167019e-05, "loss": 0.5496, "step": 22598 }, { "epoch": 0.6942217307160631, "grad_norm": 0.38672181963920593, "learning_rate": 1.4648976230528666e-05, "loss": 0.607, "step": 22599 }, { "epoch": 0.6942524498510122, "grad_norm": 0.39665624499320984, "learning_rate": 1.4648548348529133e-05, "loss": 0.5727, "step": 22600 }, { "epoch": 0.6942831689859613, "grad_norm": 0.33314213156700134, "learning_rate": 1.464812045567259e-05, "loss": 0.5076, "step": 22601 }, { "epoch": 0.6943138881209105, "grad_norm": 0.3515544533729553, "learning_rate": 1.4647692551960035e-05, "loss": 0.6, "step": 22602 }, { "epoch": 0.6943446072558597, "grad_norm": 0.3775416612625122, "learning_rate": 1.4647264637392473e-05, "loss": 0.5295, "step": 22603 }, { "epoch": 0.6943753263908088, "grad_norm": 0.3529481887817383, "learning_rate": 1.4646836711970896e-05, "loss": 0.4984, "step": 22604 }, { "epoch": 0.694406045525758, "grad_norm": 0.3400987982749939, "learning_rate": 1.464640877569631e-05, "loss": 0.6149, "step": 22605 }, { "epoch": 0.6944367646607071, "grad_norm": 0.35183417797088623, "learning_rate": 1.464598082856971e-05, "loss": 0.5823, "step": 22606 }, { "epoch": 0.6944674837956563, "grad_norm": 0.3908868432044983, "learning_rate": 1.46455528705921e-05, "loss": 0.5019, "step": 22607 }, { "epoch": 0.6944982029306055, "grad_norm": 0.3601454794406891, "learning_rate": 1.4645124901764476e-05, "loss": 0.5654, "step": 22608 }, { "epoch": 0.6945289220655546, "grad_norm": 0.3777960538864136, "learning_rate": 1.4644696922087835e-05, "loss": 0.5624, "step": 22609 }, { "epoch": 0.6945596412005038, "grad_norm": 0.36064577102661133, "learning_rate": 1.4644268931563183e-05, "loss": 0.519, "step": 22610 }, { "epoch": 0.694590360335453, "grad_norm": 0.3440132141113281, "learning_rate": 1.4643840930191514e-05, "loss": 0.5612, "step": 22611 }, { "epoch": 0.6946210794704021, "grad_norm": 0.36056798696517944, "learning_rate": 1.4643412917973836e-05, "loss": 0.5336, "step": 22612 }, { "epoch": 0.6946517986053513, "grad_norm": 0.3198976516723633, "learning_rate": 1.4642984894911137e-05, "loss": 0.522, "step": 22613 }, { "epoch": 0.6946825177403004, "grad_norm": 0.3693627715110779, "learning_rate": 1.4642556861004424e-05, "loss": 0.5736, "step": 22614 }, { "epoch": 0.6947132368752496, "grad_norm": 0.38909587264060974, "learning_rate": 1.4642128816254697e-05, "loss": 0.6043, "step": 22615 }, { "epoch": 0.6947439560101988, "grad_norm": 0.3908487558364868, "learning_rate": 1.4641700760662952e-05, "loss": 0.5861, "step": 22616 }, { "epoch": 0.6947746751451479, "grad_norm": 0.38308629393577576, "learning_rate": 1.4641272694230192e-05, "loss": 0.6003, "step": 22617 }, { "epoch": 0.694805394280097, "grad_norm": 0.3647722899913788, "learning_rate": 1.4640844616957415e-05, "loss": 0.5829, "step": 22618 }, { "epoch": 0.6948361134150463, "grad_norm": 0.36078202724456787, "learning_rate": 1.464041652884562e-05, "loss": 0.579, "step": 22619 }, { "epoch": 0.6948668325499954, "grad_norm": 0.3492617607116699, "learning_rate": 1.463998842989581e-05, "loss": 0.572, "step": 22620 }, { "epoch": 0.6948975516849446, "grad_norm": 0.41127699613571167, "learning_rate": 1.4639560320108982e-05, "loss": 0.5581, "step": 22621 }, { "epoch": 0.6949282708198937, "grad_norm": 0.4190850257873535, "learning_rate": 1.4639132199486137e-05, "loss": 0.5591, "step": 22622 }, { "epoch": 0.6949589899548428, "grad_norm": 0.3685346841812134, "learning_rate": 1.4638704068028276e-05, "loss": 0.5567, "step": 22623 }, { "epoch": 0.6949897090897921, "grad_norm": 0.33703821897506714, "learning_rate": 1.4638275925736396e-05, "loss": 0.5537, "step": 22624 }, { "epoch": 0.6950204282247412, "grad_norm": 0.3973979353904724, "learning_rate": 1.46378477726115e-05, "loss": 0.574, "step": 22625 }, { "epoch": 0.6950511473596903, "grad_norm": 0.3690866231918335, "learning_rate": 1.4637419608654586e-05, "loss": 0.5562, "step": 22626 }, { "epoch": 0.6950818664946395, "grad_norm": 0.4480339586734772, "learning_rate": 1.4636991433866655e-05, "loss": 0.5726, "step": 22627 }, { "epoch": 0.6951125856295887, "grad_norm": 0.40986064076423645, "learning_rate": 1.4636563248248704e-05, "loss": 0.5311, "step": 22628 }, { "epoch": 0.6951433047645378, "grad_norm": 0.33583909273147583, "learning_rate": 1.463613505180174e-05, "loss": 0.5422, "step": 22629 }, { "epoch": 0.695174023899487, "grad_norm": 0.4453539252281189, "learning_rate": 1.4635706844526757e-05, "loss": 0.5664, "step": 22630 }, { "epoch": 0.6952047430344361, "grad_norm": 0.37374526262283325, "learning_rate": 1.4635278626424754e-05, "loss": 0.6012, "step": 22631 }, { "epoch": 0.6952354621693854, "grad_norm": 0.40841931104660034, "learning_rate": 1.4634850397496739e-05, "loss": 0.634, "step": 22632 }, { "epoch": 0.6952661813043345, "grad_norm": 0.3801027834415436, "learning_rate": 1.4634422157743703e-05, "loss": 0.5034, "step": 22633 }, { "epoch": 0.6952969004392836, "grad_norm": 0.374788373708725, "learning_rate": 1.4633993907166652e-05, "loss": 0.6216, "step": 22634 }, { "epoch": 0.6953276195742328, "grad_norm": 0.43774136900901794, "learning_rate": 1.4633565645766585e-05, "loss": 0.5648, "step": 22635 }, { "epoch": 0.695358338709182, "grad_norm": 0.8645035028457642, "learning_rate": 1.4633137373544501e-05, "loss": 0.5345, "step": 22636 }, { "epoch": 0.6953890578441311, "grad_norm": 0.3564022183418274, "learning_rate": 1.4632709090501402e-05, "loss": 0.5277, "step": 22637 }, { "epoch": 0.6954197769790803, "grad_norm": 0.3500881791114807, "learning_rate": 1.4632280796638287e-05, "loss": 0.5864, "step": 22638 }, { "epoch": 0.6954504961140294, "grad_norm": 0.45170027017593384, "learning_rate": 1.4631852491956158e-05, "loss": 0.5914, "step": 22639 }, { "epoch": 0.6954812152489785, "grad_norm": 0.3891288638114929, "learning_rate": 1.4631424176456014e-05, "loss": 0.4952, "step": 22640 }, { "epoch": 0.6955119343839278, "grad_norm": 0.3670620918273926, "learning_rate": 1.4630995850138853e-05, "loss": 0.526, "step": 22641 }, { "epoch": 0.6955426535188769, "grad_norm": 0.43476542830467224, "learning_rate": 1.4630567513005677e-05, "loss": 0.5713, "step": 22642 }, { "epoch": 0.695573372653826, "grad_norm": 0.3358655571937561, "learning_rate": 1.4630139165057491e-05, "loss": 0.5453, "step": 22643 }, { "epoch": 0.6956040917887752, "grad_norm": 0.3949541449546814, "learning_rate": 1.462971080629529e-05, "loss": 0.5975, "step": 22644 }, { "epoch": 0.6956348109237244, "grad_norm": 0.3688315153121948, "learning_rate": 1.4629282436720075e-05, "loss": 0.5138, "step": 22645 }, { "epoch": 0.6956655300586736, "grad_norm": 0.3279627859592438, "learning_rate": 1.462885405633285e-05, "loss": 0.5312, "step": 22646 }, { "epoch": 0.6956962491936227, "grad_norm": 0.3474194407463074, "learning_rate": 1.4628425665134611e-05, "loss": 0.4573, "step": 22647 }, { "epoch": 0.6957269683285718, "grad_norm": 0.3881056606769562, "learning_rate": 1.4627997263126362e-05, "loss": 0.5562, "step": 22648 }, { "epoch": 0.6957576874635211, "grad_norm": 0.3973795175552368, "learning_rate": 1.4627568850309099e-05, "loss": 0.5698, "step": 22649 }, { "epoch": 0.6957884065984702, "grad_norm": 0.33754873275756836, "learning_rate": 1.4627140426683828e-05, "loss": 0.5117, "step": 22650 }, { "epoch": 0.6958191257334193, "grad_norm": 0.3602282404899597, "learning_rate": 1.4626711992251547e-05, "loss": 0.64, "step": 22651 }, { "epoch": 0.6958498448683685, "grad_norm": 0.3646886944770813, "learning_rate": 1.4626283547013258e-05, "loss": 0.601, "step": 22652 }, { "epoch": 0.6958805640033177, "grad_norm": 0.34893548488616943, "learning_rate": 1.462585509096996e-05, "loss": 0.5388, "step": 22653 }, { "epoch": 0.6959112831382668, "grad_norm": 0.3727281987667084, "learning_rate": 1.4625426624122654e-05, "loss": 0.5669, "step": 22654 }, { "epoch": 0.695942002273216, "grad_norm": 0.3622271716594696, "learning_rate": 1.4624998146472342e-05, "loss": 0.4585, "step": 22655 }, { "epoch": 0.6959727214081651, "grad_norm": 0.43568500876426697, "learning_rate": 1.4624569658020023e-05, "loss": 0.5503, "step": 22656 }, { "epoch": 0.6960034405431144, "grad_norm": 0.37510064244270325, "learning_rate": 1.46241411587667e-05, "loss": 0.4584, "step": 22657 }, { "epoch": 0.6960341596780635, "grad_norm": 0.3743645250797272, "learning_rate": 1.462371264871337e-05, "loss": 0.4818, "step": 22658 }, { "epoch": 0.6960648788130126, "grad_norm": 0.34617379307746887, "learning_rate": 1.4623284127861039e-05, "loss": 0.5958, "step": 22659 }, { "epoch": 0.6960955979479618, "grad_norm": 0.3243817985057831, "learning_rate": 1.4622855596210705e-05, "loss": 0.5426, "step": 22660 }, { "epoch": 0.696126317082911, "grad_norm": 0.36599379777908325, "learning_rate": 1.4622427053763366e-05, "loss": 0.6229, "step": 22661 }, { "epoch": 0.6961570362178601, "grad_norm": 0.46805647015571594, "learning_rate": 1.4621998500520027e-05, "loss": 0.5551, "step": 22662 }, { "epoch": 0.6961877553528093, "grad_norm": 0.35179540514945984, "learning_rate": 1.4621569936481687e-05, "loss": 0.5514, "step": 22663 }, { "epoch": 0.6962184744877584, "grad_norm": 0.3547077775001526, "learning_rate": 1.4621141361649352e-05, "loss": 0.5203, "step": 22664 }, { "epoch": 0.6962491936227075, "grad_norm": 0.381740540266037, "learning_rate": 1.4620712776024014e-05, "loss": 0.6441, "step": 22665 }, { "epoch": 0.6962799127576568, "grad_norm": 0.38753247261047363, "learning_rate": 1.4620284179606681e-05, "loss": 0.5477, "step": 22666 }, { "epoch": 0.6963106318926059, "grad_norm": 0.3449159264564514, "learning_rate": 1.4619855572398347e-05, "loss": 0.6103, "step": 22667 }, { "epoch": 0.696341351027555, "grad_norm": 0.4511210024356842, "learning_rate": 1.4619426954400022e-05, "loss": 0.5512, "step": 22668 }, { "epoch": 0.6963720701625042, "grad_norm": 0.42165133357048035, "learning_rate": 1.46189983256127e-05, "loss": 0.5097, "step": 22669 }, { "epoch": 0.6964027892974534, "grad_norm": 0.3627742528915405, "learning_rate": 1.4618569686037388e-05, "loss": 0.5565, "step": 22670 }, { "epoch": 0.6964335084324026, "grad_norm": 0.3561030328273773, "learning_rate": 1.4618141035675083e-05, "loss": 0.4741, "step": 22671 }, { "epoch": 0.6964642275673517, "grad_norm": 0.4159592390060425, "learning_rate": 1.4617712374526787e-05, "loss": 0.6798, "step": 22672 }, { "epoch": 0.6964949467023008, "grad_norm": 0.34796103835105896, "learning_rate": 1.4617283702593501e-05, "loss": 0.5387, "step": 22673 }, { "epoch": 0.6965256658372501, "grad_norm": 0.41253191232681274, "learning_rate": 1.4616855019876225e-05, "loss": 0.4672, "step": 22674 }, { "epoch": 0.6965563849721992, "grad_norm": 0.36490434408187866, "learning_rate": 1.4616426326375966e-05, "loss": 0.5496, "step": 22675 }, { "epoch": 0.6965871041071483, "grad_norm": 0.3856409788131714, "learning_rate": 1.4615997622093716e-05, "loss": 0.524, "step": 22676 }, { "epoch": 0.6966178232420975, "grad_norm": 0.3623887002468109, "learning_rate": 1.4615568907030483e-05, "loss": 0.517, "step": 22677 }, { "epoch": 0.6966485423770467, "grad_norm": 0.42046162486076355, "learning_rate": 1.4615140181187269e-05, "loss": 0.5462, "step": 22678 }, { "epoch": 0.6966792615119958, "grad_norm": 0.40576061606407166, "learning_rate": 1.4614711444565069e-05, "loss": 0.5509, "step": 22679 }, { "epoch": 0.696709980646945, "grad_norm": 0.37258023023605347, "learning_rate": 1.461428269716489e-05, "loss": 0.5448, "step": 22680 }, { "epoch": 0.6967406997818941, "grad_norm": 0.32836076617240906, "learning_rate": 1.461385393898773e-05, "loss": 0.5534, "step": 22681 }, { "epoch": 0.6967714189168434, "grad_norm": 0.35674309730529785, "learning_rate": 1.4613425170034595e-05, "loss": 0.4538, "step": 22682 }, { "epoch": 0.6968021380517925, "grad_norm": 0.3475133776664734, "learning_rate": 1.461299639030648e-05, "loss": 0.563, "step": 22683 }, { "epoch": 0.6968328571867416, "grad_norm": 0.36239194869995117, "learning_rate": 1.461256759980439e-05, "loss": 0.5308, "step": 22684 }, { "epoch": 0.6968635763216908, "grad_norm": 0.38623562455177307, "learning_rate": 1.4612138798529328e-05, "loss": 0.5961, "step": 22685 }, { "epoch": 0.69689429545664, "grad_norm": 0.3760462999343872, "learning_rate": 1.4611709986482293e-05, "loss": 0.5703, "step": 22686 }, { "epoch": 0.6969250145915891, "grad_norm": 0.3478698432445526, "learning_rate": 1.4611281163664285e-05, "loss": 0.5445, "step": 22687 }, { "epoch": 0.6969557337265383, "grad_norm": 0.37650299072265625, "learning_rate": 1.461085233007631e-05, "loss": 0.5201, "step": 22688 }, { "epoch": 0.6969864528614874, "grad_norm": 0.3581222891807556, "learning_rate": 1.461042348571937e-05, "loss": 0.6268, "step": 22689 }, { "epoch": 0.6970171719964365, "grad_norm": 0.41234371066093445, "learning_rate": 1.460999463059446e-05, "loss": 0.577, "step": 22690 }, { "epoch": 0.6970478911313858, "grad_norm": 0.36291590332984924, "learning_rate": 1.4609565764702587e-05, "loss": 0.5668, "step": 22691 }, { "epoch": 0.6970786102663349, "grad_norm": 0.35234665870666504, "learning_rate": 1.460913688804475e-05, "loss": 0.5672, "step": 22692 }, { "epoch": 0.697109329401284, "grad_norm": 0.34655192494392395, "learning_rate": 1.4608708000621954e-05, "loss": 0.5324, "step": 22693 }, { "epoch": 0.6971400485362332, "grad_norm": 0.35953179001808167, "learning_rate": 1.4608279102435197e-05, "loss": 0.5237, "step": 22694 }, { "epoch": 0.6971707676711824, "grad_norm": 0.3643888533115387, "learning_rate": 1.4607850193485484e-05, "loss": 0.6192, "step": 22695 }, { "epoch": 0.6972014868061316, "grad_norm": 0.39024725556373596, "learning_rate": 1.4607421273773814e-05, "loss": 0.5714, "step": 22696 }, { "epoch": 0.6972322059410807, "grad_norm": 0.39870572090148926, "learning_rate": 1.4606992343301189e-05, "loss": 0.6163, "step": 22697 }, { "epoch": 0.6972629250760298, "grad_norm": 0.3589004874229431, "learning_rate": 1.4606563402068614e-05, "loss": 0.5551, "step": 22698 }, { "epoch": 0.6972936442109791, "grad_norm": 0.3622085154056549, "learning_rate": 1.4606134450077086e-05, "loss": 0.5284, "step": 22699 }, { "epoch": 0.6973243633459282, "grad_norm": 0.333213746547699, "learning_rate": 1.4605705487327611e-05, "loss": 0.646, "step": 22700 }, { "epoch": 0.6973550824808773, "grad_norm": 0.4033159613609314, "learning_rate": 1.4605276513821187e-05, "loss": 0.6714, "step": 22701 }, { "epoch": 0.6973858016158265, "grad_norm": 0.35060158371925354, "learning_rate": 1.4604847529558822e-05, "loss": 0.5904, "step": 22702 }, { "epoch": 0.6974165207507756, "grad_norm": 0.36272892355918884, "learning_rate": 1.460441853454151e-05, "loss": 0.5895, "step": 22703 }, { "epoch": 0.6974472398857248, "grad_norm": 0.3695494532585144, "learning_rate": 1.4603989528770257e-05, "loss": 0.5151, "step": 22704 }, { "epoch": 0.697477959020674, "grad_norm": 0.3459913730621338, "learning_rate": 1.4603560512246067e-05, "loss": 0.5662, "step": 22705 }, { "epoch": 0.6975086781556231, "grad_norm": 0.43500953912734985, "learning_rate": 1.460313148496994e-05, "loss": 0.5876, "step": 22706 }, { "epoch": 0.6975393972905723, "grad_norm": 0.4001953899860382, "learning_rate": 1.4602702446942876e-05, "loss": 0.5203, "step": 22707 }, { "epoch": 0.6975701164255215, "grad_norm": 0.36471763253211975, "learning_rate": 1.460227339816588e-05, "loss": 0.5722, "step": 22708 }, { "epoch": 0.6976008355604706, "grad_norm": 0.38705340027809143, "learning_rate": 1.4601844338639954e-05, "loss": 0.5281, "step": 22709 }, { "epoch": 0.6976315546954198, "grad_norm": 0.3929666578769684, "learning_rate": 1.4601415268366097e-05, "loss": 0.5084, "step": 22710 }, { "epoch": 0.6976622738303689, "grad_norm": 0.39036452770233154, "learning_rate": 1.4600986187345315e-05, "loss": 0.5137, "step": 22711 }, { "epoch": 0.6976929929653181, "grad_norm": 0.37625637650489807, "learning_rate": 1.4600557095578611e-05, "loss": 0.6306, "step": 22712 }, { "epoch": 0.6977237121002673, "grad_norm": 0.3698633909225464, "learning_rate": 1.4600127993066982e-05, "loss": 0.4965, "step": 22713 }, { "epoch": 0.6977544312352164, "grad_norm": 0.4145824611186981, "learning_rate": 1.4599698879811433e-05, "loss": 0.5887, "step": 22714 }, { "epoch": 0.6977851503701655, "grad_norm": 0.337420791387558, "learning_rate": 1.4599269755812963e-05, "loss": 0.5894, "step": 22715 }, { "epoch": 0.6978158695051148, "grad_norm": 0.36152786016464233, "learning_rate": 1.4598840621072581e-05, "loss": 0.5191, "step": 22716 }, { "epoch": 0.6978465886400639, "grad_norm": 0.4189799427986145, "learning_rate": 1.4598411475591282e-05, "loss": 0.5077, "step": 22717 }, { "epoch": 0.697877307775013, "grad_norm": 0.3214954435825348, "learning_rate": 1.4597982319370077e-05, "loss": 0.4541, "step": 22718 }, { "epoch": 0.6979080269099622, "grad_norm": 0.371396005153656, "learning_rate": 1.459755315240996e-05, "loss": 0.5131, "step": 22719 }, { "epoch": 0.6979387460449114, "grad_norm": 0.4219970107078552, "learning_rate": 1.4597123974711938e-05, "loss": 0.4771, "step": 22720 }, { "epoch": 0.6979694651798606, "grad_norm": 0.47350507974624634, "learning_rate": 1.4596694786277008e-05, "loss": 0.5853, "step": 22721 }, { "epoch": 0.6980001843148097, "grad_norm": 0.39622002840042114, "learning_rate": 1.459626558710618e-05, "loss": 0.5746, "step": 22722 }, { "epoch": 0.6980309034497588, "grad_norm": 0.37910422682762146, "learning_rate": 1.4595836377200451e-05, "loss": 0.4909, "step": 22723 }, { "epoch": 0.698061622584708, "grad_norm": 0.35066360235214233, "learning_rate": 1.4595407156560825e-05, "loss": 0.5154, "step": 22724 }, { "epoch": 0.6980923417196572, "grad_norm": 0.3384469449520111, "learning_rate": 1.4594977925188307e-05, "loss": 0.5187, "step": 22725 }, { "epoch": 0.6981230608546063, "grad_norm": 0.3225082755088806, "learning_rate": 1.4594548683083896e-05, "loss": 0.4921, "step": 22726 }, { "epoch": 0.6981537799895555, "grad_norm": 0.3471711575984955, "learning_rate": 1.4594119430248595e-05, "loss": 0.5922, "step": 22727 }, { "epoch": 0.6981844991245046, "grad_norm": 0.3799887001514435, "learning_rate": 1.4593690166683407e-05, "loss": 0.5803, "step": 22728 }, { "epoch": 0.6982152182594538, "grad_norm": 0.37447619438171387, "learning_rate": 1.4593260892389336e-05, "loss": 0.5482, "step": 22729 }, { "epoch": 0.698245937394403, "grad_norm": 0.3833891451358795, "learning_rate": 1.4592831607367384e-05, "loss": 0.5519, "step": 22730 }, { "epoch": 0.6982766565293521, "grad_norm": 0.3514619469642639, "learning_rate": 1.4592402311618552e-05, "loss": 0.5101, "step": 22731 }, { "epoch": 0.6983073756643013, "grad_norm": 0.4710066318511963, "learning_rate": 1.4591973005143844e-05, "loss": 0.6288, "step": 22732 }, { "epoch": 0.6983380947992505, "grad_norm": 0.3788207173347473, "learning_rate": 1.4591543687944261e-05, "loss": 0.5027, "step": 22733 }, { "epoch": 0.6983688139341996, "grad_norm": 0.39718058705329895, "learning_rate": 1.4591114360020809e-05, "loss": 0.4857, "step": 22734 }, { "epoch": 0.6983995330691488, "grad_norm": 0.4206257164478302, "learning_rate": 1.4590685021374486e-05, "loss": 0.5265, "step": 22735 }, { "epoch": 0.6984302522040979, "grad_norm": 0.3568282127380371, "learning_rate": 1.4590255672006301e-05, "loss": 0.5334, "step": 22736 }, { "epoch": 0.698460971339047, "grad_norm": 0.35557031631469727, "learning_rate": 1.458982631191725e-05, "loss": 0.5345, "step": 22737 }, { "epoch": 0.6984916904739963, "grad_norm": 0.35913217067718506, "learning_rate": 1.4589396941108342e-05, "loss": 0.537, "step": 22738 }, { "epoch": 0.6985224096089454, "grad_norm": 0.34377211332321167, "learning_rate": 1.4588967559580575e-05, "loss": 0.5028, "step": 22739 }, { "epoch": 0.6985531287438945, "grad_norm": 1.811585545539856, "learning_rate": 1.4588538167334953e-05, "loss": 0.645, "step": 22740 }, { "epoch": 0.6985838478788438, "grad_norm": 0.36952003836631775, "learning_rate": 1.4588108764372481e-05, "loss": 0.5584, "step": 22741 }, { "epoch": 0.6986145670137929, "grad_norm": 0.40020951628685, "learning_rate": 1.4587679350694161e-05, "loss": 0.5715, "step": 22742 }, { "epoch": 0.6986452861487421, "grad_norm": 0.38662219047546387, "learning_rate": 1.4587249926300996e-05, "loss": 0.5613, "step": 22743 }, { "epoch": 0.6986760052836912, "grad_norm": 0.3830729126930237, "learning_rate": 1.4586820491193986e-05, "loss": 0.6042, "step": 22744 }, { "epoch": 0.6987067244186403, "grad_norm": 0.37623271346092224, "learning_rate": 1.4586391045374139e-05, "loss": 0.5591, "step": 22745 }, { "epoch": 0.6987374435535896, "grad_norm": 0.33460864424705505, "learning_rate": 1.4585961588842454e-05, "loss": 0.5312, "step": 22746 }, { "epoch": 0.6987681626885387, "grad_norm": 0.33594751358032227, "learning_rate": 1.4585532121599937e-05, "loss": 0.6072, "step": 22747 }, { "epoch": 0.6987988818234878, "grad_norm": 0.35574600100517273, "learning_rate": 1.4585102643647588e-05, "loss": 0.5482, "step": 22748 }, { "epoch": 0.698829600958437, "grad_norm": 0.32053011655807495, "learning_rate": 1.4584673154986411e-05, "loss": 0.5801, "step": 22749 }, { "epoch": 0.6988603200933862, "grad_norm": 0.3883623480796814, "learning_rate": 1.4584243655617412e-05, "loss": 0.5921, "step": 22750 }, { "epoch": 0.6988910392283353, "grad_norm": 0.3597763180732727, "learning_rate": 1.4583814145541588e-05, "loss": 0.61, "step": 22751 }, { "epoch": 0.6989217583632845, "grad_norm": 0.3303324282169342, "learning_rate": 1.4583384624759948e-05, "loss": 0.526, "step": 22752 }, { "epoch": 0.6989524774982336, "grad_norm": 0.44304364919662476, "learning_rate": 1.4582955093273494e-05, "loss": 0.5677, "step": 22753 }, { "epoch": 0.6989831966331828, "grad_norm": 0.38638606667518616, "learning_rate": 1.4582525551083228e-05, "loss": 0.5361, "step": 22754 }, { "epoch": 0.699013915768132, "grad_norm": 0.40742993354797363, "learning_rate": 1.4582095998190153e-05, "loss": 0.606, "step": 22755 }, { "epoch": 0.6990446349030811, "grad_norm": 0.36934345960617065, "learning_rate": 1.4581666434595275e-05, "loss": 0.5567, "step": 22756 }, { "epoch": 0.6990753540380303, "grad_norm": 0.3469395935535431, "learning_rate": 1.458123686029959e-05, "loss": 0.5246, "step": 22757 }, { "epoch": 0.6991060731729795, "grad_norm": 0.38465774059295654, "learning_rate": 1.4580807275304109e-05, "loss": 0.578, "step": 22758 }, { "epoch": 0.6991367923079286, "grad_norm": 0.4461408853530884, "learning_rate": 1.4580377679609833e-05, "loss": 0.4305, "step": 22759 }, { "epoch": 0.6991675114428778, "grad_norm": 0.35109037160873413, "learning_rate": 1.4579948073217764e-05, "loss": 0.5945, "step": 22760 }, { "epoch": 0.6991982305778269, "grad_norm": 0.3634200692176819, "learning_rate": 1.457951845612891e-05, "loss": 0.5184, "step": 22761 }, { "epoch": 0.699228949712776, "grad_norm": 0.32850730419158936, "learning_rate": 1.4579088828344266e-05, "loss": 0.4596, "step": 22762 }, { "epoch": 0.6992596688477253, "grad_norm": 0.3327782154083252, "learning_rate": 1.4578659189864843e-05, "loss": 0.5689, "step": 22763 }, { "epoch": 0.6992903879826744, "grad_norm": 0.4195777475833893, "learning_rate": 1.4578229540691644e-05, "loss": 0.6031, "step": 22764 }, { "epoch": 0.6993211071176235, "grad_norm": 0.3556104600429535, "learning_rate": 1.4577799880825666e-05, "loss": 0.5753, "step": 22765 }, { "epoch": 0.6993518262525728, "grad_norm": 0.3481832444667816, "learning_rate": 1.4577370210267918e-05, "loss": 0.522, "step": 22766 }, { "epoch": 0.6993825453875219, "grad_norm": 0.401644766330719, "learning_rate": 1.4576940529019401e-05, "loss": 0.6031, "step": 22767 }, { "epoch": 0.6994132645224711, "grad_norm": 0.3711177408695221, "learning_rate": 1.4576510837081124e-05, "loss": 0.5714, "step": 22768 }, { "epoch": 0.6994439836574202, "grad_norm": 0.35169747471809387, "learning_rate": 1.457608113445408e-05, "loss": 0.606, "step": 22769 }, { "epoch": 0.6994747027923693, "grad_norm": 0.3524141013622284, "learning_rate": 1.4575651421139283e-05, "loss": 0.5871, "step": 22770 }, { "epoch": 0.6995054219273186, "grad_norm": 0.3903377950191498, "learning_rate": 1.4575221697137731e-05, "loss": 0.5026, "step": 22771 }, { "epoch": 0.6995361410622677, "grad_norm": 0.3366168141365051, "learning_rate": 1.4574791962450433e-05, "loss": 0.524, "step": 22772 }, { "epoch": 0.6995668601972168, "grad_norm": 0.4615647494792938, "learning_rate": 1.4574362217078385e-05, "loss": 0.4752, "step": 22773 }, { "epoch": 0.699597579332166, "grad_norm": 0.39390432834625244, "learning_rate": 1.4573932461022595e-05, "loss": 0.4422, "step": 22774 }, { "epoch": 0.6996282984671152, "grad_norm": 0.3640400767326355, "learning_rate": 1.4573502694284068e-05, "loss": 0.5003, "step": 22775 }, { "epoch": 0.6996590176020643, "grad_norm": 0.3844768702983856, "learning_rate": 1.4573072916863802e-05, "loss": 0.5448, "step": 22776 }, { "epoch": 0.6996897367370135, "grad_norm": 0.4461835026741028, "learning_rate": 1.4572643128762809e-05, "loss": 0.5445, "step": 22777 }, { "epoch": 0.6997204558719626, "grad_norm": 0.3705158531665802, "learning_rate": 1.4572213329982083e-05, "loss": 0.4626, "step": 22778 }, { "epoch": 0.6997511750069118, "grad_norm": 0.33157867193222046, "learning_rate": 1.4571783520522639e-05, "loss": 0.5471, "step": 22779 }, { "epoch": 0.699781894141861, "grad_norm": 0.3805142641067505, "learning_rate": 1.4571353700385475e-05, "loss": 0.593, "step": 22780 }, { "epoch": 0.6998126132768101, "grad_norm": 0.37049517035484314, "learning_rate": 1.4570923869571594e-05, "loss": 0.5478, "step": 22781 }, { "epoch": 0.6998433324117593, "grad_norm": 0.41099077463150024, "learning_rate": 1.4570494028082003e-05, "loss": 0.5382, "step": 22782 }, { "epoch": 0.6998740515467085, "grad_norm": 0.4122641682624817, "learning_rate": 1.4570064175917699e-05, "loss": 0.5214, "step": 22783 }, { "epoch": 0.6999047706816576, "grad_norm": 0.3480283319950104, "learning_rate": 1.4569634313079696e-05, "loss": 0.5508, "step": 22784 }, { "epoch": 0.6999354898166068, "grad_norm": 0.35131773352622986, "learning_rate": 1.4569204439568992e-05, "loss": 0.568, "step": 22785 }, { "epoch": 0.6999662089515559, "grad_norm": 0.3923819065093994, "learning_rate": 1.456877455538659e-05, "loss": 0.6411, "step": 22786 }, { "epoch": 0.699996928086505, "grad_norm": 0.3571188747882843, "learning_rate": 1.4568344660533495e-05, "loss": 0.5781, "step": 22787 }, { "epoch": 0.7000276472214543, "grad_norm": 0.3507484495639801, "learning_rate": 1.4567914755010717e-05, "loss": 0.5243, "step": 22788 }, { "epoch": 0.7000583663564034, "grad_norm": 0.38164252042770386, "learning_rate": 1.456748483881925e-05, "loss": 0.5827, "step": 22789 }, { "epoch": 0.7000890854913525, "grad_norm": 0.3348979353904724, "learning_rate": 1.4567054911960105e-05, "loss": 0.605, "step": 22790 }, { "epoch": 0.7001198046263017, "grad_norm": 0.39877113699913025, "learning_rate": 1.4566624974434285e-05, "loss": 0.5981, "step": 22791 }, { "epoch": 0.7001505237612509, "grad_norm": 0.32018035650253296, "learning_rate": 1.4566195026242793e-05, "loss": 0.521, "step": 22792 }, { "epoch": 0.7001812428962001, "grad_norm": 0.3418984115123749, "learning_rate": 1.4565765067386634e-05, "loss": 0.4489, "step": 22793 }, { "epoch": 0.7002119620311492, "grad_norm": 0.3393201529979706, "learning_rate": 1.4565335097866814e-05, "loss": 0.53, "step": 22794 }, { "epoch": 0.7002426811660983, "grad_norm": 0.399614542722702, "learning_rate": 1.4564905117684329e-05, "loss": 0.5276, "step": 22795 }, { "epoch": 0.7002734003010476, "grad_norm": 0.36597326397895813, "learning_rate": 1.4564475126840191e-05, "loss": 0.5979, "step": 22796 }, { "epoch": 0.7003041194359967, "grad_norm": 0.4022495448589325, "learning_rate": 1.4564045125335407e-05, "loss": 0.5682, "step": 22797 }, { "epoch": 0.7003348385709458, "grad_norm": 0.3792642652988434, "learning_rate": 1.4563615113170973e-05, "loss": 0.5116, "step": 22798 }, { "epoch": 0.700365557705895, "grad_norm": 0.44367289543151855, "learning_rate": 1.4563185090347897e-05, "loss": 0.4908, "step": 22799 }, { "epoch": 0.7003962768408442, "grad_norm": 0.3650529384613037, "learning_rate": 1.4562755056867186e-05, "loss": 0.5907, "step": 22800 }, { "epoch": 0.7004269959757933, "grad_norm": 0.4542345702648163, "learning_rate": 1.4562325012729841e-05, "loss": 0.6416, "step": 22801 }, { "epoch": 0.7004577151107425, "grad_norm": 0.37634986639022827, "learning_rate": 1.4561894957936869e-05, "loss": 0.5926, "step": 22802 }, { "epoch": 0.7004884342456916, "grad_norm": 0.34997987747192383, "learning_rate": 1.4561464892489268e-05, "loss": 0.5422, "step": 22803 }, { "epoch": 0.7005191533806407, "grad_norm": 0.3954198360443115, "learning_rate": 1.456103481638805e-05, "loss": 0.5067, "step": 22804 }, { "epoch": 0.70054987251559, "grad_norm": 0.35792744159698486, "learning_rate": 1.4560604729634216e-05, "loss": 0.6043, "step": 22805 }, { "epoch": 0.7005805916505391, "grad_norm": 0.36221078038215637, "learning_rate": 1.4560174632228772e-05, "loss": 0.6149, "step": 22806 }, { "epoch": 0.7006113107854883, "grad_norm": 0.4583247900009155, "learning_rate": 1.4559744524172722e-05, "loss": 0.6383, "step": 22807 }, { "epoch": 0.7006420299204374, "grad_norm": 0.5182620286941528, "learning_rate": 1.4559314405467069e-05, "loss": 0.5604, "step": 22808 }, { "epoch": 0.7006727490553866, "grad_norm": 0.35575294494628906, "learning_rate": 1.4558884276112819e-05, "loss": 0.6677, "step": 22809 }, { "epoch": 0.7007034681903358, "grad_norm": 0.37107419967651367, "learning_rate": 1.4558454136110978e-05, "loss": 0.5814, "step": 22810 }, { "epoch": 0.7007341873252849, "grad_norm": 0.3991928696632385, "learning_rate": 1.4558023985462549e-05, "loss": 0.587, "step": 22811 }, { "epoch": 0.700764906460234, "grad_norm": 0.3345324993133545, "learning_rate": 1.4557593824168535e-05, "loss": 0.5672, "step": 22812 }, { "epoch": 0.7007956255951833, "grad_norm": 0.32378995418548584, "learning_rate": 1.4557163652229939e-05, "loss": 0.5433, "step": 22813 }, { "epoch": 0.7008263447301324, "grad_norm": 0.3526691496372223, "learning_rate": 1.4556733469647774e-05, "loss": 0.5685, "step": 22814 }, { "epoch": 0.7008570638650815, "grad_norm": 0.3523760735988617, "learning_rate": 1.4556303276423039e-05, "loss": 0.5508, "step": 22815 }, { "epoch": 0.7008877830000307, "grad_norm": 0.3563656806945801, "learning_rate": 1.4555873072556739e-05, "loss": 0.53, "step": 22816 }, { "epoch": 0.7009185021349799, "grad_norm": 0.3406163454055786, "learning_rate": 1.4555442858049879e-05, "loss": 0.5111, "step": 22817 }, { "epoch": 0.7009492212699291, "grad_norm": 0.3340211510658264, "learning_rate": 1.4555012632903465e-05, "loss": 0.5744, "step": 22818 }, { "epoch": 0.7009799404048782, "grad_norm": 0.34135255217552185, "learning_rate": 1.4554582397118499e-05, "loss": 0.4669, "step": 22819 }, { "epoch": 0.7010106595398273, "grad_norm": 0.3994549810886383, "learning_rate": 1.455415215069599e-05, "loss": 0.5663, "step": 22820 }, { "epoch": 0.7010413786747766, "grad_norm": 0.37170448899269104, "learning_rate": 1.4553721893636939e-05, "loss": 0.602, "step": 22821 }, { "epoch": 0.7010720978097257, "grad_norm": 0.3449903428554535, "learning_rate": 1.4553291625942354e-05, "loss": 0.471, "step": 22822 }, { "epoch": 0.7011028169446748, "grad_norm": 0.39861395955085754, "learning_rate": 1.4552861347613237e-05, "loss": 0.6054, "step": 22823 }, { "epoch": 0.701133536079624, "grad_norm": 0.35048165917396545, "learning_rate": 1.4552431058650595e-05, "loss": 0.5259, "step": 22824 }, { "epoch": 0.7011642552145732, "grad_norm": 0.362206369638443, "learning_rate": 1.4552000759055432e-05, "loss": 0.5865, "step": 22825 }, { "epoch": 0.7011949743495223, "grad_norm": 0.4684704542160034, "learning_rate": 1.4551570448828753e-05, "loss": 0.5496, "step": 22826 }, { "epoch": 0.7012256934844715, "grad_norm": 0.4203329086303711, "learning_rate": 1.4551140127971565e-05, "loss": 0.5822, "step": 22827 }, { "epoch": 0.7012564126194206, "grad_norm": 0.33440643548965454, "learning_rate": 1.455070979648487e-05, "loss": 0.588, "step": 22828 }, { "epoch": 0.7012871317543699, "grad_norm": 0.3653399348258972, "learning_rate": 1.4550279454369674e-05, "loss": 0.5762, "step": 22829 }, { "epoch": 0.701317850889319, "grad_norm": 0.34714943170547485, "learning_rate": 1.4549849101626982e-05, "loss": 0.5624, "step": 22830 }, { "epoch": 0.7013485700242681, "grad_norm": 0.3877015709877014, "learning_rate": 1.4549418738257804e-05, "loss": 0.5676, "step": 22831 }, { "epoch": 0.7013792891592173, "grad_norm": 0.46675246953964233, "learning_rate": 1.4548988364263132e-05, "loss": 0.6039, "step": 22832 }, { "epoch": 0.7014100082941664, "grad_norm": 0.42267102003097534, "learning_rate": 1.4548557979643987e-05, "loss": 0.6298, "step": 22833 }, { "epoch": 0.7014407274291156, "grad_norm": 0.3524516820907593, "learning_rate": 1.4548127584401369e-05, "loss": 0.4845, "step": 22834 }, { "epoch": 0.7014714465640648, "grad_norm": 0.40826621651649475, "learning_rate": 1.4547697178536275e-05, "loss": 0.5527, "step": 22835 }, { "epoch": 0.7015021656990139, "grad_norm": 0.34316056966781616, "learning_rate": 1.454726676204972e-05, "loss": 0.5728, "step": 22836 }, { "epoch": 0.701532884833963, "grad_norm": 0.3711056411266327, "learning_rate": 1.4546836334942707e-05, "loss": 0.6091, "step": 22837 }, { "epoch": 0.7015636039689123, "grad_norm": 0.36259299516677856, "learning_rate": 1.4546405897216238e-05, "loss": 0.519, "step": 22838 }, { "epoch": 0.7015943231038614, "grad_norm": 0.42987483739852905, "learning_rate": 1.4545975448871322e-05, "loss": 0.5318, "step": 22839 }, { "epoch": 0.7016250422388105, "grad_norm": 0.3637450337409973, "learning_rate": 1.4545544989908962e-05, "loss": 0.5022, "step": 22840 }, { "epoch": 0.7016557613737597, "grad_norm": 0.3284754157066345, "learning_rate": 1.4545114520330163e-05, "loss": 0.5285, "step": 22841 }, { "epoch": 0.7016864805087089, "grad_norm": 0.34325510263442993, "learning_rate": 1.4544684040135934e-05, "loss": 0.6378, "step": 22842 }, { "epoch": 0.7017171996436581, "grad_norm": 0.3790234923362732, "learning_rate": 1.4544253549327276e-05, "loss": 0.5986, "step": 22843 }, { "epoch": 0.7017479187786072, "grad_norm": 0.35839441418647766, "learning_rate": 1.45438230479052e-05, "loss": 0.5774, "step": 22844 }, { "epoch": 0.7017786379135563, "grad_norm": 0.35159096121788025, "learning_rate": 1.4543392535870707e-05, "loss": 0.6064, "step": 22845 }, { "epoch": 0.7018093570485056, "grad_norm": 0.38188284635543823, "learning_rate": 1.4542962013224799e-05, "loss": 0.6005, "step": 22846 }, { "epoch": 0.7018400761834547, "grad_norm": 0.3862486481666565, "learning_rate": 1.4542531479968492e-05, "loss": 0.6091, "step": 22847 }, { "epoch": 0.7018707953184038, "grad_norm": 0.35040998458862305, "learning_rate": 1.4542100936102779e-05, "loss": 0.5351, "step": 22848 }, { "epoch": 0.701901514453353, "grad_norm": 0.3530709147453308, "learning_rate": 1.4541670381628676e-05, "loss": 0.5549, "step": 22849 }, { "epoch": 0.7019322335883021, "grad_norm": 0.34579047560691833, "learning_rate": 1.4541239816547182e-05, "loss": 0.569, "step": 22850 }, { "epoch": 0.7019629527232513, "grad_norm": 0.3199346363544464, "learning_rate": 1.4540809240859308e-05, "loss": 0.5376, "step": 22851 }, { "epoch": 0.7019936718582005, "grad_norm": 0.3645905554294586, "learning_rate": 1.4540378654566056e-05, "loss": 0.6191, "step": 22852 }, { "epoch": 0.7020243909931496, "grad_norm": 0.3461923897266388, "learning_rate": 1.4539948057668433e-05, "loss": 0.5687, "step": 22853 }, { "epoch": 0.7020551101280988, "grad_norm": 0.367147833108902, "learning_rate": 1.4539517450167444e-05, "loss": 0.5374, "step": 22854 }, { "epoch": 0.702085829263048, "grad_norm": 0.36126935482025146, "learning_rate": 1.4539086832064096e-05, "loss": 0.5573, "step": 22855 }, { "epoch": 0.7021165483979971, "grad_norm": 0.4113994538784027, "learning_rate": 1.4538656203359392e-05, "loss": 0.6018, "step": 22856 }, { "epoch": 0.7021472675329463, "grad_norm": 0.37532272934913635, "learning_rate": 1.453822556405434e-05, "loss": 0.5511, "step": 22857 }, { "epoch": 0.7021779866678954, "grad_norm": 0.3709590435028076, "learning_rate": 1.4537794914149946e-05, "loss": 0.5519, "step": 22858 }, { "epoch": 0.7022087058028446, "grad_norm": 0.34410595893859863, "learning_rate": 1.4537364253647213e-05, "loss": 0.5382, "step": 22859 }, { "epoch": 0.7022394249377938, "grad_norm": 0.4175748825073242, "learning_rate": 1.4536933582547152e-05, "loss": 0.6161, "step": 22860 }, { "epoch": 0.7022701440727429, "grad_norm": 0.3591126799583435, "learning_rate": 1.4536502900850762e-05, "loss": 0.4591, "step": 22861 }, { "epoch": 0.702300863207692, "grad_norm": 0.362669438123703, "learning_rate": 1.4536072208559056e-05, "loss": 0.5418, "step": 22862 }, { "epoch": 0.7023315823426413, "grad_norm": 0.35969310998916626, "learning_rate": 1.4535641505673037e-05, "loss": 0.4883, "step": 22863 }, { "epoch": 0.7023623014775904, "grad_norm": 0.36475974321365356, "learning_rate": 1.4535210792193705e-05, "loss": 0.5679, "step": 22864 }, { "epoch": 0.7023930206125395, "grad_norm": 0.5045778155326843, "learning_rate": 1.4534780068122078e-05, "loss": 0.628, "step": 22865 }, { "epoch": 0.7024237397474887, "grad_norm": 0.3466109335422516, "learning_rate": 1.4534349333459153e-05, "loss": 0.5564, "step": 22866 }, { "epoch": 0.7024544588824378, "grad_norm": 0.36826038360595703, "learning_rate": 1.4533918588205937e-05, "loss": 0.5724, "step": 22867 }, { "epoch": 0.7024851780173871, "grad_norm": 0.3973080813884735, "learning_rate": 1.4533487832363438e-05, "loss": 0.5572, "step": 22868 }, { "epoch": 0.7025158971523362, "grad_norm": 0.4098432660102844, "learning_rate": 1.453305706593266e-05, "loss": 0.4874, "step": 22869 }, { "epoch": 0.7025466162872853, "grad_norm": 0.3438859283924103, "learning_rate": 1.4532626288914615e-05, "loss": 0.5352, "step": 22870 }, { "epoch": 0.7025773354222346, "grad_norm": 0.7508162260055542, "learning_rate": 1.4532195501310303e-05, "loss": 0.5436, "step": 22871 }, { "epoch": 0.7026080545571837, "grad_norm": 0.38775455951690674, "learning_rate": 1.453176470312073e-05, "loss": 0.5897, "step": 22872 }, { "epoch": 0.7026387736921328, "grad_norm": 0.3522389233112335, "learning_rate": 1.4531333894346906e-05, "loss": 0.6297, "step": 22873 }, { "epoch": 0.702669492827082, "grad_norm": 0.3602699041366577, "learning_rate": 1.4530903074989834e-05, "loss": 0.5262, "step": 22874 }, { "epoch": 0.7027002119620311, "grad_norm": 0.3567761480808258, "learning_rate": 1.4530472245050522e-05, "loss": 0.5896, "step": 22875 }, { "epoch": 0.7027309310969803, "grad_norm": 0.33409062027931213, "learning_rate": 1.4530041404529978e-05, "loss": 0.5944, "step": 22876 }, { "epoch": 0.7027616502319295, "grad_norm": 0.3584613502025604, "learning_rate": 1.4529610553429201e-05, "loss": 0.5264, "step": 22877 }, { "epoch": 0.7027923693668786, "grad_norm": 0.376620888710022, "learning_rate": 1.4529179691749206e-05, "loss": 0.6141, "step": 22878 }, { "epoch": 0.7028230885018278, "grad_norm": 0.37028026580810547, "learning_rate": 1.4528748819490995e-05, "loss": 0.5862, "step": 22879 }, { "epoch": 0.702853807636777, "grad_norm": 0.35160723328590393, "learning_rate": 1.4528317936655573e-05, "loss": 0.5425, "step": 22880 }, { "epoch": 0.7028845267717261, "grad_norm": 0.3856756389141083, "learning_rate": 1.452788704324395e-05, "loss": 0.532, "step": 22881 }, { "epoch": 0.7029152459066753, "grad_norm": 0.3472043573856354, "learning_rate": 1.452745613925713e-05, "loss": 0.5192, "step": 22882 }, { "epoch": 0.7029459650416244, "grad_norm": 0.35238853096961975, "learning_rate": 1.4527025224696121e-05, "loss": 0.5844, "step": 22883 }, { "epoch": 0.7029766841765736, "grad_norm": 0.33087438344955444, "learning_rate": 1.4526594299561927e-05, "loss": 0.5084, "step": 22884 }, { "epoch": 0.7030074033115228, "grad_norm": 0.5001827478408813, "learning_rate": 1.4526163363855556e-05, "loss": 0.6223, "step": 22885 }, { "epoch": 0.7030381224464719, "grad_norm": 0.3765128254890442, "learning_rate": 1.4525732417578015e-05, "loss": 0.6107, "step": 22886 }, { "epoch": 0.703068841581421, "grad_norm": 0.6044362783432007, "learning_rate": 1.452530146073031e-05, "loss": 0.5021, "step": 22887 }, { "epoch": 0.7030995607163703, "grad_norm": 0.33248814940452576, "learning_rate": 1.4524870493313445e-05, "loss": 0.5462, "step": 22888 }, { "epoch": 0.7031302798513194, "grad_norm": 0.3634447753429413, "learning_rate": 1.4524439515328432e-05, "loss": 0.6231, "step": 22889 }, { "epoch": 0.7031609989862685, "grad_norm": 0.374244749546051, "learning_rate": 1.4524008526776274e-05, "loss": 0.594, "step": 22890 }, { "epoch": 0.7031917181212177, "grad_norm": 0.33760565519332886, "learning_rate": 1.4523577527657977e-05, "loss": 0.493, "step": 22891 }, { "epoch": 0.7032224372561668, "grad_norm": 0.3910742998123169, "learning_rate": 1.4523146517974551e-05, "loss": 0.5394, "step": 22892 }, { "epoch": 0.7032531563911161, "grad_norm": 0.4323113262653351, "learning_rate": 1.4522715497726995e-05, "loss": 0.6098, "step": 22893 }, { "epoch": 0.7032838755260652, "grad_norm": 0.39593246579170227, "learning_rate": 1.4522284466916327e-05, "loss": 0.5296, "step": 22894 }, { "epoch": 0.7033145946610143, "grad_norm": 0.40132763981819153, "learning_rate": 1.4521853425543544e-05, "loss": 0.5909, "step": 22895 }, { "epoch": 0.7033453137959635, "grad_norm": 0.36575061082839966, "learning_rate": 1.452142237360966e-05, "loss": 0.5082, "step": 22896 }, { "epoch": 0.7033760329309127, "grad_norm": 0.3814242482185364, "learning_rate": 1.4520991311115678e-05, "loss": 0.5063, "step": 22897 }, { "epoch": 0.7034067520658618, "grad_norm": 0.3461247682571411, "learning_rate": 1.4520560238062603e-05, "loss": 0.4621, "step": 22898 }, { "epoch": 0.703437471200811, "grad_norm": 0.7017604112625122, "learning_rate": 1.4520129154451446e-05, "loss": 0.5633, "step": 22899 }, { "epoch": 0.7034681903357601, "grad_norm": 0.34799906611442566, "learning_rate": 1.4519698060283207e-05, "loss": 0.567, "step": 22900 }, { "epoch": 0.7034989094707093, "grad_norm": 0.3295420706272125, "learning_rate": 1.4519266955558902e-05, "loss": 0.4975, "step": 22901 }, { "epoch": 0.7035296286056585, "grad_norm": 0.34889882802963257, "learning_rate": 1.4518835840279531e-05, "loss": 0.5304, "step": 22902 }, { "epoch": 0.7035603477406076, "grad_norm": 0.35680997371673584, "learning_rate": 1.4518404714446107e-05, "loss": 0.5517, "step": 22903 }, { "epoch": 0.7035910668755568, "grad_norm": 0.48517128825187683, "learning_rate": 1.4517973578059627e-05, "loss": 0.7004, "step": 22904 }, { "epoch": 0.703621786010506, "grad_norm": 0.38378679752349854, "learning_rate": 1.4517542431121108e-05, "loss": 0.6202, "step": 22905 }, { "epoch": 0.7036525051454551, "grad_norm": 0.3723198175430298, "learning_rate": 1.4517111273631553e-05, "loss": 0.6375, "step": 22906 }, { "epoch": 0.7036832242804043, "grad_norm": 0.33538419008255005, "learning_rate": 1.4516680105591969e-05, "loss": 0.5534, "step": 22907 }, { "epoch": 0.7037139434153534, "grad_norm": 0.3618636429309845, "learning_rate": 1.4516248927003362e-05, "loss": 0.5257, "step": 22908 }, { "epoch": 0.7037446625503025, "grad_norm": 0.3583250343799591, "learning_rate": 1.4515817737866742e-05, "loss": 0.4774, "step": 22909 }, { "epoch": 0.7037753816852518, "grad_norm": 0.3656419813632965, "learning_rate": 1.4515386538183113e-05, "loss": 0.5058, "step": 22910 }, { "epoch": 0.7038061008202009, "grad_norm": 0.4133569300174713, "learning_rate": 1.4514955327953484e-05, "loss": 0.6443, "step": 22911 }, { "epoch": 0.70383681995515, "grad_norm": 0.37801286578178406, "learning_rate": 1.4514524107178862e-05, "loss": 0.5583, "step": 22912 }, { "epoch": 0.7038675390900992, "grad_norm": 0.4066169559955597, "learning_rate": 1.4514092875860254e-05, "loss": 0.6102, "step": 22913 }, { "epoch": 0.7038982582250484, "grad_norm": 0.3291459381580353, "learning_rate": 1.4513661633998665e-05, "loss": 0.5656, "step": 22914 }, { "epoch": 0.7039289773599975, "grad_norm": 0.37486839294433594, "learning_rate": 1.4513230381595108e-05, "loss": 0.4848, "step": 22915 }, { "epoch": 0.7039596964949467, "grad_norm": 0.40638071298599243, "learning_rate": 1.4512799118650583e-05, "loss": 0.4745, "step": 22916 }, { "epoch": 0.7039904156298958, "grad_norm": 0.3672785460948944, "learning_rate": 1.45123678451661e-05, "loss": 0.5006, "step": 22917 }, { "epoch": 0.7040211347648451, "grad_norm": 0.36186274886131287, "learning_rate": 1.4511936561142666e-05, "loss": 0.6069, "step": 22918 }, { "epoch": 0.7040518538997942, "grad_norm": 0.3022129535675049, "learning_rate": 1.4511505266581291e-05, "loss": 0.5065, "step": 22919 }, { "epoch": 0.7040825730347433, "grad_norm": 0.3470118045806885, "learning_rate": 1.451107396148298e-05, "loss": 0.5365, "step": 22920 }, { "epoch": 0.7041132921696925, "grad_norm": 0.3656608760356903, "learning_rate": 1.4510642645848742e-05, "loss": 0.5703, "step": 22921 }, { "epoch": 0.7041440113046417, "grad_norm": 0.39742743968963623, "learning_rate": 1.4510211319679581e-05, "loss": 0.523, "step": 22922 }, { "epoch": 0.7041747304395908, "grad_norm": 0.36146119236946106, "learning_rate": 1.4509779982976506e-05, "loss": 0.5925, "step": 22923 }, { "epoch": 0.70420544957454, "grad_norm": 0.3478795289993286, "learning_rate": 1.4509348635740526e-05, "loss": 0.4523, "step": 22924 }, { "epoch": 0.7042361687094891, "grad_norm": 0.38213062286376953, "learning_rate": 1.4508917277972648e-05, "loss": 0.5897, "step": 22925 }, { "epoch": 0.7042668878444382, "grad_norm": 0.3770413398742676, "learning_rate": 1.4508485909673877e-05, "loss": 0.5582, "step": 22926 }, { "epoch": 0.7042976069793875, "grad_norm": 0.35741207003593445, "learning_rate": 1.4508054530845223e-05, "loss": 0.6169, "step": 22927 }, { "epoch": 0.7043283261143366, "grad_norm": 0.3622036874294281, "learning_rate": 1.4507623141487695e-05, "loss": 0.5258, "step": 22928 }, { "epoch": 0.7043590452492858, "grad_norm": 0.36808347702026367, "learning_rate": 1.4507191741602295e-05, "loss": 0.548, "step": 22929 }, { "epoch": 0.704389764384235, "grad_norm": 0.4170840382575989, "learning_rate": 1.4506760331190037e-05, "loss": 0.5714, "step": 22930 }, { "epoch": 0.7044204835191841, "grad_norm": 0.3949587345123291, "learning_rate": 1.4506328910251924e-05, "loss": 0.5574, "step": 22931 }, { "epoch": 0.7044512026541333, "grad_norm": 0.38205835223197937, "learning_rate": 1.4505897478788966e-05, "loss": 0.5369, "step": 22932 }, { "epoch": 0.7044819217890824, "grad_norm": 0.4511113166809082, "learning_rate": 1.450546603680217e-05, "loss": 0.5814, "step": 22933 }, { "epoch": 0.7045126409240315, "grad_norm": 0.33401328325271606, "learning_rate": 1.4505034584292542e-05, "loss": 0.5537, "step": 22934 }, { "epoch": 0.7045433600589808, "grad_norm": 0.3885745406150818, "learning_rate": 1.4504603121261091e-05, "loss": 0.577, "step": 22935 }, { "epoch": 0.7045740791939299, "grad_norm": 0.47954118251800537, "learning_rate": 1.4504171647708825e-05, "loss": 0.5082, "step": 22936 }, { "epoch": 0.704604798328879, "grad_norm": 0.35205787420272827, "learning_rate": 1.4503740163636753e-05, "loss": 0.5606, "step": 22937 }, { "epoch": 0.7046355174638282, "grad_norm": 0.4279021918773651, "learning_rate": 1.450330866904588e-05, "loss": 0.4761, "step": 22938 }, { "epoch": 0.7046662365987774, "grad_norm": 0.3461022973060608, "learning_rate": 1.4502877163937215e-05, "loss": 0.5838, "step": 22939 }, { "epoch": 0.7046969557337266, "grad_norm": 0.356644868850708, "learning_rate": 1.4502445648311766e-05, "loss": 0.5312, "step": 22940 }, { "epoch": 0.7047276748686757, "grad_norm": 0.4335714876651764, "learning_rate": 1.4502014122170538e-05, "loss": 0.5502, "step": 22941 }, { "epoch": 0.7047583940036248, "grad_norm": 0.3494728207588196, "learning_rate": 1.4501582585514546e-05, "loss": 0.5425, "step": 22942 }, { "epoch": 0.7047891131385741, "grad_norm": 0.3605012893676758, "learning_rate": 1.4501151038344792e-05, "loss": 0.5999, "step": 22943 }, { "epoch": 0.7048198322735232, "grad_norm": 0.37797683477401733, "learning_rate": 1.4500719480662287e-05, "loss": 0.6105, "step": 22944 }, { "epoch": 0.7048505514084723, "grad_norm": 0.36745724081993103, "learning_rate": 1.4500287912468033e-05, "loss": 0.599, "step": 22945 }, { "epoch": 0.7048812705434215, "grad_norm": 0.45699360966682434, "learning_rate": 1.4499856333763048e-05, "loss": 0.5068, "step": 22946 }, { "epoch": 0.7049119896783707, "grad_norm": 0.4207497239112854, "learning_rate": 1.449942474454833e-05, "loss": 0.5546, "step": 22947 }, { "epoch": 0.7049427088133198, "grad_norm": 0.3816334903240204, "learning_rate": 1.4498993144824892e-05, "loss": 0.5659, "step": 22948 }, { "epoch": 0.704973427948269, "grad_norm": 0.35515984892845154, "learning_rate": 1.4498561534593742e-05, "loss": 0.5113, "step": 22949 }, { "epoch": 0.7050041470832181, "grad_norm": 0.3735414445400238, "learning_rate": 1.4498129913855887e-05, "loss": 0.5629, "step": 22950 }, { "epoch": 0.7050348662181672, "grad_norm": 0.3675881028175354, "learning_rate": 1.4497698282612338e-05, "loss": 0.5413, "step": 22951 }, { "epoch": 0.7050655853531165, "grad_norm": 0.36824753880500793, "learning_rate": 1.4497266640864096e-05, "loss": 0.5926, "step": 22952 }, { "epoch": 0.7050963044880656, "grad_norm": 0.36849790811538696, "learning_rate": 1.4496834988612176e-05, "loss": 0.5333, "step": 22953 }, { "epoch": 0.7051270236230148, "grad_norm": 0.3632769286632538, "learning_rate": 1.4496403325857584e-05, "loss": 0.5991, "step": 22954 }, { "epoch": 0.705157742757964, "grad_norm": 0.3138526380062103, "learning_rate": 1.4495971652601329e-05, "loss": 0.4977, "step": 22955 }, { "epoch": 0.7051884618929131, "grad_norm": 0.3490069806575775, "learning_rate": 1.4495539968844414e-05, "loss": 0.509, "step": 22956 }, { "epoch": 0.7052191810278623, "grad_norm": 0.3515849709510803, "learning_rate": 1.4495108274587857e-05, "loss": 0.558, "step": 22957 }, { "epoch": 0.7052499001628114, "grad_norm": 0.3448316156864166, "learning_rate": 1.4494676569832655e-05, "loss": 0.586, "step": 22958 }, { "epoch": 0.7052806192977605, "grad_norm": 0.3816393315792084, "learning_rate": 1.4494244854579824e-05, "loss": 0.6575, "step": 22959 }, { "epoch": 0.7053113384327098, "grad_norm": 0.39507728815078735, "learning_rate": 1.4493813128830375e-05, "loss": 0.466, "step": 22960 }, { "epoch": 0.7053420575676589, "grad_norm": 0.3625277876853943, "learning_rate": 1.4493381392585306e-05, "loss": 0.5433, "step": 22961 }, { "epoch": 0.705372776702608, "grad_norm": 0.7628723978996277, "learning_rate": 1.4492949645845633e-05, "loss": 0.5923, "step": 22962 }, { "epoch": 0.7054034958375572, "grad_norm": 0.40936988592147827, "learning_rate": 1.449251788861236e-05, "loss": 0.5445, "step": 22963 }, { "epoch": 0.7054342149725064, "grad_norm": 0.3470347821712494, "learning_rate": 1.4492086120886502e-05, "loss": 0.5657, "step": 22964 }, { "epoch": 0.7054649341074556, "grad_norm": 0.3826867640018463, "learning_rate": 1.4491654342669059e-05, "loss": 0.5645, "step": 22965 }, { "epoch": 0.7054956532424047, "grad_norm": 0.37732645869255066, "learning_rate": 1.4491222553961048e-05, "loss": 0.5472, "step": 22966 }, { "epoch": 0.7055263723773538, "grad_norm": 0.4038812518119812, "learning_rate": 1.4490790754763471e-05, "loss": 0.5691, "step": 22967 }, { "epoch": 0.7055570915123031, "grad_norm": 0.4108286201953888, "learning_rate": 1.4490358945077336e-05, "loss": 0.5304, "step": 22968 }, { "epoch": 0.7055878106472522, "grad_norm": 0.3603500425815582, "learning_rate": 1.4489927124903658e-05, "loss": 0.5431, "step": 22969 }, { "epoch": 0.7056185297822013, "grad_norm": 0.352876752614975, "learning_rate": 1.4489495294243439e-05, "loss": 0.5293, "step": 22970 }, { "epoch": 0.7056492489171505, "grad_norm": 0.5635542869567871, "learning_rate": 1.4489063453097694e-05, "loss": 0.6368, "step": 22971 }, { "epoch": 0.7056799680520996, "grad_norm": 0.45358970761299133, "learning_rate": 1.4488631601467422e-05, "loss": 0.5735, "step": 22972 }, { "epoch": 0.7057106871870488, "grad_norm": 0.34158214926719666, "learning_rate": 1.4488199739353642e-05, "loss": 0.5782, "step": 22973 }, { "epoch": 0.705741406321998, "grad_norm": 0.3507739305496216, "learning_rate": 1.4487767866757355e-05, "loss": 0.4917, "step": 22974 }, { "epoch": 0.7057721254569471, "grad_norm": 0.32589250802993774, "learning_rate": 1.4487335983679575e-05, "loss": 0.5702, "step": 22975 }, { "epoch": 0.7058028445918962, "grad_norm": 0.4101223349571228, "learning_rate": 1.4486904090121305e-05, "loss": 0.5611, "step": 22976 }, { "epoch": 0.7058335637268455, "grad_norm": 0.3555884063243866, "learning_rate": 1.4486472186083563e-05, "loss": 0.4975, "step": 22977 }, { "epoch": 0.7058642828617946, "grad_norm": 0.47983866930007935, "learning_rate": 1.4486040271567348e-05, "loss": 0.5309, "step": 22978 }, { "epoch": 0.7058950019967438, "grad_norm": 0.35189497470855713, "learning_rate": 1.448560834657367e-05, "loss": 0.6225, "step": 22979 }, { "epoch": 0.7059257211316929, "grad_norm": 0.3749738335609436, "learning_rate": 1.4485176411103545e-05, "loss": 0.48, "step": 22980 }, { "epoch": 0.7059564402666421, "grad_norm": 0.3425863981246948, "learning_rate": 1.4484744465157974e-05, "loss": 0.5836, "step": 22981 }, { "epoch": 0.7059871594015913, "grad_norm": 0.3295155167579651, "learning_rate": 1.4484312508737972e-05, "loss": 0.5395, "step": 22982 }, { "epoch": 0.7060178785365404, "grad_norm": 0.4169027507305145, "learning_rate": 1.4483880541844544e-05, "loss": 0.5578, "step": 22983 }, { "epoch": 0.7060485976714895, "grad_norm": 0.37750154733657837, "learning_rate": 1.4483448564478701e-05, "loss": 0.5738, "step": 22984 }, { "epoch": 0.7060793168064388, "grad_norm": 0.3774126172065735, "learning_rate": 1.4483016576641449e-05, "loss": 0.5118, "step": 22985 }, { "epoch": 0.7061100359413879, "grad_norm": 0.33417850732803345, "learning_rate": 1.4482584578333799e-05, "loss": 0.526, "step": 22986 }, { "epoch": 0.706140755076337, "grad_norm": 0.3588663637638092, "learning_rate": 1.4482152569556762e-05, "loss": 0.5721, "step": 22987 }, { "epoch": 0.7061714742112862, "grad_norm": 0.3736327290534973, "learning_rate": 1.448172055031134e-05, "loss": 0.5525, "step": 22988 }, { "epoch": 0.7062021933462354, "grad_norm": 0.3529767692089081, "learning_rate": 1.448128852059855e-05, "loss": 0.5801, "step": 22989 }, { "epoch": 0.7062329124811846, "grad_norm": 1.4871511459350586, "learning_rate": 1.4480856480419397e-05, "loss": 0.5723, "step": 22990 }, { "epoch": 0.7062636316161337, "grad_norm": 0.48468926548957825, "learning_rate": 1.448042442977489e-05, "loss": 0.4697, "step": 22991 }, { "epoch": 0.7062943507510828, "grad_norm": 0.3315890431404114, "learning_rate": 1.447999236866604e-05, "loss": 0.4556, "step": 22992 }, { "epoch": 0.706325069886032, "grad_norm": 0.3729872405529022, "learning_rate": 1.4479560297093853e-05, "loss": 0.523, "step": 22993 }, { "epoch": 0.7063557890209812, "grad_norm": 0.3324905335903168, "learning_rate": 1.4479128215059341e-05, "loss": 0.5137, "step": 22994 }, { "epoch": 0.7063865081559303, "grad_norm": 0.42850205302238464, "learning_rate": 1.4478696122563514e-05, "loss": 0.5165, "step": 22995 }, { "epoch": 0.7064172272908795, "grad_norm": 0.3482384979724884, "learning_rate": 1.4478264019607378e-05, "loss": 0.5408, "step": 22996 }, { "epoch": 0.7064479464258286, "grad_norm": 0.3566868305206299, "learning_rate": 1.4477831906191941e-05, "loss": 0.6102, "step": 22997 }, { "epoch": 0.7064786655607778, "grad_norm": 0.39712342619895935, "learning_rate": 1.447739978231822e-05, "loss": 0.4705, "step": 22998 }, { "epoch": 0.706509384695727, "grad_norm": 0.4323369860649109, "learning_rate": 1.4476967647987215e-05, "loss": 0.566, "step": 22999 }, { "epoch": 0.7065401038306761, "grad_norm": 0.4160631597042084, "learning_rate": 1.4476535503199943e-05, "loss": 0.5477, "step": 23000 }, { "epoch": 0.7065708229656252, "grad_norm": 0.4066062569618225, "learning_rate": 1.4476103347957408e-05, "loss": 0.5424, "step": 23001 }, { "epoch": 0.7066015421005745, "grad_norm": 0.5372565984725952, "learning_rate": 1.447567118226062e-05, "loss": 0.48, "step": 23002 }, { "epoch": 0.7066322612355236, "grad_norm": 0.3195563554763794, "learning_rate": 1.447523900611059e-05, "loss": 0.5714, "step": 23003 }, { "epoch": 0.7066629803704728, "grad_norm": 0.385177344083786, "learning_rate": 1.4474806819508328e-05, "loss": 0.6046, "step": 23004 }, { "epoch": 0.7066936995054219, "grad_norm": 0.7284210324287415, "learning_rate": 1.447437462245484e-05, "loss": 0.5183, "step": 23005 }, { "epoch": 0.706724418640371, "grad_norm": 0.4066028594970703, "learning_rate": 1.4473942414951138e-05, "loss": 0.5204, "step": 23006 }, { "epoch": 0.7067551377753203, "grad_norm": 0.37680259346961975, "learning_rate": 1.4473510196998232e-05, "loss": 0.6599, "step": 23007 }, { "epoch": 0.7067858569102694, "grad_norm": 0.3801862299442291, "learning_rate": 1.4473077968597131e-05, "loss": 0.5724, "step": 23008 }, { "epoch": 0.7068165760452185, "grad_norm": 0.511515736579895, "learning_rate": 1.4472645729748844e-05, "loss": 0.5988, "step": 23009 }, { "epoch": 0.7068472951801678, "grad_norm": 0.35925593972206116, "learning_rate": 1.4472213480454378e-05, "loss": 0.5337, "step": 23010 }, { "epoch": 0.7068780143151169, "grad_norm": 0.3943916857242584, "learning_rate": 1.4471781220714749e-05, "loss": 0.6478, "step": 23011 }, { "epoch": 0.706908733450066, "grad_norm": 0.4322599172592163, "learning_rate": 1.447134895053096e-05, "loss": 0.5424, "step": 23012 }, { "epoch": 0.7069394525850152, "grad_norm": 0.4154728651046753, "learning_rate": 1.4470916669904022e-05, "loss": 0.5452, "step": 23013 }, { "epoch": 0.7069701717199643, "grad_norm": 0.588262140750885, "learning_rate": 1.4470484378834948e-05, "loss": 0.4878, "step": 23014 }, { "epoch": 0.7070008908549136, "grad_norm": 0.3484846353530884, "learning_rate": 1.4470052077324743e-05, "loss": 0.5419, "step": 23015 }, { "epoch": 0.7070316099898627, "grad_norm": 0.39448282122612, "learning_rate": 1.4469619765374422e-05, "loss": 0.5385, "step": 23016 }, { "epoch": 0.7070623291248118, "grad_norm": 0.3779955208301544, "learning_rate": 1.446918744298499e-05, "loss": 0.5498, "step": 23017 }, { "epoch": 0.707093048259761, "grad_norm": 0.3844943642616272, "learning_rate": 1.4468755110157461e-05, "loss": 0.614, "step": 23018 }, { "epoch": 0.7071237673947102, "grad_norm": 0.3402717113494873, "learning_rate": 1.4468322766892845e-05, "loss": 0.59, "step": 23019 }, { "epoch": 0.7071544865296593, "grad_norm": 0.43103745579719543, "learning_rate": 1.4467890413192143e-05, "loss": 0.5966, "step": 23020 }, { "epoch": 0.7071852056646085, "grad_norm": 0.3776431977748871, "learning_rate": 1.4467458049056374e-05, "loss": 0.5401, "step": 23021 }, { "epoch": 0.7072159247995576, "grad_norm": 0.37983769178390503, "learning_rate": 1.4467025674486541e-05, "loss": 0.6152, "step": 23022 }, { "epoch": 0.7072466439345068, "grad_norm": 0.3602312505245209, "learning_rate": 1.4466593289483663e-05, "loss": 0.5822, "step": 23023 }, { "epoch": 0.707277363069456, "grad_norm": 0.3617551028728485, "learning_rate": 1.446616089404874e-05, "loss": 0.6186, "step": 23024 }, { "epoch": 0.7073080822044051, "grad_norm": 0.34657007455825806, "learning_rate": 1.4465728488182792e-05, "loss": 0.5291, "step": 23025 }, { "epoch": 0.7073388013393542, "grad_norm": 0.35015618801116943, "learning_rate": 1.4465296071886816e-05, "loss": 0.6574, "step": 23026 }, { "epoch": 0.7073695204743035, "grad_norm": 0.37855157256126404, "learning_rate": 1.4464863645161833e-05, "loss": 0.5433, "step": 23027 }, { "epoch": 0.7074002396092526, "grad_norm": 0.40738242864608765, "learning_rate": 1.4464431208008849e-05, "loss": 0.5633, "step": 23028 }, { "epoch": 0.7074309587442018, "grad_norm": 0.33447393774986267, "learning_rate": 1.4463998760428875e-05, "loss": 0.5109, "step": 23029 }, { "epoch": 0.7074616778791509, "grad_norm": 0.6916822791099548, "learning_rate": 1.446356630242292e-05, "loss": 0.5482, "step": 23030 }, { "epoch": 0.7074923970141, "grad_norm": 0.3667564392089844, "learning_rate": 1.4463133833991991e-05, "loss": 0.5876, "step": 23031 }, { "epoch": 0.7075231161490493, "grad_norm": 0.39976921677589417, "learning_rate": 1.4462701355137104e-05, "loss": 0.5807, "step": 23032 }, { "epoch": 0.7075538352839984, "grad_norm": 0.36124712228775024, "learning_rate": 1.4462268865859264e-05, "loss": 0.5624, "step": 23033 }, { "epoch": 0.7075845544189475, "grad_norm": 0.36961832642555237, "learning_rate": 1.4461836366159485e-05, "loss": 0.5676, "step": 23034 }, { "epoch": 0.7076152735538968, "grad_norm": 0.3493576943874359, "learning_rate": 1.4461403856038776e-05, "loss": 0.5501, "step": 23035 }, { "epoch": 0.7076459926888459, "grad_norm": 0.33467769622802734, "learning_rate": 1.4460971335498146e-05, "loss": 0.5462, "step": 23036 }, { "epoch": 0.707676711823795, "grad_norm": 0.3930888772010803, "learning_rate": 1.4460538804538606e-05, "loss": 0.5743, "step": 23037 }, { "epoch": 0.7077074309587442, "grad_norm": 0.3731018900871277, "learning_rate": 1.4460106263161167e-05, "loss": 0.5403, "step": 23038 }, { "epoch": 0.7077381500936933, "grad_norm": 0.8386621475219727, "learning_rate": 1.4459673711366837e-05, "loss": 0.4738, "step": 23039 }, { "epoch": 0.7077688692286426, "grad_norm": 0.35183101892471313, "learning_rate": 1.4459241149156628e-05, "loss": 0.5392, "step": 23040 }, { "epoch": 0.7077995883635917, "grad_norm": 0.40955692529678345, "learning_rate": 1.445880857653155e-05, "loss": 0.5261, "step": 23041 }, { "epoch": 0.7078303074985408, "grad_norm": 0.3801997900009155, "learning_rate": 1.4458375993492615e-05, "loss": 0.5549, "step": 23042 }, { "epoch": 0.70786102663349, "grad_norm": 0.3660202920436859, "learning_rate": 1.445794340004083e-05, "loss": 0.5269, "step": 23043 }, { "epoch": 0.7078917457684392, "grad_norm": 0.36117592453956604, "learning_rate": 1.4457510796177205e-05, "loss": 0.5596, "step": 23044 }, { "epoch": 0.7079224649033883, "grad_norm": 0.3861066401004791, "learning_rate": 1.4457078181902755e-05, "loss": 0.4393, "step": 23045 }, { "epoch": 0.7079531840383375, "grad_norm": 0.4105859100818634, "learning_rate": 1.4456645557218484e-05, "loss": 0.5627, "step": 23046 }, { "epoch": 0.7079839031732866, "grad_norm": 0.3678833544254303, "learning_rate": 1.4456212922125412e-05, "loss": 0.5278, "step": 23047 }, { "epoch": 0.7080146223082358, "grad_norm": 0.33302041888237, "learning_rate": 1.445578027662454e-05, "loss": 0.5385, "step": 23048 }, { "epoch": 0.708045341443185, "grad_norm": 0.4505271911621094, "learning_rate": 1.4455347620716881e-05, "loss": 0.5007, "step": 23049 }, { "epoch": 0.7080760605781341, "grad_norm": 0.4627983868122101, "learning_rate": 1.445491495440345e-05, "loss": 0.6179, "step": 23050 }, { "epoch": 0.7081067797130833, "grad_norm": 0.46214690804481506, "learning_rate": 1.4454482277685248e-05, "loss": 0.5513, "step": 23051 }, { "epoch": 0.7081374988480325, "grad_norm": 0.3707757890224457, "learning_rate": 1.4454049590563295e-05, "loss": 0.592, "step": 23052 }, { "epoch": 0.7081682179829816, "grad_norm": 0.42383474111557007, "learning_rate": 1.44536168930386e-05, "loss": 0.5093, "step": 23053 }, { "epoch": 0.7081989371179308, "grad_norm": 0.4326099753379822, "learning_rate": 1.4453184185112168e-05, "loss": 0.5773, "step": 23054 }, { "epoch": 0.7082296562528799, "grad_norm": 0.4413365125656128, "learning_rate": 1.4452751466785018e-05, "loss": 0.6201, "step": 23055 }, { "epoch": 0.708260375387829, "grad_norm": 0.32307571172714233, "learning_rate": 1.445231873805815e-05, "loss": 0.507, "step": 23056 }, { "epoch": 0.7082910945227783, "grad_norm": 0.3526722192764282, "learning_rate": 1.4451885998932587e-05, "loss": 0.5219, "step": 23057 }, { "epoch": 0.7083218136577274, "grad_norm": 0.333835244178772, "learning_rate": 1.445145324940933e-05, "loss": 0.5549, "step": 23058 }, { "epoch": 0.7083525327926765, "grad_norm": 0.3434488773345947, "learning_rate": 1.445102048948939e-05, "loss": 0.5314, "step": 23059 }, { "epoch": 0.7083832519276257, "grad_norm": 0.34878432750701904, "learning_rate": 1.4450587719173786e-05, "loss": 0.5195, "step": 23060 }, { "epoch": 0.7084139710625749, "grad_norm": 0.39832648634910583, "learning_rate": 1.445015493846352e-05, "loss": 0.5439, "step": 23061 }, { "epoch": 0.708444690197524, "grad_norm": 0.36931538581848145, "learning_rate": 1.4449722147359608e-05, "loss": 0.4577, "step": 23062 }, { "epoch": 0.7084754093324732, "grad_norm": 0.35550153255462646, "learning_rate": 1.4449289345863062e-05, "loss": 0.5684, "step": 23063 }, { "epoch": 0.7085061284674223, "grad_norm": 0.4660094976425171, "learning_rate": 1.4448856533974887e-05, "loss": 0.5414, "step": 23064 }, { "epoch": 0.7085368476023716, "grad_norm": 0.3434155285358429, "learning_rate": 1.4448423711696096e-05, "loss": 0.5671, "step": 23065 }, { "epoch": 0.7085675667373207, "grad_norm": 0.39096999168395996, "learning_rate": 1.4447990879027703e-05, "loss": 0.5399, "step": 23066 }, { "epoch": 0.7085982858722698, "grad_norm": 0.3465670943260193, "learning_rate": 1.4447558035970715e-05, "loss": 0.5601, "step": 23067 }, { "epoch": 0.708629005007219, "grad_norm": 0.41028523445129395, "learning_rate": 1.4447125182526145e-05, "loss": 0.5139, "step": 23068 }, { "epoch": 0.7086597241421682, "grad_norm": 0.3677568733692169, "learning_rate": 1.4446692318695e-05, "loss": 0.6041, "step": 23069 }, { "epoch": 0.7086904432771173, "grad_norm": 0.40022456645965576, "learning_rate": 1.4446259444478298e-05, "loss": 0.6552, "step": 23070 }, { "epoch": 0.7087211624120665, "grad_norm": 0.372785359621048, "learning_rate": 1.4445826559877048e-05, "loss": 0.5191, "step": 23071 }, { "epoch": 0.7087518815470156, "grad_norm": 0.4220815896987915, "learning_rate": 1.4445393664892257e-05, "loss": 0.5863, "step": 23072 }, { "epoch": 0.7087826006819647, "grad_norm": 0.3445606231689453, "learning_rate": 1.4444960759524942e-05, "loss": 0.4955, "step": 23073 }, { "epoch": 0.708813319816914, "grad_norm": 0.36066901683807373, "learning_rate": 1.4444527843776106e-05, "loss": 0.5742, "step": 23074 }, { "epoch": 0.7088440389518631, "grad_norm": 0.4167182743549347, "learning_rate": 1.4444094917646769e-05, "loss": 0.5154, "step": 23075 }, { "epoch": 0.7088747580868123, "grad_norm": 0.3348700702190399, "learning_rate": 1.4443661981137935e-05, "loss": 0.5178, "step": 23076 }, { "epoch": 0.7089054772217614, "grad_norm": 0.35245281457901, "learning_rate": 1.4443229034250621e-05, "loss": 0.5752, "step": 23077 }, { "epoch": 0.7089361963567106, "grad_norm": 0.3634800612926483, "learning_rate": 1.4442796076985833e-05, "loss": 0.5037, "step": 23078 }, { "epoch": 0.7089669154916598, "grad_norm": 0.3458399474620819, "learning_rate": 1.4442363109344586e-05, "loss": 0.5571, "step": 23079 }, { "epoch": 0.7089976346266089, "grad_norm": 0.3611626625061035, "learning_rate": 1.4441930131327888e-05, "loss": 0.6063, "step": 23080 }, { "epoch": 0.709028353761558, "grad_norm": 0.41884487867355347, "learning_rate": 1.4441497142936753e-05, "loss": 0.4847, "step": 23081 }, { "epoch": 0.7090590728965073, "grad_norm": 0.48781508207321167, "learning_rate": 1.4441064144172192e-05, "loss": 0.5834, "step": 23082 }, { "epoch": 0.7090897920314564, "grad_norm": 0.3499770760536194, "learning_rate": 1.4440631135035212e-05, "loss": 0.5017, "step": 23083 }, { "epoch": 0.7091205111664055, "grad_norm": 0.3855915367603302, "learning_rate": 1.4440198115526833e-05, "loss": 0.6076, "step": 23084 }, { "epoch": 0.7091512303013547, "grad_norm": 0.34242692589759827, "learning_rate": 1.4439765085648056e-05, "loss": 0.5193, "step": 23085 }, { "epoch": 0.7091819494363039, "grad_norm": 0.36987176537513733, "learning_rate": 1.4439332045399904e-05, "loss": 0.632, "step": 23086 }, { "epoch": 0.709212668571253, "grad_norm": 0.3636292517185211, "learning_rate": 1.4438898994783375e-05, "loss": 0.5667, "step": 23087 }, { "epoch": 0.7092433877062022, "grad_norm": 0.39753931760787964, "learning_rate": 1.443846593379949e-05, "loss": 0.6015, "step": 23088 }, { "epoch": 0.7092741068411513, "grad_norm": 0.3583000600337982, "learning_rate": 1.443803286244926e-05, "loss": 0.5412, "step": 23089 }, { "epoch": 0.7093048259761006, "grad_norm": 0.33592551946640015, "learning_rate": 1.443759978073369e-05, "loss": 0.5773, "step": 23090 }, { "epoch": 0.7093355451110497, "grad_norm": 0.3573514521121979, "learning_rate": 1.44371666886538e-05, "loss": 0.5359, "step": 23091 }, { "epoch": 0.7093662642459988, "grad_norm": 0.8026015162467957, "learning_rate": 1.4436733586210597e-05, "loss": 0.6404, "step": 23092 }, { "epoch": 0.709396983380948, "grad_norm": 0.3329825699329376, "learning_rate": 1.4436300473405092e-05, "loss": 0.5301, "step": 23093 }, { "epoch": 0.7094277025158972, "grad_norm": 0.38807255029678345, "learning_rate": 1.4435867350238295e-05, "loss": 0.625, "step": 23094 }, { "epoch": 0.7094584216508463, "grad_norm": 0.34749889373779297, "learning_rate": 1.4435434216711223e-05, "loss": 0.4915, "step": 23095 }, { "epoch": 0.7094891407857955, "grad_norm": 0.36177024245262146, "learning_rate": 1.4435001072824883e-05, "loss": 0.6207, "step": 23096 }, { "epoch": 0.7095198599207446, "grad_norm": 0.3792213797569275, "learning_rate": 1.443456791858029e-05, "loss": 0.5247, "step": 23097 }, { "epoch": 0.7095505790556937, "grad_norm": 0.3884359896183014, "learning_rate": 1.4434134753978452e-05, "loss": 0.4529, "step": 23098 }, { "epoch": 0.709581298190643, "grad_norm": 0.35379791259765625, "learning_rate": 1.4433701579020384e-05, "loss": 0.5183, "step": 23099 }, { "epoch": 0.7096120173255921, "grad_norm": 0.3638116717338562, "learning_rate": 1.4433268393707096e-05, "loss": 0.5418, "step": 23100 }, { "epoch": 0.7096427364605413, "grad_norm": 0.3687261641025543, "learning_rate": 1.44328351980396e-05, "loss": 0.5195, "step": 23101 }, { "epoch": 0.7096734555954904, "grad_norm": 0.37641438841819763, "learning_rate": 1.4432401992018908e-05, "loss": 0.5005, "step": 23102 }, { "epoch": 0.7097041747304396, "grad_norm": 0.3433673083782196, "learning_rate": 1.443196877564603e-05, "loss": 0.5577, "step": 23103 }, { "epoch": 0.7097348938653888, "grad_norm": 0.4470759928226471, "learning_rate": 1.4431535548921981e-05, "loss": 0.5269, "step": 23104 }, { "epoch": 0.7097656130003379, "grad_norm": 0.3392722010612488, "learning_rate": 1.4431102311847772e-05, "loss": 0.5687, "step": 23105 }, { "epoch": 0.709796332135287, "grad_norm": 0.42907336354255676, "learning_rate": 1.4430669064424412e-05, "loss": 0.5816, "step": 23106 }, { "epoch": 0.7098270512702363, "grad_norm": 0.3783896863460541, "learning_rate": 1.4430235806652915e-05, "loss": 0.6268, "step": 23107 }, { "epoch": 0.7098577704051854, "grad_norm": 0.38889607787132263, "learning_rate": 1.4429802538534294e-05, "loss": 0.4631, "step": 23108 }, { "epoch": 0.7098884895401345, "grad_norm": 0.36080121994018555, "learning_rate": 1.4429369260069559e-05, "loss": 0.5442, "step": 23109 }, { "epoch": 0.7099192086750837, "grad_norm": 0.31648266315460205, "learning_rate": 1.4428935971259723e-05, "loss": 0.5558, "step": 23110 }, { "epoch": 0.7099499278100329, "grad_norm": 0.3757586181163788, "learning_rate": 1.44285026721058e-05, "loss": 0.512, "step": 23111 }, { "epoch": 0.709980646944982, "grad_norm": 0.3717178404331207, "learning_rate": 1.4428069362608796e-05, "loss": 0.5651, "step": 23112 }, { "epoch": 0.7100113660799312, "grad_norm": 0.3475629687309265, "learning_rate": 1.4427636042769728e-05, "loss": 0.5819, "step": 23113 }, { "epoch": 0.7100420852148803, "grad_norm": 0.3877166211605072, "learning_rate": 1.4427202712589608e-05, "loss": 0.5226, "step": 23114 }, { "epoch": 0.7100728043498296, "grad_norm": 0.3571532368659973, "learning_rate": 1.4426769372069446e-05, "loss": 0.6254, "step": 23115 }, { "epoch": 0.7101035234847787, "grad_norm": 0.3352164626121521, "learning_rate": 1.4426336021210257e-05, "loss": 0.5367, "step": 23116 }, { "epoch": 0.7101342426197278, "grad_norm": 0.389855295419693, "learning_rate": 1.4425902660013047e-05, "loss": 0.5336, "step": 23117 }, { "epoch": 0.710164961754677, "grad_norm": 0.48637062311172485, "learning_rate": 1.4425469288478837e-05, "loss": 0.6014, "step": 23118 }, { "epoch": 0.7101956808896261, "grad_norm": 0.3846763074398041, "learning_rate": 1.442503590660863e-05, "loss": 0.5201, "step": 23119 }, { "epoch": 0.7102264000245753, "grad_norm": 0.3414325714111328, "learning_rate": 1.4424602514403446e-05, "loss": 0.5722, "step": 23120 }, { "epoch": 0.7102571191595245, "grad_norm": 0.4340962767601013, "learning_rate": 1.4424169111864292e-05, "loss": 0.6557, "step": 23121 }, { "epoch": 0.7102878382944736, "grad_norm": 0.35195252299308777, "learning_rate": 1.4423735698992182e-05, "loss": 0.5233, "step": 23122 }, { "epoch": 0.7103185574294227, "grad_norm": 0.3571989834308624, "learning_rate": 1.4423302275788129e-05, "loss": 0.6249, "step": 23123 }, { "epoch": 0.710349276564372, "grad_norm": 0.37197771668434143, "learning_rate": 1.4422868842253142e-05, "loss": 0.5459, "step": 23124 }, { "epoch": 0.7103799956993211, "grad_norm": 0.37214046716690063, "learning_rate": 1.4422435398388238e-05, "loss": 0.6276, "step": 23125 }, { "epoch": 0.7104107148342703, "grad_norm": 0.3478812277317047, "learning_rate": 1.4422001944194428e-05, "loss": 0.5993, "step": 23126 }, { "epoch": 0.7104414339692194, "grad_norm": 0.3654778301715851, "learning_rate": 1.4421568479672724e-05, "loss": 0.5434, "step": 23127 }, { "epoch": 0.7104721531041686, "grad_norm": 0.3626367449760437, "learning_rate": 1.4421135004824136e-05, "loss": 0.6272, "step": 23128 }, { "epoch": 0.7105028722391178, "grad_norm": 0.34407490491867065, "learning_rate": 1.442070151964968e-05, "loss": 0.5054, "step": 23129 }, { "epoch": 0.7105335913740669, "grad_norm": 0.3517288565635681, "learning_rate": 1.4420268024150365e-05, "loss": 0.5591, "step": 23130 }, { "epoch": 0.710564310509016, "grad_norm": 0.3527558445930481, "learning_rate": 1.441983451832721e-05, "loss": 0.6294, "step": 23131 }, { "epoch": 0.7105950296439653, "grad_norm": 0.34687817096710205, "learning_rate": 1.4419401002181217e-05, "loss": 0.5132, "step": 23132 }, { "epoch": 0.7106257487789144, "grad_norm": 0.3705577254295349, "learning_rate": 1.4418967475713409e-05, "loss": 0.5784, "step": 23133 }, { "epoch": 0.7106564679138635, "grad_norm": 0.3985271751880646, "learning_rate": 1.4418533938924793e-05, "loss": 0.6006, "step": 23134 }, { "epoch": 0.7106871870488127, "grad_norm": 0.3586808145046234, "learning_rate": 1.4418100391816379e-05, "loss": 0.5196, "step": 23135 }, { "epoch": 0.7107179061837618, "grad_norm": 0.355976402759552, "learning_rate": 1.4417666834389186e-05, "loss": 0.5504, "step": 23136 }, { "epoch": 0.7107486253187111, "grad_norm": 0.36334526538848877, "learning_rate": 1.4417233266644222e-05, "loss": 0.6067, "step": 23137 }, { "epoch": 0.7107793444536602, "grad_norm": 0.4030219316482544, "learning_rate": 1.4416799688582503e-05, "loss": 0.5995, "step": 23138 }, { "epoch": 0.7108100635886093, "grad_norm": 0.3105413615703583, "learning_rate": 1.4416366100205036e-05, "loss": 0.5032, "step": 23139 }, { "epoch": 0.7108407827235586, "grad_norm": 0.43707555532455444, "learning_rate": 1.4415932501512844e-05, "loss": 0.6419, "step": 23140 }, { "epoch": 0.7108715018585077, "grad_norm": 0.34092557430267334, "learning_rate": 1.4415498892506926e-05, "loss": 0.5514, "step": 23141 }, { "epoch": 0.7109022209934568, "grad_norm": 0.42173606157302856, "learning_rate": 1.4415065273188305e-05, "loss": 0.5854, "step": 23142 }, { "epoch": 0.710932940128406, "grad_norm": 0.366253525018692, "learning_rate": 1.4414631643557992e-05, "loss": 0.5963, "step": 23143 }, { "epoch": 0.7109636592633551, "grad_norm": 0.3504307270050049, "learning_rate": 1.4414198003616996e-05, "loss": 0.5117, "step": 23144 }, { "epoch": 0.7109943783983043, "grad_norm": 0.6091676950454712, "learning_rate": 1.4413764353366336e-05, "loss": 0.5936, "step": 23145 }, { "epoch": 0.7110250975332535, "grad_norm": 0.45024269819259644, "learning_rate": 1.441333069280702e-05, "loss": 0.6019, "step": 23146 }, { "epoch": 0.7110558166682026, "grad_norm": 0.4137520492076874, "learning_rate": 1.441289702194006e-05, "loss": 0.5727, "step": 23147 }, { "epoch": 0.7110865358031517, "grad_norm": 0.36061373353004456, "learning_rate": 1.4412463340766471e-05, "loss": 0.5656, "step": 23148 }, { "epoch": 0.711117254938101, "grad_norm": 0.4389161765575409, "learning_rate": 1.441202964928727e-05, "loss": 0.5492, "step": 23149 }, { "epoch": 0.7111479740730501, "grad_norm": 0.37058350443840027, "learning_rate": 1.4411595947503458e-05, "loss": 0.5525, "step": 23150 }, { "epoch": 0.7111786932079993, "grad_norm": 0.41106116771698, "learning_rate": 1.4411162235416061e-05, "loss": 0.5747, "step": 23151 }, { "epoch": 0.7112094123429484, "grad_norm": 0.4149491786956787, "learning_rate": 1.4410728513026086e-05, "loss": 0.5949, "step": 23152 }, { "epoch": 0.7112401314778976, "grad_norm": 0.3750985264778137, "learning_rate": 1.4410294780334546e-05, "loss": 0.5843, "step": 23153 }, { "epoch": 0.7112708506128468, "grad_norm": 0.35462892055511475, "learning_rate": 1.4409861037342454e-05, "loss": 0.6018, "step": 23154 }, { "epoch": 0.7113015697477959, "grad_norm": 0.3271157443523407, "learning_rate": 1.4409427284050824e-05, "loss": 0.574, "step": 23155 }, { "epoch": 0.711332288882745, "grad_norm": 0.40661928057670593, "learning_rate": 1.440899352046067e-05, "loss": 0.6022, "step": 23156 }, { "epoch": 0.7113630080176943, "grad_norm": 0.38703617453575134, "learning_rate": 1.4408559746572999e-05, "loss": 0.6287, "step": 23157 }, { "epoch": 0.7113937271526434, "grad_norm": 0.5234213471412659, "learning_rate": 1.4408125962388835e-05, "loss": 0.5602, "step": 23158 }, { "epoch": 0.7114244462875925, "grad_norm": 0.3974968194961548, "learning_rate": 1.440769216790918e-05, "loss": 0.5392, "step": 23159 }, { "epoch": 0.7114551654225417, "grad_norm": 0.4091323912143707, "learning_rate": 1.4407258363135053e-05, "loss": 0.5359, "step": 23160 }, { "epoch": 0.7114858845574908, "grad_norm": 0.3596690595149994, "learning_rate": 1.4406824548067469e-05, "loss": 0.5659, "step": 23161 }, { "epoch": 0.7115166036924401, "grad_norm": 0.36298155784606934, "learning_rate": 1.4406390722707436e-05, "loss": 0.5164, "step": 23162 }, { "epoch": 0.7115473228273892, "grad_norm": 0.34600916504859924, "learning_rate": 1.4405956887055973e-05, "loss": 0.5324, "step": 23163 }, { "epoch": 0.7115780419623383, "grad_norm": 0.39074718952178955, "learning_rate": 1.4405523041114084e-05, "loss": 0.5494, "step": 23164 }, { "epoch": 0.7116087610972875, "grad_norm": 0.38014692068099976, "learning_rate": 1.4405089184882793e-05, "loss": 0.5607, "step": 23165 }, { "epoch": 0.7116394802322367, "grad_norm": 0.37148168683052063, "learning_rate": 1.4404655318363108e-05, "loss": 0.585, "step": 23166 }, { "epoch": 0.7116701993671858, "grad_norm": 0.36409929394721985, "learning_rate": 1.4404221441556042e-05, "loss": 0.4892, "step": 23167 }, { "epoch": 0.711700918502135, "grad_norm": 0.35483771562576294, "learning_rate": 1.4403787554462607e-05, "loss": 0.5718, "step": 23168 }, { "epoch": 0.7117316376370841, "grad_norm": 0.330191969871521, "learning_rate": 1.4403353657083823e-05, "loss": 0.436, "step": 23169 }, { "epoch": 0.7117623567720333, "grad_norm": 0.3583383858203888, "learning_rate": 1.4402919749420699e-05, "loss": 0.5047, "step": 23170 }, { "epoch": 0.7117930759069825, "grad_norm": 0.5058189630508423, "learning_rate": 1.4402485831474245e-05, "loss": 0.58, "step": 23171 }, { "epoch": 0.7118237950419316, "grad_norm": 0.43713584542274475, "learning_rate": 1.440205190324548e-05, "loss": 0.5538, "step": 23172 }, { "epoch": 0.7118545141768807, "grad_norm": 0.33514082431793213, "learning_rate": 1.4401617964735415e-05, "loss": 0.5523, "step": 23173 }, { "epoch": 0.71188523331183, "grad_norm": 0.34641215205192566, "learning_rate": 1.4401184015945065e-05, "loss": 0.5597, "step": 23174 }, { "epoch": 0.7119159524467791, "grad_norm": 0.4287494421005249, "learning_rate": 1.440075005687544e-05, "loss": 0.5919, "step": 23175 }, { "epoch": 0.7119466715817283, "grad_norm": 0.5401845574378967, "learning_rate": 1.4400316087527557e-05, "loss": 0.506, "step": 23176 }, { "epoch": 0.7119773907166774, "grad_norm": 0.44411858916282654, "learning_rate": 1.4399882107902428e-05, "loss": 0.5616, "step": 23177 }, { "epoch": 0.7120081098516265, "grad_norm": 0.4344513416290283, "learning_rate": 1.4399448118001066e-05, "loss": 0.5393, "step": 23178 }, { "epoch": 0.7120388289865758, "grad_norm": 0.32687899470329285, "learning_rate": 1.4399014117824486e-05, "loss": 0.4815, "step": 23179 }, { "epoch": 0.7120695481215249, "grad_norm": 0.3641681969165802, "learning_rate": 1.4398580107373703e-05, "loss": 0.5535, "step": 23180 }, { "epoch": 0.712100267256474, "grad_norm": 0.3446493148803711, "learning_rate": 1.4398146086649728e-05, "loss": 0.5697, "step": 23181 }, { "epoch": 0.7121309863914232, "grad_norm": 0.4210122525691986, "learning_rate": 1.4397712055653577e-05, "loss": 0.4598, "step": 23182 }, { "epoch": 0.7121617055263724, "grad_norm": 0.3595261871814728, "learning_rate": 1.4397278014386262e-05, "loss": 0.5743, "step": 23183 }, { "epoch": 0.7121924246613215, "grad_norm": 0.34554776549339294, "learning_rate": 1.4396843962848794e-05, "loss": 0.5786, "step": 23184 }, { "epoch": 0.7122231437962707, "grad_norm": 0.42187047004699707, "learning_rate": 1.4396409901042195e-05, "loss": 0.5312, "step": 23185 }, { "epoch": 0.7122538629312198, "grad_norm": 0.3821777105331421, "learning_rate": 1.439597582896747e-05, "loss": 0.6443, "step": 23186 }, { "epoch": 0.7122845820661691, "grad_norm": 0.33297234773635864, "learning_rate": 1.4395541746625636e-05, "loss": 0.4766, "step": 23187 }, { "epoch": 0.7123153012011182, "grad_norm": 0.32210713624954224, "learning_rate": 1.4395107654017711e-05, "loss": 0.5172, "step": 23188 }, { "epoch": 0.7123460203360673, "grad_norm": 0.3457854390144348, "learning_rate": 1.43946735511447e-05, "loss": 0.5581, "step": 23189 }, { "epoch": 0.7123767394710165, "grad_norm": 0.336777001619339, "learning_rate": 1.4394239438007627e-05, "loss": 0.4611, "step": 23190 }, { "epoch": 0.7124074586059657, "grad_norm": 0.4799322187900543, "learning_rate": 1.4393805314607496e-05, "loss": 0.4805, "step": 23191 }, { "epoch": 0.7124381777409148, "grad_norm": 0.34266161918640137, "learning_rate": 1.439337118094533e-05, "loss": 0.5733, "step": 23192 }, { "epoch": 0.712468896875864, "grad_norm": 0.3782579004764557, "learning_rate": 1.4392937037022137e-05, "loss": 0.6275, "step": 23193 }, { "epoch": 0.7124996160108131, "grad_norm": 0.35909515619277954, "learning_rate": 1.4392502882838934e-05, "loss": 0.5783, "step": 23194 }, { "epoch": 0.7125303351457623, "grad_norm": 0.4945495128631592, "learning_rate": 1.4392068718396732e-05, "loss": 0.5301, "step": 23195 }, { "epoch": 0.7125610542807115, "grad_norm": 0.3391670882701874, "learning_rate": 1.4391634543696548e-05, "loss": 0.4759, "step": 23196 }, { "epoch": 0.7125917734156606, "grad_norm": 0.3458058536052704, "learning_rate": 1.4391200358739394e-05, "loss": 0.5754, "step": 23197 }, { "epoch": 0.7126224925506097, "grad_norm": 0.3640713095664978, "learning_rate": 1.4390766163526283e-05, "loss": 0.5461, "step": 23198 }, { "epoch": 0.712653211685559, "grad_norm": 0.4494645297527313, "learning_rate": 1.4390331958058235e-05, "loss": 0.4752, "step": 23199 }, { "epoch": 0.7126839308205081, "grad_norm": 0.3539104759693146, "learning_rate": 1.4389897742336256e-05, "loss": 0.6296, "step": 23200 }, { "epoch": 0.7127146499554573, "grad_norm": 0.35763445496559143, "learning_rate": 1.4389463516361368e-05, "loss": 0.5965, "step": 23201 }, { "epoch": 0.7127453690904064, "grad_norm": 0.42334264516830444, "learning_rate": 1.4389029280134578e-05, "loss": 0.5439, "step": 23202 }, { "epoch": 0.7127760882253555, "grad_norm": 0.38551226258277893, "learning_rate": 1.438859503365691e-05, "loss": 0.6234, "step": 23203 }, { "epoch": 0.7128068073603048, "grad_norm": 0.3605939447879791, "learning_rate": 1.4388160776929368e-05, "loss": 0.5752, "step": 23204 }, { "epoch": 0.7128375264952539, "grad_norm": 0.32525408267974854, "learning_rate": 1.4387726509952966e-05, "loss": 0.555, "step": 23205 }, { "epoch": 0.712868245630203, "grad_norm": 0.3363845944404602, "learning_rate": 1.4387292232728726e-05, "loss": 0.4951, "step": 23206 }, { "epoch": 0.7128989647651522, "grad_norm": 0.3857913911342621, "learning_rate": 1.438685794525766e-05, "loss": 0.5427, "step": 23207 }, { "epoch": 0.7129296839001014, "grad_norm": 0.4063234031200409, "learning_rate": 1.4386423647540779e-05, "loss": 0.6571, "step": 23208 }, { "epoch": 0.7129604030350505, "grad_norm": 0.3856711685657501, "learning_rate": 1.4385989339579099e-05, "loss": 0.591, "step": 23209 }, { "epoch": 0.7129911221699997, "grad_norm": 0.37258753180503845, "learning_rate": 1.4385555021373633e-05, "loss": 0.5816, "step": 23210 }, { "epoch": 0.7130218413049488, "grad_norm": 0.3642735779285431, "learning_rate": 1.4385120692925401e-05, "loss": 0.615, "step": 23211 }, { "epoch": 0.7130525604398981, "grad_norm": 0.3527680039405823, "learning_rate": 1.438468635423541e-05, "loss": 0.6164, "step": 23212 }, { "epoch": 0.7130832795748472, "grad_norm": 0.42647191882133484, "learning_rate": 1.438425200530468e-05, "loss": 0.6269, "step": 23213 }, { "epoch": 0.7131139987097963, "grad_norm": 0.37884822487831116, "learning_rate": 1.4383817646134221e-05, "loss": 0.499, "step": 23214 }, { "epoch": 0.7131447178447455, "grad_norm": 0.3612182140350342, "learning_rate": 1.4383383276725051e-05, "loss": 0.5414, "step": 23215 }, { "epoch": 0.7131754369796947, "grad_norm": 0.3987963795661926, "learning_rate": 1.4382948897078182e-05, "loss": 0.4966, "step": 23216 }, { "epoch": 0.7132061561146438, "grad_norm": 0.35355040431022644, "learning_rate": 1.4382514507194633e-05, "loss": 0.5231, "step": 23217 }, { "epoch": 0.713236875249593, "grad_norm": 0.35451841354370117, "learning_rate": 1.4382080107075411e-05, "loss": 0.5475, "step": 23218 }, { "epoch": 0.7132675943845421, "grad_norm": 0.4079259932041168, "learning_rate": 1.438164569672154e-05, "loss": 0.6281, "step": 23219 }, { "epoch": 0.7132983135194912, "grad_norm": 0.3593311607837677, "learning_rate": 1.4381211276134024e-05, "loss": 0.5751, "step": 23220 }, { "epoch": 0.7133290326544405, "grad_norm": 0.3853421211242676, "learning_rate": 1.4380776845313888e-05, "loss": 0.6224, "step": 23221 }, { "epoch": 0.7133597517893896, "grad_norm": 0.3302721083164215, "learning_rate": 1.438034240426214e-05, "loss": 0.6251, "step": 23222 }, { "epoch": 0.7133904709243387, "grad_norm": 0.3675985634326935, "learning_rate": 1.4379907952979795e-05, "loss": 0.6025, "step": 23223 }, { "epoch": 0.713421190059288, "grad_norm": 0.39555293321609497, "learning_rate": 1.437947349146787e-05, "loss": 0.5786, "step": 23224 }, { "epoch": 0.7134519091942371, "grad_norm": 0.3521851599216461, "learning_rate": 1.437903901972738e-05, "loss": 0.5954, "step": 23225 }, { "epoch": 0.7134826283291863, "grad_norm": 0.37242019176483154, "learning_rate": 1.4378604537759337e-05, "loss": 0.5482, "step": 23226 }, { "epoch": 0.7135133474641354, "grad_norm": 0.36565521359443665, "learning_rate": 1.4378170045564757e-05, "loss": 0.5812, "step": 23227 }, { "epoch": 0.7135440665990845, "grad_norm": 0.38653984665870667, "learning_rate": 1.4377735543144655e-05, "loss": 0.5488, "step": 23228 }, { "epoch": 0.7135747857340338, "grad_norm": 0.3806653916835785, "learning_rate": 1.4377301030500049e-05, "loss": 0.5551, "step": 23229 }, { "epoch": 0.7136055048689829, "grad_norm": 0.3746584355831146, "learning_rate": 1.4376866507631948e-05, "loss": 0.5095, "step": 23230 }, { "epoch": 0.713636224003932, "grad_norm": 0.39651650190353394, "learning_rate": 1.4376431974541367e-05, "loss": 0.5855, "step": 23231 }, { "epoch": 0.7136669431388812, "grad_norm": 0.3361566662788391, "learning_rate": 1.4375997431229326e-05, "loss": 0.4693, "step": 23232 }, { "epoch": 0.7136976622738304, "grad_norm": 0.39500734210014343, "learning_rate": 1.437556287769684e-05, "loss": 0.5515, "step": 23233 }, { "epoch": 0.7137283814087795, "grad_norm": 0.35733091831207275, "learning_rate": 1.4375128313944917e-05, "loss": 0.62, "step": 23234 }, { "epoch": 0.7137591005437287, "grad_norm": 0.40594691038131714, "learning_rate": 1.4374693739974578e-05, "loss": 0.6148, "step": 23235 }, { "epoch": 0.7137898196786778, "grad_norm": 0.40191906690597534, "learning_rate": 1.4374259155786838e-05, "loss": 0.5802, "step": 23236 }, { "epoch": 0.7138205388136271, "grad_norm": 0.37886014580726624, "learning_rate": 1.437382456138271e-05, "loss": 0.5868, "step": 23237 }, { "epoch": 0.7138512579485762, "grad_norm": 0.37020668387413025, "learning_rate": 1.4373389956763208e-05, "loss": 0.6479, "step": 23238 }, { "epoch": 0.7138819770835253, "grad_norm": 0.4108998477458954, "learning_rate": 1.4372955341929347e-05, "loss": 0.5781, "step": 23239 }, { "epoch": 0.7139126962184745, "grad_norm": 0.3749347925186157, "learning_rate": 1.4372520716882145e-05, "loss": 0.6201, "step": 23240 }, { "epoch": 0.7139434153534236, "grad_norm": 0.3828153908252716, "learning_rate": 1.4372086081622616e-05, "loss": 0.579, "step": 23241 }, { "epoch": 0.7139741344883728, "grad_norm": 0.3745262920856476, "learning_rate": 1.4371651436151775e-05, "loss": 0.505, "step": 23242 }, { "epoch": 0.714004853623322, "grad_norm": 0.36548444628715515, "learning_rate": 1.4371216780470634e-05, "loss": 0.5541, "step": 23243 }, { "epoch": 0.7140355727582711, "grad_norm": 0.34726086258888245, "learning_rate": 1.4370782114580216e-05, "loss": 0.5419, "step": 23244 }, { "epoch": 0.7140662918932202, "grad_norm": 0.3648160696029663, "learning_rate": 1.4370347438481525e-05, "loss": 0.506, "step": 23245 }, { "epoch": 0.7140970110281695, "grad_norm": 0.3389286398887634, "learning_rate": 1.4369912752175585e-05, "loss": 0.4732, "step": 23246 }, { "epoch": 0.7141277301631186, "grad_norm": 0.44821658730506897, "learning_rate": 1.4369478055663409e-05, "loss": 0.63, "step": 23247 }, { "epoch": 0.7141584492980678, "grad_norm": 0.37231093645095825, "learning_rate": 1.4369043348946012e-05, "loss": 0.6053, "step": 23248 }, { "epoch": 0.7141891684330169, "grad_norm": 0.3906601667404175, "learning_rate": 1.436860863202441e-05, "loss": 0.5956, "step": 23249 }, { "epoch": 0.7142198875679661, "grad_norm": 0.3578781187534332, "learning_rate": 1.4368173904899614e-05, "loss": 0.501, "step": 23250 }, { "epoch": 0.7142506067029153, "grad_norm": 0.35175591707229614, "learning_rate": 1.4367739167572646e-05, "loss": 0.5421, "step": 23251 }, { "epoch": 0.7142813258378644, "grad_norm": 0.4064997434616089, "learning_rate": 1.4367304420044514e-05, "loss": 0.5789, "step": 23252 }, { "epoch": 0.7143120449728135, "grad_norm": 0.37112247943878174, "learning_rate": 1.436686966231624e-05, "loss": 0.6515, "step": 23253 }, { "epoch": 0.7143427641077628, "grad_norm": 0.3833886682987213, "learning_rate": 1.4366434894388838e-05, "loss": 0.6044, "step": 23254 }, { "epoch": 0.7143734832427119, "grad_norm": 0.359438955783844, "learning_rate": 1.4366000116263323e-05, "loss": 0.5239, "step": 23255 }, { "epoch": 0.714404202377661, "grad_norm": 0.3801514506340027, "learning_rate": 1.4365565327940709e-05, "loss": 0.6288, "step": 23256 }, { "epoch": 0.7144349215126102, "grad_norm": 0.36502525210380554, "learning_rate": 1.436513052942201e-05, "loss": 0.559, "step": 23257 }, { "epoch": 0.7144656406475594, "grad_norm": 0.410566121339798, "learning_rate": 1.4364695720708249e-05, "loss": 0.6161, "step": 23258 }, { "epoch": 0.7144963597825085, "grad_norm": 0.36486223340034485, "learning_rate": 1.436426090180043e-05, "loss": 0.576, "step": 23259 }, { "epoch": 0.7145270789174577, "grad_norm": 0.4041081666946411, "learning_rate": 1.436382607269958e-05, "loss": 0.5171, "step": 23260 }, { "epoch": 0.7145577980524068, "grad_norm": 0.4316808879375458, "learning_rate": 1.4363391233406706e-05, "loss": 0.5615, "step": 23261 }, { "epoch": 0.714588517187356, "grad_norm": 0.4820363223552704, "learning_rate": 1.436295638392283e-05, "loss": 0.4624, "step": 23262 }, { "epoch": 0.7146192363223052, "grad_norm": 0.3624843657016754, "learning_rate": 1.436252152424896e-05, "loss": 0.6026, "step": 23263 }, { "epoch": 0.7146499554572543, "grad_norm": 0.426296591758728, "learning_rate": 1.4362086654386123e-05, "loss": 0.5209, "step": 23264 }, { "epoch": 0.7146806745922035, "grad_norm": 0.36864766478538513, "learning_rate": 1.4361651774335324e-05, "loss": 0.6411, "step": 23265 }, { "epoch": 0.7147113937271526, "grad_norm": 0.3212739825248718, "learning_rate": 1.4361216884097582e-05, "loss": 0.4897, "step": 23266 }, { "epoch": 0.7147421128621018, "grad_norm": 0.3411737382411957, "learning_rate": 1.4360781983673915e-05, "loss": 0.5444, "step": 23267 }, { "epoch": 0.714772831997051, "grad_norm": 0.37816518545150757, "learning_rate": 1.4360347073065335e-05, "loss": 0.5405, "step": 23268 }, { "epoch": 0.7148035511320001, "grad_norm": 0.405592679977417, "learning_rate": 1.4359912152272864e-05, "loss": 0.5881, "step": 23269 }, { "epoch": 0.7148342702669492, "grad_norm": 0.3976184129714966, "learning_rate": 1.4359477221297506e-05, "loss": 0.5742, "step": 23270 }, { "epoch": 0.7148649894018985, "grad_norm": 0.36280977725982666, "learning_rate": 1.4359042280140292e-05, "loss": 0.6012, "step": 23271 }, { "epoch": 0.7148957085368476, "grad_norm": 0.3753979802131653, "learning_rate": 1.4358607328802228e-05, "loss": 0.5738, "step": 23272 }, { "epoch": 0.7149264276717968, "grad_norm": 0.38848915696144104, "learning_rate": 1.435817236728433e-05, "loss": 0.5328, "step": 23273 }, { "epoch": 0.7149571468067459, "grad_norm": 0.38083934783935547, "learning_rate": 1.4357737395587621e-05, "loss": 0.5627, "step": 23274 }, { "epoch": 0.714987865941695, "grad_norm": 0.36678746342658997, "learning_rate": 1.4357302413713108e-05, "loss": 0.5414, "step": 23275 }, { "epoch": 0.7150185850766443, "grad_norm": 0.4170690178871155, "learning_rate": 1.4356867421661811e-05, "loss": 0.5087, "step": 23276 }, { "epoch": 0.7150493042115934, "grad_norm": 0.36380818486213684, "learning_rate": 1.4356432419434747e-05, "loss": 0.5841, "step": 23277 }, { "epoch": 0.7150800233465425, "grad_norm": 0.3793545663356781, "learning_rate": 1.435599740703293e-05, "loss": 0.5511, "step": 23278 }, { "epoch": 0.7151107424814918, "grad_norm": 0.3677300810813904, "learning_rate": 1.4355562384457378e-05, "loss": 0.6988, "step": 23279 }, { "epoch": 0.7151414616164409, "grad_norm": 0.34163081645965576, "learning_rate": 1.4355127351709106e-05, "loss": 0.5273, "step": 23280 }, { "epoch": 0.71517218075139, "grad_norm": 0.3674616515636444, "learning_rate": 1.4354692308789129e-05, "loss": 0.5459, "step": 23281 }, { "epoch": 0.7152028998863392, "grad_norm": 0.3884761333465576, "learning_rate": 1.4354257255698465e-05, "loss": 0.5058, "step": 23282 }, { "epoch": 0.7152336190212883, "grad_norm": 0.36278995871543884, "learning_rate": 1.4353822192438127e-05, "loss": 0.5749, "step": 23283 }, { "epoch": 0.7152643381562375, "grad_norm": 0.4294101893901825, "learning_rate": 1.4353387119009137e-05, "loss": 0.5724, "step": 23284 }, { "epoch": 0.7152950572911867, "grad_norm": 0.485844224691391, "learning_rate": 1.4352952035412504e-05, "loss": 0.7418, "step": 23285 }, { "epoch": 0.7153257764261358, "grad_norm": 0.3414039611816406, "learning_rate": 1.4352516941649249e-05, "loss": 0.5897, "step": 23286 }, { "epoch": 0.715356495561085, "grad_norm": 0.3572513163089752, "learning_rate": 1.4352081837720387e-05, "loss": 0.5226, "step": 23287 }, { "epoch": 0.7153872146960342, "grad_norm": 0.4040201008319855, "learning_rate": 1.4351646723626929e-05, "loss": 0.592, "step": 23288 }, { "epoch": 0.7154179338309833, "grad_norm": 0.34446364641189575, "learning_rate": 1.4351211599369901e-05, "loss": 0.516, "step": 23289 }, { "epoch": 0.7154486529659325, "grad_norm": 0.3523973822593689, "learning_rate": 1.4350776464950316e-05, "loss": 0.528, "step": 23290 }, { "epoch": 0.7154793721008816, "grad_norm": 0.3502325117588043, "learning_rate": 1.4350341320369183e-05, "loss": 0.6264, "step": 23291 }, { "epoch": 0.7155100912358308, "grad_norm": 0.3523746430873871, "learning_rate": 1.434990616562753e-05, "loss": 0.4959, "step": 23292 }, { "epoch": 0.71554081037078, "grad_norm": 0.40855416655540466, "learning_rate": 1.4349471000726362e-05, "loss": 0.507, "step": 23293 }, { "epoch": 0.7155715295057291, "grad_norm": 0.3590714633464813, "learning_rate": 1.4349035825666704e-05, "loss": 0.556, "step": 23294 }, { "epoch": 0.7156022486406782, "grad_norm": 0.3938080668449402, "learning_rate": 1.4348600640449569e-05, "loss": 0.5377, "step": 23295 }, { "epoch": 0.7156329677756275, "grad_norm": 0.4099295139312744, "learning_rate": 1.4348165445075975e-05, "loss": 0.5557, "step": 23296 }, { "epoch": 0.7156636869105766, "grad_norm": 0.484404981136322, "learning_rate": 1.4347730239546933e-05, "loss": 0.6096, "step": 23297 }, { "epoch": 0.7156944060455258, "grad_norm": 0.34550541639328003, "learning_rate": 1.4347295023863466e-05, "loss": 0.4908, "step": 23298 }, { "epoch": 0.7157251251804749, "grad_norm": 0.3521379828453064, "learning_rate": 1.4346859798026587e-05, "loss": 0.5452, "step": 23299 }, { "epoch": 0.715755844315424, "grad_norm": 0.32496801018714905, "learning_rate": 1.4346424562037314e-05, "loss": 0.5762, "step": 23300 }, { "epoch": 0.7157865634503733, "grad_norm": 0.3432060182094574, "learning_rate": 1.4345989315896663e-05, "loss": 0.5723, "step": 23301 }, { "epoch": 0.7158172825853224, "grad_norm": 0.36461469531059265, "learning_rate": 1.4345554059605649e-05, "loss": 0.5322, "step": 23302 }, { "epoch": 0.7158480017202715, "grad_norm": 0.3311673104763031, "learning_rate": 1.434511879316529e-05, "loss": 0.5613, "step": 23303 }, { "epoch": 0.7158787208552208, "grad_norm": 0.4263823926448822, "learning_rate": 1.4344683516576602e-05, "loss": 0.5887, "step": 23304 }, { "epoch": 0.7159094399901699, "grad_norm": 0.35805633664131165, "learning_rate": 1.4344248229840607e-05, "loss": 0.5551, "step": 23305 }, { "epoch": 0.715940159125119, "grad_norm": 0.36383625864982605, "learning_rate": 1.434381293295831e-05, "loss": 0.5287, "step": 23306 }, { "epoch": 0.7159708782600682, "grad_norm": 0.37530261278152466, "learning_rate": 1.4343377625930739e-05, "loss": 0.5837, "step": 23307 }, { "epoch": 0.7160015973950173, "grad_norm": 0.3517078459262848, "learning_rate": 1.4342942308758908e-05, "loss": 0.6055, "step": 23308 }, { "epoch": 0.7160323165299665, "grad_norm": 0.42102184891700745, "learning_rate": 1.4342506981443827e-05, "loss": 0.5974, "step": 23309 }, { "epoch": 0.7160630356649157, "grad_norm": 0.43585890531539917, "learning_rate": 1.4342071643986519e-05, "loss": 0.6014, "step": 23310 }, { "epoch": 0.7160937547998648, "grad_norm": 0.40566739439964294, "learning_rate": 1.4341636296388002e-05, "loss": 0.5566, "step": 23311 }, { "epoch": 0.716124473934814, "grad_norm": 0.35119661688804626, "learning_rate": 1.4341200938649289e-05, "loss": 0.6099, "step": 23312 }, { "epoch": 0.7161551930697632, "grad_norm": 0.30401116609573364, "learning_rate": 1.4340765570771396e-05, "loss": 0.4999, "step": 23313 }, { "epoch": 0.7161859122047123, "grad_norm": 0.3688519597053528, "learning_rate": 1.4340330192755345e-05, "loss": 0.5247, "step": 23314 }, { "epoch": 0.7162166313396615, "grad_norm": 0.4636021852493286, "learning_rate": 1.4339894804602146e-05, "loss": 0.5469, "step": 23315 }, { "epoch": 0.7162473504746106, "grad_norm": 0.3334212303161621, "learning_rate": 1.4339459406312823e-05, "loss": 0.5627, "step": 23316 }, { "epoch": 0.7162780696095598, "grad_norm": 0.3991237282752991, "learning_rate": 1.4339023997888388e-05, "loss": 0.5742, "step": 23317 }, { "epoch": 0.716308788744509, "grad_norm": 0.36510488390922546, "learning_rate": 1.4338588579329859e-05, "loss": 0.6266, "step": 23318 }, { "epoch": 0.7163395078794581, "grad_norm": 0.39876389503479004, "learning_rate": 1.4338153150638254e-05, "loss": 0.5302, "step": 23319 }, { "epoch": 0.7163702270144072, "grad_norm": 0.37326377630233765, "learning_rate": 1.433771771181459e-05, "loss": 0.5936, "step": 23320 }, { "epoch": 0.7164009461493565, "grad_norm": 0.3308236300945282, "learning_rate": 1.4337282262859883e-05, "loss": 0.4885, "step": 23321 }, { "epoch": 0.7164316652843056, "grad_norm": 0.3381790220737457, "learning_rate": 1.4336846803775148e-05, "loss": 0.5686, "step": 23322 }, { "epoch": 0.7164623844192548, "grad_norm": 0.4075927436351776, "learning_rate": 1.4336411334561408e-05, "loss": 0.6076, "step": 23323 }, { "epoch": 0.7164931035542039, "grad_norm": 0.34470346570014954, "learning_rate": 1.4335975855219673e-05, "loss": 0.565, "step": 23324 }, { "epoch": 0.716523822689153, "grad_norm": 0.40160301327705383, "learning_rate": 1.4335540365750966e-05, "loss": 0.6228, "step": 23325 }, { "epoch": 0.7165545418241023, "grad_norm": 0.38266289234161377, "learning_rate": 1.4335104866156302e-05, "loss": 0.4936, "step": 23326 }, { "epoch": 0.7165852609590514, "grad_norm": 0.33567723631858826, "learning_rate": 1.4334669356436698e-05, "loss": 0.5414, "step": 23327 }, { "epoch": 0.7166159800940005, "grad_norm": 0.409091979265213, "learning_rate": 1.433423383659317e-05, "loss": 0.6088, "step": 23328 }, { "epoch": 0.7166466992289497, "grad_norm": 0.346138060092926, "learning_rate": 1.4333798306626734e-05, "loss": 0.5689, "step": 23329 }, { "epoch": 0.7166774183638989, "grad_norm": 0.33970165252685547, "learning_rate": 1.4333362766538412e-05, "loss": 0.5053, "step": 23330 }, { "epoch": 0.716708137498848, "grad_norm": 0.3871152102947235, "learning_rate": 1.4332927216329218e-05, "loss": 0.5891, "step": 23331 }, { "epoch": 0.7167388566337972, "grad_norm": 0.33691972494125366, "learning_rate": 1.4332491656000171e-05, "loss": 0.5108, "step": 23332 }, { "epoch": 0.7167695757687463, "grad_norm": 0.47202879190444946, "learning_rate": 1.4332056085552287e-05, "loss": 0.5482, "step": 23333 }, { "epoch": 0.7168002949036956, "grad_norm": 0.3604145050048828, "learning_rate": 1.433162050498658e-05, "loss": 0.6573, "step": 23334 }, { "epoch": 0.7168310140386447, "grad_norm": 0.3350997567176819, "learning_rate": 1.4331184914304074e-05, "loss": 0.5688, "step": 23335 }, { "epoch": 0.7168617331735938, "grad_norm": 0.33414512872695923, "learning_rate": 1.4330749313505783e-05, "loss": 0.6111, "step": 23336 }, { "epoch": 0.716892452308543, "grad_norm": 0.3736283779144287, "learning_rate": 1.4330313702592726e-05, "loss": 0.5654, "step": 23337 }, { "epoch": 0.7169231714434922, "grad_norm": 0.35170868039131165, "learning_rate": 1.4329878081565915e-05, "loss": 0.5259, "step": 23338 }, { "epoch": 0.7169538905784413, "grad_norm": 0.35884276032447815, "learning_rate": 1.4329442450426374e-05, "loss": 0.5616, "step": 23339 }, { "epoch": 0.7169846097133905, "grad_norm": 0.38542717695236206, "learning_rate": 1.4329006809175117e-05, "loss": 0.5469, "step": 23340 }, { "epoch": 0.7170153288483396, "grad_norm": 0.3689219057559967, "learning_rate": 1.4328571157813164e-05, "loss": 0.5323, "step": 23341 }, { "epoch": 0.7170460479832887, "grad_norm": 0.4429589807987213, "learning_rate": 1.4328135496341529e-05, "loss": 0.5336, "step": 23342 }, { "epoch": 0.717076767118238, "grad_norm": 0.33750975131988525, "learning_rate": 1.432769982476123e-05, "loss": 0.4908, "step": 23343 }, { "epoch": 0.7171074862531871, "grad_norm": 0.34846019744873047, "learning_rate": 1.4327264143073288e-05, "loss": 0.5081, "step": 23344 }, { "epoch": 0.7171382053881362, "grad_norm": 0.4282776117324829, "learning_rate": 1.4326828451278715e-05, "loss": 0.6628, "step": 23345 }, { "epoch": 0.7171689245230854, "grad_norm": 0.4773401916027069, "learning_rate": 1.4326392749378538e-05, "loss": 0.5444, "step": 23346 }, { "epoch": 0.7171996436580346, "grad_norm": 0.37992265820503235, "learning_rate": 1.4325957037373764e-05, "loss": 0.5113, "step": 23347 }, { "epoch": 0.7172303627929838, "grad_norm": 0.3755682706832886, "learning_rate": 1.4325521315265417e-05, "loss": 0.5111, "step": 23348 }, { "epoch": 0.7172610819279329, "grad_norm": 0.34996992349624634, "learning_rate": 1.432508558305451e-05, "loss": 0.5154, "step": 23349 }, { "epoch": 0.717291801062882, "grad_norm": 0.3797302544116974, "learning_rate": 1.4324649840742066e-05, "loss": 0.5343, "step": 23350 }, { "epoch": 0.7173225201978313, "grad_norm": 0.35790523886680603, "learning_rate": 1.43242140883291e-05, "loss": 0.5546, "step": 23351 }, { "epoch": 0.7173532393327804, "grad_norm": 0.38124406337738037, "learning_rate": 1.432377832581663e-05, "loss": 0.5581, "step": 23352 }, { "epoch": 0.7173839584677295, "grad_norm": 0.3550511598587036, "learning_rate": 1.4323342553205677e-05, "loss": 0.5392, "step": 23353 }, { "epoch": 0.7174146776026787, "grad_norm": 0.36780500411987305, "learning_rate": 1.4322906770497252e-05, "loss": 0.5042, "step": 23354 }, { "epoch": 0.7174453967376279, "grad_norm": 0.3599548637866974, "learning_rate": 1.4322470977692375e-05, "loss": 0.6016, "step": 23355 }, { "epoch": 0.717476115872577, "grad_norm": 0.9069557189941406, "learning_rate": 1.4322035174792067e-05, "loss": 0.5127, "step": 23356 }, { "epoch": 0.7175068350075262, "grad_norm": 0.3885195553302765, "learning_rate": 1.4321599361797344e-05, "loss": 0.4883, "step": 23357 }, { "epoch": 0.7175375541424753, "grad_norm": 0.42352351546287537, "learning_rate": 1.4321163538709224e-05, "loss": 0.605, "step": 23358 }, { "epoch": 0.7175682732774246, "grad_norm": 0.3540290594100952, "learning_rate": 1.4320727705528725e-05, "loss": 0.6522, "step": 23359 }, { "epoch": 0.7175989924123737, "grad_norm": 0.332116037607193, "learning_rate": 1.4320291862256863e-05, "loss": 0.6044, "step": 23360 }, { "epoch": 0.7176297115473228, "grad_norm": 0.35039860010147095, "learning_rate": 1.4319856008894659e-05, "loss": 0.6117, "step": 23361 }, { "epoch": 0.717660430682272, "grad_norm": 0.353455513715744, "learning_rate": 1.431942014544313e-05, "loss": 0.533, "step": 23362 }, { "epoch": 0.7176911498172212, "grad_norm": 0.39966461062431335, "learning_rate": 1.4318984271903293e-05, "loss": 0.617, "step": 23363 }, { "epoch": 0.7177218689521703, "grad_norm": 0.3545120060443878, "learning_rate": 1.4318548388276167e-05, "loss": 0.5887, "step": 23364 }, { "epoch": 0.7177525880871195, "grad_norm": 0.37002456188201904, "learning_rate": 1.4318112494562769e-05, "loss": 0.5387, "step": 23365 }, { "epoch": 0.7177833072220686, "grad_norm": 0.38272687792778015, "learning_rate": 1.4317676590764118e-05, "loss": 0.558, "step": 23366 }, { "epoch": 0.7178140263570177, "grad_norm": 0.3638666272163391, "learning_rate": 1.431724067688123e-05, "loss": 0.564, "step": 23367 }, { "epoch": 0.717844745491967, "grad_norm": 0.35474076867103577, "learning_rate": 1.4316804752915128e-05, "loss": 0.5706, "step": 23368 }, { "epoch": 0.7178754646269161, "grad_norm": 0.33820199966430664, "learning_rate": 1.4316368818866826e-05, "loss": 0.5847, "step": 23369 }, { "epoch": 0.7179061837618652, "grad_norm": 0.38841402530670166, "learning_rate": 1.4315932874737342e-05, "loss": 0.5951, "step": 23370 }, { "epoch": 0.7179369028968144, "grad_norm": 0.37294360995292664, "learning_rate": 1.4315496920527698e-05, "loss": 0.58, "step": 23371 }, { "epoch": 0.7179676220317636, "grad_norm": 0.34825658798217773, "learning_rate": 1.4315060956238906e-05, "loss": 0.5927, "step": 23372 }, { "epoch": 0.7179983411667128, "grad_norm": 0.38324832916259766, "learning_rate": 1.4314624981871992e-05, "loss": 0.5841, "step": 23373 }, { "epoch": 0.7180290603016619, "grad_norm": 0.3947225511074066, "learning_rate": 1.4314188997427967e-05, "loss": 0.5404, "step": 23374 }, { "epoch": 0.718059779436611, "grad_norm": 0.3624431788921356, "learning_rate": 1.4313753002907854e-05, "loss": 0.565, "step": 23375 }, { "epoch": 0.7180904985715603, "grad_norm": 0.3630715310573578, "learning_rate": 1.4313316998312665e-05, "loss": 0.4658, "step": 23376 }, { "epoch": 0.7181212177065094, "grad_norm": 0.34741833806037903, "learning_rate": 1.4312880983643429e-05, "loss": 0.4836, "step": 23377 }, { "epoch": 0.7181519368414585, "grad_norm": 0.4227137267589569, "learning_rate": 1.4312444958901153e-05, "loss": 0.641, "step": 23378 }, { "epoch": 0.7181826559764077, "grad_norm": 0.3884705603122711, "learning_rate": 1.4312008924086862e-05, "loss": 0.572, "step": 23379 }, { "epoch": 0.7182133751113569, "grad_norm": 0.39625272154808044, "learning_rate": 1.4311572879201577e-05, "loss": 0.5974, "step": 23380 }, { "epoch": 0.718244094246306, "grad_norm": 0.398294597864151, "learning_rate": 1.4311136824246309e-05, "loss": 0.5638, "step": 23381 }, { "epoch": 0.7182748133812552, "grad_norm": 0.3662996292114258, "learning_rate": 1.431070075922208e-05, "loss": 0.6213, "step": 23382 }, { "epoch": 0.7183055325162043, "grad_norm": 0.3543523848056793, "learning_rate": 1.4310264684129908e-05, "loss": 0.4681, "step": 23383 }, { "epoch": 0.7183362516511536, "grad_norm": 0.34324678778648376, "learning_rate": 1.4309828598970814e-05, "loss": 0.5637, "step": 23384 }, { "epoch": 0.7183669707861027, "grad_norm": 0.34019735455513, "learning_rate": 1.4309392503745812e-05, "loss": 0.5376, "step": 23385 }, { "epoch": 0.7183976899210518, "grad_norm": 0.3611259460449219, "learning_rate": 1.4308956398455924e-05, "loss": 0.5509, "step": 23386 }, { "epoch": 0.718428409056001, "grad_norm": 0.36567965149879456, "learning_rate": 1.4308520283102167e-05, "loss": 0.5546, "step": 23387 }, { "epoch": 0.7184591281909501, "grad_norm": 0.39032405614852905, "learning_rate": 1.430808415768556e-05, "loss": 0.5141, "step": 23388 }, { "epoch": 0.7184898473258993, "grad_norm": 0.36387690901756287, "learning_rate": 1.4307648022207123e-05, "loss": 0.5576, "step": 23389 }, { "epoch": 0.7185205664608485, "grad_norm": 0.3832300007343292, "learning_rate": 1.430721187666787e-05, "loss": 0.6273, "step": 23390 }, { "epoch": 0.7185512855957976, "grad_norm": 0.7635495066642761, "learning_rate": 1.4306775721068826e-05, "loss": 0.5395, "step": 23391 }, { "epoch": 0.7185820047307467, "grad_norm": 0.37648671865463257, "learning_rate": 1.4306339555411003e-05, "loss": 0.4993, "step": 23392 }, { "epoch": 0.718612723865696, "grad_norm": 0.37910258769989014, "learning_rate": 1.4305903379695427e-05, "loss": 0.5715, "step": 23393 }, { "epoch": 0.7186434430006451, "grad_norm": 0.3670291602611542, "learning_rate": 1.4305467193923113e-05, "loss": 0.5825, "step": 23394 }, { "epoch": 0.7186741621355942, "grad_norm": 0.4225667715072632, "learning_rate": 1.4305030998095078e-05, "loss": 0.5188, "step": 23395 }, { "epoch": 0.7187048812705434, "grad_norm": 0.3438062369823456, "learning_rate": 1.4304594792212344e-05, "loss": 0.5301, "step": 23396 }, { "epoch": 0.7187356004054926, "grad_norm": 0.40781277418136597, "learning_rate": 1.4304158576275924e-05, "loss": 0.5693, "step": 23397 }, { "epoch": 0.7187663195404418, "grad_norm": 0.3350687325000763, "learning_rate": 1.4303722350286843e-05, "loss": 0.5378, "step": 23398 }, { "epoch": 0.7187970386753909, "grad_norm": 0.35388505458831787, "learning_rate": 1.4303286114246117e-05, "loss": 0.5841, "step": 23399 }, { "epoch": 0.71882775781034, "grad_norm": 0.3882730007171631, "learning_rate": 1.4302849868154771e-05, "loss": 0.5466, "step": 23400 }, { "epoch": 0.7188584769452893, "grad_norm": 0.37193191051483154, "learning_rate": 1.4302413612013815e-05, "loss": 0.5358, "step": 23401 }, { "epoch": 0.7188891960802384, "grad_norm": 0.35610097646713257, "learning_rate": 1.4301977345824272e-05, "loss": 0.5378, "step": 23402 }, { "epoch": 0.7189199152151875, "grad_norm": 0.3542301058769226, "learning_rate": 1.4301541069587157e-05, "loss": 0.4777, "step": 23403 }, { "epoch": 0.7189506343501367, "grad_norm": 0.39965328574180603, "learning_rate": 1.4301104783303496e-05, "loss": 0.5612, "step": 23404 }, { "epoch": 0.7189813534850859, "grad_norm": 0.355535089969635, "learning_rate": 1.4300668486974308e-05, "loss": 0.4996, "step": 23405 }, { "epoch": 0.719012072620035, "grad_norm": 0.37397462129592896, "learning_rate": 1.4300232180600602e-05, "loss": 0.5765, "step": 23406 }, { "epoch": 0.7190427917549842, "grad_norm": 0.36390209197998047, "learning_rate": 1.4299795864183406e-05, "loss": 0.567, "step": 23407 }, { "epoch": 0.7190735108899333, "grad_norm": 0.37729743123054504, "learning_rate": 1.4299359537723735e-05, "loss": 0.587, "step": 23408 }, { "epoch": 0.7191042300248826, "grad_norm": 0.40184929966926575, "learning_rate": 1.4298923201222611e-05, "loss": 0.6126, "step": 23409 }, { "epoch": 0.7191349491598317, "grad_norm": 0.40769609808921814, "learning_rate": 1.4298486854681052e-05, "loss": 0.4589, "step": 23410 }, { "epoch": 0.7191656682947808, "grad_norm": 0.5933443307876587, "learning_rate": 1.4298050498100077e-05, "loss": 0.5148, "step": 23411 }, { "epoch": 0.71919638742973, "grad_norm": 0.3954637944698334, "learning_rate": 1.4297614131480701e-05, "loss": 0.5271, "step": 23412 }, { "epoch": 0.7192271065646791, "grad_norm": 0.4042503237724304, "learning_rate": 1.4297177754823953e-05, "loss": 0.5499, "step": 23413 }, { "epoch": 0.7192578256996283, "grad_norm": 0.5043869018554688, "learning_rate": 1.4296741368130843e-05, "loss": 0.4963, "step": 23414 }, { "epoch": 0.7192885448345775, "grad_norm": 0.3813895285129547, "learning_rate": 1.4296304971402393e-05, "loss": 0.6043, "step": 23415 }, { "epoch": 0.7193192639695266, "grad_norm": 0.37226787209510803, "learning_rate": 1.4295868564639624e-05, "loss": 0.5243, "step": 23416 }, { "epoch": 0.7193499831044757, "grad_norm": 0.3344322741031647, "learning_rate": 1.4295432147843551e-05, "loss": 0.5806, "step": 23417 }, { "epoch": 0.719380702239425, "grad_norm": 0.4774976372718811, "learning_rate": 1.42949957210152e-05, "loss": 0.5425, "step": 23418 }, { "epoch": 0.7194114213743741, "grad_norm": 0.4311380088329315, "learning_rate": 1.4294559284155584e-05, "loss": 0.4993, "step": 23419 }, { "epoch": 0.7194421405093232, "grad_norm": 0.4222111999988556, "learning_rate": 1.4294122837265727e-05, "loss": 0.5399, "step": 23420 }, { "epoch": 0.7194728596442724, "grad_norm": 0.3721184730529785, "learning_rate": 1.4293686380346645e-05, "loss": 0.5528, "step": 23421 }, { "epoch": 0.7195035787792216, "grad_norm": 0.4661678075790405, "learning_rate": 1.4293249913399359e-05, "loss": 0.5689, "step": 23422 }, { "epoch": 0.7195342979141708, "grad_norm": 0.41172197461128235, "learning_rate": 1.4292813436424888e-05, "loss": 0.5738, "step": 23423 }, { "epoch": 0.7195650170491199, "grad_norm": 0.40150320529937744, "learning_rate": 1.4292376949424252e-05, "loss": 0.5771, "step": 23424 }, { "epoch": 0.719595736184069, "grad_norm": 0.396605908870697, "learning_rate": 1.429194045239847e-05, "loss": 0.5546, "step": 23425 }, { "epoch": 0.7196264553190183, "grad_norm": 0.3357087969779968, "learning_rate": 1.429150394534856e-05, "loss": 0.5701, "step": 23426 }, { "epoch": 0.7196571744539674, "grad_norm": 0.3965671956539154, "learning_rate": 1.4291067428275543e-05, "loss": 0.5357, "step": 23427 }, { "epoch": 0.7196878935889165, "grad_norm": 0.4435134828090668, "learning_rate": 1.4290630901180439e-05, "loss": 0.5683, "step": 23428 }, { "epoch": 0.7197186127238657, "grad_norm": 0.3520336151123047, "learning_rate": 1.4290194364064266e-05, "loss": 0.5516, "step": 23429 }, { "epoch": 0.7197493318588148, "grad_norm": 0.4225528836250305, "learning_rate": 1.4289757816928043e-05, "loss": 0.5117, "step": 23430 }, { "epoch": 0.719780050993764, "grad_norm": 0.3795910179615021, "learning_rate": 1.4289321259772795e-05, "loss": 0.573, "step": 23431 }, { "epoch": 0.7198107701287132, "grad_norm": 0.4273488521575928, "learning_rate": 1.4288884692599535e-05, "loss": 0.6004, "step": 23432 }, { "epoch": 0.7198414892636623, "grad_norm": 0.36052200198173523, "learning_rate": 1.4288448115409286e-05, "loss": 0.5627, "step": 23433 }, { "epoch": 0.7198722083986115, "grad_norm": 0.43812426924705505, "learning_rate": 1.4288011528203066e-05, "loss": 0.558, "step": 23434 }, { "epoch": 0.7199029275335607, "grad_norm": 0.4579241871833801, "learning_rate": 1.4287574930981896e-05, "loss": 0.6813, "step": 23435 }, { "epoch": 0.7199336466685098, "grad_norm": 0.31883493065834045, "learning_rate": 1.4287138323746797e-05, "loss": 0.5291, "step": 23436 }, { "epoch": 0.719964365803459, "grad_norm": 0.3641261160373688, "learning_rate": 1.4286701706498783e-05, "loss": 0.4937, "step": 23437 }, { "epoch": 0.7199950849384081, "grad_norm": 0.37596240639686584, "learning_rate": 1.4286265079238882e-05, "loss": 0.5416, "step": 23438 }, { "epoch": 0.7200258040733573, "grad_norm": 0.37289512157440186, "learning_rate": 1.4285828441968105e-05, "loss": 0.5633, "step": 23439 }, { "epoch": 0.7200565232083065, "grad_norm": 0.3479554057121277, "learning_rate": 1.4285391794687483e-05, "loss": 0.4653, "step": 23440 }, { "epoch": 0.7200872423432556, "grad_norm": 0.3610537350177765, "learning_rate": 1.4284955137398024e-05, "loss": 0.5408, "step": 23441 }, { "epoch": 0.7201179614782047, "grad_norm": 0.3511520326137543, "learning_rate": 1.4284518470100754e-05, "loss": 0.5596, "step": 23442 }, { "epoch": 0.720148680613154, "grad_norm": 0.3815266489982605, "learning_rate": 1.4284081792796693e-05, "loss": 0.5558, "step": 23443 }, { "epoch": 0.7201793997481031, "grad_norm": 0.36893096566200256, "learning_rate": 1.4283645105486859e-05, "loss": 0.5613, "step": 23444 }, { "epoch": 0.7202101188830523, "grad_norm": 0.3474266529083252, "learning_rate": 1.4283208408172273e-05, "loss": 0.527, "step": 23445 }, { "epoch": 0.7202408380180014, "grad_norm": 0.371135413646698, "learning_rate": 1.4282771700853955e-05, "loss": 0.6193, "step": 23446 }, { "epoch": 0.7202715571529505, "grad_norm": 0.32462865114212036, "learning_rate": 1.4282334983532927e-05, "loss": 0.4584, "step": 23447 }, { "epoch": 0.7203022762878998, "grad_norm": 0.37046581506729126, "learning_rate": 1.42818982562102e-05, "loss": 0.5058, "step": 23448 }, { "epoch": 0.7203329954228489, "grad_norm": 0.3731372356414795, "learning_rate": 1.4281461518886805e-05, "loss": 0.4411, "step": 23449 }, { "epoch": 0.720363714557798, "grad_norm": 0.359437495470047, "learning_rate": 1.4281024771563757e-05, "loss": 0.5051, "step": 23450 }, { "epoch": 0.7203944336927472, "grad_norm": 0.5205446481704712, "learning_rate": 1.4280588014242075e-05, "loss": 0.5746, "step": 23451 }, { "epoch": 0.7204251528276964, "grad_norm": 0.34987807273864746, "learning_rate": 1.4280151246922783e-05, "loss": 0.5467, "step": 23452 }, { "epoch": 0.7204558719626455, "grad_norm": 0.38661012053489685, "learning_rate": 1.4279714469606897e-05, "loss": 0.5524, "step": 23453 }, { "epoch": 0.7204865910975947, "grad_norm": 0.3387630581855774, "learning_rate": 1.427927768229544e-05, "loss": 0.5239, "step": 23454 }, { "epoch": 0.7205173102325438, "grad_norm": 0.3855663537979126, "learning_rate": 1.4278840884989431e-05, "loss": 0.5273, "step": 23455 }, { "epoch": 0.720548029367493, "grad_norm": 0.5183831453323364, "learning_rate": 1.4278404077689892e-05, "loss": 0.4896, "step": 23456 }, { "epoch": 0.7205787485024422, "grad_norm": 0.36710596084594727, "learning_rate": 1.4277967260397838e-05, "loss": 0.5133, "step": 23457 }, { "epoch": 0.7206094676373913, "grad_norm": 0.312406063079834, "learning_rate": 1.4277530433114298e-05, "loss": 0.4594, "step": 23458 }, { "epoch": 0.7206401867723405, "grad_norm": 0.450519859790802, "learning_rate": 1.4277093595840284e-05, "loss": 0.5608, "step": 23459 }, { "epoch": 0.7206709059072897, "grad_norm": 0.3597572445869446, "learning_rate": 1.4276656748576815e-05, "loss": 0.577, "step": 23460 }, { "epoch": 0.7207016250422388, "grad_norm": 0.49385905265808105, "learning_rate": 1.4276219891324923e-05, "loss": 0.6354, "step": 23461 }, { "epoch": 0.720732344177188, "grad_norm": 0.3317689001560211, "learning_rate": 1.4275783024085614e-05, "loss": 0.6569, "step": 23462 }, { "epoch": 0.7207630633121371, "grad_norm": 0.3661884367465973, "learning_rate": 1.427534614685992e-05, "loss": 0.5805, "step": 23463 }, { "epoch": 0.7207937824470863, "grad_norm": 0.3370664119720459, "learning_rate": 1.4274909259648855e-05, "loss": 0.5653, "step": 23464 }, { "epoch": 0.7208245015820355, "grad_norm": 0.33649757504463196, "learning_rate": 1.4274472362453442e-05, "loss": 0.6166, "step": 23465 }, { "epoch": 0.7208552207169846, "grad_norm": 0.39650124311447144, "learning_rate": 1.4274035455274695e-05, "loss": 0.5767, "step": 23466 }, { "epoch": 0.7208859398519337, "grad_norm": 0.3307555913925171, "learning_rate": 1.4273598538113646e-05, "loss": 0.4849, "step": 23467 }, { "epoch": 0.720916658986883, "grad_norm": 0.33985888957977295, "learning_rate": 1.4273161610971307e-05, "loss": 0.5805, "step": 23468 }, { "epoch": 0.7209473781218321, "grad_norm": 0.3778727948665619, "learning_rate": 1.42727246738487e-05, "loss": 0.4693, "step": 23469 }, { "epoch": 0.7209780972567813, "grad_norm": 0.5259585976600647, "learning_rate": 1.4272287726746845e-05, "loss": 0.6184, "step": 23470 }, { "epoch": 0.7210088163917304, "grad_norm": 0.3810993432998657, "learning_rate": 1.4271850769666763e-05, "loss": 0.5616, "step": 23471 }, { "epoch": 0.7210395355266795, "grad_norm": 0.3548995554447174, "learning_rate": 1.427141380260948e-05, "loss": 0.5633, "step": 23472 }, { "epoch": 0.7210702546616288, "grad_norm": 0.32920271158218384, "learning_rate": 1.4270976825576006e-05, "loss": 0.5484, "step": 23473 }, { "epoch": 0.7211009737965779, "grad_norm": 0.33987072110176086, "learning_rate": 1.4270539838567371e-05, "loss": 0.5073, "step": 23474 }, { "epoch": 0.721131692931527, "grad_norm": 0.37485525012016296, "learning_rate": 1.4270102841584594e-05, "loss": 0.5564, "step": 23475 }, { "epoch": 0.7211624120664762, "grad_norm": 0.32292401790618896, "learning_rate": 1.426966583462869e-05, "loss": 0.5713, "step": 23476 }, { "epoch": 0.7211931312014254, "grad_norm": 0.42183274030685425, "learning_rate": 1.4269228817700685e-05, "loss": 0.5866, "step": 23477 }, { "epoch": 0.7212238503363745, "grad_norm": 0.35733240842819214, "learning_rate": 1.4268791790801598e-05, "loss": 0.6096, "step": 23478 }, { "epoch": 0.7212545694713237, "grad_norm": 0.3717252314090729, "learning_rate": 1.4268354753932447e-05, "loss": 0.5921, "step": 23479 }, { "epoch": 0.7212852886062728, "grad_norm": 0.3736947178840637, "learning_rate": 1.4267917707094257e-05, "loss": 0.4554, "step": 23480 }, { "epoch": 0.721316007741222, "grad_norm": 0.3663298189640045, "learning_rate": 1.4267480650288048e-05, "loss": 0.5867, "step": 23481 }, { "epoch": 0.7213467268761712, "grad_norm": 0.45398327708244324, "learning_rate": 1.4267043583514836e-05, "loss": 0.5095, "step": 23482 }, { "epoch": 0.7213774460111203, "grad_norm": 0.36694446206092834, "learning_rate": 1.4266606506775653e-05, "loss": 0.5956, "step": 23483 }, { "epoch": 0.7214081651460695, "grad_norm": 0.4851472079753876, "learning_rate": 1.4266169420071507e-05, "loss": 0.5639, "step": 23484 }, { "epoch": 0.7214388842810187, "grad_norm": 0.39363670349121094, "learning_rate": 1.4265732323403426e-05, "loss": 0.5258, "step": 23485 }, { "epoch": 0.7214696034159678, "grad_norm": 0.3572028875350952, "learning_rate": 1.426529521677243e-05, "loss": 0.5246, "step": 23486 }, { "epoch": 0.721500322550917, "grad_norm": 0.3445357084274292, "learning_rate": 1.4264858100179537e-05, "loss": 0.5678, "step": 23487 }, { "epoch": 0.7215310416858661, "grad_norm": 0.4097510874271393, "learning_rate": 1.4264420973625772e-05, "loss": 0.5672, "step": 23488 }, { "epoch": 0.7215617608208152, "grad_norm": 0.3336658477783203, "learning_rate": 1.426398383711215e-05, "loss": 0.5434, "step": 23489 }, { "epoch": 0.7215924799557645, "grad_norm": 0.40842708945274353, "learning_rate": 1.42635466906397e-05, "loss": 0.5809, "step": 23490 }, { "epoch": 0.7216231990907136, "grad_norm": 0.3394746780395508, "learning_rate": 1.4263109534209436e-05, "loss": 0.5382, "step": 23491 }, { "epoch": 0.7216539182256627, "grad_norm": 0.34217455983161926, "learning_rate": 1.4262672367822385e-05, "loss": 0.5194, "step": 23492 }, { "epoch": 0.721684637360612, "grad_norm": 0.37281423807144165, "learning_rate": 1.4262235191479566e-05, "loss": 0.4995, "step": 23493 }, { "epoch": 0.7217153564955611, "grad_norm": 0.38173940777778625, "learning_rate": 1.4261798005181998e-05, "loss": 0.5987, "step": 23494 }, { "epoch": 0.7217460756305103, "grad_norm": 0.3785091042518616, "learning_rate": 1.4261360808930703e-05, "loss": 0.5775, "step": 23495 }, { "epoch": 0.7217767947654594, "grad_norm": 0.39345741271972656, "learning_rate": 1.42609236027267e-05, "loss": 0.6366, "step": 23496 }, { "epoch": 0.7218075139004085, "grad_norm": 0.34291011095046997, "learning_rate": 1.4260486386571012e-05, "loss": 0.6027, "step": 23497 }, { "epoch": 0.7218382330353578, "grad_norm": 0.411468505859375, "learning_rate": 1.4260049160464662e-05, "loss": 0.6068, "step": 23498 }, { "epoch": 0.7218689521703069, "grad_norm": 0.3584659695625305, "learning_rate": 1.4259611924408672e-05, "loss": 0.58, "step": 23499 }, { "epoch": 0.721899671305256, "grad_norm": 0.3552321493625641, "learning_rate": 1.4259174678404057e-05, "loss": 0.5471, "step": 23500 }, { "epoch": 0.7219303904402052, "grad_norm": 0.34494662284851074, "learning_rate": 1.4258737422451845e-05, "loss": 0.5457, "step": 23501 }, { "epoch": 0.7219611095751544, "grad_norm": 0.3720614016056061, "learning_rate": 1.4258300156553051e-05, "loss": 0.5179, "step": 23502 }, { "epoch": 0.7219918287101035, "grad_norm": 0.34958207607269287, "learning_rate": 1.4257862880708704e-05, "loss": 0.5272, "step": 23503 }, { "epoch": 0.7220225478450527, "grad_norm": 0.4568202495574951, "learning_rate": 1.4257425594919818e-05, "loss": 0.5586, "step": 23504 }, { "epoch": 0.7220532669800018, "grad_norm": 0.3843866288661957, "learning_rate": 1.4256988299187417e-05, "loss": 0.5801, "step": 23505 }, { "epoch": 0.722083986114951, "grad_norm": 0.36378565430641174, "learning_rate": 1.4256550993512527e-05, "loss": 0.607, "step": 23506 }, { "epoch": 0.7221147052499002, "grad_norm": 0.36671867966651917, "learning_rate": 1.4256113677896158e-05, "loss": 0.6193, "step": 23507 }, { "epoch": 0.7221454243848493, "grad_norm": 0.4297789931297302, "learning_rate": 1.425567635233934e-05, "loss": 0.5734, "step": 23508 }, { "epoch": 0.7221761435197985, "grad_norm": 0.728316605091095, "learning_rate": 1.4255239016843095e-05, "loss": 0.5575, "step": 23509 }, { "epoch": 0.7222068626547476, "grad_norm": 0.3559725284576416, "learning_rate": 1.4254801671408442e-05, "loss": 0.5936, "step": 23510 }, { "epoch": 0.7222375817896968, "grad_norm": 0.4130822718143463, "learning_rate": 1.4254364316036402e-05, "loss": 0.5243, "step": 23511 }, { "epoch": 0.722268300924646, "grad_norm": 0.3829379081726074, "learning_rate": 1.4253926950727997e-05, "loss": 0.4824, "step": 23512 }, { "epoch": 0.7222990200595951, "grad_norm": 0.3305726945400238, "learning_rate": 1.4253489575484247e-05, "loss": 0.5554, "step": 23513 }, { "epoch": 0.7223297391945442, "grad_norm": 0.3740295469760895, "learning_rate": 1.4253052190306177e-05, "loss": 0.5811, "step": 23514 }, { "epoch": 0.7223604583294935, "grad_norm": 0.3594711720943451, "learning_rate": 1.4252614795194805e-05, "loss": 0.603, "step": 23515 }, { "epoch": 0.7223911774644426, "grad_norm": 0.35480058193206787, "learning_rate": 1.4252177390151156e-05, "loss": 0.4766, "step": 23516 }, { "epoch": 0.7224218965993917, "grad_norm": 0.3549823462963104, "learning_rate": 1.4251739975176248e-05, "loss": 0.5985, "step": 23517 }, { "epoch": 0.7224526157343409, "grad_norm": 0.35526639223098755, "learning_rate": 1.4251302550271104e-05, "loss": 0.6272, "step": 23518 }, { "epoch": 0.7224833348692901, "grad_norm": 0.34360334277153015, "learning_rate": 1.4250865115436749e-05, "loss": 0.5596, "step": 23519 }, { "epoch": 0.7225140540042393, "grad_norm": 0.37993302941322327, "learning_rate": 1.4250427670674198e-05, "loss": 0.503, "step": 23520 }, { "epoch": 0.7225447731391884, "grad_norm": 0.3571467101573944, "learning_rate": 1.4249990215984482e-05, "loss": 0.578, "step": 23521 }, { "epoch": 0.7225754922741375, "grad_norm": 0.3444359600543976, "learning_rate": 1.4249552751368611e-05, "loss": 0.5547, "step": 23522 }, { "epoch": 0.7226062114090868, "grad_norm": 0.34911638498306274, "learning_rate": 1.4249115276827614e-05, "loss": 0.5207, "step": 23523 }, { "epoch": 0.7226369305440359, "grad_norm": 0.3860121965408325, "learning_rate": 1.4248677792362515e-05, "loss": 0.5549, "step": 23524 }, { "epoch": 0.722667649678985, "grad_norm": 0.3352706730365753, "learning_rate": 1.4248240297974327e-05, "loss": 0.5056, "step": 23525 }, { "epoch": 0.7226983688139342, "grad_norm": 0.3512820899486542, "learning_rate": 1.4247802793664081e-05, "loss": 0.5402, "step": 23526 }, { "epoch": 0.7227290879488834, "grad_norm": 0.43235495686531067, "learning_rate": 1.4247365279432794e-05, "loss": 0.5529, "step": 23527 }, { "epoch": 0.7227598070838325, "grad_norm": 0.37741196155548096, "learning_rate": 1.4246927755281488e-05, "loss": 0.5438, "step": 23528 }, { "epoch": 0.7227905262187817, "grad_norm": 0.3736162483692169, "learning_rate": 1.4246490221211186e-05, "loss": 0.5415, "step": 23529 }, { "epoch": 0.7228212453537308, "grad_norm": 0.4622918963432312, "learning_rate": 1.4246052677222909e-05, "loss": 0.4973, "step": 23530 }, { "epoch": 0.7228519644886799, "grad_norm": 0.3693772554397583, "learning_rate": 1.424561512331768e-05, "loss": 0.5761, "step": 23531 }, { "epoch": 0.7228826836236292, "grad_norm": 0.3438240587711334, "learning_rate": 1.4245177559496521e-05, "loss": 0.5499, "step": 23532 }, { "epoch": 0.7229134027585783, "grad_norm": 0.3542213439941406, "learning_rate": 1.4244739985760453e-05, "loss": 0.4583, "step": 23533 }, { "epoch": 0.7229441218935275, "grad_norm": 0.3750077486038208, "learning_rate": 1.4244302402110496e-05, "loss": 0.5647, "step": 23534 }, { "epoch": 0.7229748410284766, "grad_norm": 0.41831180453300476, "learning_rate": 1.4243864808547677e-05, "loss": 0.6675, "step": 23535 }, { "epoch": 0.7230055601634258, "grad_norm": 0.5244983434677124, "learning_rate": 1.4243427205073014e-05, "loss": 0.6826, "step": 23536 }, { "epoch": 0.723036279298375, "grad_norm": 0.4066429138183594, "learning_rate": 1.4242989591687532e-05, "loss": 0.5748, "step": 23537 }, { "epoch": 0.7230669984333241, "grad_norm": 0.616328775882721, "learning_rate": 1.4242551968392253e-05, "loss": 0.547, "step": 23538 }, { "epoch": 0.7230977175682732, "grad_norm": 0.3794616162776947, "learning_rate": 1.4242114335188193e-05, "loss": 0.4982, "step": 23539 }, { "epoch": 0.7231284367032225, "grad_norm": 0.34607994556427, "learning_rate": 1.4241676692076383e-05, "loss": 0.5203, "step": 23540 }, { "epoch": 0.7231591558381716, "grad_norm": 0.36104822158813477, "learning_rate": 1.4241239039057836e-05, "loss": 0.5054, "step": 23541 }, { "epoch": 0.7231898749731207, "grad_norm": 0.3541184961795807, "learning_rate": 1.4240801376133585e-05, "loss": 0.6594, "step": 23542 }, { "epoch": 0.7232205941080699, "grad_norm": 0.3412359952926636, "learning_rate": 1.4240363703304638e-05, "loss": 0.6325, "step": 23543 }, { "epoch": 0.7232513132430191, "grad_norm": 0.3251633942127228, "learning_rate": 1.4239926020572032e-05, "loss": 0.5314, "step": 23544 }, { "epoch": 0.7232820323779683, "grad_norm": 0.3489767611026764, "learning_rate": 1.4239488327936783e-05, "loss": 0.4876, "step": 23545 }, { "epoch": 0.7233127515129174, "grad_norm": 0.3867242932319641, "learning_rate": 1.4239050625399911e-05, "loss": 0.5427, "step": 23546 }, { "epoch": 0.7233434706478665, "grad_norm": 0.35458317399024963, "learning_rate": 1.423861291296244e-05, "loss": 0.5871, "step": 23547 }, { "epoch": 0.7233741897828158, "grad_norm": 0.34922313690185547, "learning_rate": 1.4238175190625392e-05, "loss": 0.5489, "step": 23548 }, { "epoch": 0.7234049089177649, "grad_norm": 0.3462423086166382, "learning_rate": 1.4237737458389791e-05, "loss": 0.5782, "step": 23549 }, { "epoch": 0.723435628052714, "grad_norm": 0.434574156999588, "learning_rate": 1.4237299716256656e-05, "loss": 0.518, "step": 23550 }, { "epoch": 0.7234663471876632, "grad_norm": 0.43729275465011597, "learning_rate": 1.4236861964227016e-05, "loss": 0.5057, "step": 23551 }, { "epoch": 0.7234970663226123, "grad_norm": 0.37877216935157776, "learning_rate": 1.4236424202301883e-05, "loss": 0.5618, "step": 23552 }, { "epoch": 0.7235277854575615, "grad_norm": 0.37984994053840637, "learning_rate": 1.4235986430482292e-05, "loss": 0.5368, "step": 23553 }, { "epoch": 0.7235585045925107, "grad_norm": 0.3621096611022949, "learning_rate": 1.4235548648769256e-05, "loss": 0.5725, "step": 23554 }, { "epoch": 0.7235892237274598, "grad_norm": 0.3596028685569763, "learning_rate": 1.42351108571638e-05, "loss": 0.6169, "step": 23555 }, { "epoch": 0.723619942862409, "grad_norm": 0.3701498210430145, "learning_rate": 1.4234673055666948e-05, "loss": 0.552, "step": 23556 }, { "epoch": 0.7236506619973582, "grad_norm": 0.3714126646518707, "learning_rate": 1.4234235244279717e-05, "loss": 0.5682, "step": 23557 }, { "epoch": 0.7236813811323073, "grad_norm": 0.40173104405403137, "learning_rate": 1.4233797423003139e-05, "loss": 0.7051, "step": 23558 }, { "epoch": 0.7237121002672565, "grad_norm": 0.3745660185813904, "learning_rate": 1.4233359591838227e-05, "loss": 0.5995, "step": 23559 }, { "epoch": 0.7237428194022056, "grad_norm": 0.38664981722831726, "learning_rate": 1.4232921750786013e-05, "loss": 0.5587, "step": 23560 }, { "epoch": 0.7237735385371548, "grad_norm": 0.4140790104866028, "learning_rate": 1.4232483899847511e-05, "loss": 0.5288, "step": 23561 }, { "epoch": 0.723804257672104, "grad_norm": 0.3839832544326782, "learning_rate": 1.4232046039023746e-05, "loss": 0.5741, "step": 23562 }, { "epoch": 0.7238349768070531, "grad_norm": 0.4153613746166229, "learning_rate": 1.4231608168315746e-05, "loss": 0.5012, "step": 23563 }, { "epoch": 0.7238656959420022, "grad_norm": 0.38559553027153015, "learning_rate": 1.4231170287724527e-05, "loss": 0.5815, "step": 23564 }, { "epoch": 0.7238964150769515, "grad_norm": 0.41478753089904785, "learning_rate": 1.4230732397251115e-05, "loss": 0.6385, "step": 23565 }, { "epoch": 0.7239271342119006, "grad_norm": 0.347736656665802, "learning_rate": 1.4230294496896532e-05, "loss": 0.507, "step": 23566 }, { "epoch": 0.7239578533468497, "grad_norm": 0.33042705059051514, "learning_rate": 1.4229856586661801e-05, "loss": 0.5066, "step": 23567 }, { "epoch": 0.7239885724817989, "grad_norm": 0.3666737377643585, "learning_rate": 1.4229418666547945e-05, "loss": 0.6081, "step": 23568 }, { "epoch": 0.724019291616748, "grad_norm": 0.3478720784187317, "learning_rate": 1.4228980736555986e-05, "loss": 0.5656, "step": 23569 }, { "epoch": 0.7240500107516973, "grad_norm": 0.36524301767349243, "learning_rate": 1.4228542796686946e-05, "loss": 0.533, "step": 23570 }, { "epoch": 0.7240807298866464, "grad_norm": 0.3652389645576477, "learning_rate": 1.422810484694185e-05, "loss": 0.6263, "step": 23571 }, { "epoch": 0.7241114490215955, "grad_norm": 0.32612359523773193, "learning_rate": 1.422766688732172e-05, "loss": 0.4685, "step": 23572 }, { "epoch": 0.7241421681565448, "grad_norm": 0.4568779170513153, "learning_rate": 1.4227228917827578e-05, "loss": 0.5696, "step": 23573 }, { "epoch": 0.7241728872914939, "grad_norm": 0.3929382860660553, "learning_rate": 1.4226790938460449e-05, "loss": 0.6065, "step": 23574 }, { "epoch": 0.724203606426443, "grad_norm": 0.34109339118003845, "learning_rate": 1.4226352949221354e-05, "loss": 0.5296, "step": 23575 }, { "epoch": 0.7242343255613922, "grad_norm": 0.35941293835639954, "learning_rate": 1.4225914950111318e-05, "loss": 0.5471, "step": 23576 }, { "epoch": 0.7242650446963413, "grad_norm": 0.36524489521980286, "learning_rate": 1.4225476941131358e-05, "loss": 0.5515, "step": 23577 }, { "epoch": 0.7242957638312905, "grad_norm": 0.32702356576919556, "learning_rate": 1.4225038922282506e-05, "loss": 0.5481, "step": 23578 }, { "epoch": 0.7243264829662397, "grad_norm": 0.37777161598205566, "learning_rate": 1.4224600893565777e-05, "loss": 0.6303, "step": 23579 }, { "epoch": 0.7243572021011888, "grad_norm": 0.365879625082016, "learning_rate": 1.4224162854982197e-05, "loss": 0.5312, "step": 23580 }, { "epoch": 0.724387921236138, "grad_norm": 0.34263065457344055, "learning_rate": 1.4223724806532793e-05, "loss": 0.5218, "step": 23581 }, { "epoch": 0.7244186403710872, "grad_norm": 0.3555160164833069, "learning_rate": 1.4223286748218583e-05, "loss": 0.5147, "step": 23582 }, { "epoch": 0.7244493595060363, "grad_norm": 0.34509575366973877, "learning_rate": 1.4222848680040593e-05, "loss": 0.6227, "step": 23583 }, { "epoch": 0.7244800786409855, "grad_norm": 0.3607139587402344, "learning_rate": 1.4222410601999843e-05, "loss": 0.5479, "step": 23584 }, { "epoch": 0.7245107977759346, "grad_norm": 0.43900054693222046, "learning_rate": 1.4221972514097363e-05, "loss": 0.6165, "step": 23585 }, { "epoch": 0.7245415169108838, "grad_norm": 0.34456953406333923, "learning_rate": 1.4221534416334167e-05, "loss": 0.5993, "step": 23586 }, { "epoch": 0.724572236045833, "grad_norm": 0.3278615474700928, "learning_rate": 1.4221096308711284e-05, "loss": 0.5482, "step": 23587 }, { "epoch": 0.7246029551807821, "grad_norm": 0.3542517423629761, "learning_rate": 1.4220658191229735e-05, "loss": 0.5655, "step": 23588 }, { "epoch": 0.7246336743157312, "grad_norm": 0.6193568110466003, "learning_rate": 1.4220220063890544e-05, "loss": 0.5425, "step": 23589 }, { "epoch": 0.7246643934506805, "grad_norm": 0.3758081793785095, "learning_rate": 1.4219781926694736e-05, "loss": 0.5533, "step": 23590 }, { "epoch": 0.7246951125856296, "grad_norm": 0.36420997977256775, "learning_rate": 1.4219343779643328e-05, "loss": 0.5604, "step": 23591 }, { "epoch": 0.7247258317205787, "grad_norm": 0.3886128067970276, "learning_rate": 1.4218905622737353e-05, "loss": 0.6149, "step": 23592 }, { "epoch": 0.7247565508555279, "grad_norm": 0.4585995674133301, "learning_rate": 1.4218467455977826e-05, "loss": 0.557, "step": 23593 }, { "epoch": 0.724787269990477, "grad_norm": 0.35859495401382446, "learning_rate": 1.4218029279365777e-05, "loss": 0.5058, "step": 23594 }, { "epoch": 0.7248179891254263, "grad_norm": 0.3854186236858368, "learning_rate": 1.4217591092902223e-05, "loss": 0.5406, "step": 23595 }, { "epoch": 0.7248487082603754, "grad_norm": 1.0835661888122559, "learning_rate": 1.4217152896588193e-05, "loss": 0.5466, "step": 23596 }, { "epoch": 0.7248794273953245, "grad_norm": 0.3648626208305359, "learning_rate": 1.4216714690424704e-05, "loss": 0.5928, "step": 23597 }, { "epoch": 0.7249101465302737, "grad_norm": 0.38134458661079407, "learning_rate": 1.4216276474412785e-05, "loss": 0.5591, "step": 23598 }, { "epoch": 0.7249408656652229, "grad_norm": 0.48309874534606934, "learning_rate": 1.4215838248553459e-05, "loss": 0.5667, "step": 23599 }, { "epoch": 0.724971584800172, "grad_norm": 0.3313147723674774, "learning_rate": 1.421540001284775e-05, "loss": 0.5457, "step": 23600 }, { "epoch": 0.7250023039351212, "grad_norm": 0.3960816264152527, "learning_rate": 1.4214961767296678e-05, "loss": 0.5322, "step": 23601 }, { "epoch": 0.7250330230700703, "grad_norm": 0.38588660955429077, "learning_rate": 1.4214523511901265e-05, "loss": 0.5452, "step": 23602 }, { "epoch": 0.7250637422050195, "grad_norm": 0.3263353109359741, "learning_rate": 1.4214085246662544e-05, "loss": 0.5888, "step": 23603 }, { "epoch": 0.7250944613399687, "grad_norm": 0.36650851368904114, "learning_rate": 1.4213646971581527e-05, "loss": 0.5392, "step": 23604 }, { "epoch": 0.7251251804749178, "grad_norm": 0.4747573137283325, "learning_rate": 1.4213208686659248e-05, "loss": 0.5754, "step": 23605 }, { "epoch": 0.725155899609867, "grad_norm": 0.3595588207244873, "learning_rate": 1.4212770391896724e-05, "loss": 0.5737, "step": 23606 }, { "epoch": 0.7251866187448162, "grad_norm": 0.35866689682006836, "learning_rate": 1.421233208729498e-05, "loss": 0.5038, "step": 23607 }, { "epoch": 0.7252173378797653, "grad_norm": 0.430494099855423, "learning_rate": 1.4211893772855043e-05, "loss": 0.619, "step": 23608 }, { "epoch": 0.7252480570147145, "grad_norm": 0.3667164146900177, "learning_rate": 1.4211455448577928e-05, "loss": 0.491, "step": 23609 }, { "epoch": 0.7252787761496636, "grad_norm": 0.3604293465614319, "learning_rate": 1.4211017114464671e-05, "loss": 0.5741, "step": 23610 }, { "epoch": 0.7253094952846127, "grad_norm": 0.3586040139198303, "learning_rate": 1.4210578770516286e-05, "loss": 0.4825, "step": 23611 }, { "epoch": 0.725340214419562, "grad_norm": 0.43813616037368774, "learning_rate": 1.4210140416733802e-05, "loss": 0.5524, "step": 23612 }, { "epoch": 0.7253709335545111, "grad_norm": 0.39980700612068176, "learning_rate": 1.4209702053118237e-05, "loss": 0.6294, "step": 23613 }, { "epoch": 0.7254016526894602, "grad_norm": 0.352367639541626, "learning_rate": 1.4209263679670624e-05, "loss": 0.5896, "step": 23614 }, { "epoch": 0.7254323718244094, "grad_norm": 0.3426440954208374, "learning_rate": 1.4208825296391979e-05, "loss": 0.6318, "step": 23615 }, { "epoch": 0.7254630909593586, "grad_norm": 0.3985994756221771, "learning_rate": 1.4208386903283327e-05, "loss": 0.5942, "step": 23616 }, { "epoch": 0.7254938100943077, "grad_norm": 0.417350172996521, "learning_rate": 1.4207948500345695e-05, "loss": 0.4961, "step": 23617 }, { "epoch": 0.7255245292292569, "grad_norm": 0.4160880148410797, "learning_rate": 1.4207510087580108e-05, "loss": 0.4906, "step": 23618 }, { "epoch": 0.725555248364206, "grad_norm": 0.3859294354915619, "learning_rate": 1.4207071664987587e-05, "loss": 0.5419, "step": 23619 }, { "epoch": 0.7255859674991553, "grad_norm": 0.37368664145469666, "learning_rate": 1.4206633232569151e-05, "loss": 0.5098, "step": 23620 }, { "epoch": 0.7256166866341044, "grad_norm": 0.808610200881958, "learning_rate": 1.4206194790325833e-05, "loss": 0.5449, "step": 23621 }, { "epoch": 0.7256474057690535, "grad_norm": 0.37795358896255493, "learning_rate": 1.4205756338258654e-05, "loss": 0.4909, "step": 23622 }, { "epoch": 0.7256781249040027, "grad_norm": 0.30645954608917236, "learning_rate": 1.4205317876368638e-05, "loss": 0.5003, "step": 23623 }, { "epoch": 0.7257088440389519, "grad_norm": 0.3907066881656647, "learning_rate": 1.4204879404656807e-05, "loss": 0.5422, "step": 23624 }, { "epoch": 0.725739563173901, "grad_norm": 0.38744497299194336, "learning_rate": 1.4204440923124185e-05, "loss": 0.566, "step": 23625 }, { "epoch": 0.7257702823088502, "grad_norm": 0.38452228903770447, "learning_rate": 1.4204002431771801e-05, "loss": 0.5625, "step": 23626 }, { "epoch": 0.7258010014437993, "grad_norm": 0.37231042981147766, "learning_rate": 1.4203563930600672e-05, "loss": 0.5877, "step": 23627 }, { "epoch": 0.7258317205787485, "grad_norm": 0.3269977569580078, "learning_rate": 1.420312541961183e-05, "loss": 0.6083, "step": 23628 }, { "epoch": 0.7258624397136977, "grad_norm": 0.397399365901947, "learning_rate": 1.420268689880629e-05, "loss": 0.5514, "step": 23629 }, { "epoch": 0.7258931588486468, "grad_norm": 0.35324257612228394, "learning_rate": 1.4202248368185085e-05, "loss": 0.5605, "step": 23630 }, { "epoch": 0.725923877983596, "grad_norm": 0.35701411962509155, "learning_rate": 1.4201809827749234e-05, "loss": 0.5274, "step": 23631 }, { "epoch": 0.7259545971185452, "grad_norm": 0.3501030504703522, "learning_rate": 1.4201371277499766e-05, "loss": 0.6404, "step": 23632 }, { "epoch": 0.7259853162534943, "grad_norm": 0.3604285717010498, "learning_rate": 1.4200932717437696e-05, "loss": 0.5357, "step": 23633 }, { "epoch": 0.7260160353884435, "grad_norm": 0.3467474579811096, "learning_rate": 1.4200494147564059e-05, "loss": 0.5708, "step": 23634 }, { "epoch": 0.7260467545233926, "grad_norm": 0.5039874911308289, "learning_rate": 1.4200055567879872e-05, "loss": 0.5587, "step": 23635 }, { "epoch": 0.7260774736583417, "grad_norm": 0.32333534955978394, "learning_rate": 1.4199616978386162e-05, "loss": 0.4742, "step": 23636 }, { "epoch": 0.726108192793291, "grad_norm": 0.3479653298854828, "learning_rate": 1.4199178379083954e-05, "loss": 0.6476, "step": 23637 }, { "epoch": 0.7261389119282401, "grad_norm": 0.445355087518692, "learning_rate": 1.419873976997427e-05, "loss": 0.5719, "step": 23638 }, { "epoch": 0.7261696310631892, "grad_norm": 0.37784457206726074, "learning_rate": 1.4198301151058138e-05, "loss": 0.4871, "step": 23639 }, { "epoch": 0.7262003501981384, "grad_norm": 0.34361544251441956, "learning_rate": 1.4197862522336578e-05, "loss": 0.507, "step": 23640 }, { "epoch": 0.7262310693330876, "grad_norm": 0.33159273862838745, "learning_rate": 1.4197423883810622e-05, "loss": 0.5632, "step": 23641 }, { "epoch": 0.7262617884680368, "grad_norm": 0.3924744725227356, "learning_rate": 1.4196985235481287e-05, "loss": 0.5602, "step": 23642 }, { "epoch": 0.7262925076029859, "grad_norm": 0.3940472900867462, "learning_rate": 1.41965465773496e-05, "loss": 0.4785, "step": 23643 }, { "epoch": 0.726323226737935, "grad_norm": 0.3590486943721771, "learning_rate": 1.4196107909416584e-05, "loss": 0.5561, "step": 23644 }, { "epoch": 0.7263539458728843, "grad_norm": 0.384631872177124, "learning_rate": 1.4195669231683264e-05, "loss": 0.6053, "step": 23645 }, { "epoch": 0.7263846650078334, "grad_norm": 0.6582173705101013, "learning_rate": 1.4195230544150668e-05, "loss": 0.5167, "step": 23646 }, { "epoch": 0.7264153841427825, "grad_norm": 0.49320319294929504, "learning_rate": 1.4194791846819817e-05, "loss": 0.5639, "step": 23647 }, { "epoch": 0.7264461032777317, "grad_norm": 0.36309540271759033, "learning_rate": 1.4194353139691737e-05, "loss": 0.6017, "step": 23648 }, { "epoch": 0.7264768224126809, "grad_norm": 0.3810455799102783, "learning_rate": 1.4193914422767452e-05, "loss": 0.5704, "step": 23649 }, { "epoch": 0.72650754154763, "grad_norm": 0.37978923320770264, "learning_rate": 1.4193475696047987e-05, "loss": 0.6431, "step": 23650 }, { "epoch": 0.7265382606825792, "grad_norm": 0.4737134277820587, "learning_rate": 1.4193036959534365e-05, "loss": 0.6235, "step": 23651 }, { "epoch": 0.7265689798175283, "grad_norm": 0.36666056513786316, "learning_rate": 1.4192598213227615e-05, "loss": 0.5328, "step": 23652 }, { "epoch": 0.7265996989524774, "grad_norm": 0.3435770571231842, "learning_rate": 1.4192159457128758e-05, "loss": 0.5205, "step": 23653 }, { "epoch": 0.7266304180874267, "grad_norm": 0.35588446259498596, "learning_rate": 1.4191720691238819e-05, "loss": 0.5005, "step": 23654 }, { "epoch": 0.7266611372223758, "grad_norm": 0.4103385806083679, "learning_rate": 1.4191281915558826e-05, "loss": 0.5791, "step": 23655 }, { "epoch": 0.726691856357325, "grad_norm": 0.33726218342781067, "learning_rate": 1.4190843130089797e-05, "loss": 0.6239, "step": 23656 }, { "epoch": 0.7267225754922741, "grad_norm": 0.33079278469085693, "learning_rate": 1.4190404334832766e-05, "loss": 0.5108, "step": 23657 }, { "epoch": 0.7267532946272233, "grad_norm": 0.3695538341999054, "learning_rate": 1.418996552978875e-05, "loss": 0.4978, "step": 23658 }, { "epoch": 0.7267840137621725, "grad_norm": 0.3764713406562805, "learning_rate": 1.4189526714958779e-05, "loss": 0.5155, "step": 23659 }, { "epoch": 0.7268147328971216, "grad_norm": 0.39029690623283386, "learning_rate": 1.4189087890343875e-05, "loss": 0.5318, "step": 23660 }, { "epoch": 0.7268454520320707, "grad_norm": 0.35460689663887024, "learning_rate": 1.4188649055945063e-05, "loss": 0.5321, "step": 23661 }, { "epoch": 0.72687617116702, "grad_norm": 0.36746084690093994, "learning_rate": 1.4188210211763371e-05, "loss": 0.5647, "step": 23662 }, { "epoch": 0.7269068903019691, "grad_norm": 0.3940039575099945, "learning_rate": 1.4187771357799818e-05, "loss": 0.5764, "step": 23663 }, { "epoch": 0.7269376094369182, "grad_norm": 0.3698848485946655, "learning_rate": 1.4187332494055434e-05, "loss": 0.5009, "step": 23664 }, { "epoch": 0.7269683285718674, "grad_norm": 0.3521776795387268, "learning_rate": 1.4186893620531242e-05, "loss": 0.559, "step": 23665 }, { "epoch": 0.7269990477068166, "grad_norm": 0.32246285676956177, "learning_rate": 1.418645473722827e-05, "loss": 0.5357, "step": 23666 }, { "epoch": 0.7270297668417658, "grad_norm": 0.36324405670166016, "learning_rate": 1.4186015844147538e-05, "loss": 0.5306, "step": 23667 }, { "epoch": 0.7270604859767149, "grad_norm": 0.3495122194290161, "learning_rate": 1.4185576941290073e-05, "loss": 0.5361, "step": 23668 }, { "epoch": 0.727091205111664, "grad_norm": 0.3654204308986664, "learning_rate": 1.4185138028656901e-05, "loss": 0.5168, "step": 23669 }, { "epoch": 0.7271219242466133, "grad_norm": 0.41240623593330383, "learning_rate": 1.4184699106249049e-05, "loss": 0.5746, "step": 23670 }, { "epoch": 0.7271526433815624, "grad_norm": 0.35371023416519165, "learning_rate": 1.4184260174067539e-05, "loss": 0.5814, "step": 23671 }, { "epoch": 0.7271833625165115, "grad_norm": 0.46820759773254395, "learning_rate": 1.4183821232113398e-05, "loss": 0.5673, "step": 23672 }, { "epoch": 0.7272140816514607, "grad_norm": 0.6816707849502563, "learning_rate": 1.4183382280387648e-05, "loss": 0.5718, "step": 23673 }, { "epoch": 0.7272448007864099, "grad_norm": 0.34966328740119934, "learning_rate": 1.4182943318891318e-05, "loss": 0.521, "step": 23674 }, { "epoch": 0.727275519921359, "grad_norm": 0.38064253330230713, "learning_rate": 1.4182504347625431e-05, "loss": 0.5502, "step": 23675 }, { "epoch": 0.7273062390563082, "grad_norm": 0.3547210097312927, "learning_rate": 1.4182065366591013e-05, "loss": 0.5587, "step": 23676 }, { "epoch": 0.7273369581912573, "grad_norm": 0.35320207476615906, "learning_rate": 1.418162637578909e-05, "loss": 0.4789, "step": 23677 }, { "epoch": 0.7273676773262064, "grad_norm": 0.4392543435096741, "learning_rate": 1.4181187375220688e-05, "loss": 0.5687, "step": 23678 }, { "epoch": 0.7273983964611557, "grad_norm": 0.35102733969688416, "learning_rate": 1.418074836488683e-05, "loss": 0.6056, "step": 23679 }, { "epoch": 0.7274291155961048, "grad_norm": 0.31673920154571533, "learning_rate": 1.418030934478854e-05, "loss": 0.4979, "step": 23680 }, { "epoch": 0.727459834731054, "grad_norm": 0.33973270654678345, "learning_rate": 1.4179870314926847e-05, "loss": 0.5974, "step": 23681 }, { "epoch": 0.7274905538660031, "grad_norm": 0.36820733547210693, "learning_rate": 1.4179431275302777e-05, "loss": 0.5429, "step": 23682 }, { "epoch": 0.7275212730009523, "grad_norm": 0.3375203311443329, "learning_rate": 1.417899222591735e-05, "loss": 0.4856, "step": 23683 }, { "epoch": 0.7275519921359015, "grad_norm": 0.4372187852859497, "learning_rate": 1.4178553166771599e-05, "loss": 0.6219, "step": 23684 }, { "epoch": 0.7275827112708506, "grad_norm": 0.36342763900756836, "learning_rate": 1.417811409786654e-05, "loss": 0.5706, "step": 23685 }, { "epoch": 0.7276134304057997, "grad_norm": 0.41063809394836426, "learning_rate": 1.4177675019203207e-05, "loss": 0.5834, "step": 23686 }, { "epoch": 0.727644149540749, "grad_norm": 0.35983720421791077, "learning_rate": 1.417723593078262e-05, "loss": 0.5377, "step": 23687 }, { "epoch": 0.7276748686756981, "grad_norm": 0.7070322632789612, "learning_rate": 1.417679683260581e-05, "loss": 0.6076, "step": 23688 }, { "epoch": 0.7277055878106472, "grad_norm": 0.3942178189754486, "learning_rate": 1.4176357724673798e-05, "loss": 0.5189, "step": 23689 }, { "epoch": 0.7277363069455964, "grad_norm": 0.406179815530777, "learning_rate": 1.4175918606987607e-05, "loss": 0.5003, "step": 23690 }, { "epoch": 0.7277670260805456, "grad_norm": 0.3940722644329071, "learning_rate": 1.4175479479548272e-05, "loss": 0.5898, "step": 23691 }, { "epoch": 0.7277977452154948, "grad_norm": 0.43896061182022095, "learning_rate": 1.4175040342356809e-05, "loss": 0.5015, "step": 23692 }, { "epoch": 0.7278284643504439, "grad_norm": 0.32563382387161255, "learning_rate": 1.417460119541425e-05, "loss": 0.5137, "step": 23693 }, { "epoch": 0.727859183485393, "grad_norm": 0.4146062135696411, "learning_rate": 1.4174162038721616e-05, "loss": 0.5592, "step": 23694 }, { "epoch": 0.7278899026203423, "grad_norm": 0.37616169452667236, "learning_rate": 1.4173722872279938e-05, "loss": 0.5174, "step": 23695 }, { "epoch": 0.7279206217552914, "grad_norm": 0.38926035165786743, "learning_rate": 1.4173283696090238e-05, "loss": 0.5809, "step": 23696 }, { "epoch": 0.7279513408902405, "grad_norm": 0.4060308635234833, "learning_rate": 1.4172844510153542e-05, "loss": 0.581, "step": 23697 }, { "epoch": 0.7279820600251897, "grad_norm": 0.3433569073677063, "learning_rate": 1.4172405314470879e-05, "loss": 0.6551, "step": 23698 }, { "epoch": 0.7280127791601388, "grad_norm": 0.38237112760543823, "learning_rate": 1.4171966109043267e-05, "loss": 0.5956, "step": 23699 }, { "epoch": 0.728043498295088, "grad_norm": 0.35107576847076416, "learning_rate": 1.417152689387174e-05, "loss": 0.5651, "step": 23700 }, { "epoch": 0.7280742174300372, "grad_norm": 0.39413052797317505, "learning_rate": 1.417108766895732e-05, "loss": 0.6055, "step": 23701 }, { "epoch": 0.7281049365649863, "grad_norm": 0.4344220757484436, "learning_rate": 1.4170648434301033e-05, "loss": 0.5153, "step": 23702 }, { "epoch": 0.7281356556999354, "grad_norm": 0.3744897246360779, "learning_rate": 1.4170209189903906e-05, "loss": 0.5292, "step": 23703 }, { "epoch": 0.7281663748348847, "grad_norm": 0.5169966220855713, "learning_rate": 1.4169769935766963e-05, "loss": 0.5295, "step": 23704 }, { "epoch": 0.7281970939698338, "grad_norm": 0.3597906529903412, "learning_rate": 1.416933067189123e-05, "loss": 0.577, "step": 23705 }, { "epoch": 0.728227813104783, "grad_norm": 0.376465380191803, "learning_rate": 1.4168891398277737e-05, "loss": 0.5306, "step": 23706 }, { "epoch": 0.7282585322397321, "grad_norm": 0.3684774935245514, "learning_rate": 1.416845211492751e-05, "loss": 0.5796, "step": 23707 }, { "epoch": 0.7282892513746813, "grad_norm": 0.32659581303596497, "learning_rate": 1.4168012821841564e-05, "loss": 0.4845, "step": 23708 }, { "epoch": 0.7283199705096305, "grad_norm": 0.3570389151573181, "learning_rate": 1.4167573519020937e-05, "loss": 0.5604, "step": 23709 }, { "epoch": 0.7283506896445796, "grad_norm": 0.386349081993103, "learning_rate": 1.4167134206466649e-05, "loss": 0.4984, "step": 23710 }, { "epoch": 0.7283814087795287, "grad_norm": 0.3805271089076996, "learning_rate": 1.4166694884179731e-05, "loss": 0.5491, "step": 23711 }, { "epoch": 0.728412127914478, "grad_norm": 0.3947352468967438, "learning_rate": 1.4166255552161206e-05, "loss": 0.5344, "step": 23712 }, { "epoch": 0.7284428470494271, "grad_norm": 0.3343518078327179, "learning_rate": 1.4165816210412098e-05, "loss": 0.5227, "step": 23713 }, { "epoch": 0.7284735661843762, "grad_norm": 0.3659738302230835, "learning_rate": 1.4165376858933438e-05, "loss": 0.6126, "step": 23714 }, { "epoch": 0.7285042853193254, "grad_norm": 0.33404213190078735, "learning_rate": 1.4164937497726245e-05, "loss": 0.5147, "step": 23715 }, { "epoch": 0.7285350044542745, "grad_norm": 0.3803084194660187, "learning_rate": 1.4164498126791555e-05, "loss": 0.6356, "step": 23716 }, { "epoch": 0.7285657235892238, "grad_norm": 0.3894367814064026, "learning_rate": 1.4164058746130385e-05, "loss": 0.5556, "step": 23717 }, { "epoch": 0.7285964427241729, "grad_norm": 0.37393128871917725, "learning_rate": 1.4163619355743768e-05, "loss": 0.56, "step": 23718 }, { "epoch": 0.728627161859122, "grad_norm": 0.3443280756473541, "learning_rate": 1.4163179955632721e-05, "loss": 0.5109, "step": 23719 }, { "epoch": 0.7286578809940712, "grad_norm": 0.3509398102760315, "learning_rate": 1.4162740545798284e-05, "loss": 0.6025, "step": 23720 }, { "epoch": 0.7286886001290204, "grad_norm": 0.38881832361221313, "learning_rate": 1.4162301126241471e-05, "loss": 0.5501, "step": 23721 }, { "epoch": 0.7287193192639695, "grad_norm": 0.37509265542030334, "learning_rate": 1.4161861696963318e-05, "loss": 0.5371, "step": 23722 }, { "epoch": 0.7287500383989187, "grad_norm": 0.5452874898910522, "learning_rate": 1.4161422257964843e-05, "loss": 0.5234, "step": 23723 }, { "epoch": 0.7287807575338678, "grad_norm": 0.4019443988800049, "learning_rate": 1.4160982809247075e-05, "loss": 0.5706, "step": 23724 }, { "epoch": 0.728811476668817, "grad_norm": 0.3421544134616852, "learning_rate": 1.4160543350811042e-05, "loss": 0.54, "step": 23725 }, { "epoch": 0.7288421958037662, "grad_norm": 0.36849454045295715, "learning_rate": 1.4160103882657766e-05, "loss": 0.5909, "step": 23726 }, { "epoch": 0.7288729149387153, "grad_norm": 0.38589173555374146, "learning_rate": 1.415966440478828e-05, "loss": 0.5259, "step": 23727 }, { "epoch": 0.7289036340736644, "grad_norm": 0.41061854362487793, "learning_rate": 1.4159224917203607e-05, "loss": 0.5833, "step": 23728 }, { "epoch": 0.7289343532086137, "grad_norm": 0.36486682295799255, "learning_rate": 1.4158785419904774e-05, "loss": 0.5775, "step": 23729 }, { "epoch": 0.7289650723435628, "grad_norm": 0.39157333970069885, "learning_rate": 1.4158345912892808e-05, "loss": 0.6332, "step": 23730 }, { "epoch": 0.728995791478512, "grad_norm": 0.36302247643470764, "learning_rate": 1.4157906396168733e-05, "loss": 0.4568, "step": 23731 }, { "epoch": 0.7290265106134611, "grad_norm": 0.3445998430252075, "learning_rate": 1.4157466869733581e-05, "loss": 0.5348, "step": 23732 }, { "epoch": 0.7290572297484103, "grad_norm": 0.4787598252296448, "learning_rate": 1.4157027333588369e-05, "loss": 0.5893, "step": 23733 }, { "epoch": 0.7290879488833595, "grad_norm": 0.38292160630226135, "learning_rate": 1.4156587787734134e-05, "loss": 0.5237, "step": 23734 }, { "epoch": 0.7291186680183086, "grad_norm": 0.36222484707832336, "learning_rate": 1.4156148232171893e-05, "loss": 0.6583, "step": 23735 }, { "epoch": 0.7291493871532577, "grad_norm": 0.3638589680194855, "learning_rate": 1.4155708666902683e-05, "loss": 0.5407, "step": 23736 }, { "epoch": 0.729180106288207, "grad_norm": 0.34387287497520447, "learning_rate": 1.4155269091927522e-05, "loss": 0.5783, "step": 23737 }, { "epoch": 0.7292108254231561, "grad_norm": 0.36789408326148987, "learning_rate": 1.4154829507247444e-05, "loss": 0.6014, "step": 23738 }, { "epoch": 0.7292415445581052, "grad_norm": 0.37597379088401794, "learning_rate": 1.4154389912863465e-05, "loss": 0.5534, "step": 23739 }, { "epoch": 0.7292722636930544, "grad_norm": 0.35946986079216003, "learning_rate": 1.4153950308776623e-05, "loss": 0.632, "step": 23740 }, { "epoch": 0.7293029828280035, "grad_norm": 0.44843506813049316, "learning_rate": 1.415351069498794e-05, "loss": 0.5819, "step": 23741 }, { "epoch": 0.7293337019629528, "grad_norm": 0.38117727637290955, "learning_rate": 1.4153071071498438e-05, "loss": 0.5955, "step": 23742 }, { "epoch": 0.7293644210979019, "grad_norm": 0.35568153858184814, "learning_rate": 1.4152631438309152e-05, "loss": 0.5855, "step": 23743 }, { "epoch": 0.729395140232851, "grad_norm": 2.803114891052246, "learning_rate": 1.4152191795421104e-05, "loss": 0.6971, "step": 23744 }, { "epoch": 0.7294258593678002, "grad_norm": 0.3489205539226532, "learning_rate": 1.4151752142835324e-05, "loss": 0.6009, "step": 23745 }, { "epoch": 0.7294565785027494, "grad_norm": 0.39385589957237244, "learning_rate": 1.4151312480552834e-05, "loss": 0.5883, "step": 23746 }, { "epoch": 0.7294872976376985, "grad_norm": 0.35110464692115784, "learning_rate": 1.4150872808574667e-05, "loss": 0.5261, "step": 23747 }, { "epoch": 0.7295180167726477, "grad_norm": 0.4131391942501068, "learning_rate": 1.4150433126901845e-05, "loss": 0.5845, "step": 23748 }, { "epoch": 0.7295487359075968, "grad_norm": 0.35200539231300354, "learning_rate": 1.4149993435535396e-05, "loss": 0.5523, "step": 23749 }, { "epoch": 0.729579455042546, "grad_norm": 0.3685833513736725, "learning_rate": 1.4149553734476349e-05, "loss": 0.502, "step": 23750 }, { "epoch": 0.7296101741774952, "grad_norm": 0.34631842374801636, "learning_rate": 1.4149114023725728e-05, "loss": 0.581, "step": 23751 }, { "epoch": 0.7296408933124443, "grad_norm": 0.35019397735595703, "learning_rate": 1.4148674303284564e-05, "loss": 0.5088, "step": 23752 }, { "epoch": 0.7296716124473935, "grad_norm": 0.36026641726493835, "learning_rate": 1.4148234573153879e-05, "loss": 0.6171, "step": 23753 }, { "epoch": 0.7297023315823427, "grad_norm": 0.4330006539821625, "learning_rate": 1.4147794833334702e-05, "loss": 0.5155, "step": 23754 }, { "epoch": 0.7297330507172918, "grad_norm": 0.3418286144733429, "learning_rate": 1.414735508382806e-05, "loss": 0.5793, "step": 23755 }, { "epoch": 0.729763769852241, "grad_norm": 0.6904200315475464, "learning_rate": 1.4146915324634985e-05, "loss": 0.562, "step": 23756 }, { "epoch": 0.7297944889871901, "grad_norm": 0.36489981412887573, "learning_rate": 1.4146475555756497e-05, "loss": 0.5959, "step": 23757 }, { "epoch": 0.7298252081221392, "grad_norm": 0.4145813584327698, "learning_rate": 1.4146035777193624e-05, "loss": 0.6023, "step": 23758 }, { "epoch": 0.7298559272570885, "grad_norm": 0.3624298870563507, "learning_rate": 1.4145595988947399e-05, "loss": 0.5944, "step": 23759 }, { "epoch": 0.7298866463920376, "grad_norm": 0.5102236270904541, "learning_rate": 1.4145156191018841e-05, "loss": 0.5354, "step": 23760 }, { "epoch": 0.7299173655269867, "grad_norm": 0.388832688331604, "learning_rate": 1.4144716383408983e-05, "loss": 0.5566, "step": 23761 }, { "epoch": 0.729948084661936, "grad_norm": 0.3607177436351776, "learning_rate": 1.4144276566118847e-05, "loss": 0.561, "step": 23762 }, { "epoch": 0.7299788037968851, "grad_norm": 0.37044408917427063, "learning_rate": 1.4143836739149468e-05, "loss": 0.5612, "step": 23763 }, { "epoch": 0.7300095229318342, "grad_norm": 0.3716779053211212, "learning_rate": 1.4143396902501869e-05, "loss": 0.5654, "step": 23764 }, { "epoch": 0.7300402420667834, "grad_norm": 0.3244674503803253, "learning_rate": 1.4142957056177074e-05, "loss": 0.6282, "step": 23765 }, { "epoch": 0.7300709612017325, "grad_norm": 0.34889617562294006, "learning_rate": 1.4142517200176115e-05, "loss": 0.4345, "step": 23766 }, { "epoch": 0.7301016803366818, "grad_norm": 0.3510979115962982, "learning_rate": 1.414207733450002e-05, "loss": 0.5173, "step": 23767 }, { "epoch": 0.7301323994716309, "grad_norm": 0.38287153840065, "learning_rate": 1.4141637459149811e-05, "loss": 0.5539, "step": 23768 }, { "epoch": 0.73016311860658, "grad_norm": 0.406887412071228, "learning_rate": 1.4141197574126519e-05, "loss": 0.5519, "step": 23769 }, { "epoch": 0.7301938377415292, "grad_norm": 0.3683392405509949, "learning_rate": 1.4140757679431172e-05, "loss": 0.5023, "step": 23770 }, { "epoch": 0.7302245568764784, "grad_norm": 0.38493525981903076, "learning_rate": 1.4140317775064794e-05, "loss": 0.6653, "step": 23771 }, { "epoch": 0.7302552760114275, "grad_norm": 0.47716188430786133, "learning_rate": 1.4139877861028417e-05, "loss": 0.5262, "step": 23772 }, { "epoch": 0.7302859951463767, "grad_norm": 0.3358722925186157, "learning_rate": 1.4139437937323066e-05, "loss": 0.5286, "step": 23773 }, { "epoch": 0.7303167142813258, "grad_norm": 0.3670349717140198, "learning_rate": 1.4138998003949768e-05, "loss": 0.5919, "step": 23774 }, { "epoch": 0.730347433416275, "grad_norm": 0.3686022162437439, "learning_rate": 1.4138558060909554e-05, "loss": 0.5198, "step": 23775 }, { "epoch": 0.7303781525512242, "grad_norm": 0.37169981002807617, "learning_rate": 1.4138118108203443e-05, "loss": 0.633, "step": 23776 }, { "epoch": 0.7304088716861733, "grad_norm": 0.35692423582077026, "learning_rate": 1.4137678145832472e-05, "loss": 0.6198, "step": 23777 }, { "epoch": 0.7304395908211225, "grad_norm": 0.38708075881004333, "learning_rate": 1.4137238173797662e-05, "loss": 0.469, "step": 23778 }, { "epoch": 0.7304703099560717, "grad_norm": 0.366332471370697, "learning_rate": 1.4136798192100048e-05, "loss": 0.5832, "step": 23779 }, { "epoch": 0.7305010290910208, "grad_norm": 0.38620463013648987, "learning_rate": 1.4136358200740648e-05, "loss": 0.5234, "step": 23780 }, { "epoch": 0.73053174822597, "grad_norm": 0.39283668994903564, "learning_rate": 1.4135918199720495e-05, "loss": 0.5622, "step": 23781 }, { "epoch": 0.7305624673609191, "grad_norm": 0.41996192932128906, "learning_rate": 1.4135478189040619e-05, "loss": 0.5483, "step": 23782 }, { "epoch": 0.7305931864958682, "grad_norm": 0.48027142882347107, "learning_rate": 1.4135038168702043e-05, "loss": 0.5421, "step": 23783 }, { "epoch": 0.7306239056308175, "grad_norm": 0.30207884311676025, "learning_rate": 1.4134598138705797e-05, "loss": 0.4644, "step": 23784 }, { "epoch": 0.7306546247657666, "grad_norm": 0.355209082365036, "learning_rate": 1.4134158099052908e-05, "loss": 0.4625, "step": 23785 }, { "epoch": 0.7306853439007157, "grad_norm": 0.37796348333358765, "learning_rate": 1.4133718049744404e-05, "loss": 0.5679, "step": 23786 }, { "epoch": 0.7307160630356649, "grad_norm": 0.37691113352775574, "learning_rate": 1.4133277990781314e-05, "loss": 0.641, "step": 23787 }, { "epoch": 0.7307467821706141, "grad_norm": 0.364925354719162, "learning_rate": 1.4132837922164664e-05, "loss": 0.5836, "step": 23788 }, { "epoch": 0.7307775013055632, "grad_norm": 0.35037028789520264, "learning_rate": 1.413239784389548e-05, "loss": 0.5516, "step": 23789 }, { "epoch": 0.7308082204405124, "grad_norm": 0.35052090883255005, "learning_rate": 1.4131957755974797e-05, "loss": 0.6165, "step": 23790 }, { "epoch": 0.7308389395754615, "grad_norm": 0.3854401409626007, "learning_rate": 1.4131517658403633e-05, "loss": 0.5946, "step": 23791 }, { "epoch": 0.7308696587104108, "grad_norm": 0.35541802644729614, "learning_rate": 1.4131077551183026e-05, "loss": 0.5778, "step": 23792 }, { "epoch": 0.7309003778453599, "grad_norm": 0.32179227471351624, "learning_rate": 1.4130637434313997e-05, "loss": 0.5899, "step": 23793 }, { "epoch": 0.730931096980309, "grad_norm": 0.36376190185546875, "learning_rate": 1.4130197307797572e-05, "loss": 0.5315, "step": 23794 }, { "epoch": 0.7309618161152582, "grad_norm": 0.38832175731658936, "learning_rate": 1.4129757171634788e-05, "loss": 0.6767, "step": 23795 }, { "epoch": 0.7309925352502074, "grad_norm": 0.3294855058193207, "learning_rate": 1.4129317025826665e-05, "loss": 0.4725, "step": 23796 }, { "epoch": 0.7310232543851565, "grad_norm": 0.3974390923976898, "learning_rate": 1.4128876870374235e-05, "loss": 0.5249, "step": 23797 }, { "epoch": 0.7310539735201057, "grad_norm": 0.33673760294914246, "learning_rate": 1.4128436705278523e-05, "loss": 0.5093, "step": 23798 }, { "epoch": 0.7310846926550548, "grad_norm": 0.3875851333141327, "learning_rate": 1.412799653054056e-05, "loss": 0.5226, "step": 23799 }, { "epoch": 0.7311154117900039, "grad_norm": 0.3860240876674652, "learning_rate": 1.4127556346161372e-05, "loss": 0.5315, "step": 23800 }, { "epoch": 0.7311461309249532, "grad_norm": 0.3507285714149475, "learning_rate": 1.4127116152141987e-05, "loss": 0.5334, "step": 23801 }, { "epoch": 0.7311768500599023, "grad_norm": 0.32238373160362244, "learning_rate": 1.4126675948483437e-05, "loss": 0.4495, "step": 23802 }, { "epoch": 0.7312075691948515, "grad_norm": 1.1306601762771606, "learning_rate": 1.4126235735186744e-05, "loss": 0.6395, "step": 23803 }, { "epoch": 0.7312382883298006, "grad_norm": 0.37212640047073364, "learning_rate": 1.4125795512252942e-05, "loss": 0.4793, "step": 23804 }, { "epoch": 0.7312690074647498, "grad_norm": 0.35936444997787476, "learning_rate": 1.4125355279683053e-05, "loss": 0.4765, "step": 23805 }, { "epoch": 0.731299726599699, "grad_norm": 0.37972119450569153, "learning_rate": 1.4124915037478113e-05, "loss": 0.5499, "step": 23806 }, { "epoch": 0.7313304457346481, "grad_norm": 0.37695568799972534, "learning_rate": 1.4124474785639141e-05, "loss": 0.5386, "step": 23807 }, { "epoch": 0.7313611648695972, "grad_norm": 0.36910682916641235, "learning_rate": 1.4124034524167176e-05, "loss": 0.5308, "step": 23808 }, { "epoch": 0.7313918840045465, "grad_norm": 0.4456002414226532, "learning_rate": 1.4123594253063236e-05, "loss": 0.6161, "step": 23809 }, { "epoch": 0.7314226031394956, "grad_norm": 0.4325506091117859, "learning_rate": 1.4123153972328357e-05, "loss": 0.5403, "step": 23810 }, { "epoch": 0.7314533222744447, "grad_norm": 0.3282666504383087, "learning_rate": 1.412271368196356e-05, "loss": 0.5588, "step": 23811 }, { "epoch": 0.7314840414093939, "grad_norm": 0.34438541531562805, "learning_rate": 1.412227338196988e-05, "loss": 0.5014, "step": 23812 }, { "epoch": 0.7315147605443431, "grad_norm": 0.42259544134140015, "learning_rate": 1.4121833072348341e-05, "loss": 0.5308, "step": 23813 }, { "epoch": 0.7315454796792922, "grad_norm": 0.3705318868160248, "learning_rate": 1.4121392753099975e-05, "loss": 0.4601, "step": 23814 }, { "epoch": 0.7315761988142414, "grad_norm": 0.4051368534564972, "learning_rate": 1.4120952424225807e-05, "loss": 0.5993, "step": 23815 }, { "epoch": 0.7316069179491905, "grad_norm": 0.3893367648124695, "learning_rate": 1.4120512085726866e-05, "loss": 0.571, "step": 23816 }, { "epoch": 0.7316376370841398, "grad_norm": 0.6370880007743835, "learning_rate": 1.412007173760418e-05, "loss": 0.5077, "step": 23817 }, { "epoch": 0.7316683562190889, "grad_norm": 0.3745144307613373, "learning_rate": 1.411963137985878e-05, "loss": 0.5648, "step": 23818 }, { "epoch": 0.731699075354038, "grad_norm": 0.44569480419158936, "learning_rate": 1.4119191012491694e-05, "loss": 0.6108, "step": 23819 }, { "epoch": 0.7317297944889872, "grad_norm": 0.3420185148715973, "learning_rate": 1.411875063550395e-05, "loss": 0.579, "step": 23820 }, { "epoch": 0.7317605136239363, "grad_norm": 0.3672969341278076, "learning_rate": 1.4118310248896577e-05, "loss": 0.6462, "step": 23821 }, { "epoch": 0.7317912327588855, "grad_norm": 0.3594294786453247, "learning_rate": 1.4117869852670603e-05, "loss": 0.4922, "step": 23822 }, { "epoch": 0.7318219518938347, "grad_norm": 0.3354698717594147, "learning_rate": 1.4117429446827053e-05, "loss": 0.4826, "step": 23823 }, { "epoch": 0.7318526710287838, "grad_norm": 0.38058674335479736, "learning_rate": 1.411698903136696e-05, "loss": 0.6047, "step": 23824 }, { "epoch": 0.7318833901637329, "grad_norm": 0.3777005076408386, "learning_rate": 1.4116548606291355e-05, "loss": 0.5904, "step": 23825 }, { "epoch": 0.7319141092986822, "grad_norm": 0.36339858174324036, "learning_rate": 1.4116108171601259e-05, "loss": 0.5605, "step": 23826 }, { "epoch": 0.7319448284336313, "grad_norm": 0.3897477686405182, "learning_rate": 1.4115667727297709e-05, "loss": 0.4936, "step": 23827 }, { "epoch": 0.7319755475685805, "grad_norm": 0.38832777738571167, "learning_rate": 1.4115227273381726e-05, "loss": 0.5864, "step": 23828 }, { "epoch": 0.7320062667035296, "grad_norm": 1.050102949142456, "learning_rate": 1.4114786809854346e-05, "loss": 0.5752, "step": 23829 }, { "epoch": 0.7320369858384788, "grad_norm": 0.36115455627441406, "learning_rate": 1.4114346336716588e-05, "loss": 0.5987, "step": 23830 }, { "epoch": 0.732067704973428, "grad_norm": 0.4233711361885071, "learning_rate": 1.4113905853969492e-05, "loss": 0.4848, "step": 23831 }, { "epoch": 0.7320984241083771, "grad_norm": 0.530774712562561, "learning_rate": 1.411346536161408e-05, "loss": 0.5446, "step": 23832 }, { "epoch": 0.7321291432433262, "grad_norm": 0.4054495692253113, "learning_rate": 1.4113024859651383e-05, "loss": 0.5227, "step": 23833 }, { "epoch": 0.7321598623782755, "grad_norm": 0.3408663272857666, "learning_rate": 1.4112584348082427e-05, "loss": 0.4912, "step": 23834 }, { "epoch": 0.7321905815132246, "grad_norm": 0.40786978602409363, "learning_rate": 1.4112143826908245e-05, "loss": 0.5043, "step": 23835 }, { "epoch": 0.7322213006481737, "grad_norm": 0.3754103481769562, "learning_rate": 1.4111703296129865e-05, "loss": 0.6351, "step": 23836 }, { "epoch": 0.7322520197831229, "grad_norm": 0.3712635934352875, "learning_rate": 1.4111262755748313e-05, "loss": 0.5846, "step": 23837 }, { "epoch": 0.732282738918072, "grad_norm": 0.38400259613990784, "learning_rate": 1.411082220576462e-05, "loss": 0.501, "step": 23838 }, { "epoch": 0.7323134580530213, "grad_norm": 0.39701202511787415, "learning_rate": 1.4110381646179812e-05, "loss": 0.5717, "step": 23839 }, { "epoch": 0.7323441771879704, "grad_norm": 0.4084271192550659, "learning_rate": 1.4109941076994926e-05, "loss": 0.5388, "step": 23840 }, { "epoch": 0.7323748963229195, "grad_norm": 0.4035400152206421, "learning_rate": 1.4109500498210982e-05, "loss": 0.5991, "step": 23841 }, { "epoch": 0.7324056154578688, "grad_norm": 0.31270331144332886, "learning_rate": 1.4109059909829013e-05, "loss": 0.4851, "step": 23842 }, { "epoch": 0.7324363345928179, "grad_norm": 0.4322281777858734, "learning_rate": 1.4108619311850048e-05, "loss": 0.5145, "step": 23843 }, { "epoch": 0.732467053727767, "grad_norm": 0.3824089765548706, "learning_rate": 1.4108178704275117e-05, "loss": 0.5024, "step": 23844 }, { "epoch": 0.7324977728627162, "grad_norm": 0.34385570883750916, "learning_rate": 1.4107738087105247e-05, "loss": 0.5376, "step": 23845 }, { "epoch": 0.7325284919976653, "grad_norm": 0.36713412404060364, "learning_rate": 1.4107297460341465e-05, "loss": 0.5681, "step": 23846 }, { "epoch": 0.7325592111326145, "grad_norm": 0.35734403133392334, "learning_rate": 1.4106856823984806e-05, "loss": 0.5191, "step": 23847 }, { "epoch": 0.7325899302675637, "grad_norm": 0.35544246435165405, "learning_rate": 1.4106416178036295e-05, "loss": 0.5044, "step": 23848 }, { "epoch": 0.7326206494025128, "grad_norm": 0.3531512916088104, "learning_rate": 1.4105975522496964e-05, "loss": 0.6401, "step": 23849 }, { "epoch": 0.7326513685374619, "grad_norm": 0.4275946617126465, "learning_rate": 1.4105534857367837e-05, "loss": 0.5802, "step": 23850 }, { "epoch": 0.7326820876724112, "grad_norm": 0.3868359327316284, "learning_rate": 1.410509418264995e-05, "loss": 0.605, "step": 23851 }, { "epoch": 0.7327128068073603, "grad_norm": 0.4179460108280182, "learning_rate": 1.4104653498344325e-05, "loss": 0.487, "step": 23852 }, { "epoch": 0.7327435259423095, "grad_norm": 0.398408442735672, "learning_rate": 1.4104212804452e-05, "loss": 0.5813, "step": 23853 }, { "epoch": 0.7327742450772586, "grad_norm": 0.3546433448791504, "learning_rate": 1.4103772100973995e-05, "loss": 0.5445, "step": 23854 }, { "epoch": 0.7328049642122078, "grad_norm": 0.5970355272293091, "learning_rate": 1.4103331387911343e-05, "loss": 0.4531, "step": 23855 }, { "epoch": 0.732835683347157, "grad_norm": 0.3688204288482666, "learning_rate": 1.4102890665265079e-05, "loss": 0.594, "step": 23856 }, { "epoch": 0.7328664024821061, "grad_norm": 0.3299977481365204, "learning_rate": 1.4102449933036224e-05, "loss": 0.5054, "step": 23857 }, { "epoch": 0.7328971216170552, "grad_norm": 0.35637158155441284, "learning_rate": 1.4102009191225812e-05, "loss": 0.5579, "step": 23858 }, { "epoch": 0.7329278407520045, "grad_norm": 1.057495355606079, "learning_rate": 1.410156843983487e-05, "loss": 0.5133, "step": 23859 }, { "epoch": 0.7329585598869536, "grad_norm": 0.33738529682159424, "learning_rate": 1.4101127678864429e-05, "loss": 0.5926, "step": 23860 }, { "epoch": 0.7329892790219027, "grad_norm": 0.3989870250225067, "learning_rate": 1.4100686908315517e-05, "loss": 0.5976, "step": 23861 }, { "epoch": 0.7330199981568519, "grad_norm": 0.46590688824653625, "learning_rate": 1.4100246128189165e-05, "loss": 0.6201, "step": 23862 }, { "epoch": 0.733050717291801, "grad_norm": 0.37158089876174927, "learning_rate": 1.4099805338486402e-05, "loss": 0.6296, "step": 23863 }, { "epoch": 0.7330814364267503, "grad_norm": 0.3727044463157654, "learning_rate": 1.4099364539208258e-05, "loss": 0.5229, "step": 23864 }, { "epoch": 0.7331121555616994, "grad_norm": 1.0757319927215576, "learning_rate": 1.4098923730355762e-05, "loss": 0.6049, "step": 23865 }, { "epoch": 0.7331428746966485, "grad_norm": 0.3643920421600342, "learning_rate": 1.409848291192994e-05, "loss": 0.6029, "step": 23866 }, { "epoch": 0.7331735938315977, "grad_norm": 0.4575008749961853, "learning_rate": 1.4098042083931832e-05, "loss": 0.6498, "step": 23867 }, { "epoch": 0.7332043129665469, "grad_norm": 0.31003108620643616, "learning_rate": 1.4097601246362453e-05, "loss": 0.6027, "step": 23868 }, { "epoch": 0.733235032101496, "grad_norm": 0.38965171575546265, "learning_rate": 1.4097160399222842e-05, "loss": 0.6407, "step": 23869 }, { "epoch": 0.7332657512364452, "grad_norm": 0.3760623335838318, "learning_rate": 1.4096719542514029e-05, "loss": 0.6064, "step": 23870 }, { "epoch": 0.7332964703713943, "grad_norm": 0.39543598890304565, "learning_rate": 1.4096278676237042e-05, "loss": 0.5293, "step": 23871 }, { "epoch": 0.7333271895063435, "grad_norm": 0.35998085141181946, "learning_rate": 1.4095837800392907e-05, "loss": 0.6313, "step": 23872 }, { "epoch": 0.7333579086412927, "grad_norm": 0.41826364398002625, "learning_rate": 1.4095396914982655e-05, "loss": 0.5654, "step": 23873 }, { "epoch": 0.7333886277762418, "grad_norm": 0.3970765173435211, "learning_rate": 1.4094956020007322e-05, "loss": 0.615, "step": 23874 }, { "epoch": 0.7334193469111909, "grad_norm": 0.40530863404273987, "learning_rate": 1.4094515115467931e-05, "loss": 0.6496, "step": 23875 }, { "epoch": 0.7334500660461402, "grad_norm": 0.35092347860336304, "learning_rate": 1.4094074201365515e-05, "loss": 0.5972, "step": 23876 }, { "epoch": 0.7334807851810893, "grad_norm": 0.3528945744037628, "learning_rate": 1.4093633277701102e-05, "loss": 0.4479, "step": 23877 }, { "epoch": 0.7335115043160385, "grad_norm": 0.349943608045578, "learning_rate": 1.4093192344475724e-05, "loss": 0.484, "step": 23878 }, { "epoch": 0.7335422234509876, "grad_norm": 0.38208290934562683, "learning_rate": 1.4092751401690409e-05, "loss": 0.5285, "step": 23879 }, { "epoch": 0.7335729425859367, "grad_norm": 0.3732461929321289, "learning_rate": 1.4092310449346186e-05, "loss": 0.519, "step": 23880 }, { "epoch": 0.733603661720886, "grad_norm": 0.4067455232143402, "learning_rate": 1.4091869487444088e-05, "loss": 0.5913, "step": 23881 }, { "epoch": 0.7336343808558351, "grad_norm": 0.3742539882659912, "learning_rate": 1.409142851598514e-05, "loss": 0.5907, "step": 23882 }, { "epoch": 0.7336650999907842, "grad_norm": 0.3537377715110779, "learning_rate": 1.4090987534970377e-05, "loss": 0.4751, "step": 23883 }, { "epoch": 0.7336958191257335, "grad_norm": 0.37998679280281067, "learning_rate": 1.4090546544400827e-05, "loss": 0.6438, "step": 23884 }, { "epoch": 0.7337265382606826, "grad_norm": 0.4697338044643402, "learning_rate": 1.4090105544277522e-05, "loss": 0.6412, "step": 23885 }, { "epoch": 0.7337572573956317, "grad_norm": 0.3450888693332672, "learning_rate": 1.4089664534601483e-05, "loss": 0.5243, "step": 23886 }, { "epoch": 0.7337879765305809, "grad_norm": 0.34048396348953247, "learning_rate": 1.4089223515373753e-05, "loss": 0.5299, "step": 23887 }, { "epoch": 0.73381869566553, "grad_norm": 0.3681848347187042, "learning_rate": 1.4088782486595353e-05, "loss": 0.6237, "step": 23888 }, { "epoch": 0.7338494148004793, "grad_norm": 0.3530117869377136, "learning_rate": 1.408834144826732e-05, "loss": 0.5713, "step": 23889 }, { "epoch": 0.7338801339354284, "grad_norm": 0.33086907863616943, "learning_rate": 1.4087900400390674e-05, "loss": 0.5516, "step": 23890 }, { "epoch": 0.7339108530703775, "grad_norm": 0.3624996542930603, "learning_rate": 1.4087459342966454e-05, "loss": 0.5364, "step": 23891 }, { "epoch": 0.7339415722053267, "grad_norm": 0.46207231283187866, "learning_rate": 1.408701827599569e-05, "loss": 0.5629, "step": 23892 }, { "epoch": 0.7339722913402759, "grad_norm": 0.34908565878868103, "learning_rate": 1.4086577199479405e-05, "loss": 0.5607, "step": 23893 }, { "epoch": 0.734003010475225, "grad_norm": 0.344638854265213, "learning_rate": 1.4086136113418636e-05, "loss": 0.5451, "step": 23894 }, { "epoch": 0.7340337296101742, "grad_norm": 0.37586358189582825, "learning_rate": 1.408569501781441e-05, "loss": 0.5665, "step": 23895 }, { "epoch": 0.7340644487451233, "grad_norm": 0.3607143759727478, "learning_rate": 1.4085253912667759e-05, "loss": 0.5365, "step": 23896 }, { "epoch": 0.7340951678800725, "grad_norm": 0.34121307730674744, "learning_rate": 1.4084812797979713e-05, "loss": 0.5422, "step": 23897 }, { "epoch": 0.7341258870150217, "grad_norm": 0.37564563751220703, "learning_rate": 1.4084371673751298e-05, "loss": 0.5317, "step": 23898 }, { "epoch": 0.7341566061499708, "grad_norm": 0.36113718152046204, "learning_rate": 1.408393053998355e-05, "loss": 0.5679, "step": 23899 }, { "epoch": 0.7341873252849199, "grad_norm": 0.39385986328125, "learning_rate": 1.4083489396677496e-05, "loss": 0.5464, "step": 23900 }, { "epoch": 0.7342180444198692, "grad_norm": 0.41663163900375366, "learning_rate": 1.4083048243834167e-05, "loss": 0.5627, "step": 23901 }, { "epoch": 0.7342487635548183, "grad_norm": 0.38099610805511475, "learning_rate": 1.4082607081454595e-05, "loss": 0.5574, "step": 23902 }, { "epoch": 0.7342794826897675, "grad_norm": 0.35265451669692993, "learning_rate": 1.408216590953981e-05, "loss": 0.5171, "step": 23903 }, { "epoch": 0.7343102018247166, "grad_norm": 0.4928490221500397, "learning_rate": 1.4081724728090838e-05, "loss": 0.5027, "step": 23904 }, { "epoch": 0.7343409209596657, "grad_norm": 0.3724992573261261, "learning_rate": 1.4081283537108713e-05, "loss": 0.5024, "step": 23905 }, { "epoch": 0.734371640094615, "grad_norm": 0.3969669044017792, "learning_rate": 1.4080842336594468e-05, "loss": 0.5283, "step": 23906 }, { "epoch": 0.7344023592295641, "grad_norm": 0.3377130925655365, "learning_rate": 1.4080401126549132e-05, "loss": 0.5453, "step": 23907 }, { "epoch": 0.7344330783645132, "grad_norm": 0.35825300216674805, "learning_rate": 1.407995990697373e-05, "loss": 0.5629, "step": 23908 }, { "epoch": 0.7344637974994624, "grad_norm": 0.34303900599479675, "learning_rate": 1.4079518677869298e-05, "loss": 0.5566, "step": 23909 }, { "epoch": 0.7344945166344116, "grad_norm": 0.6520171165466309, "learning_rate": 1.4079077439236866e-05, "loss": 0.6145, "step": 23910 }, { "epoch": 0.7345252357693607, "grad_norm": 0.349319726228714, "learning_rate": 1.4078636191077464e-05, "loss": 0.5857, "step": 23911 }, { "epoch": 0.7345559549043099, "grad_norm": 0.36469724774360657, "learning_rate": 1.4078194933392122e-05, "loss": 0.5454, "step": 23912 }, { "epoch": 0.734586674039259, "grad_norm": 0.3676665127277374, "learning_rate": 1.4077753666181871e-05, "loss": 0.6046, "step": 23913 }, { "epoch": 0.7346173931742083, "grad_norm": 0.33843523263931274, "learning_rate": 1.4077312389447742e-05, "loss": 0.5275, "step": 23914 }, { "epoch": 0.7346481123091574, "grad_norm": 0.4022136330604553, "learning_rate": 1.4076871103190764e-05, "loss": 0.5517, "step": 23915 }, { "epoch": 0.7346788314441065, "grad_norm": 0.3610767722129822, "learning_rate": 1.4076429807411969e-05, "loss": 0.5871, "step": 23916 }, { "epoch": 0.7347095505790557, "grad_norm": 0.36382797360420227, "learning_rate": 1.407598850211239e-05, "loss": 0.534, "step": 23917 }, { "epoch": 0.7347402697140049, "grad_norm": 0.343461275100708, "learning_rate": 1.407554718729305e-05, "loss": 0.5621, "step": 23918 }, { "epoch": 0.734770988848954, "grad_norm": 0.34922873973846436, "learning_rate": 1.407510586295499e-05, "loss": 0.5966, "step": 23919 }, { "epoch": 0.7348017079839032, "grad_norm": 0.34182265400886536, "learning_rate": 1.4074664529099233e-05, "loss": 0.5078, "step": 23920 }, { "epoch": 0.7348324271188523, "grad_norm": 0.3548901379108429, "learning_rate": 1.4074223185726811e-05, "loss": 0.6044, "step": 23921 }, { "epoch": 0.7348631462538014, "grad_norm": 0.35314759612083435, "learning_rate": 1.4073781832838759e-05, "loss": 0.529, "step": 23922 }, { "epoch": 0.7348938653887507, "grad_norm": 0.35011303424835205, "learning_rate": 1.4073340470436106e-05, "loss": 0.5359, "step": 23923 }, { "epoch": 0.7349245845236998, "grad_norm": 0.36161237955093384, "learning_rate": 1.4072899098519879e-05, "loss": 0.5631, "step": 23924 }, { "epoch": 0.7349553036586489, "grad_norm": 0.3432294428348541, "learning_rate": 1.4072457717091112e-05, "loss": 0.5836, "step": 23925 }, { "epoch": 0.7349860227935981, "grad_norm": 0.38183504343032837, "learning_rate": 1.407201632615084e-05, "loss": 0.5457, "step": 23926 }, { "epoch": 0.7350167419285473, "grad_norm": 0.4374266564846039, "learning_rate": 1.4071574925700082e-05, "loss": 0.6513, "step": 23927 }, { "epoch": 0.7350474610634965, "grad_norm": 0.4254762828350067, "learning_rate": 1.4071133515739881e-05, "loss": 0.4994, "step": 23928 }, { "epoch": 0.7350781801984456, "grad_norm": 0.365461528301239, "learning_rate": 1.4070692096271261e-05, "loss": 0.6058, "step": 23929 }, { "epoch": 0.7351088993333947, "grad_norm": 0.3637332618236542, "learning_rate": 1.4070250667295258e-05, "loss": 0.4784, "step": 23930 }, { "epoch": 0.735139618468344, "grad_norm": 0.426251083612442, "learning_rate": 1.4069809228812901e-05, "loss": 0.5811, "step": 23931 }, { "epoch": 0.7351703376032931, "grad_norm": 0.3704899847507477, "learning_rate": 1.4069367780825216e-05, "loss": 0.4917, "step": 23932 }, { "epoch": 0.7352010567382422, "grad_norm": 0.3922511041164398, "learning_rate": 1.4068926323333243e-05, "loss": 0.514, "step": 23933 }, { "epoch": 0.7352317758731914, "grad_norm": 0.32643023133277893, "learning_rate": 1.4068484856338004e-05, "loss": 0.4517, "step": 23934 }, { "epoch": 0.7352624950081406, "grad_norm": 0.33544814586639404, "learning_rate": 1.4068043379840538e-05, "loss": 0.5871, "step": 23935 }, { "epoch": 0.7352932141430897, "grad_norm": 0.3680316209793091, "learning_rate": 1.4067601893841872e-05, "loss": 0.4923, "step": 23936 }, { "epoch": 0.7353239332780389, "grad_norm": 0.3586023449897766, "learning_rate": 1.4067160398343038e-05, "loss": 0.4944, "step": 23937 }, { "epoch": 0.735354652412988, "grad_norm": 0.366202175617218, "learning_rate": 1.4066718893345064e-05, "loss": 0.5954, "step": 23938 }, { "epoch": 0.7353853715479373, "grad_norm": 0.36065879464149475, "learning_rate": 1.4066277378848987e-05, "loss": 0.5607, "step": 23939 }, { "epoch": 0.7354160906828864, "grad_norm": 0.3939714729785919, "learning_rate": 1.4065835854855831e-05, "loss": 0.5717, "step": 23940 }, { "epoch": 0.7354468098178355, "grad_norm": 0.3498212397098541, "learning_rate": 1.4065394321366637e-05, "loss": 0.6001, "step": 23941 }, { "epoch": 0.7354775289527847, "grad_norm": 0.3661903142929077, "learning_rate": 1.4064952778382428e-05, "loss": 0.5333, "step": 23942 }, { "epoch": 0.7355082480877339, "grad_norm": 0.3715902268886566, "learning_rate": 1.4064511225904238e-05, "loss": 0.6142, "step": 23943 }, { "epoch": 0.735538967222683, "grad_norm": 0.3283309042453766, "learning_rate": 1.40640696639331e-05, "loss": 0.5596, "step": 23944 }, { "epoch": 0.7355696863576322, "grad_norm": 0.3458085358142853, "learning_rate": 1.4063628092470038e-05, "loss": 0.5763, "step": 23945 }, { "epoch": 0.7356004054925813, "grad_norm": 0.401968777179718, "learning_rate": 1.4063186511516093e-05, "loss": 0.5635, "step": 23946 }, { "epoch": 0.7356311246275304, "grad_norm": 0.3870897591114044, "learning_rate": 1.406274492107229e-05, "loss": 0.5997, "step": 23947 }, { "epoch": 0.7356618437624797, "grad_norm": 0.34694695472717285, "learning_rate": 1.4062303321139666e-05, "loss": 0.5776, "step": 23948 }, { "epoch": 0.7356925628974288, "grad_norm": 0.3902072310447693, "learning_rate": 1.4061861711719248e-05, "loss": 0.557, "step": 23949 }, { "epoch": 0.735723282032378, "grad_norm": 0.45290282368659973, "learning_rate": 1.4061420092812065e-05, "loss": 0.586, "step": 23950 }, { "epoch": 0.7357540011673271, "grad_norm": 0.349902868270874, "learning_rate": 1.4060978464419157e-05, "loss": 0.4904, "step": 23951 }, { "epoch": 0.7357847203022763, "grad_norm": 0.4062516689300537, "learning_rate": 1.4060536826541547e-05, "loss": 0.5088, "step": 23952 }, { "epoch": 0.7358154394372255, "grad_norm": 0.374416321516037, "learning_rate": 1.4060095179180268e-05, "loss": 0.5091, "step": 23953 }, { "epoch": 0.7358461585721746, "grad_norm": 0.36285632848739624, "learning_rate": 1.4059653522336353e-05, "loss": 0.537, "step": 23954 }, { "epoch": 0.7358768777071237, "grad_norm": 0.3294793963432312, "learning_rate": 1.4059211856010838e-05, "loss": 0.528, "step": 23955 }, { "epoch": 0.735907596842073, "grad_norm": 0.36854037642478943, "learning_rate": 1.4058770180204748e-05, "loss": 0.5984, "step": 23956 }, { "epoch": 0.7359383159770221, "grad_norm": 0.3725568652153015, "learning_rate": 1.4058328494919117e-05, "loss": 0.6137, "step": 23957 }, { "epoch": 0.7359690351119712, "grad_norm": 0.3573515713214874, "learning_rate": 1.4057886800154976e-05, "loss": 0.5776, "step": 23958 }, { "epoch": 0.7359997542469204, "grad_norm": 0.371776282787323, "learning_rate": 1.4057445095913357e-05, "loss": 0.5518, "step": 23959 }, { "epoch": 0.7360304733818696, "grad_norm": 0.34047186374664307, "learning_rate": 1.4057003382195294e-05, "loss": 0.6296, "step": 23960 }, { "epoch": 0.7360611925168187, "grad_norm": 0.37181127071380615, "learning_rate": 1.4056561659001813e-05, "loss": 0.5008, "step": 23961 }, { "epoch": 0.7360919116517679, "grad_norm": 0.3385287821292877, "learning_rate": 1.4056119926333952e-05, "loss": 0.5483, "step": 23962 }, { "epoch": 0.736122630786717, "grad_norm": 0.3728054463863373, "learning_rate": 1.4055678184192732e-05, "loss": 0.5809, "step": 23963 }, { "epoch": 0.7361533499216663, "grad_norm": 0.3628765046596527, "learning_rate": 1.40552364325792e-05, "loss": 0.5829, "step": 23964 }, { "epoch": 0.7361840690566154, "grad_norm": 0.38654211163520813, "learning_rate": 1.4054794671494376e-05, "loss": 0.5268, "step": 23965 }, { "epoch": 0.7362147881915645, "grad_norm": 0.41645652055740356, "learning_rate": 1.4054352900939299e-05, "loss": 0.5419, "step": 23966 }, { "epoch": 0.7362455073265137, "grad_norm": 0.35983800888061523, "learning_rate": 1.4053911120914996e-05, "loss": 0.5183, "step": 23967 }, { "epoch": 0.7362762264614628, "grad_norm": 0.39624685049057007, "learning_rate": 1.40534693314225e-05, "loss": 0.6474, "step": 23968 }, { "epoch": 0.736306945596412, "grad_norm": 0.3621513545513153, "learning_rate": 1.4053027532462847e-05, "loss": 0.55, "step": 23969 }, { "epoch": 0.7363376647313612, "grad_norm": 0.34866905212402344, "learning_rate": 1.405258572403706e-05, "loss": 0.5725, "step": 23970 }, { "epoch": 0.7363683838663103, "grad_norm": 0.35034066438674927, "learning_rate": 1.405214390614618e-05, "loss": 0.5722, "step": 23971 }, { "epoch": 0.7363991030012594, "grad_norm": 0.4435269832611084, "learning_rate": 1.4051702078791232e-05, "loss": 0.5311, "step": 23972 }, { "epoch": 0.7364298221362087, "grad_norm": 1.0760148763656616, "learning_rate": 1.4051260241973253e-05, "loss": 0.6257, "step": 23973 }, { "epoch": 0.7364605412711578, "grad_norm": 0.37539005279541016, "learning_rate": 1.4050818395693271e-05, "loss": 0.5828, "step": 23974 }, { "epoch": 0.736491260406107, "grad_norm": 0.3581584095954895, "learning_rate": 1.4050376539952322e-05, "loss": 0.5046, "step": 23975 }, { "epoch": 0.7365219795410561, "grad_norm": 0.37505874037742615, "learning_rate": 1.4049934674751434e-05, "loss": 0.6339, "step": 23976 }, { "epoch": 0.7365526986760053, "grad_norm": 0.34565988183021545, "learning_rate": 1.404949280009164e-05, "loss": 0.5184, "step": 23977 }, { "epoch": 0.7365834178109545, "grad_norm": 0.3221534788608551, "learning_rate": 1.4049050915973974e-05, "loss": 0.4942, "step": 23978 }, { "epoch": 0.7366141369459036, "grad_norm": 0.38721615076065063, "learning_rate": 1.4048609022399467e-05, "loss": 0.6111, "step": 23979 }, { "epoch": 0.7366448560808527, "grad_norm": 0.3706909120082855, "learning_rate": 1.4048167119369152e-05, "loss": 0.5197, "step": 23980 }, { "epoch": 0.736675575215802, "grad_norm": 0.3615691363811493, "learning_rate": 1.4047725206884053e-05, "loss": 0.6521, "step": 23981 }, { "epoch": 0.7367062943507511, "grad_norm": 0.37460437417030334, "learning_rate": 1.4047283284945216e-05, "loss": 0.5817, "step": 23982 }, { "epoch": 0.7367370134857002, "grad_norm": 0.3444964289665222, "learning_rate": 1.4046841353553664e-05, "loss": 0.5133, "step": 23983 }, { "epoch": 0.7367677326206494, "grad_norm": 0.35620808601379395, "learning_rate": 1.4046399412710434e-05, "loss": 0.5859, "step": 23984 }, { "epoch": 0.7367984517555985, "grad_norm": 0.3288174867630005, "learning_rate": 1.4045957462416553e-05, "loss": 0.4673, "step": 23985 }, { "epoch": 0.7368291708905477, "grad_norm": 0.38143235445022583, "learning_rate": 1.4045515502673057e-05, "loss": 0.5743, "step": 23986 }, { "epoch": 0.7368598900254969, "grad_norm": 0.3475135266780853, "learning_rate": 1.4045073533480978e-05, "loss": 0.489, "step": 23987 }, { "epoch": 0.736890609160446, "grad_norm": 0.3271581530570984, "learning_rate": 1.4044631554841344e-05, "loss": 0.5285, "step": 23988 }, { "epoch": 0.7369213282953953, "grad_norm": 0.47731634974479675, "learning_rate": 1.4044189566755193e-05, "loss": 0.5748, "step": 23989 }, { "epoch": 0.7369520474303444, "grad_norm": 0.35412389039993286, "learning_rate": 1.4043747569223553e-05, "loss": 0.5991, "step": 23990 }, { "epoch": 0.7369827665652935, "grad_norm": 0.3622449040412903, "learning_rate": 1.404330556224746e-05, "loss": 0.5917, "step": 23991 }, { "epoch": 0.7370134857002427, "grad_norm": 0.34551748633384705, "learning_rate": 1.4042863545827943e-05, "loss": 0.5689, "step": 23992 }, { "epoch": 0.7370442048351918, "grad_norm": 0.3848418593406677, "learning_rate": 1.4042421519966039e-05, "loss": 0.5802, "step": 23993 }, { "epoch": 0.737074923970141, "grad_norm": 0.36744365096092224, "learning_rate": 1.404197948466277e-05, "loss": 0.5424, "step": 23994 }, { "epoch": 0.7371056431050902, "grad_norm": 0.36542680859565735, "learning_rate": 1.4041537439919184e-05, "loss": 0.6048, "step": 23995 }, { "epoch": 0.7371363622400393, "grad_norm": 0.38360899686813354, "learning_rate": 1.40410953857363e-05, "loss": 0.5903, "step": 23996 }, { "epoch": 0.7371670813749884, "grad_norm": 0.4056641459465027, "learning_rate": 1.4040653322115158e-05, "loss": 0.591, "step": 23997 }, { "epoch": 0.7371978005099377, "grad_norm": 0.3301253914833069, "learning_rate": 1.4040211249056787e-05, "loss": 0.5179, "step": 23998 }, { "epoch": 0.7372285196448868, "grad_norm": 0.32430121302604675, "learning_rate": 1.403976916656222e-05, "loss": 0.4936, "step": 23999 }, { "epoch": 0.737259238779836, "grad_norm": 0.35328444838523865, "learning_rate": 1.403932707463249e-05, "loss": 0.5415, "step": 24000 }, { "epoch": 0.7372899579147851, "grad_norm": 0.35685500502586365, "learning_rate": 1.4038884973268629e-05, "loss": 0.5547, "step": 24001 }, { "epoch": 0.7373206770497343, "grad_norm": 0.3723700940608978, "learning_rate": 1.403844286247167e-05, "loss": 0.6508, "step": 24002 }, { "epoch": 0.7373513961846835, "grad_norm": 0.37581154704093933, "learning_rate": 1.4038000742242647e-05, "loss": 0.5528, "step": 24003 }, { "epoch": 0.7373821153196326, "grad_norm": 0.45270076394081116, "learning_rate": 1.4037558612582592e-05, "loss": 0.4869, "step": 24004 }, { "epoch": 0.7374128344545817, "grad_norm": 0.34453266859054565, "learning_rate": 1.4037116473492536e-05, "loss": 0.6106, "step": 24005 }, { "epoch": 0.737443553589531, "grad_norm": 0.37359076738357544, "learning_rate": 1.4036674324973512e-05, "loss": 0.5018, "step": 24006 }, { "epoch": 0.7374742727244801, "grad_norm": 0.3359360992908478, "learning_rate": 1.4036232167026553e-05, "loss": 0.5359, "step": 24007 }, { "epoch": 0.7375049918594292, "grad_norm": 0.34190985560417175, "learning_rate": 1.403578999965269e-05, "loss": 0.5881, "step": 24008 }, { "epoch": 0.7375357109943784, "grad_norm": 0.362771213054657, "learning_rate": 1.4035347822852963e-05, "loss": 0.4929, "step": 24009 }, { "epoch": 0.7375664301293275, "grad_norm": 0.360774964094162, "learning_rate": 1.4034905636628395e-05, "loss": 0.5571, "step": 24010 }, { "epoch": 0.7375971492642767, "grad_norm": 0.3520717918872833, "learning_rate": 1.4034463440980024e-05, "loss": 0.4846, "step": 24011 }, { "epoch": 0.7376278683992259, "grad_norm": 0.4446204900741577, "learning_rate": 1.4034021235908885e-05, "loss": 0.5188, "step": 24012 }, { "epoch": 0.737658587534175, "grad_norm": 0.4092522859573364, "learning_rate": 1.4033579021416003e-05, "loss": 0.5168, "step": 24013 }, { "epoch": 0.7376893066691242, "grad_norm": 0.3499012589454651, "learning_rate": 1.4033136797502418e-05, "loss": 0.5616, "step": 24014 }, { "epoch": 0.7377200258040734, "grad_norm": 0.4429846405982971, "learning_rate": 1.4032694564169158e-05, "loss": 0.5637, "step": 24015 }, { "epoch": 0.7377507449390225, "grad_norm": 0.39011919498443604, "learning_rate": 1.4032252321417263e-05, "loss": 0.5589, "step": 24016 }, { "epoch": 0.7377814640739717, "grad_norm": 0.4570232033729553, "learning_rate": 1.4031810069247755e-05, "loss": 0.5826, "step": 24017 }, { "epoch": 0.7378121832089208, "grad_norm": 0.3443679213523865, "learning_rate": 1.4031367807661676e-05, "loss": 0.5308, "step": 24018 }, { "epoch": 0.73784290234387, "grad_norm": 0.3689989447593689, "learning_rate": 1.4030925536660056e-05, "loss": 0.5158, "step": 24019 }, { "epoch": 0.7378736214788192, "grad_norm": 0.4074837863445282, "learning_rate": 1.4030483256243927e-05, "loss": 0.599, "step": 24020 }, { "epoch": 0.7379043406137683, "grad_norm": 0.4306962788105011, "learning_rate": 1.4030040966414324e-05, "loss": 0.5188, "step": 24021 }, { "epoch": 0.7379350597487174, "grad_norm": 0.34936586022377014, "learning_rate": 1.4029598667172279e-05, "loss": 0.4699, "step": 24022 }, { "epoch": 0.7379657788836667, "grad_norm": 0.3359787166118622, "learning_rate": 1.4029156358518825e-05, "loss": 0.4988, "step": 24023 }, { "epoch": 0.7379964980186158, "grad_norm": 1.208280086517334, "learning_rate": 1.4028714040454993e-05, "loss": 0.5794, "step": 24024 }, { "epoch": 0.738027217153565, "grad_norm": 0.3483094573020935, "learning_rate": 1.4028271712981823e-05, "loss": 0.4516, "step": 24025 }, { "epoch": 0.7380579362885141, "grad_norm": 0.3598763942718506, "learning_rate": 1.4027829376100337e-05, "loss": 0.6699, "step": 24026 }, { "epoch": 0.7380886554234632, "grad_norm": 0.3392447531223297, "learning_rate": 1.4027387029811577e-05, "loss": 0.5702, "step": 24027 }, { "epoch": 0.7381193745584125, "grad_norm": 0.3769208788871765, "learning_rate": 1.4026944674116572e-05, "loss": 0.598, "step": 24028 }, { "epoch": 0.7381500936933616, "grad_norm": 0.45454928278923035, "learning_rate": 1.4026502309016358e-05, "loss": 0.5883, "step": 24029 }, { "epoch": 0.7381808128283107, "grad_norm": 0.3662364184856415, "learning_rate": 1.4026059934511968e-05, "loss": 0.5438, "step": 24030 }, { "epoch": 0.73821153196326, "grad_norm": 0.42357373237609863, "learning_rate": 1.4025617550604431e-05, "loss": 0.5645, "step": 24031 }, { "epoch": 0.7382422510982091, "grad_norm": 0.3918980360031128, "learning_rate": 1.4025175157294783e-05, "loss": 0.5833, "step": 24032 }, { "epoch": 0.7382729702331582, "grad_norm": 0.36832988262176514, "learning_rate": 1.402473275458406e-05, "loss": 0.5309, "step": 24033 }, { "epoch": 0.7383036893681074, "grad_norm": 0.41875535249710083, "learning_rate": 1.4024290342473293e-05, "loss": 0.5509, "step": 24034 }, { "epoch": 0.7383344085030565, "grad_norm": 0.35417771339416504, "learning_rate": 1.4023847920963509e-05, "loss": 0.5626, "step": 24035 }, { "epoch": 0.7383651276380057, "grad_norm": 0.3810676336288452, "learning_rate": 1.402340549005575e-05, "loss": 0.6445, "step": 24036 }, { "epoch": 0.7383958467729549, "grad_norm": 0.3971172869205475, "learning_rate": 1.4022963049751051e-05, "loss": 0.6079, "step": 24037 }, { "epoch": 0.738426565907904, "grad_norm": 0.35918736457824707, "learning_rate": 1.4022520600050435e-05, "loss": 0.5339, "step": 24038 }, { "epoch": 0.7384572850428532, "grad_norm": 0.3702136278152466, "learning_rate": 1.4022078140954945e-05, "loss": 0.6225, "step": 24039 }, { "epoch": 0.7384880041778024, "grad_norm": 0.3666830062866211, "learning_rate": 1.4021635672465608e-05, "loss": 0.5318, "step": 24040 }, { "epoch": 0.7385187233127515, "grad_norm": 0.4105644226074219, "learning_rate": 1.402119319458346e-05, "loss": 0.5735, "step": 24041 }, { "epoch": 0.7385494424477007, "grad_norm": 0.3340572714805603, "learning_rate": 1.4020750707309536e-05, "loss": 0.5327, "step": 24042 }, { "epoch": 0.7385801615826498, "grad_norm": 0.3777811527252197, "learning_rate": 1.4020308210644868e-05, "loss": 0.6017, "step": 24043 }, { "epoch": 0.738610880717599, "grad_norm": 0.43184736371040344, "learning_rate": 1.4019865704590488e-05, "loss": 0.5725, "step": 24044 }, { "epoch": 0.7386415998525482, "grad_norm": 0.4380151033401489, "learning_rate": 1.4019423189147432e-05, "loss": 0.5524, "step": 24045 }, { "epoch": 0.7386723189874973, "grad_norm": 0.3569099009037018, "learning_rate": 1.4018980664316732e-05, "loss": 0.529, "step": 24046 }, { "epoch": 0.7387030381224464, "grad_norm": 0.36972251534461975, "learning_rate": 1.4018538130099421e-05, "loss": 0.6117, "step": 24047 }, { "epoch": 0.7387337572573957, "grad_norm": 0.349970281124115, "learning_rate": 1.4018095586496538e-05, "loss": 0.5758, "step": 24048 }, { "epoch": 0.7387644763923448, "grad_norm": 0.37260302901268005, "learning_rate": 1.4017653033509107e-05, "loss": 0.5355, "step": 24049 }, { "epoch": 0.738795195527294, "grad_norm": 0.37481576204299927, "learning_rate": 1.401721047113817e-05, "loss": 0.5195, "step": 24050 }, { "epoch": 0.7388259146622431, "grad_norm": 0.33485570549964905, "learning_rate": 1.4016767899384753e-05, "loss": 0.4633, "step": 24051 }, { "epoch": 0.7388566337971922, "grad_norm": 0.368348628282547, "learning_rate": 1.4016325318249899e-05, "loss": 0.4829, "step": 24052 }, { "epoch": 0.7388873529321415, "grad_norm": 0.3314364552497864, "learning_rate": 1.4015882727734633e-05, "loss": 0.5465, "step": 24053 }, { "epoch": 0.7389180720670906, "grad_norm": 0.33065229654312134, "learning_rate": 1.4015440127839995e-05, "loss": 0.5199, "step": 24054 }, { "epoch": 0.7389487912020397, "grad_norm": 0.3633914589881897, "learning_rate": 1.4014997518567015e-05, "loss": 0.651, "step": 24055 }, { "epoch": 0.7389795103369889, "grad_norm": 0.37289178371429443, "learning_rate": 1.4014554899916728e-05, "loss": 0.5667, "step": 24056 }, { "epoch": 0.7390102294719381, "grad_norm": 0.36510995030403137, "learning_rate": 1.401411227189017e-05, "loss": 0.5635, "step": 24057 }, { "epoch": 0.7390409486068872, "grad_norm": 0.4184882640838623, "learning_rate": 1.4013669634488367e-05, "loss": 0.5594, "step": 24058 }, { "epoch": 0.7390716677418364, "grad_norm": 0.3926422595977783, "learning_rate": 1.4013226987712363e-05, "loss": 0.5119, "step": 24059 }, { "epoch": 0.7391023868767855, "grad_norm": 0.3785075545310974, "learning_rate": 1.4012784331563185e-05, "loss": 0.5328, "step": 24060 }, { "epoch": 0.7391331060117348, "grad_norm": 0.3395192325115204, "learning_rate": 1.4012341666041871e-05, "loss": 0.5307, "step": 24061 }, { "epoch": 0.7391638251466839, "grad_norm": 0.44648870825767517, "learning_rate": 1.401189899114945e-05, "loss": 0.6016, "step": 24062 }, { "epoch": 0.739194544281633, "grad_norm": 0.37489473819732666, "learning_rate": 1.4011456306886961e-05, "loss": 0.6054, "step": 24063 }, { "epoch": 0.7392252634165822, "grad_norm": 0.33771848678588867, "learning_rate": 1.4011013613255436e-05, "loss": 0.5683, "step": 24064 }, { "epoch": 0.7392559825515314, "grad_norm": 0.3283257484436035, "learning_rate": 1.4010570910255905e-05, "loss": 0.542, "step": 24065 }, { "epoch": 0.7392867016864805, "grad_norm": 0.334966242313385, "learning_rate": 1.401012819788941e-05, "loss": 0.6069, "step": 24066 }, { "epoch": 0.7393174208214297, "grad_norm": 0.35373446345329285, "learning_rate": 1.4009685476156977e-05, "loss": 0.5764, "step": 24067 }, { "epoch": 0.7393481399563788, "grad_norm": 0.5883303284645081, "learning_rate": 1.4009242745059646e-05, "loss": 0.6232, "step": 24068 }, { "epoch": 0.739378859091328, "grad_norm": 0.349556565284729, "learning_rate": 1.4008800004598446e-05, "loss": 0.5882, "step": 24069 }, { "epoch": 0.7394095782262772, "grad_norm": 0.33294105529785156, "learning_rate": 1.4008357254774416e-05, "loss": 0.4963, "step": 24070 }, { "epoch": 0.7394402973612263, "grad_norm": 0.3759443461894989, "learning_rate": 1.4007914495588586e-05, "loss": 0.5511, "step": 24071 }, { "epoch": 0.7394710164961754, "grad_norm": 0.36933282017707825, "learning_rate": 1.4007471727041994e-05, "loss": 0.5878, "step": 24072 }, { "epoch": 0.7395017356311246, "grad_norm": 0.48361724615097046, "learning_rate": 1.400702894913567e-05, "loss": 0.6043, "step": 24073 }, { "epoch": 0.7395324547660738, "grad_norm": 0.38287582993507385, "learning_rate": 1.400658616187065e-05, "loss": 0.6802, "step": 24074 }, { "epoch": 0.739563173901023, "grad_norm": 0.3953946530818939, "learning_rate": 1.400614336524797e-05, "loss": 0.5728, "step": 24075 }, { "epoch": 0.7395938930359721, "grad_norm": 0.3434072434902191, "learning_rate": 1.400570055926866e-05, "loss": 0.4521, "step": 24076 }, { "epoch": 0.7396246121709212, "grad_norm": 0.3852611482143402, "learning_rate": 1.4005257743933757e-05, "loss": 0.5015, "step": 24077 }, { "epoch": 0.7396553313058705, "grad_norm": 0.33754751086235046, "learning_rate": 1.4004814919244297e-05, "loss": 0.5, "step": 24078 }, { "epoch": 0.7396860504408196, "grad_norm": 0.384284108877182, "learning_rate": 1.400437208520131e-05, "loss": 0.5611, "step": 24079 }, { "epoch": 0.7397167695757687, "grad_norm": 0.34580808877944946, "learning_rate": 1.4003929241805834e-05, "loss": 0.5991, "step": 24080 }, { "epoch": 0.7397474887107179, "grad_norm": 0.3477906286716461, "learning_rate": 1.40034863890589e-05, "loss": 0.4796, "step": 24081 }, { "epoch": 0.7397782078456671, "grad_norm": 0.38307985663414, "learning_rate": 1.4003043526961547e-05, "loss": 0.4827, "step": 24082 }, { "epoch": 0.7398089269806162, "grad_norm": 0.3861304819583893, "learning_rate": 1.4002600655514802e-05, "loss": 0.6125, "step": 24083 }, { "epoch": 0.7398396461155654, "grad_norm": 0.38379088044166565, "learning_rate": 1.4002157774719707e-05, "loss": 0.5724, "step": 24084 }, { "epoch": 0.7398703652505145, "grad_norm": 0.3431858420372009, "learning_rate": 1.4001714884577288e-05, "loss": 0.4991, "step": 24085 }, { "epoch": 0.7399010843854638, "grad_norm": 0.7129083871841431, "learning_rate": 1.4001271985088589e-05, "loss": 0.6142, "step": 24086 }, { "epoch": 0.7399318035204129, "grad_norm": 0.40613263845443726, "learning_rate": 1.4000829076254638e-05, "loss": 0.6413, "step": 24087 }, { "epoch": 0.739962522655362, "grad_norm": 0.4554364085197449, "learning_rate": 1.4000386158076472e-05, "loss": 0.6276, "step": 24088 }, { "epoch": 0.7399932417903112, "grad_norm": 0.3417763411998749, "learning_rate": 1.3999943230555127e-05, "loss": 0.5591, "step": 24089 }, { "epoch": 0.7400239609252603, "grad_norm": 0.378839373588562, "learning_rate": 1.3999500293691633e-05, "loss": 0.5175, "step": 24090 }, { "epoch": 0.7400546800602095, "grad_norm": 0.3299786448478699, "learning_rate": 1.3999057347487024e-05, "loss": 0.5187, "step": 24091 }, { "epoch": 0.7400853991951587, "grad_norm": 0.35547545552253723, "learning_rate": 1.399861439194234e-05, "loss": 0.5167, "step": 24092 }, { "epoch": 0.7401161183301078, "grad_norm": 0.3292669355869293, "learning_rate": 1.3998171427058613e-05, "loss": 0.5668, "step": 24093 }, { "epoch": 0.7401468374650569, "grad_norm": 0.37461093068122864, "learning_rate": 1.3997728452836876e-05, "loss": 0.4791, "step": 24094 }, { "epoch": 0.7401775566000062, "grad_norm": 0.39083757996559143, "learning_rate": 1.3997285469278169e-05, "loss": 0.5454, "step": 24095 }, { "epoch": 0.7402082757349553, "grad_norm": 0.3868541419506073, "learning_rate": 1.399684247638352e-05, "loss": 0.544, "step": 24096 }, { "epoch": 0.7402389948699044, "grad_norm": 0.33153676986694336, "learning_rate": 1.3996399474153966e-05, "loss": 0.5815, "step": 24097 }, { "epoch": 0.7402697140048536, "grad_norm": 0.3644466698169708, "learning_rate": 1.3995956462590538e-05, "loss": 0.6358, "step": 24098 }, { "epoch": 0.7403004331398028, "grad_norm": 0.3573976755142212, "learning_rate": 1.3995513441694279e-05, "loss": 0.502, "step": 24099 }, { "epoch": 0.740331152274752, "grad_norm": 0.36074206233024597, "learning_rate": 1.3995070411466221e-05, "loss": 0.5414, "step": 24100 }, { "epoch": 0.7403618714097011, "grad_norm": 0.3397543132305145, "learning_rate": 1.3994627371907395e-05, "loss": 0.5651, "step": 24101 }, { "epoch": 0.7403925905446502, "grad_norm": 0.33978626132011414, "learning_rate": 1.3994184323018839e-05, "loss": 0.5611, "step": 24102 }, { "epoch": 0.7404233096795995, "grad_norm": 0.35022249817848206, "learning_rate": 1.3993741264801583e-05, "loss": 0.5617, "step": 24103 }, { "epoch": 0.7404540288145486, "grad_norm": 0.44543251395225525, "learning_rate": 1.3993298197256668e-05, "loss": 0.6228, "step": 24104 }, { "epoch": 0.7404847479494977, "grad_norm": 0.335196852684021, "learning_rate": 1.3992855120385125e-05, "loss": 0.5024, "step": 24105 }, { "epoch": 0.7405154670844469, "grad_norm": 0.3999618589878082, "learning_rate": 1.3992412034187992e-05, "loss": 0.6588, "step": 24106 }, { "epoch": 0.740546186219396, "grad_norm": 0.4161812961101532, "learning_rate": 1.39919689386663e-05, "loss": 0.5347, "step": 24107 }, { "epoch": 0.7405769053543452, "grad_norm": 0.36717838048934937, "learning_rate": 1.399152583382109e-05, "loss": 0.6098, "step": 24108 }, { "epoch": 0.7406076244892944, "grad_norm": 0.36016014218330383, "learning_rate": 1.3991082719653386e-05, "loss": 0.5633, "step": 24109 }, { "epoch": 0.7406383436242435, "grad_norm": 0.3586862087249756, "learning_rate": 1.3990639596164232e-05, "loss": 0.5873, "step": 24110 }, { "epoch": 0.7406690627591928, "grad_norm": 0.3714008927345276, "learning_rate": 1.3990196463354662e-05, "loss": 0.5933, "step": 24111 }, { "epoch": 0.7406997818941419, "grad_norm": 0.5032541751861572, "learning_rate": 1.3989753321225708e-05, "loss": 0.5932, "step": 24112 }, { "epoch": 0.740730501029091, "grad_norm": 0.33220216631889343, "learning_rate": 1.3989310169778407e-05, "loss": 0.5306, "step": 24113 }, { "epoch": 0.7407612201640402, "grad_norm": 0.6696385145187378, "learning_rate": 1.3988867009013793e-05, "loss": 0.6023, "step": 24114 }, { "epoch": 0.7407919392989893, "grad_norm": 0.34617888927459717, "learning_rate": 1.3988423838932903e-05, "loss": 0.6015, "step": 24115 }, { "epoch": 0.7408226584339385, "grad_norm": 0.40457382798194885, "learning_rate": 1.398798065953677e-05, "loss": 0.6164, "step": 24116 }, { "epoch": 0.7408533775688877, "grad_norm": 0.3597176671028137, "learning_rate": 1.3987537470826429e-05, "loss": 0.4988, "step": 24117 }, { "epoch": 0.7408840967038368, "grad_norm": 0.3635714054107666, "learning_rate": 1.3987094272802918e-05, "loss": 0.5936, "step": 24118 }, { "epoch": 0.7409148158387859, "grad_norm": 0.3780451714992523, "learning_rate": 1.3986651065467267e-05, "loss": 0.5899, "step": 24119 }, { "epoch": 0.7409455349737352, "grad_norm": 0.34673336148262024, "learning_rate": 1.3986207848820516e-05, "loss": 0.5033, "step": 24120 }, { "epoch": 0.7409762541086843, "grad_norm": 0.364650696516037, "learning_rate": 1.3985764622863696e-05, "loss": 0.569, "step": 24121 }, { "epoch": 0.7410069732436334, "grad_norm": 0.38662976026535034, "learning_rate": 1.3985321387597846e-05, "loss": 0.54, "step": 24122 }, { "epoch": 0.7410376923785826, "grad_norm": 0.3676775097846985, "learning_rate": 1.3984878143023999e-05, "loss": 0.5261, "step": 24123 }, { "epoch": 0.7410684115135318, "grad_norm": 0.33855563402175903, "learning_rate": 1.3984434889143193e-05, "loss": 0.5239, "step": 24124 }, { "epoch": 0.741099130648481, "grad_norm": 0.43141382932662964, "learning_rate": 1.3983991625956456e-05, "loss": 0.493, "step": 24125 }, { "epoch": 0.7411298497834301, "grad_norm": 0.36873507499694824, "learning_rate": 1.3983548353464833e-05, "loss": 0.5468, "step": 24126 }, { "epoch": 0.7411605689183792, "grad_norm": 0.3368791341781616, "learning_rate": 1.3983105071669352e-05, "loss": 0.5477, "step": 24127 }, { "epoch": 0.7411912880533285, "grad_norm": 0.3805893659591675, "learning_rate": 1.398266178057105e-05, "loss": 0.59, "step": 24128 }, { "epoch": 0.7412220071882776, "grad_norm": 0.3429812490940094, "learning_rate": 1.3982218480170966e-05, "loss": 0.5602, "step": 24129 }, { "epoch": 0.7412527263232267, "grad_norm": 0.37428224086761475, "learning_rate": 1.3981775170470131e-05, "loss": 0.5554, "step": 24130 }, { "epoch": 0.7412834454581759, "grad_norm": 0.3665332496166229, "learning_rate": 1.3981331851469583e-05, "loss": 0.6112, "step": 24131 }, { "epoch": 0.741314164593125, "grad_norm": 0.3315355181694031, "learning_rate": 1.3980888523170355e-05, "loss": 0.4565, "step": 24132 }, { "epoch": 0.7413448837280742, "grad_norm": 0.34935128688812256, "learning_rate": 1.3980445185573488e-05, "loss": 0.5004, "step": 24133 }, { "epoch": 0.7413756028630234, "grad_norm": 0.45483022928237915, "learning_rate": 1.398000183868001e-05, "loss": 0.5943, "step": 24134 }, { "epoch": 0.7414063219979725, "grad_norm": 0.43498826026916504, "learning_rate": 1.397955848249096e-05, "loss": 0.5712, "step": 24135 }, { "epoch": 0.7414370411329217, "grad_norm": 0.3472997844219208, "learning_rate": 1.3979115117007374e-05, "loss": 0.5205, "step": 24136 }, { "epoch": 0.7414677602678709, "grad_norm": 0.36821937561035156, "learning_rate": 1.3978671742230284e-05, "loss": 0.5408, "step": 24137 }, { "epoch": 0.74149847940282, "grad_norm": 0.3703446686267853, "learning_rate": 1.397822835816073e-05, "loss": 0.5392, "step": 24138 }, { "epoch": 0.7415291985377692, "grad_norm": 0.3232649862766266, "learning_rate": 1.3977784964799748e-05, "loss": 0.5277, "step": 24139 }, { "epoch": 0.7415599176727183, "grad_norm": 0.3419930934906006, "learning_rate": 1.397734156214837e-05, "loss": 0.5394, "step": 24140 }, { "epoch": 0.7415906368076675, "grad_norm": 0.3381323218345642, "learning_rate": 1.3976898150207632e-05, "loss": 0.5147, "step": 24141 }, { "epoch": 0.7416213559426167, "grad_norm": 0.8751561641693115, "learning_rate": 1.397645472897857e-05, "loss": 0.539, "step": 24142 }, { "epoch": 0.7416520750775658, "grad_norm": 0.38520804047584534, "learning_rate": 1.3976011298462221e-05, "loss": 0.5649, "step": 24143 }, { "epoch": 0.7416827942125149, "grad_norm": 0.37663424015045166, "learning_rate": 1.397556785865962e-05, "loss": 0.5401, "step": 24144 }, { "epoch": 0.7417135133474642, "grad_norm": 0.3466649353504181, "learning_rate": 1.3975124409571806e-05, "loss": 0.5567, "step": 24145 }, { "epoch": 0.7417442324824133, "grad_norm": 0.3477979302406311, "learning_rate": 1.3974680951199804e-05, "loss": 0.5731, "step": 24146 }, { "epoch": 0.7417749516173625, "grad_norm": 0.41759437322616577, "learning_rate": 1.3974237483544664e-05, "loss": 0.5803, "step": 24147 }, { "epoch": 0.7418056707523116, "grad_norm": 0.49904847145080566, "learning_rate": 1.3973794006607411e-05, "loss": 0.6013, "step": 24148 }, { "epoch": 0.7418363898872607, "grad_norm": 0.3744172155857086, "learning_rate": 1.3973350520389085e-05, "loss": 0.5419, "step": 24149 }, { "epoch": 0.74186710902221, "grad_norm": 0.352936714887619, "learning_rate": 1.3972907024890721e-05, "loss": 0.5541, "step": 24150 }, { "epoch": 0.7418978281571591, "grad_norm": 0.32561057806015015, "learning_rate": 1.3972463520113357e-05, "loss": 0.5031, "step": 24151 }, { "epoch": 0.7419285472921082, "grad_norm": 0.34309548139572144, "learning_rate": 1.3972020006058026e-05, "loss": 0.5613, "step": 24152 }, { "epoch": 0.7419592664270575, "grad_norm": 0.33721300959587097, "learning_rate": 1.3971576482725767e-05, "loss": 0.5338, "step": 24153 }, { "epoch": 0.7419899855620066, "grad_norm": 0.38324347138404846, "learning_rate": 1.3971132950117612e-05, "loss": 0.4771, "step": 24154 }, { "epoch": 0.7420207046969557, "grad_norm": 0.3637491464614868, "learning_rate": 1.3970689408234596e-05, "loss": 0.5308, "step": 24155 }, { "epoch": 0.7420514238319049, "grad_norm": 0.35825636982917786, "learning_rate": 1.3970245857077762e-05, "loss": 0.5437, "step": 24156 }, { "epoch": 0.742082142966854, "grad_norm": 0.3569939434528351, "learning_rate": 1.3969802296648138e-05, "loss": 0.5125, "step": 24157 }, { "epoch": 0.7421128621018032, "grad_norm": 0.39946073293685913, "learning_rate": 1.3969358726946766e-05, "loss": 0.4932, "step": 24158 }, { "epoch": 0.7421435812367524, "grad_norm": 0.37380123138427734, "learning_rate": 1.396891514797468e-05, "loss": 0.5723, "step": 24159 }, { "epoch": 0.7421743003717015, "grad_norm": 0.38735419511795044, "learning_rate": 1.3968471559732915e-05, "loss": 0.582, "step": 24160 }, { "epoch": 0.7422050195066507, "grad_norm": 0.4017593562602997, "learning_rate": 1.3968027962222504e-05, "loss": 0.555, "step": 24161 }, { "epoch": 0.7422357386415999, "grad_norm": 0.35706010460853577, "learning_rate": 1.396758435544449e-05, "loss": 0.4782, "step": 24162 }, { "epoch": 0.742266457776549, "grad_norm": 0.3563995659351349, "learning_rate": 1.3967140739399905e-05, "loss": 0.5514, "step": 24163 }, { "epoch": 0.7422971769114982, "grad_norm": 0.3625752329826355, "learning_rate": 1.3966697114089783e-05, "loss": 0.593, "step": 24164 }, { "epoch": 0.7423278960464473, "grad_norm": 0.37264594435691833, "learning_rate": 1.3966253479515169e-05, "loss": 0.5212, "step": 24165 }, { "epoch": 0.7423586151813965, "grad_norm": 0.45435431599617004, "learning_rate": 1.3965809835677087e-05, "loss": 0.5697, "step": 24166 }, { "epoch": 0.7423893343163457, "grad_norm": 0.36366111040115356, "learning_rate": 1.3965366182576583e-05, "loss": 0.5561, "step": 24167 }, { "epoch": 0.7424200534512948, "grad_norm": 0.3568592965602875, "learning_rate": 1.3964922520214688e-05, "loss": 0.6135, "step": 24168 }, { "epoch": 0.7424507725862439, "grad_norm": 0.3697623908519745, "learning_rate": 1.3964478848592438e-05, "loss": 0.4722, "step": 24169 }, { "epoch": 0.7424814917211932, "grad_norm": 0.37901216745376587, "learning_rate": 1.3964035167710875e-05, "loss": 0.5737, "step": 24170 }, { "epoch": 0.7425122108561423, "grad_norm": 0.431497722864151, "learning_rate": 1.3963591477571026e-05, "loss": 0.6095, "step": 24171 }, { "epoch": 0.7425429299910915, "grad_norm": 0.343195378780365, "learning_rate": 1.3963147778173936e-05, "loss": 0.5457, "step": 24172 }, { "epoch": 0.7425736491260406, "grad_norm": 0.36160191893577576, "learning_rate": 1.3962704069520634e-05, "loss": 0.58, "step": 24173 }, { "epoch": 0.7426043682609897, "grad_norm": 0.36049699783325195, "learning_rate": 1.3962260351612163e-05, "loss": 0.5628, "step": 24174 }, { "epoch": 0.742635087395939, "grad_norm": 0.3350850045681, "learning_rate": 1.3961816624449553e-05, "loss": 0.5289, "step": 24175 }, { "epoch": 0.7426658065308881, "grad_norm": 0.37808895111083984, "learning_rate": 1.3961372888033848e-05, "loss": 0.4449, "step": 24176 }, { "epoch": 0.7426965256658372, "grad_norm": 0.35625264048576355, "learning_rate": 1.3960929142366077e-05, "loss": 0.5552, "step": 24177 }, { "epoch": 0.7427272448007864, "grad_norm": 0.3743714392185211, "learning_rate": 1.396048538744728e-05, "loss": 0.5136, "step": 24178 }, { "epoch": 0.7427579639357356, "grad_norm": 0.3520742952823639, "learning_rate": 1.3960041623278495e-05, "loss": 0.5248, "step": 24179 }, { "epoch": 0.7427886830706847, "grad_norm": 0.3508898913860321, "learning_rate": 1.3959597849860753e-05, "loss": 0.5446, "step": 24180 }, { "epoch": 0.7428194022056339, "grad_norm": 0.3457241952419281, "learning_rate": 1.3959154067195096e-05, "loss": 0.6045, "step": 24181 }, { "epoch": 0.742850121340583, "grad_norm": 0.4005032777786255, "learning_rate": 1.3958710275282555e-05, "loss": 0.5824, "step": 24182 }, { "epoch": 0.7428808404755322, "grad_norm": 0.3411861062049866, "learning_rate": 1.3958266474124172e-05, "loss": 0.5473, "step": 24183 }, { "epoch": 0.7429115596104814, "grad_norm": 0.35423213243484497, "learning_rate": 1.395782266372098e-05, "loss": 0.5712, "step": 24184 }, { "epoch": 0.7429422787454305, "grad_norm": 0.37443649768829346, "learning_rate": 1.3957378844074018e-05, "loss": 0.593, "step": 24185 }, { "epoch": 0.7429729978803797, "grad_norm": 0.35715997219085693, "learning_rate": 1.3956935015184323e-05, "loss": 0.583, "step": 24186 }, { "epoch": 0.7430037170153289, "grad_norm": 0.3274696469306946, "learning_rate": 1.3956491177052927e-05, "loss": 0.5114, "step": 24187 }, { "epoch": 0.743034436150278, "grad_norm": 0.3526763319969177, "learning_rate": 1.3956047329680871e-05, "loss": 0.5456, "step": 24188 }, { "epoch": 0.7430651552852272, "grad_norm": 0.335777223110199, "learning_rate": 1.3955603473069187e-05, "loss": 0.5481, "step": 24189 }, { "epoch": 0.7430958744201763, "grad_norm": 0.4098687469959259, "learning_rate": 1.395515960721892e-05, "loss": 0.6215, "step": 24190 }, { "epoch": 0.7431265935551254, "grad_norm": 0.3662053644657135, "learning_rate": 1.3954715732131096e-05, "loss": 0.5338, "step": 24191 }, { "epoch": 0.7431573126900747, "grad_norm": 0.33739206194877625, "learning_rate": 1.3954271847806761e-05, "loss": 0.5522, "step": 24192 }, { "epoch": 0.7431880318250238, "grad_norm": 0.3790123760700226, "learning_rate": 1.3953827954246948e-05, "loss": 0.5653, "step": 24193 }, { "epoch": 0.7432187509599729, "grad_norm": 0.36215999722480774, "learning_rate": 1.3953384051452693e-05, "loss": 0.5382, "step": 24194 }, { "epoch": 0.7432494700949221, "grad_norm": 0.34811559319496155, "learning_rate": 1.3952940139425033e-05, "loss": 0.48, "step": 24195 }, { "epoch": 0.7432801892298713, "grad_norm": 0.35505208373069763, "learning_rate": 1.3952496218165006e-05, "loss": 0.5498, "step": 24196 }, { "epoch": 0.7433109083648205, "grad_norm": 0.36433714628219604, "learning_rate": 1.395205228767365e-05, "loss": 0.5031, "step": 24197 }, { "epoch": 0.7433416274997696, "grad_norm": 0.40672969818115234, "learning_rate": 1.3951608347951994e-05, "loss": 0.5347, "step": 24198 }, { "epoch": 0.7433723466347187, "grad_norm": 0.37118929624557495, "learning_rate": 1.3951164399001086e-05, "loss": 0.5213, "step": 24199 }, { "epoch": 0.743403065769668, "grad_norm": 0.3587082028388977, "learning_rate": 1.3950720440821953e-05, "loss": 0.5929, "step": 24200 }, { "epoch": 0.7434337849046171, "grad_norm": 0.40817737579345703, "learning_rate": 1.395027647341564e-05, "loss": 0.6345, "step": 24201 }, { "epoch": 0.7434645040395662, "grad_norm": 0.3997225761413574, "learning_rate": 1.3949832496783178e-05, "loss": 0.6826, "step": 24202 }, { "epoch": 0.7434952231745154, "grad_norm": 0.3848213255405426, "learning_rate": 1.394938851092561e-05, "loss": 0.6197, "step": 24203 }, { "epoch": 0.7435259423094646, "grad_norm": 0.3878210186958313, "learning_rate": 1.3948944515843968e-05, "loss": 0.5571, "step": 24204 }, { "epoch": 0.7435566614444137, "grad_norm": 0.3788864314556122, "learning_rate": 1.3948500511539287e-05, "loss": 0.5704, "step": 24205 }, { "epoch": 0.7435873805793629, "grad_norm": 0.3421024978160858, "learning_rate": 1.3948056498012612e-05, "loss": 0.4863, "step": 24206 }, { "epoch": 0.743618099714312, "grad_norm": 0.3601420819759369, "learning_rate": 1.394761247526497e-05, "loss": 0.5257, "step": 24207 }, { "epoch": 0.7436488188492612, "grad_norm": 0.38115647435188293, "learning_rate": 1.394716844329741e-05, "loss": 0.4525, "step": 24208 }, { "epoch": 0.7436795379842104, "grad_norm": 0.3315134346485138, "learning_rate": 1.3946724402110959e-05, "loss": 0.5085, "step": 24209 }, { "epoch": 0.7437102571191595, "grad_norm": 0.464437872171402, "learning_rate": 1.3946280351706658e-05, "loss": 0.5286, "step": 24210 }, { "epoch": 0.7437409762541087, "grad_norm": 0.3634023070335388, "learning_rate": 1.3945836292085542e-05, "loss": 0.5436, "step": 24211 }, { "epoch": 0.7437716953890579, "grad_norm": 0.3658812940120697, "learning_rate": 1.394539222324865e-05, "loss": 0.5387, "step": 24212 }, { "epoch": 0.743802414524007, "grad_norm": 0.381190687417984, "learning_rate": 1.394494814519702e-05, "loss": 0.5789, "step": 24213 }, { "epoch": 0.7438331336589562, "grad_norm": 0.3398866355419159, "learning_rate": 1.394450405793169e-05, "loss": 0.5593, "step": 24214 }, { "epoch": 0.7438638527939053, "grad_norm": 0.33352094888687134, "learning_rate": 1.3944059961453694e-05, "loss": 0.6154, "step": 24215 }, { "epoch": 0.7438945719288544, "grad_norm": 0.4397144615650177, "learning_rate": 1.3943615855764067e-05, "loss": 0.6067, "step": 24216 }, { "epoch": 0.7439252910638037, "grad_norm": 0.33332502841949463, "learning_rate": 1.3943171740863855e-05, "loss": 0.4809, "step": 24217 }, { "epoch": 0.7439560101987528, "grad_norm": 0.34704092144966125, "learning_rate": 1.3942727616754086e-05, "loss": 0.4933, "step": 24218 }, { "epoch": 0.7439867293337019, "grad_norm": 0.41253045201301575, "learning_rate": 1.3942283483435802e-05, "loss": 0.5945, "step": 24219 }, { "epoch": 0.7440174484686511, "grad_norm": 0.37822744250297546, "learning_rate": 1.3941839340910042e-05, "loss": 0.5283, "step": 24220 }, { "epoch": 0.7440481676036003, "grad_norm": 0.3508889079093933, "learning_rate": 1.3941395189177837e-05, "loss": 0.5787, "step": 24221 }, { "epoch": 0.7440788867385495, "grad_norm": 0.3932173550128937, "learning_rate": 1.3940951028240233e-05, "loss": 0.4605, "step": 24222 }, { "epoch": 0.7441096058734986, "grad_norm": 0.35058051347732544, "learning_rate": 1.3940506858098257e-05, "loss": 0.5043, "step": 24223 }, { "epoch": 0.7441403250084477, "grad_norm": 0.36043480038642883, "learning_rate": 1.3940062678752957e-05, "loss": 0.518, "step": 24224 }, { "epoch": 0.744171044143397, "grad_norm": 0.3765770196914673, "learning_rate": 1.3939618490205362e-05, "loss": 0.5418, "step": 24225 }, { "epoch": 0.7442017632783461, "grad_norm": 0.3744790256023407, "learning_rate": 1.3939174292456517e-05, "loss": 0.4772, "step": 24226 }, { "epoch": 0.7442324824132952, "grad_norm": 0.40797320008277893, "learning_rate": 1.3938730085507452e-05, "loss": 0.5584, "step": 24227 }, { "epoch": 0.7442632015482444, "grad_norm": 0.3712348937988281, "learning_rate": 1.393828586935921e-05, "loss": 0.5575, "step": 24228 }, { "epoch": 0.7442939206831936, "grad_norm": 0.4280013144016266, "learning_rate": 1.3937841644012822e-05, "loss": 0.5762, "step": 24229 }, { "epoch": 0.7443246398181427, "grad_norm": 0.3402077853679657, "learning_rate": 1.3937397409469335e-05, "loss": 0.5366, "step": 24230 }, { "epoch": 0.7443553589530919, "grad_norm": 0.36697009205818176, "learning_rate": 1.3936953165729779e-05, "loss": 0.6196, "step": 24231 }, { "epoch": 0.744386078088041, "grad_norm": 0.37117883563041687, "learning_rate": 1.3936508912795194e-05, "loss": 0.6196, "step": 24232 }, { "epoch": 0.7444167972229901, "grad_norm": 0.3615424931049347, "learning_rate": 1.3936064650666619e-05, "loss": 0.612, "step": 24233 }, { "epoch": 0.7444475163579394, "grad_norm": 0.3568446636199951, "learning_rate": 1.3935620379345087e-05, "loss": 0.5407, "step": 24234 }, { "epoch": 0.7444782354928885, "grad_norm": 0.4212568998336792, "learning_rate": 1.3935176098831643e-05, "loss": 0.5212, "step": 24235 }, { "epoch": 0.7445089546278377, "grad_norm": 0.37608468532562256, "learning_rate": 1.3934731809127315e-05, "loss": 0.5639, "step": 24236 }, { "epoch": 0.7445396737627868, "grad_norm": 0.32548534870147705, "learning_rate": 1.393428751023315e-05, "loss": 0.5272, "step": 24237 }, { "epoch": 0.744570392897736, "grad_norm": 0.35783159732818604, "learning_rate": 1.3933843202150179e-05, "loss": 0.4888, "step": 24238 }, { "epoch": 0.7446011120326852, "grad_norm": 0.34735554456710815, "learning_rate": 1.3933398884879444e-05, "loss": 0.5941, "step": 24239 }, { "epoch": 0.7446318311676343, "grad_norm": 0.37582260370254517, "learning_rate": 1.3932954558421982e-05, "loss": 0.5419, "step": 24240 }, { "epoch": 0.7446625503025834, "grad_norm": 0.3675706386566162, "learning_rate": 1.393251022277883e-05, "loss": 0.5425, "step": 24241 }, { "epoch": 0.7446932694375327, "grad_norm": 0.32339781522750854, "learning_rate": 1.3932065877951026e-05, "loss": 0.5887, "step": 24242 }, { "epoch": 0.7447239885724818, "grad_norm": 0.39473894238471985, "learning_rate": 1.3931621523939605e-05, "loss": 0.6433, "step": 24243 }, { "epoch": 0.7447547077074309, "grad_norm": 0.32943812012672424, "learning_rate": 1.393117716074561e-05, "loss": 0.5064, "step": 24244 }, { "epoch": 0.7447854268423801, "grad_norm": 0.35648566484451294, "learning_rate": 1.3930732788370074e-05, "loss": 0.5591, "step": 24245 }, { "epoch": 0.7448161459773293, "grad_norm": 0.3811604678630829, "learning_rate": 1.393028840681404e-05, "loss": 0.5827, "step": 24246 }, { "epoch": 0.7448468651122785, "grad_norm": 0.3401550352573395, "learning_rate": 1.3929844016078539e-05, "loss": 0.5883, "step": 24247 }, { "epoch": 0.7448775842472276, "grad_norm": 0.3234596848487854, "learning_rate": 1.3929399616164617e-05, "loss": 0.4891, "step": 24248 }, { "epoch": 0.7449083033821767, "grad_norm": 0.3502811789512634, "learning_rate": 1.3928955207073305e-05, "loss": 0.5406, "step": 24249 }, { "epoch": 0.744939022517126, "grad_norm": 0.3503537178039551, "learning_rate": 1.3928510788805646e-05, "loss": 0.5376, "step": 24250 }, { "epoch": 0.7449697416520751, "grad_norm": 0.38781267404556274, "learning_rate": 1.3928066361362673e-05, "loss": 0.557, "step": 24251 }, { "epoch": 0.7450004607870242, "grad_norm": 0.33628225326538086, "learning_rate": 1.3927621924745428e-05, "loss": 0.5234, "step": 24252 }, { "epoch": 0.7450311799219734, "grad_norm": 0.39167675375938416, "learning_rate": 1.3927177478954948e-05, "loss": 0.6172, "step": 24253 }, { "epoch": 0.7450618990569225, "grad_norm": 0.40443193912506104, "learning_rate": 1.392673302399227e-05, "loss": 0.618, "step": 24254 }, { "epoch": 0.7450926181918717, "grad_norm": 0.3616211712360382, "learning_rate": 1.392628855985843e-05, "loss": 0.5368, "step": 24255 }, { "epoch": 0.7451233373268209, "grad_norm": 0.3491271138191223, "learning_rate": 1.3925844086554473e-05, "loss": 0.5558, "step": 24256 }, { "epoch": 0.74515405646177, "grad_norm": 0.3251248002052307, "learning_rate": 1.392539960408143e-05, "loss": 0.476, "step": 24257 }, { "epoch": 0.7451847755967193, "grad_norm": 0.349777489900589, "learning_rate": 1.3924955112440346e-05, "loss": 0.5395, "step": 24258 }, { "epoch": 0.7452154947316684, "grad_norm": 0.3441862463951111, "learning_rate": 1.3924510611632252e-05, "loss": 0.5401, "step": 24259 }, { "epoch": 0.7452462138666175, "grad_norm": 0.3673233389854431, "learning_rate": 1.3924066101658192e-05, "loss": 0.5557, "step": 24260 }, { "epoch": 0.7452769330015667, "grad_norm": 0.36130037903785706, "learning_rate": 1.39236215825192e-05, "loss": 0.5552, "step": 24261 }, { "epoch": 0.7453076521365158, "grad_norm": 0.43323206901550293, "learning_rate": 1.3923177054216317e-05, "loss": 0.5466, "step": 24262 }, { "epoch": 0.745338371271465, "grad_norm": 0.37830227613449097, "learning_rate": 1.3922732516750577e-05, "loss": 0.5945, "step": 24263 }, { "epoch": 0.7453690904064142, "grad_norm": 0.37073755264282227, "learning_rate": 1.3922287970123023e-05, "loss": 0.5649, "step": 24264 }, { "epoch": 0.7453998095413633, "grad_norm": 0.4911746680736542, "learning_rate": 1.3921843414334691e-05, "loss": 0.5842, "step": 24265 }, { "epoch": 0.7454305286763124, "grad_norm": 0.33177611231803894, "learning_rate": 1.3921398849386623e-05, "loss": 0.5183, "step": 24266 }, { "epoch": 0.7454612478112617, "grad_norm": 0.357553094625473, "learning_rate": 1.3920954275279852e-05, "loss": 0.5382, "step": 24267 }, { "epoch": 0.7454919669462108, "grad_norm": 0.3585798442363739, "learning_rate": 1.3920509692015416e-05, "loss": 0.5473, "step": 24268 }, { "epoch": 0.7455226860811599, "grad_norm": 0.4668923318386078, "learning_rate": 1.392006509959436e-05, "loss": 0.5414, "step": 24269 }, { "epoch": 0.7455534052161091, "grad_norm": 0.3808416426181793, "learning_rate": 1.3919620498017713e-05, "loss": 0.5299, "step": 24270 }, { "epoch": 0.7455841243510583, "grad_norm": 0.5385532379150391, "learning_rate": 1.3919175887286525e-05, "loss": 0.5502, "step": 24271 }, { "epoch": 0.7456148434860075, "grad_norm": 0.3772508203983307, "learning_rate": 1.391873126740182e-05, "loss": 0.5179, "step": 24272 }, { "epoch": 0.7456455626209566, "grad_norm": 0.3820464313030243, "learning_rate": 1.3918286638364651e-05, "loss": 0.5321, "step": 24273 }, { "epoch": 0.7456762817559057, "grad_norm": 0.3347627818584442, "learning_rate": 1.3917842000176048e-05, "loss": 0.5624, "step": 24274 }, { "epoch": 0.745707000890855, "grad_norm": 0.3389339745044708, "learning_rate": 1.391739735283705e-05, "loss": 0.5808, "step": 24275 }, { "epoch": 0.7457377200258041, "grad_norm": 0.4047050476074219, "learning_rate": 1.3916952696348701e-05, "loss": 0.5554, "step": 24276 }, { "epoch": 0.7457684391607532, "grad_norm": 0.37262505292892456, "learning_rate": 1.391650803071203e-05, "loss": 0.5367, "step": 24277 }, { "epoch": 0.7457991582957024, "grad_norm": 0.37236863374710083, "learning_rate": 1.3916063355928084e-05, "loss": 0.631, "step": 24278 }, { "epoch": 0.7458298774306515, "grad_norm": 0.3316901922225952, "learning_rate": 1.3915618671997897e-05, "loss": 0.5593, "step": 24279 }, { "epoch": 0.7458605965656007, "grad_norm": 0.3525792360305786, "learning_rate": 1.391517397892251e-05, "loss": 0.5785, "step": 24280 }, { "epoch": 0.7458913157005499, "grad_norm": 0.3827996253967285, "learning_rate": 1.391472927670296e-05, "loss": 0.6164, "step": 24281 }, { "epoch": 0.745922034835499, "grad_norm": 0.40191739797592163, "learning_rate": 1.391428456534029e-05, "loss": 0.5249, "step": 24282 }, { "epoch": 0.7459527539704482, "grad_norm": 0.406488835811615, "learning_rate": 1.3913839844835533e-05, "loss": 0.5658, "step": 24283 }, { "epoch": 0.7459834731053974, "grad_norm": 0.37402811646461487, "learning_rate": 1.391339511518973e-05, "loss": 0.4913, "step": 24284 }, { "epoch": 0.7460141922403465, "grad_norm": 0.4367762804031372, "learning_rate": 1.3912950376403917e-05, "loss": 0.6588, "step": 24285 }, { "epoch": 0.7460449113752957, "grad_norm": 0.3823002576828003, "learning_rate": 1.3912505628479136e-05, "loss": 0.6149, "step": 24286 }, { "epoch": 0.7460756305102448, "grad_norm": 0.45210710167884827, "learning_rate": 1.3912060871416428e-05, "loss": 0.5594, "step": 24287 }, { "epoch": 0.746106349645194, "grad_norm": 0.3894357681274414, "learning_rate": 1.3911616105216825e-05, "loss": 0.5815, "step": 24288 }, { "epoch": 0.7461370687801432, "grad_norm": 0.38125842809677124, "learning_rate": 1.391117132988137e-05, "loss": 0.5951, "step": 24289 }, { "epoch": 0.7461677879150923, "grad_norm": 0.33436718583106995, "learning_rate": 1.3910726545411101e-05, "loss": 0.5686, "step": 24290 }, { "epoch": 0.7461985070500414, "grad_norm": 0.38050565123558044, "learning_rate": 1.391028175180706e-05, "loss": 0.5491, "step": 24291 }, { "epoch": 0.7462292261849907, "grad_norm": 0.33639514446258545, "learning_rate": 1.3909836949070279e-05, "loss": 0.519, "step": 24292 }, { "epoch": 0.7462599453199398, "grad_norm": 0.36144766211509705, "learning_rate": 1.3909392137201803e-05, "loss": 0.4874, "step": 24293 }, { "epoch": 0.7462906644548889, "grad_norm": 0.36026549339294434, "learning_rate": 1.3908947316202668e-05, "loss": 0.5368, "step": 24294 }, { "epoch": 0.7463213835898381, "grad_norm": 0.37216711044311523, "learning_rate": 1.3908502486073912e-05, "loss": 0.6844, "step": 24295 }, { "epoch": 0.7463521027247872, "grad_norm": 0.3357163369655609, "learning_rate": 1.3908057646816577e-05, "loss": 0.541, "step": 24296 }, { "epoch": 0.7463828218597365, "grad_norm": 0.4882644712924957, "learning_rate": 1.39076127984317e-05, "loss": 0.5612, "step": 24297 }, { "epoch": 0.7464135409946856, "grad_norm": 0.36877331137657166, "learning_rate": 1.3907167940920321e-05, "loss": 0.6252, "step": 24298 }, { "epoch": 0.7464442601296347, "grad_norm": 0.48167529702186584, "learning_rate": 1.390672307428348e-05, "loss": 0.6573, "step": 24299 }, { "epoch": 0.746474979264584, "grad_norm": 0.9233798980712891, "learning_rate": 1.3906278198522211e-05, "loss": 0.5583, "step": 24300 }, { "epoch": 0.7465056983995331, "grad_norm": 0.3836228549480438, "learning_rate": 1.390583331363756e-05, "loss": 0.5604, "step": 24301 }, { "epoch": 0.7465364175344822, "grad_norm": 0.35402312874794006, "learning_rate": 1.390538841963056e-05, "loss": 0.5477, "step": 24302 }, { "epoch": 0.7465671366694314, "grad_norm": 0.3448857367038727, "learning_rate": 1.3904943516502254e-05, "loss": 0.5579, "step": 24303 }, { "epoch": 0.7465978558043805, "grad_norm": 0.45526695251464844, "learning_rate": 1.3904498604253677e-05, "loss": 0.5211, "step": 24304 }, { "epoch": 0.7466285749393297, "grad_norm": 0.38319921493530273, "learning_rate": 1.3904053682885874e-05, "loss": 0.5395, "step": 24305 }, { "epoch": 0.7466592940742789, "grad_norm": 0.3827822804450989, "learning_rate": 1.3903608752399879e-05, "loss": 0.5105, "step": 24306 }, { "epoch": 0.746690013209228, "grad_norm": 0.35253193974494934, "learning_rate": 1.3903163812796736e-05, "loss": 0.5889, "step": 24307 }, { "epoch": 0.7467207323441772, "grad_norm": 0.4066231846809387, "learning_rate": 1.3902718864077479e-05, "loss": 0.5719, "step": 24308 }, { "epoch": 0.7467514514791264, "grad_norm": 0.47554945945739746, "learning_rate": 1.3902273906243149e-05, "loss": 0.6278, "step": 24309 }, { "epoch": 0.7467821706140755, "grad_norm": 0.3531419336795807, "learning_rate": 1.3901828939294786e-05, "loss": 0.4772, "step": 24310 }, { "epoch": 0.7468128897490247, "grad_norm": 0.34892112016677856, "learning_rate": 1.3901383963233429e-05, "loss": 0.5204, "step": 24311 }, { "epoch": 0.7468436088839738, "grad_norm": 0.3475990295410156, "learning_rate": 1.3900938978060116e-05, "loss": 0.4722, "step": 24312 }, { "epoch": 0.746874328018923, "grad_norm": 0.353998064994812, "learning_rate": 1.390049398377589e-05, "loss": 0.5836, "step": 24313 }, { "epoch": 0.7469050471538722, "grad_norm": 0.41844791173934937, "learning_rate": 1.3900048980381788e-05, "loss": 0.568, "step": 24314 }, { "epoch": 0.7469357662888213, "grad_norm": 0.3583321273326874, "learning_rate": 1.3899603967878848e-05, "loss": 0.6561, "step": 24315 }, { "epoch": 0.7469664854237704, "grad_norm": 0.6042196750640869, "learning_rate": 1.3899158946268113e-05, "loss": 0.526, "step": 24316 }, { "epoch": 0.7469972045587197, "grad_norm": 0.32835644483566284, "learning_rate": 1.3898713915550617e-05, "loss": 0.5674, "step": 24317 }, { "epoch": 0.7470279236936688, "grad_norm": 0.3638719320297241, "learning_rate": 1.3898268875727403e-05, "loss": 0.5959, "step": 24318 }, { "epoch": 0.7470586428286179, "grad_norm": 0.4006003737449646, "learning_rate": 1.3897823826799513e-05, "loss": 0.5194, "step": 24319 }, { "epoch": 0.7470893619635671, "grad_norm": 0.39564627408981323, "learning_rate": 1.3897378768767977e-05, "loss": 0.5683, "step": 24320 }, { "epoch": 0.7471200810985162, "grad_norm": 0.40016332268714905, "learning_rate": 1.3896933701633846e-05, "loss": 0.6083, "step": 24321 }, { "epoch": 0.7471508002334655, "grad_norm": 0.40976276993751526, "learning_rate": 1.389648862539815e-05, "loss": 0.5616, "step": 24322 }, { "epoch": 0.7471815193684146, "grad_norm": 0.5104149580001831, "learning_rate": 1.3896043540061937e-05, "loss": 0.4882, "step": 24323 }, { "epoch": 0.7472122385033637, "grad_norm": 0.39010149240493774, "learning_rate": 1.389559844562624e-05, "loss": 0.5934, "step": 24324 }, { "epoch": 0.7472429576383129, "grad_norm": 0.33001795411109924, "learning_rate": 1.38951533420921e-05, "loss": 0.5194, "step": 24325 }, { "epoch": 0.7472736767732621, "grad_norm": 0.3631680905818939, "learning_rate": 1.389470822946056e-05, "loss": 0.5336, "step": 24326 }, { "epoch": 0.7473043959082112, "grad_norm": 0.33931586146354675, "learning_rate": 1.3894263107732656e-05, "loss": 0.5071, "step": 24327 }, { "epoch": 0.7473351150431604, "grad_norm": 0.46645474433898926, "learning_rate": 1.3893817976909424e-05, "loss": 0.5395, "step": 24328 }, { "epoch": 0.7473658341781095, "grad_norm": 0.37330734729766846, "learning_rate": 1.3893372836991912e-05, "loss": 0.5235, "step": 24329 }, { "epoch": 0.7473965533130587, "grad_norm": 0.38862746953964233, "learning_rate": 1.3892927687981157e-05, "loss": 0.5463, "step": 24330 }, { "epoch": 0.7474272724480079, "grad_norm": 0.3302268087863922, "learning_rate": 1.3892482529878194e-05, "loss": 0.5733, "step": 24331 }, { "epoch": 0.747457991582957, "grad_norm": 0.36956873536109924, "learning_rate": 1.389203736268407e-05, "loss": 0.5537, "step": 24332 }, { "epoch": 0.7474887107179062, "grad_norm": 0.39870357513427734, "learning_rate": 1.3891592186399821e-05, "loss": 0.5189, "step": 24333 }, { "epoch": 0.7475194298528554, "grad_norm": 0.38837099075317383, "learning_rate": 1.3891147001026484e-05, "loss": 0.5318, "step": 24334 }, { "epoch": 0.7475501489878045, "grad_norm": 0.3461623191833496, "learning_rate": 1.3890701806565101e-05, "loss": 0.5458, "step": 24335 }, { "epoch": 0.7475808681227537, "grad_norm": 0.34969550371170044, "learning_rate": 1.3890256603016717e-05, "loss": 0.5539, "step": 24336 }, { "epoch": 0.7476115872577028, "grad_norm": 0.342507541179657, "learning_rate": 1.3889811390382365e-05, "loss": 0.4535, "step": 24337 }, { "epoch": 0.747642306392652, "grad_norm": 0.3902715742588043, "learning_rate": 1.3889366168663082e-05, "loss": 0.5198, "step": 24338 }, { "epoch": 0.7476730255276012, "grad_norm": 0.3636608421802521, "learning_rate": 1.3888920937859917e-05, "loss": 0.6116, "step": 24339 }, { "epoch": 0.7477037446625503, "grad_norm": 0.3768347501754761, "learning_rate": 1.3888475697973904e-05, "loss": 0.5204, "step": 24340 }, { "epoch": 0.7477344637974994, "grad_norm": 0.32288044691085815, "learning_rate": 1.3888030449006087e-05, "loss": 0.5015, "step": 24341 }, { "epoch": 0.7477651829324486, "grad_norm": 0.38497260212898254, "learning_rate": 1.38875851909575e-05, "loss": 0.5629, "step": 24342 }, { "epoch": 0.7477959020673978, "grad_norm": 0.38514018058776855, "learning_rate": 1.3887139923829189e-05, "loss": 0.5661, "step": 24343 }, { "epoch": 0.747826621202347, "grad_norm": 0.3485569953918457, "learning_rate": 1.388669464762219e-05, "loss": 0.5973, "step": 24344 }, { "epoch": 0.7478573403372961, "grad_norm": 0.46702176332473755, "learning_rate": 1.3886249362337545e-05, "loss": 0.5317, "step": 24345 }, { "epoch": 0.7478880594722452, "grad_norm": 0.3652004897594452, "learning_rate": 1.3885804067976292e-05, "loss": 0.5208, "step": 24346 }, { "epoch": 0.7479187786071945, "grad_norm": 0.3389112949371338, "learning_rate": 1.3885358764539471e-05, "loss": 0.45, "step": 24347 }, { "epoch": 0.7479494977421436, "grad_norm": 0.3560903072357178, "learning_rate": 1.3884913452028126e-05, "loss": 0.5556, "step": 24348 }, { "epoch": 0.7479802168770927, "grad_norm": 0.3473384976387024, "learning_rate": 1.3884468130443292e-05, "loss": 0.5761, "step": 24349 }, { "epoch": 0.7480109360120419, "grad_norm": 0.37553873658180237, "learning_rate": 1.3884022799786015e-05, "loss": 0.4544, "step": 24350 }, { "epoch": 0.7480416551469911, "grad_norm": 0.3603776693344116, "learning_rate": 1.3883577460057327e-05, "loss": 0.6008, "step": 24351 }, { "epoch": 0.7480723742819402, "grad_norm": 0.36258625984191895, "learning_rate": 1.3883132111258276e-05, "loss": 0.5771, "step": 24352 }, { "epoch": 0.7481030934168894, "grad_norm": 0.3458406627178192, "learning_rate": 1.3882686753389898e-05, "loss": 0.5584, "step": 24353 }, { "epoch": 0.7481338125518385, "grad_norm": 0.3684186041355133, "learning_rate": 1.3882241386453232e-05, "loss": 0.6252, "step": 24354 }, { "epoch": 0.7481645316867876, "grad_norm": 0.41318148374557495, "learning_rate": 1.3881796010449323e-05, "loss": 0.5749, "step": 24355 }, { "epoch": 0.7481952508217369, "grad_norm": 0.37578481435775757, "learning_rate": 1.3881350625379206e-05, "loss": 0.56, "step": 24356 }, { "epoch": 0.748225969956686, "grad_norm": 0.546559751033783, "learning_rate": 1.3880905231243925e-05, "loss": 0.5834, "step": 24357 }, { "epoch": 0.7482566890916352, "grad_norm": 0.38837581872940063, "learning_rate": 1.3880459828044517e-05, "loss": 0.5899, "step": 24358 }, { "epoch": 0.7482874082265843, "grad_norm": 0.36365386843681335, "learning_rate": 1.3880014415782025e-05, "loss": 0.5607, "step": 24359 }, { "epoch": 0.7483181273615335, "grad_norm": 0.38652753829956055, "learning_rate": 1.3879568994457491e-05, "loss": 0.5775, "step": 24360 }, { "epoch": 0.7483488464964827, "grad_norm": 0.3725283145904541, "learning_rate": 1.387912356407195e-05, "loss": 0.5322, "step": 24361 }, { "epoch": 0.7483795656314318, "grad_norm": 0.3718215227127075, "learning_rate": 1.3878678124626444e-05, "loss": 0.5383, "step": 24362 }, { "epoch": 0.7484102847663809, "grad_norm": 0.3383180499076843, "learning_rate": 1.3878232676122017e-05, "loss": 0.5336, "step": 24363 }, { "epoch": 0.7484410039013302, "grad_norm": 0.40451622009277344, "learning_rate": 1.3877787218559707e-05, "loss": 0.5902, "step": 24364 }, { "epoch": 0.7484717230362793, "grad_norm": 0.37394216656684875, "learning_rate": 1.387734175194055e-05, "loss": 0.5041, "step": 24365 }, { "epoch": 0.7485024421712284, "grad_norm": 0.3246656358242035, "learning_rate": 1.3876896276265593e-05, "loss": 0.5897, "step": 24366 }, { "epoch": 0.7485331613061776, "grad_norm": 0.34329280257225037, "learning_rate": 1.3876450791535874e-05, "loss": 0.544, "step": 24367 }, { "epoch": 0.7485638804411268, "grad_norm": 0.3985111117362976, "learning_rate": 1.3876005297752433e-05, "loss": 0.5377, "step": 24368 }, { "epoch": 0.748594599576076, "grad_norm": 0.36527955532073975, "learning_rate": 1.3875559794916312e-05, "loss": 0.5708, "step": 24369 }, { "epoch": 0.7486253187110251, "grad_norm": 0.3905005156993866, "learning_rate": 1.387511428302855e-05, "loss": 0.5287, "step": 24370 }, { "epoch": 0.7486560378459742, "grad_norm": 0.38877350091934204, "learning_rate": 1.3874668762090188e-05, "loss": 0.5733, "step": 24371 }, { "epoch": 0.7486867569809235, "grad_norm": 0.36978715658187866, "learning_rate": 1.3874223232102267e-05, "loss": 0.5758, "step": 24372 }, { "epoch": 0.7487174761158726, "grad_norm": 0.33545827865600586, "learning_rate": 1.3873777693065827e-05, "loss": 0.5403, "step": 24373 }, { "epoch": 0.7487481952508217, "grad_norm": 0.9636921286582947, "learning_rate": 1.3873332144981906e-05, "loss": 0.5805, "step": 24374 }, { "epoch": 0.7487789143857709, "grad_norm": 0.41313403844833374, "learning_rate": 1.387288658785155e-05, "loss": 0.5198, "step": 24375 }, { "epoch": 0.74880963352072, "grad_norm": 0.3611474633216858, "learning_rate": 1.3872441021675795e-05, "loss": 0.5021, "step": 24376 }, { "epoch": 0.7488403526556692, "grad_norm": 0.40582749247550964, "learning_rate": 1.3871995446455686e-05, "loss": 0.6318, "step": 24377 }, { "epoch": 0.7488710717906184, "grad_norm": 0.36131855845451355, "learning_rate": 1.387154986219226e-05, "loss": 0.5924, "step": 24378 }, { "epoch": 0.7489017909255675, "grad_norm": 0.3411530554294586, "learning_rate": 1.387110426888656e-05, "loss": 0.4545, "step": 24379 }, { "epoch": 0.7489325100605166, "grad_norm": 0.3383624851703644, "learning_rate": 1.3870658666539622e-05, "loss": 0.5916, "step": 24380 }, { "epoch": 0.7489632291954659, "grad_norm": 0.40634238719940186, "learning_rate": 1.3870213055152494e-05, "loss": 0.6255, "step": 24381 }, { "epoch": 0.748993948330415, "grad_norm": 0.4075835645198822, "learning_rate": 1.3869767434726213e-05, "loss": 0.5317, "step": 24382 }, { "epoch": 0.7490246674653642, "grad_norm": 0.3464733362197876, "learning_rate": 1.3869321805261814e-05, "loss": 0.5199, "step": 24383 }, { "epoch": 0.7490553866003133, "grad_norm": 0.37293216586112976, "learning_rate": 1.3868876166760351e-05, "loss": 0.536, "step": 24384 }, { "epoch": 0.7490861057352625, "grad_norm": 0.36385083198547363, "learning_rate": 1.3868430519222855e-05, "loss": 0.5444, "step": 24385 }, { "epoch": 0.7491168248702117, "grad_norm": 0.365121990442276, "learning_rate": 1.386798486265037e-05, "loss": 0.527, "step": 24386 }, { "epoch": 0.7491475440051608, "grad_norm": 0.41637900471687317, "learning_rate": 1.3867539197043935e-05, "loss": 0.5266, "step": 24387 }, { "epoch": 0.7491782631401099, "grad_norm": 0.3379952907562256, "learning_rate": 1.3867093522404593e-05, "loss": 0.5258, "step": 24388 }, { "epoch": 0.7492089822750592, "grad_norm": 0.42640504240989685, "learning_rate": 1.3866647838733385e-05, "loss": 0.5838, "step": 24389 }, { "epoch": 0.7492397014100083, "grad_norm": 0.35216599702835083, "learning_rate": 1.3866202146031348e-05, "loss": 0.537, "step": 24390 }, { "epoch": 0.7492704205449574, "grad_norm": 0.37318122386932373, "learning_rate": 1.386575644429953e-05, "loss": 0.5294, "step": 24391 }, { "epoch": 0.7493011396799066, "grad_norm": 0.35161611437797546, "learning_rate": 1.3865310733538964e-05, "loss": 0.572, "step": 24392 }, { "epoch": 0.7493318588148558, "grad_norm": 0.3731966018676758, "learning_rate": 1.3864865013750697e-05, "loss": 0.5878, "step": 24393 }, { "epoch": 0.749362577949805, "grad_norm": 0.3706686496734619, "learning_rate": 1.3864419284935765e-05, "loss": 0.56, "step": 24394 }, { "epoch": 0.7493932970847541, "grad_norm": 0.3443260192871094, "learning_rate": 1.3863973547095215e-05, "loss": 0.5087, "step": 24395 }, { "epoch": 0.7494240162197032, "grad_norm": 0.3402037024497986, "learning_rate": 1.3863527800230085e-05, "loss": 0.5129, "step": 24396 }, { "epoch": 0.7494547353546525, "grad_norm": 0.3844793140888214, "learning_rate": 1.3863082044341416e-05, "loss": 0.5278, "step": 24397 }, { "epoch": 0.7494854544896016, "grad_norm": 0.3740577697753906, "learning_rate": 1.3862636279430245e-05, "loss": 0.5471, "step": 24398 }, { "epoch": 0.7495161736245507, "grad_norm": 0.4002687633037567, "learning_rate": 1.3862190505497623e-05, "loss": 0.5716, "step": 24399 }, { "epoch": 0.7495468927594999, "grad_norm": 0.3604086935520172, "learning_rate": 1.3861744722544583e-05, "loss": 0.5864, "step": 24400 }, { "epoch": 0.749577611894449, "grad_norm": 0.4549020528793335, "learning_rate": 1.3861298930572165e-05, "loss": 0.5397, "step": 24401 }, { "epoch": 0.7496083310293982, "grad_norm": 0.40628865361213684, "learning_rate": 1.3860853129581417e-05, "loss": 0.5421, "step": 24402 }, { "epoch": 0.7496390501643474, "grad_norm": 0.34933483600616455, "learning_rate": 1.3860407319573377e-05, "loss": 0.4911, "step": 24403 }, { "epoch": 0.7496697692992965, "grad_norm": 0.39511898159980774, "learning_rate": 1.3859961500549087e-05, "loss": 0.6335, "step": 24404 }, { "epoch": 0.7497004884342456, "grad_norm": 0.39820635318756104, "learning_rate": 1.3859515672509587e-05, "loss": 0.6179, "step": 24405 }, { "epoch": 0.7497312075691949, "grad_norm": 0.3481858968734741, "learning_rate": 1.3859069835455915e-05, "loss": 0.5489, "step": 24406 }, { "epoch": 0.749761926704144, "grad_norm": 0.4438067674636841, "learning_rate": 1.3858623989389119e-05, "loss": 0.5729, "step": 24407 }, { "epoch": 0.7497926458390932, "grad_norm": 0.3442665934562683, "learning_rate": 1.3858178134310235e-05, "loss": 0.526, "step": 24408 }, { "epoch": 0.7498233649740423, "grad_norm": 0.3528478443622589, "learning_rate": 1.385773227022031e-05, "loss": 0.5409, "step": 24409 }, { "epoch": 0.7498540841089915, "grad_norm": 0.37678536772727966, "learning_rate": 1.385728639712038e-05, "loss": 0.5374, "step": 24410 }, { "epoch": 0.7498848032439407, "grad_norm": 0.3674841523170471, "learning_rate": 1.3856840515011489e-05, "loss": 0.5634, "step": 24411 }, { "epoch": 0.7499155223788898, "grad_norm": 0.4009157121181488, "learning_rate": 1.3856394623894676e-05, "loss": 0.5289, "step": 24412 }, { "epoch": 0.7499462415138389, "grad_norm": 0.3945879638195038, "learning_rate": 1.3855948723770987e-05, "loss": 0.6403, "step": 24413 }, { "epoch": 0.7499769606487882, "grad_norm": 0.36867228150367737, "learning_rate": 1.3855502814641457e-05, "loss": 0.6203, "step": 24414 }, { "epoch": 0.7500076797837373, "grad_norm": 0.37660765647888184, "learning_rate": 1.3855056896507132e-05, "loss": 0.5586, "step": 24415 }, { "epoch": 0.7500383989186864, "grad_norm": 0.3432517647743225, "learning_rate": 1.3854610969369055e-05, "loss": 0.5546, "step": 24416 }, { "epoch": 0.7500691180536356, "grad_norm": 0.3637527823448181, "learning_rate": 1.385416503322826e-05, "loss": 0.5536, "step": 24417 }, { "epoch": 0.7500998371885848, "grad_norm": 0.3998635411262512, "learning_rate": 1.3853719088085797e-05, "loss": 0.5822, "step": 24418 }, { "epoch": 0.750130556323534, "grad_norm": 0.37222814559936523, "learning_rate": 1.38532731339427e-05, "loss": 0.5312, "step": 24419 }, { "epoch": 0.7501612754584831, "grad_norm": 0.35037025809288025, "learning_rate": 1.3852827170800019e-05, "loss": 0.6126, "step": 24420 }, { "epoch": 0.7501919945934322, "grad_norm": 0.4001917541027069, "learning_rate": 1.3852381198658789e-05, "loss": 0.5514, "step": 24421 }, { "epoch": 0.7502227137283815, "grad_norm": 0.47321292757987976, "learning_rate": 1.3851935217520054e-05, "loss": 0.6186, "step": 24422 }, { "epoch": 0.7502534328633306, "grad_norm": 0.3437284827232361, "learning_rate": 1.3851489227384857e-05, "loss": 0.5936, "step": 24423 }, { "epoch": 0.7502841519982797, "grad_norm": 0.3314666450023651, "learning_rate": 1.3851043228254235e-05, "loss": 0.542, "step": 24424 }, { "epoch": 0.7503148711332289, "grad_norm": 0.33777546882629395, "learning_rate": 1.3850597220129234e-05, "loss": 0.494, "step": 24425 }, { "epoch": 0.750345590268178, "grad_norm": 0.3433629870414734, "learning_rate": 1.3850151203010893e-05, "loss": 0.526, "step": 24426 }, { "epoch": 0.7503763094031272, "grad_norm": 0.37354645133018494, "learning_rate": 1.3849705176900259e-05, "loss": 0.5112, "step": 24427 }, { "epoch": 0.7504070285380764, "grad_norm": 0.39258939027786255, "learning_rate": 1.3849259141798366e-05, "loss": 0.5191, "step": 24428 }, { "epoch": 0.7504377476730255, "grad_norm": 0.4133131206035614, "learning_rate": 1.3848813097706261e-05, "loss": 0.5875, "step": 24429 }, { "epoch": 0.7504684668079746, "grad_norm": 0.4157048463821411, "learning_rate": 1.3848367044624981e-05, "loss": 0.5749, "step": 24430 }, { "epoch": 0.7504991859429239, "grad_norm": 0.4071829617023468, "learning_rate": 1.3847920982555576e-05, "loss": 0.5361, "step": 24431 }, { "epoch": 0.750529905077873, "grad_norm": 0.3859230577945709, "learning_rate": 1.384747491149908e-05, "loss": 0.5289, "step": 24432 }, { "epoch": 0.7505606242128222, "grad_norm": 0.37909626960754395, "learning_rate": 1.3847028831456538e-05, "loss": 0.6059, "step": 24433 }, { "epoch": 0.7505913433477713, "grad_norm": 0.3416121304035187, "learning_rate": 1.384658274242899e-05, "loss": 0.466, "step": 24434 }, { "epoch": 0.7506220624827205, "grad_norm": 0.3269718885421753, "learning_rate": 1.3846136644417482e-05, "loss": 0.5806, "step": 24435 }, { "epoch": 0.7506527816176697, "grad_norm": 0.3470291495323181, "learning_rate": 1.3845690537423053e-05, "loss": 0.4903, "step": 24436 }, { "epoch": 0.7506835007526188, "grad_norm": 0.33253639936447144, "learning_rate": 1.3845244421446745e-05, "loss": 0.5293, "step": 24437 }, { "epoch": 0.7507142198875679, "grad_norm": 0.39228153228759766, "learning_rate": 1.3844798296489597e-05, "loss": 0.63, "step": 24438 }, { "epoch": 0.7507449390225172, "grad_norm": 0.35503003001213074, "learning_rate": 1.3844352162552654e-05, "loss": 0.5931, "step": 24439 }, { "epoch": 0.7507756581574663, "grad_norm": 0.33779534697532654, "learning_rate": 1.384390601963696e-05, "loss": 0.492, "step": 24440 }, { "epoch": 0.7508063772924154, "grad_norm": 0.33748894929885864, "learning_rate": 1.3843459867743558e-05, "loss": 0.5609, "step": 24441 }, { "epoch": 0.7508370964273646, "grad_norm": 0.385783314704895, "learning_rate": 1.3843013706873486e-05, "loss": 0.4792, "step": 24442 }, { "epoch": 0.7508678155623137, "grad_norm": 0.3793172538280487, "learning_rate": 1.3842567537027784e-05, "loss": 0.6186, "step": 24443 }, { "epoch": 0.750898534697263, "grad_norm": 0.3409092426300049, "learning_rate": 1.3842121358207499e-05, "loss": 0.5227, "step": 24444 }, { "epoch": 0.7509292538322121, "grad_norm": 0.3695496916770935, "learning_rate": 1.3841675170413673e-05, "loss": 0.6031, "step": 24445 }, { "epoch": 0.7509599729671612, "grad_norm": 0.37409430742263794, "learning_rate": 1.3841228973647343e-05, "loss": 0.5743, "step": 24446 }, { "epoch": 0.7509906921021104, "grad_norm": 0.3493788242340088, "learning_rate": 1.3840782767909555e-05, "loss": 0.5305, "step": 24447 }, { "epoch": 0.7510214112370596, "grad_norm": 0.3789497911930084, "learning_rate": 1.3840336553201352e-05, "loss": 0.5749, "step": 24448 }, { "epoch": 0.7510521303720087, "grad_norm": 0.373190701007843, "learning_rate": 1.3839890329523776e-05, "loss": 0.499, "step": 24449 }, { "epoch": 0.7510828495069579, "grad_norm": 0.3294575810432434, "learning_rate": 1.3839444096877864e-05, "loss": 0.4954, "step": 24450 }, { "epoch": 0.751113568641907, "grad_norm": 0.3312954306602478, "learning_rate": 1.3838997855264663e-05, "loss": 0.5499, "step": 24451 }, { "epoch": 0.7511442877768562, "grad_norm": 0.3626779317855835, "learning_rate": 1.3838551604685218e-05, "loss": 0.573, "step": 24452 }, { "epoch": 0.7511750069118054, "grad_norm": 0.3366241455078125, "learning_rate": 1.3838105345140565e-05, "loss": 0.5401, "step": 24453 }, { "epoch": 0.7512057260467545, "grad_norm": 0.3683604598045349, "learning_rate": 1.3837659076631751e-05, "loss": 0.573, "step": 24454 }, { "epoch": 0.7512364451817037, "grad_norm": 0.4167405068874359, "learning_rate": 1.3837212799159812e-05, "loss": 0.5266, "step": 24455 }, { "epoch": 0.7512671643166529, "grad_norm": 0.6818681359291077, "learning_rate": 1.3836766512725797e-05, "loss": 0.6535, "step": 24456 }, { "epoch": 0.751297883451602, "grad_norm": 0.43205317854881287, "learning_rate": 1.3836320217330746e-05, "loss": 0.5403, "step": 24457 }, { "epoch": 0.7513286025865512, "grad_norm": 0.3573950231075287, "learning_rate": 1.38358739129757e-05, "loss": 0.5571, "step": 24458 }, { "epoch": 0.7513593217215003, "grad_norm": 0.34494394063949585, "learning_rate": 1.3835427599661707e-05, "loss": 0.4658, "step": 24459 }, { "epoch": 0.7513900408564494, "grad_norm": 0.35267648100852966, "learning_rate": 1.3834981277389799e-05, "loss": 0.509, "step": 24460 }, { "epoch": 0.7514207599913987, "grad_norm": 0.33242514729499817, "learning_rate": 1.3834534946161028e-05, "loss": 0.5118, "step": 24461 }, { "epoch": 0.7514514791263478, "grad_norm": 0.3556055426597595, "learning_rate": 1.383408860597643e-05, "loss": 0.5656, "step": 24462 }, { "epoch": 0.7514821982612969, "grad_norm": 0.39833834767341614, "learning_rate": 1.3833642256837055e-05, "loss": 0.6123, "step": 24463 }, { "epoch": 0.7515129173962461, "grad_norm": 0.3810199201107025, "learning_rate": 1.3833195898743936e-05, "loss": 0.5987, "step": 24464 }, { "epoch": 0.7515436365311953, "grad_norm": 0.3774643838405609, "learning_rate": 1.3832749531698122e-05, "loss": 0.5641, "step": 24465 }, { "epoch": 0.7515743556661444, "grad_norm": 0.34536612033843994, "learning_rate": 1.3832303155700654e-05, "loss": 0.5235, "step": 24466 }, { "epoch": 0.7516050748010936, "grad_norm": 0.37235188484191895, "learning_rate": 1.3831856770752574e-05, "loss": 0.5406, "step": 24467 }, { "epoch": 0.7516357939360427, "grad_norm": 0.34628406167030334, "learning_rate": 1.3831410376854928e-05, "loss": 0.5772, "step": 24468 }, { "epoch": 0.751666513070992, "grad_norm": 0.35040482878685, "learning_rate": 1.3830963974008752e-05, "loss": 0.5354, "step": 24469 }, { "epoch": 0.7516972322059411, "grad_norm": 0.4151204228401184, "learning_rate": 1.3830517562215094e-05, "loss": 0.5631, "step": 24470 }, { "epoch": 0.7517279513408902, "grad_norm": 0.3208516240119934, "learning_rate": 1.3830071141474993e-05, "loss": 0.5509, "step": 24471 }, { "epoch": 0.7517586704758394, "grad_norm": 0.4101223945617676, "learning_rate": 1.3829624711789493e-05, "loss": 0.5433, "step": 24472 }, { "epoch": 0.7517893896107886, "grad_norm": 0.4279378354549408, "learning_rate": 1.3829178273159638e-05, "loss": 0.603, "step": 24473 }, { "epoch": 0.7518201087457377, "grad_norm": 0.3629553020000458, "learning_rate": 1.3828731825586468e-05, "loss": 0.6246, "step": 24474 }, { "epoch": 0.7518508278806869, "grad_norm": 0.34759655594825745, "learning_rate": 1.382828536907103e-05, "loss": 0.5519, "step": 24475 }, { "epoch": 0.751881547015636, "grad_norm": 0.34908056259155273, "learning_rate": 1.382783890361436e-05, "loss": 0.6737, "step": 24476 }, { "epoch": 0.7519122661505852, "grad_norm": 0.36967170238494873, "learning_rate": 1.3827392429217512e-05, "loss": 0.5436, "step": 24477 }, { "epoch": 0.7519429852855344, "grad_norm": 0.3748249411582947, "learning_rate": 1.3826945945881515e-05, "loss": 0.5705, "step": 24478 }, { "epoch": 0.7519737044204835, "grad_norm": 0.3832767903804779, "learning_rate": 1.3826499453607422e-05, "loss": 0.5568, "step": 24479 }, { "epoch": 0.7520044235554327, "grad_norm": 0.36699342727661133, "learning_rate": 1.3826052952396272e-05, "loss": 0.4963, "step": 24480 }, { "epoch": 0.7520351426903819, "grad_norm": 0.3853037655353546, "learning_rate": 1.3825606442249109e-05, "loss": 0.616, "step": 24481 }, { "epoch": 0.752065861825331, "grad_norm": 0.37182191014289856, "learning_rate": 1.3825159923166971e-05, "loss": 0.5073, "step": 24482 }, { "epoch": 0.7520965809602802, "grad_norm": 0.37940096855163574, "learning_rate": 1.3824713395150911e-05, "loss": 0.5432, "step": 24483 }, { "epoch": 0.7521273000952293, "grad_norm": 0.36670270562171936, "learning_rate": 1.382426685820196e-05, "loss": 0.5638, "step": 24484 }, { "epoch": 0.7521580192301784, "grad_norm": 0.3801240622997284, "learning_rate": 1.3823820312321171e-05, "loss": 0.5433, "step": 24485 }, { "epoch": 0.7521887383651277, "grad_norm": 0.36510324478149414, "learning_rate": 1.382337375750958e-05, "loss": 0.5526, "step": 24486 }, { "epoch": 0.7522194575000768, "grad_norm": 0.3597426116466522, "learning_rate": 1.3822927193768233e-05, "loss": 0.5513, "step": 24487 }, { "epoch": 0.7522501766350259, "grad_norm": 0.4063152074813843, "learning_rate": 1.3822480621098173e-05, "loss": 0.5463, "step": 24488 }, { "epoch": 0.7522808957699751, "grad_norm": 0.34734684228897095, "learning_rate": 1.382203403950044e-05, "loss": 0.6209, "step": 24489 }, { "epoch": 0.7523116149049243, "grad_norm": 0.3686735928058624, "learning_rate": 1.3821587448976084e-05, "loss": 0.4903, "step": 24490 }, { "epoch": 0.7523423340398734, "grad_norm": 0.37596747279167175, "learning_rate": 1.382114084952614e-05, "loss": 0.5626, "step": 24491 }, { "epoch": 0.7523730531748226, "grad_norm": 0.3711581230163574, "learning_rate": 1.3820694241151656e-05, "loss": 0.4962, "step": 24492 }, { "epoch": 0.7524037723097717, "grad_norm": 0.4185270369052887, "learning_rate": 1.3820247623853673e-05, "loss": 0.5966, "step": 24493 }, { "epoch": 0.752434491444721, "grad_norm": 0.369276225566864, "learning_rate": 1.3819800997633235e-05, "loss": 0.5251, "step": 24494 }, { "epoch": 0.7524652105796701, "grad_norm": 0.3428132236003876, "learning_rate": 1.3819354362491386e-05, "loss": 0.4765, "step": 24495 }, { "epoch": 0.7524959297146192, "grad_norm": 0.3991839587688446, "learning_rate": 1.3818907718429168e-05, "loss": 0.6353, "step": 24496 }, { "epoch": 0.7525266488495684, "grad_norm": 0.3577301800251007, "learning_rate": 1.3818461065447623e-05, "loss": 0.5528, "step": 24497 }, { "epoch": 0.7525573679845176, "grad_norm": 0.45897889137268066, "learning_rate": 1.3818014403547798e-05, "loss": 0.5312, "step": 24498 }, { "epoch": 0.7525880871194667, "grad_norm": 0.39265623688697815, "learning_rate": 1.3817567732730733e-05, "loss": 0.5316, "step": 24499 }, { "epoch": 0.7526188062544159, "grad_norm": 0.3536539077758789, "learning_rate": 1.381712105299747e-05, "loss": 0.5584, "step": 24500 }, { "epoch": 0.752649525389365, "grad_norm": 0.3660946190357208, "learning_rate": 1.3816674364349057e-05, "loss": 0.5149, "step": 24501 }, { "epoch": 0.7526802445243141, "grad_norm": 0.5963734984397888, "learning_rate": 1.3816227666786532e-05, "loss": 0.5684, "step": 24502 }, { "epoch": 0.7527109636592634, "grad_norm": 0.38786062598228455, "learning_rate": 1.3815780960310944e-05, "loss": 0.4734, "step": 24503 }, { "epoch": 0.7527416827942125, "grad_norm": 0.38837239146232605, "learning_rate": 1.381533424492333e-05, "loss": 0.5969, "step": 24504 }, { "epoch": 0.7527724019291617, "grad_norm": 0.40064579248428345, "learning_rate": 1.3814887520624735e-05, "loss": 0.5334, "step": 24505 }, { "epoch": 0.7528031210641108, "grad_norm": 0.3839111626148224, "learning_rate": 1.3814440787416208e-05, "loss": 0.5176, "step": 24506 }, { "epoch": 0.75283384019906, "grad_norm": 0.37009263038635254, "learning_rate": 1.3813994045298787e-05, "loss": 0.5247, "step": 24507 }, { "epoch": 0.7528645593340092, "grad_norm": 0.33917373418807983, "learning_rate": 1.3813547294273514e-05, "loss": 0.5768, "step": 24508 }, { "epoch": 0.7528952784689583, "grad_norm": 0.43069562315940857, "learning_rate": 1.3813100534341438e-05, "loss": 0.5575, "step": 24509 }, { "epoch": 0.7529259976039074, "grad_norm": 0.3448360860347748, "learning_rate": 1.3812653765503599e-05, "loss": 0.5152, "step": 24510 }, { "epoch": 0.7529567167388567, "grad_norm": 0.3584466874599457, "learning_rate": 1.3812206987761037e-05, "loss": 0.5398, "step": 24511 }, { "epoch": 0.7529874358738058, "grad_norm": 0.3485628366470337, "learning_rate": 1.3811760201114801e-05, "loss": 0.57, "step": 24512 }, { "epoch": 0.7530181550087549, "grad_norm": 0.3885654807090759, "learning_rate": 1.3811313405565936e-05, "loss": 0.5271, "step": 24513 }, { "epoch": 0.7530488741437041, "grad_norm": 0.3524733781814575, "learning_rate": 1.381086660111548e-05, "loss": 0.5264, "step": 24514 }, { "epoch": 0.7530795932786533, "grad_norm": 0.4210370182991028, "learning_rate": 1.381041978776448e-05, "loss": 0.5286, "step": 24515 }, { "epoch": 0.7531103124136024, "grad_norm": 0.39191532135009766, "learning_rate": 1.3809972965513976e-05, "loss": 0.5819, "step": 24516 }, { "epoch": 0.7531410315485516, "grad_norm": 0.3951699137687683, "learning_rate": 1.3809526134365016e-05, "loss": 0.5019, "step": 24517 }, { "epoch": 0.7531717506835007, "grad_norm": 0.42438504099845886, "learning_rate": 1.3809079294318642e-05, "loss": 0.4782, "step": 24518 }, { "epoch": 0.75320246981845, "grad_norm": 1.2248278856277466, "learning_rate": 1.3808632445375898e-05, "loss": 0.5372, "step": 24519 }, { "epoch": 0.7532331889533991, "grad_norm": 0.33487367630004883, "learning_rate": 1.3808185587537826e-05, "loss": 0.5579, "step": 24520 }, { "epoch": 0.7532639080883482, "grad_norm": 0.38232874870300293, "learning_rate": 1.380773872080547e-05, "loss": 0.6079, "step": 24521 }, { "epoch": 0.7532946272232974, "grad_norm": 0.5000663995742798, "learning_rate": 1.3807291845179876e-05, "loss": 0.5064, "step": 24522 }, { "epoch": 0.7533253463582466, "grad_norm": 0.33801260590553284, "learning_rate": 1.3806844960662082e-05, "loss": 0.5336, "step": 24523 }, { "epoch": 0.7533560654931957, "grad_norm": 0.4013778269290924, "learning_rate": 1.3806398067253139e-05, "loss": 0.5409, "step": 24524 }, { "epoch": 0.7533867846281449, "grad_norm": 0.3453293442726135, "learning_rate": 1.3805951164954089e-05, "loss": 0.6563, "step": 24525 }, { "epoch": 0.753417503763094, "grad_norm": 0.4392503798007965, "learning_rate": 1.3805504253765971e-05, "loss": 0.7089, "step": 24526 }, { "epoch": 0.7534482228980431, "grad_norm": 0.33924582600593567, "learning_rate": 1.3805057333689832e-05, "loss": 0.5807, "step": 24527 }, { "epoch": 0.7534789420329924, "grad_norm": 0.3394757807254791, "learning_rate": 1.3804610404726719e-05, "loss": 0.5976, "step": 24528 }, { "epoch": 0.7535096611679415, "grad_norm": 0.4053260087966919, "learning_rate": 1.3804163466877669e-05, "loss": 0.5679, "step": 24529 }, { "epoch": 0.7535403803028907, "grad_norm": 0.3554181456565857, "learning_rate": 1.380371652014373e-05, "loss": 0.5991, "step": 24530 }, { "epoch": 0.7535710994378398, "grad_norm": 0.3307189643383026, "learning_rate": 1.3803269564525947e-05, "loss": 0.4754, "step": 24531 }, { "epoch": 0.753601818572789, "grad_norm": 0.4120634198188782, "learning_rate": 1.3802822600025361e-05, "loss": 0.5392, "step": 24532 }, { "epoch": 0.7536325377077382, "grad_norm": 0.377346932888031, "learning_rate": 1.3802375626643021e-05, "loss": 0.6105, "step": 24533 }, { "epoch": 0.7536632568426873, "grad_norm": 0.368417352437973, "learning_rate": 1.3801928644379963e-05, "loss": 0.5645, "step": 24534 }, { "epoch": 0.7536939759776364, "grad_norm": 0.3645906150341034, "learning_rate": 1.3801481653237236e-05, "loss": 0.6171, "step": 24535 }, { "epoch": 0.7537246951125857, "grad_norm": 0.3928091824054718, "learning_rate": 1.3801034653215883e-05, "loss": 0.549, "step": 24536 }, { "epoch": 0.7537554142475348, "grad_norm": 0.38169705867767334, "learning_rate": 1.3800587644316949e-05, "loss": 0.6071, "step": 24537 }, { "epoch": 0.7537861333824839, "grad_norm": 0.3516536355018616, "learning_rate": 1.3800140626541479e-05, "loss": 0.5861, "step": 24538 }, { "epoch": 0.7538168525174331, "grad_norm": 0.3577292263507843, "learning_rate": 1.379969359989051e-05, "loss": 0.5693, "step": 24539 }, { "epoch": 0.7538475716523823, "grad_norm": 0.7510275840759277, "learning_rate": 1.3799246564365096e-05, "loss": 0.5293, "step": 24540 }, { "epoch": 0.7538782907873315, "grad_norm": 0.37442976236343384, "learning_rate": 1.3798799519966274e-05, "loss": 0.537, "step": 24541 }, { "epoch": 0.7539090099222806, "grad_norm": 0.3962199091911316, "learning_rate": 1.3798352466695093e-05, "loss": 0.4867, "step": 24542 }, { "epoch": 0.7539397290572297, "grad_norm": 0.3865078091621399, "learning_rate": 1.3797905404552591e-05, "loss": 0.5421, "step": 24543 }, { "epoch": 0.753970448192179, "grad_norm": 0.36846667528152466, "learning_rate": 1.3797458333539817e-05, "loss": 0.5088, "step": 24544 }, { "epoch": 0.7540011673271281, "grad_norm": 0.4232780337333679, "learning_rate": 1.3797011253657813e-05, "loss": 0.5075, "step": 24545 }, { "epoch": 0.7540318864620772, "grad_norm": 0.3436441123485565, "learning_rate": 1.3796564164907627e-05, "loss": 0.4556, "step": 24546 }, { "epoch": 0.7540626055970264, "grad_norm": 0.3638877272605896, "learning_rate": 1.3796117067290298e-05, "loss": 0.6205, "step": 24547 }, { "epoch": 0.7540933247319755, "grad_norm": 0.3091943562030792, "learning_rate": 1.379566996080687e-05, "loss": 0.4994, "step": 24548 }, { "epoch": 0.7541240438669247, "grad_norm": 0.37205827236175537, "learning_rate": 1.3795222845458394e-05, "loss": 0.548, "step": 24549 }, { "epoch": 0.7541547630018739, "grad_norm": 0.4075019657611847, "learning_rate": 1.3794775721245907e-05, "loss": 0.6115, "step": 24550 }, { "epoch": 0.754185482136823, "grad_norm": 0.3681865334510803, "learning_rate": 1.379432858817046e-05, "loss": 0.4895, "step": 24551 }, { "epoch": 0.7542162012717721, "grad_norm": 0.33194610476493835, "learning_rate": 1.379388144623309e-05, "loss": 0.5195, "step": 24552 }, { "epoch": 0.7542469204067214, "grad_norm": 0.35041293501853943, "learning_rate": 1.3793434295434847e-05, "loss": 0.5421, "step": 24553 }, { "epoch": 0.7542776395416705, "grad_norm": 0.38195714354515076, "learning_rate": 1.3792987135776772e-05, "loss": 0.5078, "step": 24554 }, { "epoch": 0.7543083586766197, "grad_norm": 0.36041247844696045, "learning_rate": 1.379253996725991e-05, "loss": 0.6083, "step": 24555 }, { "epoch": 0.7543390778115688, "grad_norm": 0.35830357670783997, "learning_rate": 1.3792092789885309e-05, "loss": 0.5436, "step": 24556 }, { "epoch": 0.754369796946518, "grad_norm": 0.32707107067108154, "learning_rate": 1.379164560365401e-05, "loss": 0.4813, "step": 24557 }, { "epoch": 0.7544005160814672, "grad_norm": 0.3882702887058258, "learning_rate": 1.3791198408567054e-05, "loss": 0.5926, "step": 24558 }, { "epoch": 0.7544312352164163, "grad_norm": 0.371085524559021, "learning_rate": 1.3790751204625492e-05, "loss": 0.552, "step": 24559 }, { "epoch": 0.7544619543513654, "grad_norm": 0.34712913632392883, "learning_rate": 1.3790303991830367e-05, "loss": 0.4992, "step": 24560 }, { "epoch": 0.7544926734863147, "grad_norm": 0.37336304783821106, "learning_rate": 1.3789856770182722e-05, "loss": 0.6019, "step": 24561 }, { "epoch": 0.7545233926212638, "grad_norm": 0.41874390840530396, "learning_rate": 1.37894095396836e-05, "loss": 0.5932, "step": 24562 }, { "epoch": 0.7545541117562129, "grad_norm": 0.3589402437210083, "learning_rate": 1.3788962300334048e-05, "loss": 0.5783, "step": 24563 }, { "epoch": 0.7545848308911621, "grad_norm": 0.3509446680545807, "learning_rate": 1.3788515052135111e-05, "loss": 0.5415, "step": 24564 }, { "epoch": 0.7546155500261112, "grad_norm": 0.35005611181259155, "learning_rate": 1.378806779508783e-05, "loss": 0.5655, "step": 24565 }, { "epoch": 0.7546462691610605, "grad_norm": 0.4016057550907135, "learning_rate": 1.3787620529193252e-05, "loss": 0.552, "step": 24566 }, { "epoch": 0.7546769882960096, "grad_norm": 0.3264881372451782, "learning_rate": 1.3787173254452426e-05, "loss": 0.5169, "step": 24567 }, { "epoch": 0.7547077074309587, "grad_norm": 0.4087226092815399, "learning_rate": 1.3786725970866387e-05, "loss": 0.6217, "step": 24568 }, { "epoch": 0.754738426565908, "grad_norm": 0.3359224796295166, "learning_rate": 1.3786278678436191e-05, "loss": 0.5357, "step": 24569 }, { "epoch": 0.7547691457008571, "grad_norm": 0.36813831329345703, "learning_rate": 1.3785831377162873e-05, "loss": 0.5224, "step": 24570 }, { "epoch": 0.7547998648358062, "grad_norm": 0.38564664125442505, "learning_rate": 1.3785384067047482e-05, "loss": 0.6343, "step": 24571 }, { "epoch": 0.7548305839707554, "grad_norm": 0.35081249475479126, "learning_rate": 1.3784936748091062e-05, "loss": 0.5367, "step": 24572 }, { "epoch": 0.7548613031057045, "grad_norm": 0.3622435927391052, "learning_rate": 1.3784489420294658e-05, "loss": 0.4684, "step": 24573 }, { "epoch": 0.7548920222406537, "grad_norm": 0.5146843194961548, "learning_rate": 1.3784042083659315e-05, "loss": 0.5152, "step": 24574 }, { "epoch": 0.7549227413756029, "grad_norm": 0.4173651933670044, "learning_rate": 1.3783594738186076e-05, "loss": 0.5601, "step": 24575 }, { "epoch": 0.754953460510552, "grad_norm": 0.39865541458129883, "learning_rate": 1.378314738387599e-05, "loss": 0.5553, "step": 24576 }, { "epoch": 0.7549841796455011, "grad_norm": 0.37845489382743835, "learning_rate": 1.3782700020730097e-05, "loss": 0.5413, "step": 24577 }, { "epoch": 0.7550148987804504, "grad_norm": 0.37389978766441345, "learning_rate": 1.3782252648749445e-05, "loss": 0.4463, "step": 24578 }, { "epoch": 0.7550456179153995, "grad_norm": 0.34584665298461914, "learning_rate": 1.3781805267935076e-05, "loss": 0.5713, "step": 24579 }, { "epoch": 0.7550763370503487, "grad_norm": 0.34691545367240906, "learning_rate": 1.378135787828804e-05, "loss": 0.5233, "step": 24580 }, { "epoch": 0.7551070561852978, "grad_norm": 0.43991658091545105, "learning_rate": 1.3780910479809375e-05, "loss": 0.5062, "step": 24581 }, { "epoch": 0.755137775320247, "grad_norm": 0.3282316327095032, "learning_rate": 1.3780463072500132e-05, "loss": 0.508, "step": 24582 }, { "epoch": 0.7551684944551962, "grad_norm": 0.3909160792827606, "learning_rate": 1.3780015656361351e-05, "loss": 0.6066, "step": 24583 }, { "epoch": 0.7551992135901453, "grad_norm": 0.3782077133655548, "learning_rate": 1.377956823139408e-05, "loss": 0.5299, "step": 24584 }, { "epoch": 0.7552299327250944, "grad_norm": 0.3630470931529999, "learning_rate": 1.3779120797599365e-05, "loss": 0.4693, "step": 24585 }, { "epoch": 0.7552606518600437, "grad_norm": 0.36964765191078186, "learning_rate": 1.3778673354978249e-05, "loss": 0.5311, "step": 24586 }, { "epoch": 0.7552913709949928, "grad_norm": 0.4086628556251526, "learning_rate": 1.3778225903531776e-05, "loss": 0.5382, "step": 24587 }, { "epoch": 0.7553220901299419, "grad_norm": 0.3861559331417084, "learning_rate": 1.3777778443260993e-05, "loss": 0.6142, "step": 24588 }, { "epoch": 0.7553528092648911, "grad_norm": 0.432924747467041, "learning_rate": 1.3777330974166948e-05, "loss": 0.6056, "step": 24589 }, { "epoch": 0.7553835283998402, "grad_norm": 0.35025712847709656, "learning_rate": 1.3776883496250682e-05, "loss": 0.4886, "step": 24590 }, { "epoch": 0.7554142475347895, "grad_norm": 0.3509158194065094, "learning_rate": 1.3776436009513236e-05, "loss": 0.4994, "step": 24591 }, { "epoch": 0.7554449666697386, "grad_norm": 0.36490491032600403, "learning_rate": 1.3775988513955664e-05, "loss": 0.5449, "step": 24592 }, { "epoch": 0.7554756858046877, "grad_norm": 0.4141114354133606, "learning_rate": 1.3775541009579005e-05, "loss": 0.4518, "step": 24593 }, { "epoch": 0.7555064049396369, "grad_norm": 0.35492292046546936, "learning_rate": 1.3775093496384308e-05, "loss": 0.4247, "step": 24594 }, { "epoch": 0.7555371240745861, "grad_norm": 0.42005231976509094, "learning_rate": 1.3774645974372616e-05, "loss": 0.5978, "step": 24595 }, { "epoch": 0.7555678432095352, "grad_norm": 0.39017075300216675, "learning_rate": 1.3774198443544975e-05, "loss": 0.5478, "step": 24596 }, { "epoch": 0.7555985623444844, "grad_norm": 0.41742679476737976, "learning_rate": 1.3773750903902428e-05, "loss": 0.5071, "step": 24597 }, { "epoch": 0.7556292814794335, "grad_norm": 0.40256229043006897, "learning_rate": 1.3773303355446027e-05, "loss": 0.5861, "step": 24598 }, { "epoch": 0.7556600006143827, "grad_norm": 0.3811611533164978, "learning_rate": 1.3772855798176807e-05, "loss": 0.6273, "step": 24599 }, { "epoch": 0.7556907197493319, "grad_norm": 0.372891902923584, "learning_rate": 1.3772408232095822e-05, "loss": 0.6017, "step": 24600 }, { "epoch": 0.755721438884281, "grad_norm": 0.3506641983985901, "learning_rate": 1.3771960657204112e-05, "loss": 0.5612, "step": 24601 }, { "epoch": 0.7557521580192301, "grad_norm": 0.34423959255218506, "learning_rate": 1.3771513073502725e-05, "loss": 0.5447, "step": 24602 }, { "epoch": 0.7557828771541794, "grad_norm": 0.3480944037437439, "learning_rate": 1.3771065480992704e-05, "loss": 0.5212, "step": 24603 }, { "epoch": 0.7558135962891285, "grad_norm": 0.42084208130836487, "learning_rate": 1.3770617879675098e-05, "loss": 0.5766, "step": 24604 }, { "epoch": 0.7558443154240777, "grad_norm": 0.4609842002391815, "learning_rate": 1.3770170269550953e-05, "loss": 0.5642, "step": 24605 }, { "epoch": 0.7558750345590268, "grad_norm": 0.36247891187667847, "learning_rate": 1.3769722650621309e-05, "loss": 0.5965, "step": 24606 }, { "epoch": 0.755905753693976, "grad_norm": 0.3919740915298462, "learning_rate": 1.3769275022887217e-05, "loss": 0.5593, "step": 24607 }, { "epoch": 0.7559364728289252, "grad_norm": 0.38612210750579834, "learning_rate": 1.376882738634972e-05, "loss": 0.6317, "step": 24608 }, { "epoch": 0.7559671919638743, "grad_norm": 0.3746379017829895, "learning_rate": 1.3768379741009861e-05, "loss": 0.5668, "step": 24609 }, { "epoch": 0.7559979110988234, "grad_norm": 0.3791041970252991, "learning_rate": 1.3767932086868689e-05, "loss": 0.5293, "step": 24610 }, { "epoch": 0.7560286302337726, "grad_norm": 0.3256434500217438, "learning_rate": 1.3767484423927248e-05, "loss": 0.4957, "step": 24611 }, { "epoch": 0.7560593493687218, "grad_norm": 0.41084566712379456, "learning_rate": 1.3767036752186584e-05, "loss": 0.5457, "step": 24612 }, { "epoch": 0.7560900685036709, "grad_norm": 0.41853004693984985, "learning_rate": 1.3766589071647745e-05, "loss": 0.5926, "step": 24613 }, { "epoch": 0.7561207876386201, "grad_norm": 0.370084673166275, "learning_rate": 1.376614138231177e-05, "loss": 0.5581, "step": 24614 }, { "epoch": 0.7561515067735692, "grad_norm": 0.5044322609901428, "learning_rate": 1.3765693684179711e-05, "loss": 0.5526, "step": 24615 }, { "epoch": 0.7561822259085185, "grad_norm": 0.4410322904586792, "learning_rate": 1.3765245977252613e-05, "loss": 0.5034, "step": 24616 }, { "epoch": 0.7562129450434676, "grad_norm": 0.343758225440979, "learning_rate": 1.3764798261531516e-05, "loss": 0.5869, "step": 24617 }, { "epoch": 0.7562436641784167, "grad_norm": 0.3803333044052124, "learning_rate": 1.3764350537017474e-05, "loss": 0.5332, "step": 24618 }, { "epoch": 0.7562743833133659, "grad_norm": 0.4048638939857483, "learning_rate": 1.3763902803711528e-05, "loss": 0.525, "step": 24619 }, { "epoch": 0.7563051024483151, "grad_norm": 0.33623582124710083, "learning_rate": 1.376345506161472e-05, "loss": 0.5731, "step": 24620 }, { "epoch": 0.7563358215832642, "grad_norm": 0.42063024640083313, "learning_rate": 1.3763007310728103e-05, "loss": 0.6427, "step": 24621 }, { "epoch": 0.7563665407182134, "grad_norm": 0.4743943214416504, "learning_rate": 1.3762559551052719e-05, "loss": 0.6396, "step": 24622 }, { "epoch": 0.7563972598531625, "grad_norm": 0.38188815116882324, "learning_rate": 1.3762111782589615e-05, "loss": 0.5735, "step": 24623 }, { "epoch": 0.7564279789881116, "grad_norm": 0.3637358248233795, "learning_rate": 1.3761664005339834e-05, "loss": 0.5664, "step": 24624 }, { "epoch": 0.7564586981230609, "grad_norm": 0.37863919138908386, "learning_rate": 1.3761216219304428e-05, "loss": 0.5357, "step": 24625 }, { "epoch": 0.75648941725801, "grad_norm": 0.3564393222332001, "learning_rate": 1.3760768424484437e-05, "loss": 0.5192, "step": 24626 }, { "epoch": 0.7565201363929591, "grad_norm": 0.33604729175567627, "learning_rate": 1.3760320620880908e-05, "loss": 0.4963, "step": 24627 }, { "epoch": 0.7565508555279084, "grad_norm": 0.35422414541244507, "learning_rate": 1.3759872808494888e-05, "loss": 0.6358, "step": 24628 }, { "epoch": 0.7565815746628575, "grad_norm": 0.4212462604045868, "learning_rate": 1.375942498732742e-05, "loss": 0.538, "step": 24629 }, { "epoch": 0.7566122937978067, "grad_norm": 0.34271398186683655, "learning_rate": 1.3758977157379555e-05, "loss": 0.5966, "step": 24630 }, { "epoch": 0.7566430129327558, "grad_norm": 0.3660988509654999, "learning_rate": 1.3758529318652336e-05, "loss": 0.5106, "step": 24631 }, { "epoch": 0.7566737320677049, "grad_norm": 0.37593725323677063, "learning_rate": 1.3758081471146811e-05, "loss": 0.5313, "step": 24632 }, { "epoch": 0.7567044512026542, "grad_norm": 0.3675180673599243, "learning_rate": 1.375763361486402e-05, "loss": 0.6493, "step": 24633 }, { "epoch": 0.7567351703376033, "grad_norm": 0.37050655484199524, "learning_rate": 1.3757185749805018e-05, "loss": 0.6017, "step": 24634 }, { "epoch": 0.7567658894725524, "grad_norm": 0.3495364785194397, "learning_rate": 1.3756737875970843e-05, "loss": 0.5189, "step": 24635 }, { "epoch": 0.7567966086075016, "grad_norm": 0.39740848541259766, "learning_rate": 1.3756289993362547e-05, "loss": 0.6151, "step": 24636 }, { "epoch": 0.7568273277424508, "grad_norm": 0.3728899359703064, "learning_rate": 1.3755842101981172e-05, "loss": 0.6079, "step": 24637 }, { "epoch": 0.7568580468773999, "grad_norm": 0.3700109124183655, "learning_rate": 1.3755394201827764e-05, "loss": 0.5851, "step": 24638 }, { "epoch": 0.7568887660123491, "grad_norm": 0.36370837688446045, "learning_rate": 1.375494629290337e-05, "loss": 0.5564, "step": 24639 }, { "epoch": 0.7569194851472982, "grad_norm": 0.35751664638519287, "learning_rate": 1.375449837520904e-05, "loss": 0.6008, "step": 24640 }, { "epoch": 0.7569502042822475, "grad_norm": 0.34068563580513, "learning_rate": 1.3754050448745815e-05, "loss": 0.5798, "step": 24641 }, { "epoch": 0.7569809234171966, "grad_norm": 0.3393838405609131, "learning_rate": 1.3753602513514746e-05, "loss": 0.5014, "step": 24642 }, { "epoch": 0.7570116425521457, "grad_norm": 0.34170013666152954, "learning_rate": 1.3753154569516871e-05, "loss": 0.5198, "step": 24643 }, { "epoch": 0.7570423616870949, "grad_norm": 0.4836895167827606, "learning_rate": 1.3752706616753243e-05, "loss": 0.5096, "step": 24644 }, { "epoch": 0.757073080822044, "grad_norm": 0.40865612030029297, "learning_rate": 1.3752258655224909e-05, "loss": 0.4615, "step": 24645 }, { "epoch": 0.7571037999569932, "grad_norm": 0.3650774359703064, "learning_rate": 1.3751810684932911e-05, "loss": 0.5343, "step": 24646 }, { "epoch": 0.7571345190919424, "grad_norm": 0.35175323486328125, "learning_rate": 1.3751362705878298e-05, "loss": 0.5165, "step": 24647 }, { "epoch": 0.7571652382268915, "grad_norm": 0.39050573110580444, "learning_rate": 1.3750914718062116e-05, "loss": 0.5653, "step": 24648 }, { "epoch": 0.7571959573618406, "grad_norm": 0.3575815260410309, "learning_rate": 1.3750466721485407e-05, "loss": 0.5093, "step": 24649 }, { "epoch": 0.7572266764967899, "grad_norm": 0.34421318769454956, "learning_rate": 1.3750018716149228e-05, "loss": 0.4284, "step": 24650 }, { "epoch": 0.757257395631739, "grad_norm": 0.3792278468608856, "learning_rate": 1.3749570702054611e-05, "loss": 0.6298, "step": 24651 }, { "epoch": 0.7572881147666882, "grad_norm": 0.37028202414512634, "learning_rate": 1.3749122679202614e-05, "loss": 0.5207, "step": 24652 }, { "epoch": 0.7573188339016373, "grad_norm": 0.38020607829093933, "learning_rate": 1.374867464759428e-05, "loss": 0.5495, "step": 24653 }, { "epoch": 0.7573495530365865, "grad_norm": 0.3975091576576233, "learning_rate": 1.374822660723065e-05, "loss": 0.608, "step": 24654 }, { "epoch": 0.7573802721715357, "grad_norm": 0.3693642318248749, "learning_rate": 1.374777855811278e-05, "loss": 0.555, "step": 24655 }, { "epoch": 0.7574109913064848, "grad_norm": 0.364785760641098, "learning_rate": 1.3747330500241707e-05, "loss": 0.5675, "step": 24656 }, { "epoch": 0.7574417104414339, "grad_norm": 0.3712325096130371, "learning_rate": 1.3746882433618483e-05, "loss": 0.5974, "step": 24657 }, { "epoch": 0.7574724295763832, "grad_norm": 0.40133458375930786, "learning_rate": 1.3746434358244151e-05, "loss": 0.5054, "step": 24658 }, { "epoch": 0.7575031487113323, "grad_norm": 0.37120792269706726, "learning_rate": 1.3745986274119767e-05, "loss": 0.5815, "step": 24659 }, { "epoch": 0.7575338678462814, "grad_norm": 0.4319974482059479, "learning_rate": 1.3745538181246367e-05, "loss": 0.6051, "step": 24660 }, { "epoch": 0.7575645869812306, "grad_norm": 0.3738507330417633, "learning_rate": 1.3745090079625e-05, "loss": 0.4718, "step": 24661 }, { "epoch": 0.7575953061161798, "grad_norm": 0.3453543186187744, "learning_rate": 1.3744641969256716e-05, "loss": 0.4777, "step": 24662 }, { "epoch": 0.7576260252511289, "grad_norm": 0.3540833592414856, "learning_rate": 1.3744193850142555e-05, "loss": 0.5492, "step": 24663 }, { "epoch": 0.7576567443860781, "grad_norm": 0.42123907804489136, "learning_rate": 1.374374572228357e-05, "loss": 0.561, "step": 24664 }, { "epoch": 0.7576874635210272, "grad_norm": 0.35331836342811584, "learning_rate": 1.3743297585680805e-05, "loss": 0.6215, "step": 24665 }, { "epoch": 0.7577181826559765, "grad_norm": 0.3585405945777893, "learning_rate": 1.3742849440335308e-05, "loss": 0.5207, "step": 24666 }, { "epoch": 0.7577489017909256, "grad_norm": 0.3646279573440552, "learning_rate": 1.3742401286248124e-05, "loss": 0.6292, "step": 24667 }, { "epoch": 0.7577796209258747, "grad_norm": 0.5906225442886353, "learning_rate": 1.3741953123420301e-05, "loss": 0.5105, "step": 24668 }, { "epoch": 0.7578103400608239, "grad_norm": 0.5079466104507446, "learning_rate": 1.3741504951852885e-05, "loss": 0.5545, "step": 24669 }, { "epoch": 0.757841059195773, "grad_norm": 0.3575819730758667, "learning_rate": 1.3741056771546924e-05, "loss": 0.5567, "step": 24670 }, { "epoch": 0.7578717783307222, "grad_norm": 0.32348138093948364, "learning_rate": 1.3740608582503462e-05, "loss": 0.5627, "step": 24671 }, { "epoch": 0.7579024974656714, "grad_norm": 0.32824409008026123, "learning_rate": 1.3740160384723549e-05, "loss": 0.5305, "step": 24672 }, { "epoch": 0.7579332166006205, "grad_norm": 0.3415224552154541, "learning_rate": 1.3739712178208228e-05, "loss": 0.5246, "step": 24673 }, { "epoch": 0.7579639357355696, "grad_norm": 0.34043633937835693, "learning_rate": 1.3739263962958548e-05, "loss": 0.5308, "step": 24674 }, { "epoch": 0.7579946548705189, "grad_norm": 0.34388554096221924, "learning_rate": 1.3738815738975557e-05, "loss": 0.5269, "step": 24675 }, { "epoch": 0.758025374005468, "grad_norm": 0.3561536967754364, "learning_rate": 1.37383675062603e-05, "loss": 0.5424, "step": 24676 }, { "epoch": 0.7580560931404172, "grad_norm": 0.35920000076293945, "learning_rate": 1.3737919264813826e-05, "loss": 0.4608, "step": 24677 }, { "epoch": 0.7580868122753663, "grad_norm": 4.399122714996338, "learning_rate": 1.373747101463718e-05, "loss": 0.5751, "step": 24678 }, { "epoch": 0.7581175314103155, "grad_norm": 0.3823566138744354, "learning_rate": 1.3737022755731409e-05, "loss": 0.4867, "step": 24679 }, { "epoch": 0.7581482505452647, "grad_norm": 0.37654030323028564, "learning_rate": 1.3736574488097561e-05, "loss": 0.5055, "step": 24680 }, { "epoch": 0.7581789696802138, "grad_norm": 0.746712327003479, "learning_rate": 1.3736126211736681e-05, "loss": 0.5592, "step": 24681 }, { "epoch": 0.7582096888151629, "grad_norm": 0.39567965269088745, "learning_rate": 1.373567792664982e-05, "loss": 0.5306, "step": 24682 }, { "epoch": 0.7582404079501122, "grad_norm": 0.32040926814079285, "learning_rate": 1.3735229632838018e-05, "loss": 0.5476, "step": 24683 }, { "epoch": 0.7582711270850613, "grad_norm": 0.3692101836204529, "learning_rate": 1.3734781330302331e-05, "loss": 0.5484, "step": 24684 }, { "epoch": 0.7583018462200104, "grad_norm": 0.35414811968803406, "learning_rate": 1.3734333019043798e-05, "loss": 0.5089, "step": 24685 }, { "epoch": 0.7583325653549596, "grad_norm": 0.37896838784217834, "learning_rate": 1.3733884699063471e-05, "loss": 0.6082, "step": 24686 }, { "epoch": 0.7583632844899088, "grad_norm": 0.3504708409309387, "learning_rate": 1.3733436370362395e-05, "loss": 0.5764, "step": 24687 }, { "epoch": 0.7583940036248579, "grad_norm": 0.3656189739704132, "learning_rate": 1.3732988032941619e-05, "loss": 0.5293, "step": 24688 }, { "epoch": 0.7584247227598071, "grad_norm": 0.3868030309677124, "learning_rate": 1.3732539686802189e-05, "loss": 0.5236, "step": 24689 }, { "epoch": 0.7584554418947562, "grad_norm": 0.4085100591182709, "learning_rate": 1.3732091331945148e-05, "loss": 0.6275, "step": 24690 }, { "epoch": 0.7584861610297055, "grad_norm": 0.39009878039360046, "learning_rate": 1.3731642968371551e-05, "loss": 0.49, "step": 24691 }, { "epoch": 0.7585168801646546, "grad_norm": 0.3782714009284973, "learning_rate": 1.3731194596082439e-05, "loss": 0.5745, "step": 24692 }, { "epoch": 0.7585475992996037, "grad_norm": 0.35443204641342163, "learning_rate": 1.3730746215078861e-05, "loss": 0.6018, "step": 24693 }, { "epoch": 0.7585783184345529, "grad_norm": 0.39768823981285095, "learning_rate": 1.3730297825361865e-05, "loss": 0.543, "step": 24694 }, { "epoch": 0.758609037569502, "grad_norm": 0.36183488368988037, "learning_rate": 1.3729849426932497e-05, "loss": 0.4596, "step": 24695 }, { "epoch": 0.7586397567044512, "grad_norm": 0.3509426414966583, "learning_rate": 1.372940101979181e-05, "loss": 0.5361, "step": 24696 }, { "epoch": 0.7586704758394004, "grad_norm": 0.37832698225975037, "learning_rate": 1.3728952603940841e-05, "loss": 0.5616, "step": 24697 }, { "epoch": 0.7587011949743495, "grad_norm": 0.34347474575042725, "learning_rate": 1.3728504179380646e-05, "loss": 0.565, "step": 24698 }, { "epoch": 0.7587319141092986, "grad_norm": 0.3384833037853241, "learning_rate": 1.3728055746112268e-05, "loss": 0.5555, "step": 24699 }, { "epoch": 0.7587626332442479, "grad_norm": 0.3410390019416809, "learning_rate": 1.3727607304136756e-05, "loss": 0.4831, "step": 24700 }, { "epoch": 0.758793352379197, "grad_norm": 0.35866865515708923, "learning_rate": 1.3727158853455156e-05, "loss": 0.4779, "step": 24701 }, { "epoch": 0.7588240715141462, "grad_norm": 0.36627915501594543, "learning_rate": 1.3726710394068517e-05, "loss": 0.6301, "step": 24702 }, { "epoch": 0.7588547906490953, "grad_norm": 0.4269801676273346, "learning_rate": 1.3726261925977884e-05, "loss": 0.6425, "step": 24703 }, { "epoch": 0.7588855097840445, "grad_norm": 0.3572266101837158, "learning_rate": 1.3725813449184308e-05, "loss": 0.5066, "step": 24704 }, { "epoch": 0.7589162289189937, "grad_norm": 0.3484805226325989, "learning_rate": 1.3725364963688836e-05, "loss": 0.4833, "step": 24705 }, { "epoch": 0.7589469480539428, "grad_norm": 0.3666360080242157, "learning_rate": 1.3724916469492509e-05, "loss": 0.551, "step": 24706 }, { "epoch": 0.7589776671888919, "grad_norm": 0.4152306616306305, "learning_rate": 1.3724467966596385e-05, "loss": 0.614, "step": 24707 }, { "epoch": 0.7590083863238412, "grad_norm": 0.37621551752090454, "learning_rate": 1.3724019455001501e-05, "loss": 0.5589, "step": 24708 }, { "epoch": 0.7590391054587903, "grad_norm": 0.3752250075340271, "learning_rate": 1.3723570934708912e-05, "loss": 0.451, "step": 24709 }, { "epoch": 0.7590698245937394, "grad_norm": 0.35436180233955383, "learning_rate": 1.3723122405719662e-05, "loss": 0.5781, "step": 24710 }, { "epoch": 0.7591005437286886, "grad_norm": 0.3437903821468353, "learning_rate": 1.37226738680348e-05, "loss": 0.501, "step": 24711 }, { "epoch": 0.7591312628636377, "grad_norm": 0.39030855894088745, "learning_rate": 1.3722225321655373e-05, "loss": 0.6084, "step": 24712 }, { "epoch": 0.7591619819985869, "grad_norm": 0.33232155442237854, "learning_rate": 1.372177676658243e-05, "loss": 0.5821, "step": 24713 }, { "epoch": 0.7591927011335361, "grad_norm": 0.3579506278038025, "learning_rate": 1.3721328202817016e-05, "loss": 0.5636, "step": 24714 }, { "epoch": 0.7592234202684852, "grad_norm": 0.36260464787483215, "learning_rate": 1.372087963036018e-05, "loss": 0.4621, "step": 24715 }, { "epoch": 0.7592541394034344, "grad_norm": 0.37215670943260193, "learning_rate": 1.3720431049212971e-05, "loss": 0.5276, "step": 24716 }, { "epoch": 0.7592848585383836, "grad_norm": 0.3720552921295166, "learning_rate": 1.3719982459376433e-05, "loss": 0.6117, "step": 24717 }, { "epoch": 0.7593155776733327, "grad_norm": 0.44346538186073303, "learning_rate": 1.3719533860851622e-05, "loss": 0.6193, "step": 24718 }, { "epoch": 0.7593462968082819, "grad_norm": 0.4135790765285492, "learning_rate": 1.3719085253639572e-05, "loss": 0.5602, "step": 24719 }, { "epoch": 0.759377015943231, "grad_norm": 0.3746446967124939, "learning_rate": 1.3718636637741344e-05, "loss": 0.4426, "step": 24720 }, { "epoch": 0.7594077350781802, "grad_norm": 0.37930503487586975, "learning_rate": 1.3718188013157978e-05, "loss": 0.4872, "step": 24721 }, { "epoch": 0.7594384542131294, "grad_norm": 0.3612819015979767, "learning_rate": 1.3717739379890527e-05, "loss": 0.5958, "step": 24722 }, { "epoch": 0.7594691733480785, "grad_norm": 0.39230942726135254, "learning_rate": 1.3717290737940036e-05, "loss": 0.5063, "step": 24723 }, { "epoch": 0.7594998924830276, "grad_norm": 0.3259080648422241, "learning_rate": 1.3716842087307549e-05, "loss": 0.4664, "step": 24724 }, { "epoch": 0.7595306116179769, "grad_norm": 0.37829339504241943, "learning_rate": 1.371639342799412e-05, "loss": 0.5894, "step": 24725 }, { "epoch": 0.759561330752926, "grad_norm": 0.3447209894657135, "learning_rate": 1.3715944760000794e-05, "loss": 0.6296, "step": 24726 }, { "epoch": 0.7595920498878752, "grad_norm": 0.3273679316043854, "learning_rate": 1.3715496083328621e-05, "loss": 0.5272, "step": 24727 }, { "epoch": 0.7596227690228243, "grad_norm": 0.4126949608325958, "learning_rate": 1.3715047397978647e-05, "loss": 0.6267, "step": 24728 }, { "epoch": 0.7596534881577734, "grad_norm": 0.4043728709220886, "learning_rate": 1.371459870395192e-05, "loss": 0.6271, "step": 24729 }, { "epoch": 0.7596842072927227, "grad_norm": 0.3891482949256897, "learning_rate": 1.3714150001249487e-05, "loss": 0.5395, "step": 24730 }, { "epoch": 0.7597149264276718, "grad_norm": 0.4043339788913727, "learning_rate": 1.3713701289872396e-05, "loss": 0.5621, "step": 24731 }, { "epoch": 0.7597456455626209, "grad_norm": 0.35335537791252136, "learning_rate": 1.37132525698217e-05, "loss": 0.534, "step": 24732 }, { "epoch": 0.7597763646975702, "grad_norm": 0.36199015378952026, "learning_rate": 1.3712803841098441e-05, "loss": 0.6048, "step": 24733 }, { "epoch": 0.7598070838325193, "grad_norm": 0.3994925916194916, "learning_rate": 1.3712355103703671e-05, "loss": 0.5172, "step": 24734 }, { "epoch": 0.7598378029674684, "grad_norm": 0.36801770329475403, "learning_rate": 1.3711906357638437e-05, "loss": 0.5255, "step": 24735 }, { "epoch": 0.7598685221024176, "grad_norm": 0.34837645292282104, "learning_rate": 1.3711457602903784e-05, "loss": 0.577, "step": 24736 }, { "epoch": 0.7598992412373667, "grad_norm": 0.40588074922561646, "learning_rate": 1.3711008839500764e-05, "loss": 0.5197, "step": 24737 }, { "epoch": 0.7599299603723159, "grad_norm": 0.35169658064842224, "learning_rate": 1.3710560067430422e-05, "loss": 0.627, "step": 24738 }, { "epoch": 0.7599606795072651, "grad_norm": 0.6078711152076721, "learning_rate": 1.3710111286693808e-05, "loss": 0.5969, "step": 24739 }, { "epoch": 0.7599913986422142, "grad_norm": 0.38796669244766235, "learning_rate": 1.3709662497291974e-05, "loss": 0.5197, "step": 24740 }, { "epoch": 0.7600221177771634, "grad_norm": 0.41113144159317017, "learning_rate": 1.3709213699225964e-05, "loss": 0.564, "step": 24741 }, { "epoch": 0.7600528369121126, "grad_norm": 0.3513335883617401, "learning_rate": 1.370876489249682e-05, "loss": 0.4951, "step": 24742 }, { "epoch": 0.7600835560470617, "grad_norm": 0.3519977033138275, "learning_rate": 1.3708316077105602e-05, "loss": 0.5334, "step": 24743 }, { "epoch": 0.7601142751820109, "grad_norm": 0.3926602303981781, "learning_rate": 1.3707867253053349e-05, "loss": 0.5671, "step": 24744 }, { "epoch": 0.76014499431696, "grad_norm": 0.360254168510437, "learning_rate": 1.3707418420341116e-05, "loss": 0.5284, "step": 24745 }, { "epoch": 0.7601757134519092, "grad_norm": 0.36145323514938354, "learning_rate": 1.3706969578969947e-05, "loss": 0.5962, "step": 24746 }, { "epoch": 0.7602064325868584, "grad_norm": 0.36920902132987976, "learning_rate": 1.3706520728940892e-05, "loss": 0.5315, "step": 24747 }, { "epoch": 0.7602371517218075, "grad_norm": 0.39002272486686707, "learning_rate": 1.3706071870255e-05, "loss": 0.5457, "step": 24748 }, { "epoch": 0.7602678708567566, "grad_norm": 0.3350180685520172, "learning_rate": 1.3705623002913315e-05, "loss": 0.5146, "step": 24749 }, { "epoch": 0.7602985899917059, "grad_norm": 0.3666127324104309, "learning_rate": 1.3705174126916894e-05, "loss": 0.5941, "step": 24750 }, { "epoch": 0.760329309126655, "grad_norm": 0.39968305826187134, "learning_rate": 1.3704725242266774e-05, "loss": 0.5704, "step": 24751 }, { "epoch": 0.7603600282616042, "grad_norm": 0.4519888162612915, "learning_rate": 1.3704276348964015e-05, "loss": 0.657, "step": 24752 }, { "epoch": 0.7603907473965533, "grad_norm": 0.41979265213012695, "learning_rate": 1.3703827447009657e-05, "loss": 0.4978, "step": 24753 }, { "epoch": 0.7604214665315024, "grad_norm": 0.3288288116455078, "learning_rate": 1.3703378536404752e-05, "loss": 0.5573, "step": 24754 }, { "epoch": 0.7604521856664517, "grad_norm": 0.3760867714881897, "learning_rate": 1.3702929617150347e-05, "loss": 0.5466, "step": 24755 }, { "epoch": 0.7604829048014008, "grad_norm": 0.3581031560897827, "learning_rate": 1.3702480689247492e-05, "loss": 0.5815, "step": 24756 }, { "epoch": 0.7605136239363499, "grad_norm": 0.3575216233730316, "learning_rate": 1.3702031752697233e-05, "loss": 0.5769, "step": 24757 }, { "epoch": 0.7605443430712991, "grad_norm": 0.38963866233825684, "learning_rate": 1.3701582807500624e-05, "loss": 0.5859, "step": 24758 }, { "epoch": 0.7605750622062483, "grad_norm": 0.374544233083725, "learning_rate": 1.3701133853658709e-05, "loss": 0.555, "step": 24759 }, { "epoch": 0.7606057813411974, "grad_norm": 0.3145492970943451, "learning_rate": 1.3700684891172532e-05, "loss": 0.5481, "step": 24760 }, { "epoch": 0.7606365004761466, "grad_norm": 0.3493175208568573, "learning_rate": 1.3700235920043154e-05, "loss": 0.5139, "step": 24761 }, { "epoch": 0.7606672196110957, "grad_norm": 0.36457204818725586, "learning_rate": 1.3699786940271614e-05, "loss": 0.6081, "step": 24762 }, { "epoch": 0.760697938746045, "grad_norm": 0.35624608397483826, "learning_rate": 1.3699337951858964e-05, "loss": 0.4972, "step": 24763 }, { "epoch": 0.7607286578809941, "grad_norm": 0.41096481680870056, "learning_rate": 1.3698888954806249e-05, "loss": 0.6115, "step": 24764 }, { "epoch": 0.7607593770159432, "grad_norm": 0.32993367314338684, "learning_rate": 1.3698439949114523e-05, "loss": 0.5418, "step": 24765 }, { "epoch": 0.7607900961508924, "grad_norm": 0.3507934510707855, "learning_rate": 1.369799093478483e-05, "loss": 0.5308, "step": 24766 }, { "epoch": 0.7608208152858416, "grad_norm": 0.38486191630363464, "learning_rate": 1.3697541911818223e-05, "loss": 0.5587, "step": 24767 }, { "epoch": 0.7608515344207907, "grad_norm": 0.3966338038444519, "learning_rate": 1.3697092880215747e-05, "loss": 0.526, "step": 24768 }, { "epoch": 0.7608822535557399, "grad_norm": 0.3396579325199127, "learning_rate": 1.3696643839978453e-05, "loss": 0.5575, "step": 24769 }, { "epoch": 0.760912972690689, "grad_norm": 0.333711713552475, "learning_rate": 1.3696194791107392e-05, "loss": 0.5646, "step": 24770 }, { "epoch": 0.7609436918256381, "grad_norm": 0.36222055554389954, "learning_rate": 1.3695745733603607e-05, "loss": 0.5307, "step": 24771 }, { "epoch": 0.7609744109605874, "grad_norm": 0.3493693172931671, "learning_rate": 1.3695296667468149e-05, "loss": 0.6633, "step": 24772 }, { "epoch": 0.7610051300955365, "grad_norm": 0.33814433217048645, "learning_rate": 1.3694847592702067e-05, "loss": 0.5782, "step": 24773 }, { "epoch": 0.7610358492304856, "grad_norm": 0.34516939520835876, "learning_rate": 1.3694398509306415e-05, "loss": 0.5199, "step": 24774 }, { "epoch": 0.7610665683654348, "grad_norm": 0.4049151539802551, "learning_rate": 1.3693949417282235e-05, "loss": 0.5365, "step": 24775 }, { "epoch": 0.761097287500384, "grad_norm": 0.3904387056827545, "learning_rate": 1.3693500316630577e-05, "loss": 0.5341, "step": 24776 }, { "epoch": 0.7611280066353332, "grad_norm": 0.4669255018234253, "learning_rate": 1.3693051207352492e-05, "loss": 0.6664, "step": 24777 }, { "epoch": 0.7611587257702823, "grad_norm": 0.36748820543289185, "learning_rate": 1.3692602089449028e-05, "loss": 0.531, "step": 24778 }, { "epoch": 0.7611894449052314, "grad_norm": 0.4135892987251282, "learning_rate": 1.3692152962921235e-05, "loss": 0.564, "step": 24779 }, { "epoch": 0.7612201640401807, "grad_norm": 0.37145698070526123, "learning_rate": 1.3691703827770157e-05, "loss": 0.5763, "step": 24780 }, { "epoch": 0.7612508831751298, "grad_norm": 0.3697250783443451, "learning_rate": 1.369125468399685e-05, "loss": 0.5885, "step": 24781 }, { "epoch": 0.7612816023100789, "grad_norm": 2.2890210151672363, "learning_rate": 1.3690805531602358e-05, "loss": 0.5688, "step": 24782 }, { "epoch": 0.7613123214450281, "grad_norm": 0.37775078415870667, "learning_rate": 1.3690356370587736e-05, "loss": 0.626, "step": 24783 }, { "epoch": 0.7613430405799773, "grad_norm": 0.3679448962211609, "learning_rate": 1.3689907200954025e-05, "loss": 0.5048, "step": 24784 }, { "epoch": 0.7613737597149264, "grad_norm": 0.4059910178184509, "learning_rate": 1.3689458022702277e-05, "loss": 0.5296, "step": 24785 }, { "epoch": 0.7614044788498756, "grad_norm": 0.3528602421283722, "learning_rate": 1.3689008835833546e-05, "loss": 0.548, "step": 24786 }, { "epoch": 0.7614351979848247, "grad_norm": 0.3877754509449005, "learning_rate": 1.3688559640348876e-05, "loss": 0.5175, "step": 24787 }, { "epoch": 0.761465917119774, "grad_norm": 0.35404568910598755, "learning_rate": 1.3688110436249316e-05, "loss": 0.5781, "step": 24788 }, { "epoch": 0.7614966362547231, "grad_norm": 0.35698819160461426, "learning_rate": 1.3687661223535918e-05, "loss": 0.5758, "step": 24789 }, { "epoch": 0.7615273553896722, "grad_norm": 0.36116865277290344, "learning_rate": 1.3687212002209729e-05, "loss": 0.5458, "step": 24790 }, { "epoch": 0.7615580745246214, "grad_norm": 0.3766316771507263, "learning_rate": 1.3686762772271799e-05, "loss": 0.5059, "step": 24791 }, { "epoch": 0.7615887936595706, "grad_norm": 0.3502940833568573, "learning_rate": 1.3686313533723177e-05, "loss": 0.5239, "step": 24792 }, { "epoch": 0.7616195127945197, "grad_norm": 0.3610153794288635, "learning_rate": 1.3685864286564914e-05, "loss": 0.5502, "step": 24793 }, { "epoch": 0.7616502319294689, "grad_norm": 0.41297203302383423, "learning_rate": 1.3685415030798054e-05, "loss": 0.5491, "step": 24794 }, { "epoch": 0.761680951064418, "grad_norm": 0.37176039814949036, "learning_rate": 1.3684965766423652e-05, "loss": 0.6189, "step": 24795 }, { "epoch": 0.7617116701993671, "grad_norm": 1.7753949165344238, "learning_rate": 1.3684516493442755e-05, "loss": 0.5959, "step": 24796 }, { "epoch": 0.7617423893343164, "grad_norm": 0.32361510396003723, "learning_rate": 1.3684067211856413e-05, "loss": 0.412, "step": 24797 }, { "epoch": 0.7617731084692655, "grad_norm": 0.48383447527885437, "learning_rate": 1.3683617921665673e-05, "loss": 0.7219, "step": 24798 }, { "epoch": 0.7618038276042146, "grad_norm": 0.6256800293922424, "learning_rate": 1.3683168622871587e-05, "loss": 0.6611, "step": 24799 }, { "epoch": 0.7618345467391638, "grad_norm": 0.3636181652545929, "learning_rate": 1.3682719315475201e-05, "loss": 0.5581, "step": 24800 }, { "epoch": 0.761865265874113, "grad_norm": 0.3410240709781647, "learning_rate": 1.3682269999477571e-05, "loss": 0.4824, "step": 24801 }, { "epoch": 0.7618959850090622, "grad_norm": 0.3477421998977661, "learning_rate": 1.368182067487974e-05, "loss": 0.5152, "step": 24802 }, { "epoch": 0.7619267041440113, "grad_norm": 0.34192943572998047, "learning_rate": 1.3681371341682759e-05, "loss": 0.489, "step": 24803 }, { "epoch": 0.7619574232789604, "grad_norm": 0.36760637164115906, "learning_rate": 1.368092199988768e-05, "loss": 0.5444, "step": 24804 }, { "epoch": 0.7619881424139097, "grad_norm": 0.35735073685646057, "learning_rate": 1.3680472649495549e-05, "loss": 0.5714, "step": 24805 }, { "epoch": 0.7620188615488588, "grad_norm": 0.4344329535961151, "learning_rate": 1.3680023290507421e-05, "loss": 0.512, "step": 24806 }, { "epoch": 0.7620495806838079, "grad_norm": 0.3560599982738495, "learning_rate": 1.3679573922924336e-05, "loss": 0.5711, "step": 24807 }, { "epoch": 0.7620802998187571, "grad_norm": 0.37296566367149353, "learning_rate": 1.3679124546747353e-05, "loss": 0.5767, "step": 24808 }, { "epoch": 0.7621110189537063, "grad_norm": 0.3992645740509033, "learning_rate": 1.3678675161977514e-05, "loss": 0.5629, "step": 24809 }, { "epoch": 0.7621417380886554, "grad_norm": 0.3465641736984253, "learning_rate": 1.3678225768615876e-05, "loss": 0.5548, "step": 24810 }, { "epoch": 0.7621724572236046, "grad_norm": 0.3217983543872833, "learning_rate": 1.3677776366663484e-05, "loss": 0.5239, "step": 24811 }, { "epoch": 0.7622031763585537, "grad_norm": 0.38258057832717896, "learning_rate": 1.3677326956121389e-05, "loss": 0.5918, "step": 24812 }, { "epoch": 0.762233895493503, "grad_norm": 0.341693252325058, "learning_rate": 1.3676877536990638e-05, "loss": 0.4783, "step": 24813 }, { "epoch": 0.7622646146284521, "grad_norm": 0.35746219754219055, "learning_rate": 1.3676428109272283e-05, "loss": 0.5763, "step": 24814 }, { "epoch": 0.7622953337634012, "grad_norm": 0.3892875909805298, "learning_rate": 1.3675978672967376e-05, "loss": 0.5594, "step": 24815 }, { "epoch": 0.7623260528983504, "grad_norm": 0.35998618602752686, "learning_rate": 1.3675529228076961e-05, "loss": 0.5283, "step": 24816 }, { "epoch": 0.7623567720332995, "grad_norm": 0.45389288663864136, "learning_rate": 1.367507977460209e-05, "loss": 0.5446, "step": 24817 }, { "epoch": 0.7623874911682487, "grad_norm": 0.4153731167316437, "learning_rate": 1.3674630312543817e-05, "loss": 0.5125, "step": 24818 }, { "epoch": 0.7624182103031979, "grad_norm": 0.37831416726112366, "learning_rate": 1.3674180841903186e-05, "loss": 0.6109, "step": 24819 }, { "epoch": 0.762448929438147, "grad_norm": 0.3374532163143158, "learning_rate": 1.367373136268125e-05, "loss": 0.5814, "step": 24820 }, { "epoch": 0.7624796485730961, "grad_norm": 0.35326147079467773, "learning_rate": 1.3673281874879057e-05, "loss": 0.5677, "step": 24821 }, { "epoch": 0.7625103677080454, "grad_norm": 0.3729391396045685, "learning_rate": 1.3672832378497658e-05, "loss": 0.6102, "step": 24822 }, { "epoch": 0.7625410868429945, "grad_norm": 0.36181408166885376, "learning_rate": 1.3672382873538101e-05, "loss": 0.438, "step": 24823 }, { "epoch": 0.7625718059779436, "grad_norm": 0.3394329249858856, "learning_rate": 1.3671933360001441e-05, "loss": 0.5502, "step": 24824 }, { "epoch": 0.7626025251128928, "grad_norm": 0.5988556742668152, "learning_rate": 1.3671483837888721e-05, "loss": 0.4903, "step": 24825 }, { "epoch": 0.762633244247842, "grad_norm": 0.4955488443374634, "learning_rate": 1.3671034307200996e-05, "loss": 0.6093, "step": 24826 }, { "epoch": 0.7626639633827912, "grad_norm": 0.3913930058479309, "learning_rate": 1.3670584767939317e-05, "loss": 0.519, "step": 24827 }, { "epoch": 0.7626946825177403, "grad_norm": 0.4424270689487457, "learning_rate": 1.3670135220104726e-05, "loss": 0.5157, "step": 24828 }, { "epoch": 0.7627254016526894, "grad_norm": 0.36992400884628296, "learning_rate": 1.366968566369828e-05, "loss": 0.5436, "step": 24829 }, { "epoch": 0.7627561207876387, "grad_norm": 0.3579747974872589, "learning_rate": 1.3669236098721024e-05, "loss": 0.5594, "step": 24830 }, { "epoch": 0.7627868399225878, "grad_norm": 0.34570643305778503, "learning_rate": 1.3668786525174014e-05, "loss": 0.4537, "step": 24831 }, { "epoch": 0.7628175590575369, "grad_norm": 0.37378138303756714, "learning_rate": 1.3668336943058294e-05, "loss": 0.6537, "step": 24832 }, { "epoch": 0.7628482781924861, "grad_norm": 0.3509792983531952, "learning_rate": 1.366788735237492e-05, "loss": 0.4995, "step": 24833 }, { "epoch": 0.7628789973274352, "grad_norm": 0.3767419457435608, "learning_rate": 1.3667437753124937e-05, "loss": 0.5964, "step": 24834 }, { "epoch": 0.7629097164623844, "grad_norm": 0.387366384267807, "learning_rate": 1.3666988145309397e-05, "loss": 0.5636, "step": 24835 }, { "epoch": 0.7629404355973336, "grad_norm": 0.385545551776886, "learning_rate": 1.3666538528929349e-05, "loss": 0.5805, "step": 24836 }, { "epoch": 0.7629711547322827, "grad_norm": 0.4078127145767212, "learning_rate": 1.3666088903985849e-05, "loss": 0.6336, "step": 24837 }, { "epoch": 0.763001873867232, "grad_norm": 0.3799315094947815, "learning_rate": 1.3665639270479938e-05, "loss": 0.5636, "step": 24838 }, { "epoch": 0.7630325930021811, "grad_norm": 0.3548796772956848, "learning_rate": 1.366518962841267e-05, "loss": 0.5724, "step": 24839 }, { "epoch": 0.7630633121371302, "grad_norm": 0.36335787177085876, "learning_rate": 1.3664739977785098e-05, "loss": 0.541, "step": 24840 }, { "epoch": 0.7630940312720794, "grad_norm": 0.3298206925392151, "learning_rate": 1.3664290318598268e-05, "loss": 0.4419, "step": 24841 }, { "epoch": 0.7631247504070285, "grad_norm": 0.3757633864879608, "learning_rate": 1.3663840650853233e-05, "loss": 0.5667, "step": 24842 }, { "epoch": 0.7631554695419777, "grad_norm": 0.3452656865119934, "learning_rate": 1.3663390974551042e-05, "loss": 0.6197, "step": 24843 }, { "epoch": 0.7631861886769269, "grad_norm": 0.39064252376556396, "learning_rate": 1.3662941289692749e-05, "loss": 0.5558, "step": 24844 }, { "epoch": 0.763216907811876, "grad_norm": 0.5123160481452942, "learning_rate": 1.3662491596279398e-05, "loss": 0.6113, "step": 24845 }, { "epoch": 0.7632476269468251, "grad_norm": 0.4105462431907654, "learning_rate": 1.3662041894312039e-05, "loss": 0.5907, "step": 24846 }, { "epoch": 0.7632783460817744, "grad_norm": 0.3727964460849762, "learning_rate": 1.366159218379173e-05, "loss": 0.637, "step": 24847 }, { "epoch": 0.7633090652167235, "grad_norm": 0.31496661901474, "learning_rate": 1.3661142464719514e-05, "loss": 0.4611, "step": 24848 }, { "epoch": 0.7633397843516727, "grad_norm": 0.35486555099487305, "learning_rate": 1.3660692737096445e-05, "loss": 0.4917, "step": 24849 }, { "epoch": 0.7633705034866218, "grad_norm": 0.4140135943889618, "learning_rate": 1.3660243000923572e-05, "loss": 0.5165, "step": 24850 }, { "epoch": 0.763401222621571, "grad_norm": 0.3506843149662018, "learning_rate": 1.3659793256201947e-05, "loss": 0.523, "step": 24851 }, { "epoch": 0.7634319417565202, "grad_norm": 0.38134410977363586, "learning_rate": 1.3659343502932618e-05, "loss": 0.5974, "step": 24852 }, { "epoch": 0.7634626608914693, "grad_norm": 0.35846415162086487, "learning_rate": 1.3658893741116637e-05, "loss": 0.5665, "step": 24853 }, { "epoch": 0.7634933800264184, "grad_norm": 0.33520522713661194, "learning_rate": 1.3658443970755056e-05, "loss": 0.4796, "step": 24854 }, { "epoch": 0.7635240991613677, "grad_norm": 0.39846882224082947, "learning_rate": 1.3657994191848923e-05, "loss": 0.6543, "step": 24855 }, { "epoch": 0.7635548182963168, "grad_norm": 0.34378230571746826, "learning_rate": 1.3657544404399287e-05, "loss": 0.538, "step": 24856 }, { "epoch": 0.7635855374312659, "grad_norm": 0.36865437030792236, "learning_rate": 1.3657094608407198e-05, "loss": 0.5997, "step": 24857 }, { "epoch": 0.7636162565662151, "grad_norm": 0.3704356849193573, "learning_rate": 1.3656644803873716e-05, "loss": 0.5987, "step": 24858 }, { "epoch": 0.7636469757011642, "grad_norm": 0.3306543529033661, "learning_rate": 1.3656194990799878e-05, "loss": 0.5635, "step": 24859 }, { "epoch": 0.7636776948361134, "grad_norm": 0.35839489102363586, "learning_rate": 1.3655745169186747e-05, "loss": 0.5379, "step": 24860 }, { "epoch": 0.7637084139710626, "grad_norm": 0.3894651532173157, "learning_rate": 1.3655295339035363e-05, "loss": 0.5694, "step": 24861 }, { "epoch": 0.7637391331060117, "grad_norm": 0.4283374845981598, "learning_rate": 1.3654845500346787e-05, "loss": 0.5562, "step": 24862 }, { "epoch": 0.763769852240961, "grad_norm": 0.420047402381897, "learning_rate": 1.3654395653122061e-05, "loss": 0.4956, "step": 24863 }, { "epoch": 0.7638005713759101, "grad_norm": 0.37703144550323486, "learning_rate": 1.3653945797362239e-05, "loss": 0.5692, "step": 24864 }, { "epoch": 0.7638312905108592, "grad_norm": 0.3990285396575928, "learning_rate": 1.3653495933068373e-05, "loss": 0.4592, "step": 24865 }, { "epoch": 0.7638620096458084, "grad_norm": 0.36058861017227173, "learning_rate": 1.3653046060241509e-05, "loss": 0.563, "step": 24866 }, { "epoch": 0.7638927287807575, "grad_norm": 0.3791651427745819, "learning_rate": 1.3652596178882702e-05, "loss": 0.4521, "step": 24867 }, { "epoch": 0.7639234479157067, "grad_norm": 0.5185900926589966, "learning_rate": 1.3652146288993e-05, "loss": 0.5165, "step": 24868 }, { "epoch": 0.7639541670506559, "grad_norm": 0.34065425395965576, "learning_rate": 1.3651696390573459e-05, "loss": 0.6062, "step": 24869 }, { "epoch": 0.763984886185605, "grad_norm": 0.45362338423728943, "learning_rate": 1.3651246483625123e-05, "loss": 0.5933, "step": 24870 }, { "epoch": 0.7640156053205541, "grad_norm": 0.34306851029396057, "learning_rate": 1.3650796568149047e-05, "loss": 0.5071, "step": 24871 }, { "epoch": 0.7640463244555034, "grad_norm": 0.39697372913360596, "learning_rate": 1.3650346644146279e-05, "loss": 0.5498, "step": 24872 }, { "epoch": 0.7640770435904525, "grad_norm": 0.3477513790130615, "learning_rate": 1.3649896711617873e-05, "loss": 0.6226, "step": 24873 }, { "epoch": 0.7641077627254017, "grad_norm": 0.3893751800060272, "learning_rate": 1.364944677056488e-05, "loss": 0.5881, "step": 24874 }, { "epoch": 0.7641384818603508, "grad_norm": 0.3712425231933594, "learning_rate": 1.3648996820988348e-05, "loss": 0.4697, "step": 24875 }, { "epoch": 0.7641692009953, "grad_norm": 0.34565141797065735, "learning_rate": 1.3648546862889327e-05, "loss": 0.5767, "step": 24876 }, { "epoch": 0.7641999201302492, "grad_norm": 0.3542563319206238, "learning_rate": 1.364809689626887e-05, "loss": 0.5463, "step": 24877 }, { "epoch": 0.7642306392651983, "grad_norm": 0.3915903866291046, "learning_rate": 1.364764692112803e-05, "loss": 0.5885, "step": 24878 }, { "epoch": 0.7642613584001474, "grad_norm": 0.3511504828929901, "learning_rate": 1.3647196937467858e-05, "loss": 0.5469, "step": 24879 }, { "epoch": 0.7642920775350966, "grad_norm": 0.42392992973327637, "learning_rate": 1.3646746945289398e-05, "loss": 0.4637, "step": 24880 }, { "epoch": 0.7643227966700458, "grad_norm": 0.33258017897605896, "learning_rate": 1.3646296944593708e-05, "loss": 0.5171, "step": 24881 }, { "epoch": 0.7643535158049949, "grad_norm": 0.34693050384521484, "learning_rate": 1.3645846935381836e-05, "loss": 0.5361, "step": 24882 }, { "epoch": 0.7643842349399441, "grad_norm": 0.36420267820358276, "learning_rate": 1.3645396917654836e-05, "loss": 0.5725, "step": 24883 }, { "epoch": 0.7644149540748932, "grad_norm": 0.37480929493904114, "learning_rate": 1.3644946891413755e-05, "loss": 0.5902, "step": 24884 }, { "epoch": 0.7644456732098424, "grad_norm": 0.3494318723678589, "learning_rate": 1.3644496856659648e-05, "loss": 0.5322, "step": 24885 }, { "epoch": 0.7644763923447916, "grad_norm": 0.536396324634552, "learning_rate": 1.3644046813393562e-05, "loss": 0.5114, "step": 24886 }, { "epoch": 0.7645071114797407, "grad_norm": 0.3565390408039093, "learning_rate": 1.364359676161655e-05, "loss": 0.5254, "step": 24887 }, { "epoch": 0.7645378306146899, "grad_norm": 0.3672274649143219, "learning_rate": 1.3643146701329664e-05, "loss": 0.6287, "step": 24888 }, { "epoch": 0.7645685497496391, "grad_norm": 0.34631332755088806, "learning_rate": 1.3642696632533955e-05, "loss": 0.5738, "step": 24889 }, { "epoch": 0.7645992688845882, "grad_norm": 0.39493557810783386, "learning_rate": 1.3642246555230477e-05, "loss": 0.6058, "step": 24890 }, { "epoch": 0.7646299880195374, "grad_norm": 0.37140390276908875, "learning_rate": 1.3641796469420272e-05, "loss": 0.5652, "step": 24891 }, { "epoch": 0.7646607071544865, "grad_norm": 0.4242677390575409, "learning_rate": 1.3641346375104399e-05, "loss": 0.6004, "step": 24892 }, { "epoch": 0.7646914262894356, "grad_norm": 0.35220348834991455, "learning_rate": 1.3640896272283905e-05, "loss": 0.571, "step": 24893 }, { "epoch": 0.7647221454243849, "grad_norm": 0.3675151765346527, "learning_rate": 1.3640446160959846e-05, "loss": 0.5334, "step": 24894 }, { "epoch": 0.764752864559334, "grad_norm": 0.39021554589271545, "learning_rate": 1.3639996041133267e-05, "loss": 0.5827, "step": 24895 }, { "epoch": 0.7647835836942831, "grad_norm": 0.3392220735549927, "learning_rate": 1.363954591280523e-05, "loss": 0.4851, "step": 24896 }, { "epoch": 0.7648143028292324, "grad_norm": 0.3838150203227997, "learning_rate": 1.3639095775976776e-05, "loss": 0.5299, "step": 24897 }, { "epoch": 0.7648450219641815, "grad_norm": 0.3797084093093872, "learning_rate": 1.3638645630648958e-05, "loss": 0.5719, "step": 24898 }, { "epoch": 0.7648757410991307, "grad_norm": 0.382255882024765, "learning_rate": 1.3638195476822831e-05, "loss": 0.5584, "step": 24899 }, { "epoch": 0.7649064602340798, "grad_norm": 0.3634386360645294, "learning_rate": 1.3637745314499445e-05, "loss": 0.6002, "step": 24900 }, { "epoch": 0.7649371793690289, "grad_norm": 0.36770886182785034, "learning_rate": 1.363729514367985e-05, "loss": 0.5702, "step": 24901 }, { "epoch": 0.7649678985039782, "grad_norm": 0.3594289720058441, "learning_rate": 1.3636844964365095e-05, "loss": 0.5788, "step": 24902 }, { "epoch": 0.7649986176389273, "grad_norm": 0.3725540339946747, "learning_rate": 1.3636394776556239e-05, "loss": 0.4814, "step": 24903 }, { "epoch": 0.7650293367738764, "grad_norm": 0.45093923807144165, "learning_rate": 1.3635944580254328e-05, "loss": 0.5841, "step": 24904 }, { "epoch": 0.7650600559088256, "grad_norm": 0.35699784755706787, "learning_rate": 1.3635494375460414e-05, "loss": 0.4861, "step": 24905 }, { "epoch": 0.7650907750437748, "grad_norm": 0.3692987561225891, "learning_rate": 1.3635044162175547e-05, "loss": 0.5406, "step": 24906 }, { "epoch": 0.7651214941787239, "grad_norm": 0.4092206656932831, "learning_rate": 1.3634593940400785e-05, "loss": 0.6119, "step": 24907 }, { "epoch": 0.7651522133136731, "grad_norm": 0.35651594400405884, "learning_rate": 1.3634143710137173e-05, "loss": 0.4718, "step": 24908 }, { "epoch": 0.7651829324486222, "grad_norm": 0.3802582323551178, "learning_rate": 1.3633693471385764e-05, "loss": 0.5691, "step": 24909 }, { "epoch": 0.7652136515835714, "grad_norm": 0.3687051832675934, "learning_rate": 1.3633243224147608e-05, "loss": 0.5911, "step": 24910 }, { "epoch": 0.7652443707185206, "grad_norm": 0.3307221531867981, "learning_rate": 1.363279296842376e-05, "loss": 0.5207, "step": 24911 }, { "epoch": 0.7652750898534697, "grad_norm": 0.3604053258895874, "learning_rate": 1.363234270421527e-05, "loss": 0.5842, "step": 24912 }, { "epoch": 0.7653058089884189, "grad_norm": 0.36072754859924316, "learning_rate": 1.363189243152319e-05, "loss": 0.5535, "step": 24913 }, { "epoch": 0.765336528123368, "grad_norm": 0.37767407298088074, "learning_rate": 1.3631442150348573e-05, "loss": 0.5667, "step": 24914 }, { "epoch": 0.7653672472583172, "grad_norm": 0.4119238257408142, "learning_rate": 1.363099186069247e-05, "loss": 0.6069, "step": 24915 }, { "epoch": 0.7653979663932664, "grad_norm": 0.32469120621681213, "learning_rate": 1.363054156255593e-05, "loss": 0.5063, "step": 24916 }, { "epoch": 0.7654286855282155, "grad_norm": 0.3818756937980652, "learning_rate": 1.363009125594001e-05, "loss": 0.6019, "step": 24917 }, { "epoch": 0.7654594046631646, "grad_norm": 0.31135043501853943, "learning_rate": 1.3629640940845755e-05, "loss": 0.5072, "step": 24918 }, { "epoch": 0.7654901237981139, "grad_norm": 0.39141392707824707, "learning_rate": 1.3629190617274221e-05, "loss": 0.5416, "step": 24919 }, { "epoch": 0.765520842933063, "grad_norm": 0.36572346091270447, "learning_rate": 1.3628740285226458e-05, "loss": 0.361, "step": 24920 }, { "epoch": 0.7655515620680121, "grad_norm": 0.348009318113327, "learning_rate": 1.3628289944703518e-05, "loss": 0.6037, "step": 24921 }, { "epoch": 0.7655822812029613, "grad_norm": 0.3482336401939392, "learning_rate": 1.3627839595706456e-05, "loss": 0.586, "step": 24922 }, { "epoch": 0.7656130003379105, "grad_norm": 0.41492336988449097, "learning_rate": 1.3627389238236322e-05, "loss": 0.5347, "step": 24923 }, { "epoch": 0.7656437194728597, "grad_norm": 0.3689221441745758, "learning_rate": 1.3626938872294162e-05, "loss": 0.5379, "step": 24924 }, { "epoch": 0.7656744386078088, "grad_norm": 0.36454999446868896, "learning_rate": 1.3626488497881036e-05, "loss": 0.6837, "step": 24925 }, { "epoch": 0.7657051577427579, "grad_norm": 0.4205811023712158, "learning_rate": 1.3626038114997994e-05, "loss": 0.6159, "step": 24926 }, { "epoch": 0.7657358768777072, "grad_norm": 0.3462379574775696, "learning_rate": 1.3625587723646085e-05, "loss": 0.5324, "step": 24927 }, { "epoch": 0.7657665960126563, "grad_norm": 0.41707101464271545, "learning_rate": 1.3625137323826365e-05, "loss": 0.5761, "step": 24928 }, { "epoch": 0.7657973151476054, "grad_norm": 0.3887665569782257, "learning_rate": 1.3624686915539881e-05, "loss": 0.5603, "step": 24929 }, { "epoch": 0.7658280342825546, "grad_norm": 0.3955906629562378, "learning_rate": 1.3624236498787688e-05, "loss": 0.554, "step": 24930 }, { "epoch": 0.7658587534175038, "grad_norm": 0.39125382900238037, "learning_rate": 1.3623786073570839e-05, "loss": 0.6618, "step": 24931 }, { "epoch": 0.7658894725524529, "grad_norm": 0.3610444664955139, "learning_rate": 1.3623335639890382e-05, "loss": 0.4791, "step": 24932 }, { "epoch": 0.7659201916874021, "grad_norm": 0.3974887430667877, "learning_rate": 1.3622885197747375e-05, "loss": 0.6531, "step": 24933 }, { "epoch": 0.7659509108223512, "grad_norm": 0.3536822199821472, "learning_rate": 1.3622434747142863e-05, "loss": 0.6179, "step": 24934 }, { "epoch": 0.7659816299573003, "grad_norm": 0.3788353502750397, "learning_rate": 1.3621984288077905e-05, "loss": 0.5601, "step": 24935 }, { "epoch": 0.7660123490922496, "grad_norm": 0.35983753204345703, "learning_rate": 1.3621533820553547e-05, "loss": 0.5597, "step": 24936 }, { "epoch": 0.7660430682271987, "grad_norm": 0.4721589982509613, "learning_rate": 1.3621083344570844e-05, "loss": 0.5364, "step": 24937 }, { "epoch": 0.7660737873621479, "grad_norm": 0.41371145844459534, "learning_rate": 1.3620632860130849e-05, "loss": 0.5409, "step": 24938 }, { "epoch": 0.766104506497097, "grad_norm": 0.4147651791572571, "learning_rate": 1.3620182367234613e-05, "loss": 0.5407, "step": 24939 }, { "epoch": 0.7661352256320462, "grad_norm": 0.5441293716430664, "learning_rate": 1.3619731865883189e-05, "loss": 0.5402, "step": 24940 }, { "epoch": 0.7661659447669954, "grad_norm": 0.3566250801086426, "learning_rate": 1.3619281356077628e-05, "loss": 0.5502, "step": 24941 }, { "epoch": 0.7661966639019445, "grad_norm": 0.38307908177375793, "learning_rate": 1.3618830837818983e-05, "loss": 0.512, "step": 24942 }, { "epoch": 0.7662273830368936, "grad_norm": 0.3521329164505005, "learning_rate": 1.3618380311108305e-05, "loss": 0.6146, "step": 24943 }, { "epoch": 0.7662581021718429, "grad_norm": 0.3550097346305847, "learning_rate": 1.3617929775946647e-05, "loss": 0.5118, "step": 24944 }, { "epoch": 0.766288821306792, "grad_norm": 0.3726860582828522, "learning_rate": 1.361747923233506e-05, "loss": 0.54, "step": 24945 }, { "epoch": 0.7663195404417411, "grad_norm": 0.404603511095047, "learning_rate": 1.3617028680274601e-05, "loss": 0.5014, "step": 24946 }, { "epoch": 0.7663502595766903, "grad_norm": 0.39679834246635437, "learning_rate": 1.3616578119766318e-05, "loss": 0.5567, "step": 24947 }, { "epoch": 0.7663809787116395, "grad_norm": 0.356781005859375, "learning_rate": 1.3616127550811262e-05, "loss": 0.5022, "step": 24948 }, { "epoch": 0.7664116978465887, "grad_norm": 0.36006176471710205, "learning_rate": 1.361567697341049e-05, "loss": 0.5413, "step": 24949 }, { "epoch": 0.7664424169815378, "grad_norm": 0.3754239082336426, "learning_rate": 1.3615226387565049e-05, "loss": 0.5836, "step": 24950 }, { "epoch": 0.7664731361164869, "grad_norm": 0.3386780321598053, "learning_rate": 1.3614775793275997e-05, "loss": 0.465, "step": 24951 }, { "epoch": 0.7665038552514362, "grad_norm": 0.36957573890686035, "learning_rate": 1.3614325190544383e-05, "loss": 0.4945, "step": 24952 }, { "epoch": 0.7665345743863853, "grad_norm": 0.35073724389076233, "learning_rate": 1.3613874579371258e-05, "loss": 0.5885, "step": 24953 }, { "epoch": 0.7665652935213344, "grad_norm": 0.3627093732357025, "learning_rate": 1.361342395975768e-05, "loss": 0.5718, "step": 24954 }, { "epoch": 0.7665960126562836, "grad_norm": 0.35836711525917053, "learning_rate": 1.3612973331704695e-05, "loss": 0.5182, "step": 24955 }, { "epoch": 0.7666267317912328, "grad_norm": 0.38514816761016846, "learning_rate": 1.361252269521336e-05, "loss": 0.5838, "step": 24956 }, { "epoch": 0.7666574509261819, "grad_norm": 0.3539328873157501, "learning_rate": 1.3612072050284726e-05, "loss": 0.5341, "step": 24957 }, { "epoch": 0.7666881700611311, "grad_norm": 0.3474467396736145, "learning_rate": 1.3611621396919845e-05, "loss": 0.5582, "step": 24958 }, { "epoch": 0.7667188891960802, "grad_norm": 0.3332984447479248, "learning_rate": 1.361117073511977e-05, "loss": 0.5812, "step": 24959 }, { "epoch": 0.7667496083310295, "grad_norm": 0.3503728210926056, "learning_rate": 1.3610720064885556e-05, "loss": 0.5687, "step": 24960 }, { "epoch": 0.7667803274659786, "grad_norm": 0.3708641231060028, "learning_rate": 1.361026938621825e-05, "loss": 0.5411, "step": 24961 }, { "epoch": 0.7668110466009277, "grad_norm": 0.4060436189174652, "learning_rate": 1.3609818699118908e-05, "loss": 0.6188, "step": 24962 }, { "epoch": 0.7668417657358769, "grad_norm": 0.36980435252189636, "learning_rate": 1.3609368003588583e-05, "loss": 0.5654, "step": 24963 }, { "epoch": 0.766872484870826, "grad_norm": 0.4734106659889221, "learning_rate": 1.3608917299628328e-05, "loss": 0.5094, "step": 24964 }, { "epoch": 0.7669032040057752, "grad_norm": 0.47517213225364685, "learning_rate": 1.3608466587239191e-05, "loss": 0.5358, "step": 24965 }, { "epoch": 0.7669339231407244, "grad_norm": 0.37680187821388245, "learning_rate": 1.3608015866422233e-05, "loss": 0.5151, "step": 24966 }, { "epoch": 0.7669646422756735, "grad_norm": 0.34504809975624084, "learning_rate": 1.3607565137178498e-05, "loss": 0.4921, "step": 24967 }, { "epoch": 0.7669953614106226, "grad_norm": 0.3435715436935425, "learning_rate": 1.3607114399509045e-05, "loss": 0.6485, "step": 24968 }, { "epoch": 0.7670260805455719, "grad_norm": 0.5246020555496216, "learning_rate": 1.3606663653414924e-05, "loss": 0.6898, "step": 24969 }, { "epoch": 0.767056799680521, "grad_norm": 0.3481222689151764, "learning_rate": 1.3606212898897186e-05, "loss": 0.605, "step": 24970 }, { "epoch": 0.7670875188154701, "grad_norm": 0.39010292291641235, "learning_rate": 1.3605762135956888e-05, "loss": 0.5744, "step": 24971 }, { "epoch": 0.7671182379504193, "grad_norm": 0.3232949674129486, "learning_rate": 1.3605311364595081e-05, "loss": 0.488, "step": 24972 }, { "epoch": 0.7671489570853685, "grad_norm": 0.3646180033683777, "learning_rate": 1.3604860584812817e-05, "loss": 0.5927, "step": 24973 }, { "epoch": 0.7671796762203177, "grad_norm": 0.34251669049263, "learning_rate": 1.3604409796611149e-05, "loss": 0.5533, "step": 24974 }, { "epoch": 0.7672103953552668, "grad_norm": 0.3801148533821106, "learning_rate": 1.3603958999991132e-05, "loss": 0.595, "step": 24975 }, { "epoch": 0.7672411144902159, "grad_norm": 0.3655979633331299, "learning_rate": 1.3603508194953814e-05, "loss": 0.6026, "step": 24976 }, { "epoch": 0.7672718336251652, "grad_norm": 0.5854457020759583, "learning_rate": 1.3603057381500256e-05, "loss": 0.5321, "step": 24977 }, { "epoch": 0.7673025527601143, "grad_norm": 0.36726412177085876, "learning_rate": 1.3602606559631501e-05, "loss": 0.5757, "step": 24978 }, { "epoch": 0.7673332718950634, "grad_norm": 0.37196359038352966, "learning_rate": 1.3602155729348607e-05, "loss": 0.5442, "step": 24979 }, { "epoch": 0.7673639910300126, "grad_norm": 0.3720220625400543, "learning_rate": 1.360170489065263e-05, "loss": 0.6243, "step": 24980 }, { "epoch": 0.7673947101649617, "grad_norm": 0.38065969944000244, "learning_rate": 1.3601254043544616e-05, "loss": 0.6159, "step": 24981 }, { "epoch": 0.7674254292999109, "grad_norm": 0.3727025091648102, "learning_rate": 1.3600803188025624e-05, "loss": 0.5636, "step": 24982 }, { "epoch": 0.7674561484348601, "grad_norm": 0.38669082522392273, "learning_rate": 1.3600352324096705e-05, "loss": 0.5129, "step": 24983 }, { "epoch": 0.7674868675698092, "grad_norm": 0.3852042853832245, "learning_rate": 1.359990145175891e-05, "loss": 0.5824, "step": 24984 }, { "epoch": 0.7675175867047584, "grad_norm": 0.37443387508392334, "learning_rate": 1.3599450571013293e-05, "loss": 0.5913, "step": 24985 }, { "epoch": 0.7675483058397076, "grad_norm": 0.3735333979129791, "learning_rate": 1.359899968186091e-05, "loss": 0.5921, "step": 24986 }, { "epoch": 0.7675790249746567, "grad_norm": 0.348197877407074, "learning_rate": 1.359854878430281e-05, "loss": 0.472, "step": 24987 }, { "epoch": 0.7676097441096059, "grad_norm": 0.36187252402305603, "learning_rate": 1.3598097878340049e-05, "loss": 0.5717, "step": 24988 }, { "epoch": 0.767640463244555, "grad_norm": 0.32942429184913635, "learning_rate": 1.3597646963973682e-05, "loss": 0.5842, "step": 24989 }, { "epoch": 0.7676711823795042, "grad_norm": 0.3603527545928955, "learning_rate": 1.3597196041204754e-05, "loss": 0.5273, "step": 24990 }, { "epoch": 0.7677019015144534, "grad_norm": 0.3738291561603546, "learning_rate": 1.3596745110034326e-05, "loss": 0.5776, "step": 24991 }, { "epoch": 0.7677326206494025, "grad_norm": 0.4039291739463806, "learning_rate": 1.3596294170463449e-05, "loss": 0.5451, "step": 24992 }, { "epoch": 0.7677633397843516, "grad_norm": 0.3611345887184143, "learning_rate": 1.3595843222493175e-05, "loss": 0.578, "step": 24993 }, { "epoch": 0.7677940589193009, "grad_norm": 0.35797181725502014, "learning_rate": 1.359539226612456e-05, "loss": 0.5121, "step": 24994 }, { "epoch": 0.76782477805425, "grad_norm": 0.36148926615715027, "learning_rate": 1.3594941301358652e-05, "loss": 0.5011, "step": 24995 }, { "epoch": 0.7678554971891991, "grad_norm": 0.4143373370170593, "learning_rate": 1.359449032819651e-05, "loss": 0.6353, "step": 24996 }, { "epoch": 0.7678862163241483, "grad_norm": 0.3831959068775177, "learning_rate": 1.3594039346639183e-05, "loss": 0.5515, "step": 24997 }, { "epoch": 0.7679169354590974, "grad_norm": 0.34459251165390015, "learning_rate": 1.3593588356687728e-05, "loss": 0.5905, "step": 24998 }, { "epoch": 0.7679476545940467, "grad_norm": 0.38996800780296326, "learning_rate": 1.3593137358343195e-05, "loss": 0.5182, "step": 24999 }, { "epoch": 0.7679783737289958, "grad_norm": 0.349935919046402, "learning_rate": 1.359268635160664e-05, "loss": 0.529, "step": 25000 }, { "epoch": 0.7680090928639449, "grad_norm": 0.5437840223312378, "learning_rate": 1.3592235336479114e-05, "loss": 0.5372, "step": 25001 }, { "epoch": 0.7680398119988942, "grad_norm": 0.3480723202228546, "learning_rate": 1.3591784312961673e-05, "loss": 0.6336, "step": 25002 }, { "epoch": 0.7680705311338433, "grad_norm": 0.4094555675983429, "learning_rate": 1.3591333281055366e-05, "loss": 0.575, "step": 25003 }, { "epoch": 0.7681012502687924, "grad_norm": 0.3662281632423401, "learning_rate": 1.359088224076125e-05, "loss": 0.5528, "step": 25004 }, { "epoch": 0.7681319694037416, "grad_norm": 0.32674098014831543, "learning_rate": 1.359043119208038e-05, "loss": 0.4938, "step": 25005 }, { "epoch": 0.7681626885386907, "grad_norm": 0.3768030107021332, "learning_rate": 1.3589980135013805e-05, "loss": 0.6062, "step": 25006 }, { "epoch": 0.7681934076736399, "grad_norm": 0.36800655722618103, "learning_rate": 1.3589529069562581e-05, "loss": 0.5024, "step": 25007 }, { "epoch": 0.7682241268085891, "grad_norm": 0.408626526594162, "learning_rate": 1.3589077995727762e-05, "loss": 0.6229, "step": 25008 }, { "epoch": 0.7682548459435382, "grad_norm": 0.8404573798179626, "learning_rate": 1.3588626913510404e-05, "loss": 0.4169, "step": 25009 }, { "epoch": 0.7682855650784874, "grad_norm": 0.3987136781215668, "learning_rate": 1.3588175822911551e-05, "loss": 0.5684, "step": 25010 }, { "epoch": 0.7683162842134366, "grad_norm": 0.3881964385509491, "learning_rate": 1.3587724723932264e-05, "loss": 0.5264, "step": 25011 }, { "epoch": 0.7683470033483857, "grad_norm": 0.3535960614681244, "learning_rate": 1.35872736165736e-05, "loss": 0.5642, "step": 25012 }, { "epoch": 0.7683777224833349, "grad_norm": 0.369853675365448, "learning_rate": 1.3586822500836603e-05, "loss": 0.5477, "step": 25013 }, { "epoch": 0.768408441618284, "grad_norm": 0.40143442153930664, "learning_rate": 1.3586371376722337e-05, "loss": 0.5625, "step": 25014 }, { "epoch": 0.7684391607532332, "grad_norm": 0.3828197717666626, "learning_rate": 1.3585920244231843e-05, "loss": 0.5269, "step": 25015 }, { "epoch": 0.7684698798881824, "grad_norm": 0.35121649503707886, "learning_rate": 1.3585469103366186e-05, "loss": 0.5423, "step": 25016 }, { "epoch": 0.7685005990231315, "grad_norm": 0.38839641213417053, "learning_rate": 1.3585017954126414e-05, "loss": 0.4885, "step": 25017 }, { "epoch": 0.7685313181580806, "grad_norm": 0.43228060007095337, "learning_rate": 1.3584566796513585e-05, "loss": 0.5616, "step": 25018 }, { "epoch": 0.7685620372930299, "grad_norm": 0.3588654100894928, "learning_rate": 1.3584115630528746e-05, "loss": 0.5539, "step": 25019 }, { "epoch": 0.768592756427979, "grad_norm": 0.3803045153617859, "learning_rate": 1.3583664456172958e-05, "loss": 0.6331, "step": 25020 }, { "epoch": 0.7686234755629281, "grad_norm": 0.3838726282119751, "learning_rate": 1.3583213273447268e-05, "loss": 0.5212, "step": 25021 }, { "epoch": 0.7686541946978773, "grad_norm": 0.4725419878959656, "learning_rate": 1.3582762082352737e-05, "loss": 0.5444, "step": 25022 }, { "epoch": 0.7686849138328264, "grad_norm": 0.3355001211166382, "learning_rate": 1.3582310882890413e-05, "loss": 0.4858, "step": 25023 }, { "epoch": 0.7687156329677757, "grad_norm": 0.41640859842300415, "learning_rate": 1.3581859675061351e-05, "loss": 0.4675, "step": 25024 }, { "epoch": 0.7687463521027248, "grad_norm": 0.422065794467926, "learning_rate": 1.3581408458866605e-05, "loss": 0.6257, "step": 25025 }, { "epoch": 0.7687770712376739, "grad_norm": 0.4078989326953888, "learning_rate": 1.3580957234307231e-05, "loss": 0.6903, "step": 25026 }, { "epoch": 0.7688077903726231, "grad_norm": 0.3419201672077179, "learning_rate": 1.3580506001384282e-05, "loss": 0.5876, "step": 25027 }, { "epoch": 0.7688385095075723, "grad_norm": 0.34321269392967224, "learning_rate": 1.3580054760098808e-05, "loss": 0.5553, "step": 25028 }, { "epoch": 0.7688692286425214, "grad_norm": 0.3356737792491913, "learning_rate": 1.357960351045187e-05, "loss": 0.5528, "step": 25029 }, { "epoch": 0.7688999477774706, "grad_norm": 0.3701646625995636, "learning_rate": 1.3579152252444518e-05, "loss": 0.5955, "step": 25030 }, { "epoch": 0.7689306669124197, "grad_norm": 0.3505585491657257, "learning_rate": 1.3578700986077804e-05, "loss": 0.5652, "step": 25031 }, { "epoch": 0.7689613860473689, "grad_norm": 0.4406530559062958, "learning_rate": 1.3578249711352784e-05, "loss": 0.4708, "step": 25032 }, { "epoch": 0.7689921051823181, "grad_norm": 0.3742580711841583, "learning_rate": 1.3577798428270512e-05, "loss": 0.5343, "step": 25033 }, { "epoch": 0.7690228243172672, "grad_norm": 0.3983992338180542, "learning_rate": 1.3577347136832042e-05, "loss": 0.5841, "step": 25034 }, { "epoch": 0.7690535434522164, "grad_norm": 0.34734708070755005, "learning_rate": 1.3576895837038426e-05, "loss": 0.5747, "step": 25035 }, { "epoch": 0.7690842625871656, "grad_norm": 0.594036877155304, "learning_rate": 1.3576444528890724e-05, "loss": 0.4733, "step": 25036 }, { "epoch": 0.7691149817221147, "grad_norm": 0.40043583512306213, "learning_rate": 1.3575993212389982e-05, "loss": 0.5978, "step": 25037 }, { "epoch": 0.7691457008570639, "grad_norm": 0.4173559248447418, "learning_rate": 1.3575541887537263e-05, "loss": 0.5418, "step": 25038 }, { "epoch": 0.769176419992013, "grad_norm": 0.37327417731285095, "learning_rate": 1.357509055433361e-05, "loss": 0.5809, "step": 25039 }, { "epoch": 0.7692071391269621, "grad_norm": 0.4071439504623413, "learning_rate": 1.3574639212780088e-05, "loss": 0.5188, "step": 25040 }, { "epoch": 0.7692378582619114, "grad_norm": 0.4102451503276825, "learning_rate": 1.3574187862877746e-05, "loss": 0.5193, "step": 25041 }, { "epoch": 0.7692685773968605, "grad_norm": 0.35419175028800964, "learning_rate": 1.3573736504627634e-05, "loss": 0.6053, "step": 25042 }, { "epoch": 0.7692992965318096, "grad_norm": 0.3791486918926239, "learning_rate": 1.3573285138030817e-05, "loss": 0.4754, "step": 25043 }, { "epoch": 0.7693300156667588, "grad_norm": 0.3579414486885071, "learning_rate": 1.3572833763088338e-05, "loss": 0.5068, "step": 25044 }, { "epoch": 0.769360734801708, "grad_norm": 0.37055540084838867, "learning_rate": 1.357238237980126e-05, "loss": 0.5995, "step": 25045 }, { "epoch": 0.7693914539366572, "grad_norm": 0.3754219114780426, "learning_rate": 1.3571930988170632e-05, "loss": 0.6239, "step": 25046 }, { "epoch": 0.7694221730716063, "grad_norm": 0.3256882131099701, "learning_rate": 1.3571479588197512e-05, "loss": 0.4934, "step": 25047 }, { "epoch": 0.7694528922065554, "grad_norm": 0.5269814133644104, "learning_rate": 1.357102817988295e-05, "loss": 0.4834, "step": 25048 }, { "epoch": 0.7694836113415047, "grad_norm": 0.3708003759384155, "learning_rate": 1.3570576763228e-05, "loss": 0.5517, "step": 25049 }, { "epoch": 0.7695143304764538, "grad_norm": 0.36905425786972046, "learning_rate": 1.3570125338233721e-05, "loss": 0.5501, "step": 25050 }, { "epoch": 0.7695450496114029, "grad_norm": 0.3507625460624695, "learning_rate": 1.3569673904901164e-05, "loss": 0.6024, "step": 25051 }, { "epoch": 0.7695757687463521, "grad_norm": 0.3424147963523865, "learning_rate": 1.3569222463231386e-05, "loss": 0.5295, "step": 25052 }, { "epoch": 0.7696064878813013, "grad_norm": 0.3464011251926422, "learning_rate": 1.3568771013225439e-05, "loss": 0.5378, "step": 25053 }, { "epoch": 0.7696372070162504, "grad_norm": 0.373953253030777, "learning_rate": 1.356831955488438e-05, "loss": 0.5842, "step": 25054 }, { "epoch": 0.7696679261511996, "grad_norm": 0.4674237370491028, "learning_rate": 1.3567868088209256e-05, "loss": 0.49, "step": 25055 }, { "epoch": 0.7696986452861487, "grad_norm": 0.3209557831287384, "learning_rate": 1.3567416613201132e-05, "loss": 0.5008, "step": 25056 }, { "epoch": 0.7697293644210979, "grad_norm": 0.455188512802124, "learning_rate": 1.3566965129861054e-05, "loss": 0.5455, "step": 25057 }, { "epoch": 0.7697600835560471, "grad_norm": 0.37587565183639526, "learning_rate": 1.356651363819008e-05, "loss": 0.5359, "step": 25058 }, { "epoch": 0.7697908026909962, "grad_norm": 0.4054117500782013, "learning_rate": 1.3566062138189266e-05, "loss": 0.5234, "step": 25059 }, { "epoch": 0.7698215218259454, "grad_norm": 0.35899609327316284, "learning_rate": 1.3565610629859664e-05, "loss": 0.6039, "step": 25060 }, { "epoch": 0.7698522409608946, "grad_norm": 0.36488184332847595, "learning_rate": 1.3565159113202331e-05, "loss": 0.586, "step": 25061 }, { "epoch": 0.7698829600958437, "grad_norm": 0.46729856729507446, "learning_rate": 1.3564707588218318e-05, "loss": 0.5373, "step": 25062 }, { "epoch": 0.7699136792307929, "grad_norm": 0.3484896123409271, "learning_rate": 1.3564256054908682e-05, "loss": 0.5485, "step": 25063 }, { "epoch": 0.769944398365742, "grad_norm": 0.364341139793396, "learning_rate": 1.356380451327448e-05, "loss": 0.5555, "step": 25064 }, { "epoch": 0.7699751175006911, "grad_norm": 0.35936087369918823, "learning_rate": 1.3563352963316757e-05, "loss": 0.4535, "step": 25065 }, { "epoch": 0.7700058366356404, "grad_norm": 0.3345872163772583, "learning_rate": 1.3562901405036581e-05, "loss": 0.5324, "step": 25066 }, { "epoch": 0.7700365557705895, "grad_norm": 0.334243506193161, "learning_rate": 1.3562449838434995e-05, "loss": 0.5422, "step": 25067 }, { "epoch": 0.7700672749055386, "grad_norm": 0.3703859746456146, "learning_rate": 1.3561998263513062e-05, "loss": 0.5794, "step": 25068 }, { "epoch": 0.7700979940404878, "grad_norm": 0.388470321893692, "learning_rate": 1.3561546680271831e-05, "loss": 0.5125, "step": 25069 }, { "epoch": 0.770128713175437, "grad_norm": 0.37088581919670105, "learning_rate": 1.3561095088712362e-05, "loss": 0.5267, "step": 25070 }, { "epoch": 0.7701594323103862, "grad_norm": 0.3982499837875366, "learning_rate": 1.3560643488835703e-05, "loss": 0.4806, "step": 25071 }, { "epoch": 0.7701901514453353, "grad_norm": 0.3556261360645294, "learning_rate": 1.3560191880642914e-05, "loss": 0.5942, "step": 25072 }, { "epoch": 0.7702208705802844, "grad_norm": 0.38089048862457275, "learning_rate": 1.3559740264135049e-05, "loss": 0.6151, "step": 25073 }, { "epoch": 0.7702515897152337, "grad_norm": 0.3883792757987976, "learning_rate": 1.3559288639313162e-05, "loss": 0.5362, "step": 25074 }, { "epoch": 0.7702823088501828, "grad_norm": 0.36609527468681335, "learning_rate": 1.3558837006178307e-05, "loss": 0.5533, "step": 25075 }, { "epoch": 0.7703130279851319, "grad_norm": 0.3827510178089142, "learning_rate": 1.3558385364731536e-05, "loss": 0.5107, "step": 25076 }, { "epoch": 0.7703437471200811, "grad_norm": 0.3830660283565521, "learning_rate": 1.3557933714973914e-05, "loss": 0.5885, "step": 25077 }, { "epoch": 0.7703744662550303, "grad_norm": 0.3626924753189087, "learning_rate": 1.3557482056906487e-05, "loss": 0.5572, "step": 25078 }, { "epoch": 0.7704051853899794, "grad_norm": 0.377551794052124, "learning_rate": 1.355703039053031e-05, "loss": 0.6172, "step": 25079 }, { "epoch": 0.7704359045249286, "grad_norm": 0.4394228160381317, "learning_rate": 1.3556578715846442e-05, "loss": 0.5889, "step": 25080 }, { "epoch": 0.7704666236598777, "grad_norm": 0.38759759068489075, "learning_rate": 1.3556127032855936e-05, "loss": 0.5705, "step": 25081 }, { "epoch": 0.7704973427948268, "grad_norm": 0.35815930366516113, "learning_rate": 1.3555675341559849e-05, "loss": 0.5252, "step": 25082 }, { "epoch": 0.7705280619297761, "grad_norm": 0.3438185751438141, "learning_rate": 1.355522364195923e-05, "loss": 0.5092, "step": 25083 }, { "epoch": 0.7705587810647252, "grad_norm": 0.42313769459724426, "learning_rate": 1.355477193405514e-05, "loss": 0.5257, "step": 25084 }, { "epoch": 0.7705895001996744, "grad_norm": 0.3913126587867737, "learning_rate": 1.355432021784863e-05, "loss": 0.5003, "step": 25085 }, { "epoch": 0.7706202193346235, "grad_norm": 0.3594985008239746, "learning_rate": 1.355386849334076e-05, "loss": 0.6641, "step": 25086 }, { "epoch": 0.7706509384695727, "grad_norm": 0.34534427523612976, "learning_rate": 1.355341676053258e-05, "loss": 0.3843, "step": 25087 }, { "epoch": 0.7706816576045219, "grad_norm": 0.3628765344619751, "learning_rate": 1.3552965019425149e-05, "loss": 0.5709, "step": 25088 }, { "epoch": 0.770712376739471, "grad_norm": 0.3634449243545532, "learning_rate": 1.3552513270019516e-05, "loss": 0.6352, "step": 25089 }, { "epoch": 0.7707430958744201, "grad_norm": 0.3578571379184723, "learning_rate": 1.3552061512316743e-05, "loss": 0.5961, "step": 25090 }, { "epoch": 0.7707738150093694, "grad_norm": 0.3263482451438904, "learning_rate": 1.3551609746317883e-05, "loss": 0.5034, "step": 25091 }, { "epoch": 0.7708045341443185, "grad_norm": 0.4853779673576355, "learning_rate": 1.355115797202399e-05, "loss": 0.6338, "step": 25092 }, { "epoch": 0.7708352532792676, "grad_norm": 0.3736148178577423, "learning_rate": 1.355070618943612e-05, "loss": 0.5122, "step": 25093 }, { "epoch": 0.7708659724142168, "grad_norm": 0.3628641963005066, "learning_rate": 1.3550254398555325e-05, "loss": 0.6082, "step": 25094 }, { "epoch": 0.770896691549166, "grad_norm": 0.38551342487335205, "learning_rate": 1.3549802599382664e-05, "loss": 0.6026, "step": 25095 }, { "epoch": 0.7709274106841152, "grad_norm": 0.3508957624435425, "learning_rate": 1.354935079191919e-05, "loss": 0.5711, "step": 25096 }, { "epoch": 0.7709581298190643, "grad_norm": 0.4063415229320526, "learning_rate": 1.3548898976165962e-05, "loss": 0.5377, "step": 25097 }, { "epoch": 0.7709888489540134, "grad_norm": 0.4346368610858917, "learning_rate": 1.3548447152124032e-05, "loss": 0.5806, "step": 25098 }, { "epoch": 0.7710195680889627, "grad_norm": 0.3506107032299042, "learning_rate": 1.3547995319794454e-05, "loss": 0.5792, "step": 25099 }, { "epoch": 0.7710502872239118, "grad_norm": 0.3506418466567993, "learning_rate": 1.3547543479178286e-05, "loss": 0.5508, "step": 25100 }, { "epoch": 0.7710810063588609, "grad_norm": 0.38244837522506714, "learning_rate": 1.3547091630276582e-05, "loss": 0.5672, "step": 25101 }, { "epoch": 0.7711117254938101, "grad_norm": 0.5369490385055542, "learning_rate": 1.3546639773090401e-05, "loss": 0.5284, "step": 25102 }, { "epoch": 0.7711424446287592, "grad_norm": 0.3638518750667572, "learning_rate": 1.354618790762079e-05, "loss": 0.4987, "step": 25103 }, { "epoch": 0.7711731637637084, "grad_norm": 0.3941265642642975, "learning_rate": 1.3545736033868816e-05, "loss": 0.5434, "step": 25104 }, { "epoch": 0.7712038828986576, "grad_norm": 0.3749350309371948, "learning_rate": 1.3545284151835523e-05, "loss": 0.5687, "step": 25105 }, { "epoch": 0.7712346020336067, "grad_norm": 0.5561928749084473, "learning_rate": 1.3544832261521972e-05, "loss": 0.5415, "step": 25106 }, { "epoch": 0.7712653211685558, "grad_norm": 0.33463022112846375, "learning_rate": 1.3544380362929217e-05, "loss": 0.5041, "step": 25107 }, { "epoch": 0.7712960403035051, "grad_norm": 0.3655843138694763, "learning_rate": 1.3543928456058318e-05, "loss": 0.5679, "step": 25108 }, { "epoch": 0.7713267594384542, "grad_norm": 0.3489049971103668, "learning_rate": 1.3543476540910323e-05, "loss": 0.5075, "step": 25109 }, { "epoch": 0.7713574785734034, "grad_norm": 0.3696572184562683, "learning_rate": 1.3543024617486291e-05, "loss": 0.5419, "step": 25110 }, { "epoch": 0.7713881977083525, "grad_norm": 0.33624032139778137, "learning_rate": 1.3542572685787281e-05, "loss": 0.5379, "step": 25111 }, { "epoch": 0.7714189168433017, "grad_norm": 0.32390686869621277, "learning_rate": 1.354212074581434e-05, "loss": 0.534, "step": 25112 }, { "epoch": 0.7714496359782509, "grad_norm": 0.3823825418949127, "learning_rate": 1.354166879756853e-05, "loss": 0.6085, "step": 25113 }, { "epoch": 0.7714803551132, "grad_norm": 0.3734080493450165, "learning_rate": 1.3541216841050906e-05, "loss": 0.556, "step": 25114 }, { "epoch": 0.7715110742481491, "grad_norm": 0.3479909896850586, "learning_rate": 1.3540764876262524e-05, "loss": 0.4938, "step": 25115 }, { "epoch": 0.7715417933830984, "grad_norm": 0.34333518147468567, "learning_rate": 1.3540312903204438e-05, "loss": 0.4661, "step": 25116 }, { "epoch": 0.7715725125180475, "grad_norm": 0.3543761372566223, "learning_rate": 1.3539860921877703e-05, "loss": 0.5604, "step": 25117 }, { "epoch": 0.7716032316529966, "grad_norm": 0.35131391882896423, "learning_rate": 1.3539408932283378e-05, "loss": 0.5088, "step": 25118 }, { "epoch": 0.7716339507879458, "grad_norm": 0.40542617440223694, "learning_rate": 1.3538956934422512e-05, "loss": 0.5251, "step": 25119 }, { "epoch": 0.771664669922895, "grad_norm": 0.3643665611743927, "learning_rate": 1.3538504928296169e-05, "loss": 0.5212, "step": 25120 }, { "epoch": 0.7716953890578442, "grad_norm": 0.3585774600505829, "learning_rate": 1.3538052913905398e-05, "loss": 0.6189, "step": 25121 }, { "epoch": 0.7717261081927933, "grad_norm": 0.30950918793678284, "learning_rate": 1.353760089125126e-05, "loss": 0.5015, "step": 25122 }, { "epoch": 0.7717568273277424, "grad_norm": 0.37892773747444153, "learning_rate": 1.3537148860334803e-05, "loss": 0.6053, "step": 25123 }, { "epoch": 0.7717875464626917, "grad_norm": 0.39721906185150146, "learning_rate": 1.3536696821157091e-05, "loss": 0.5765, "step": 25124 }, { "epoch": 0.7718182655976408, "grad_norm": 0.3827485740184784, "learning_rate": 1.3536244773719176e-05, "loss": 0.6277, "step": 25125 }, { "epoch": 0.7718489847325899, "grad_norm": 0.47324299812316895, "learning_rate": 1.3535792718022116e-05, "loss": 0.5368, "step": 25126 }, { "epoch": 0.7718797038675391, "grad_norm": 0.34427133202552795, "learning_rate": 1.3535340654066965e-05, "loss": 0.5347, "step": 25127 }, { "epoch": 0.7719104230024882, "grad_norm": 0.37673231959342957, "learning_rate": 1.3534888581854777e-05, "loss": 0.5814, "step": 25128 }, { "epoch": 0.7719411421374374, "grad_norm": 0.42019209265708923, "learning_rate": 1.3534436501386612e-05, "loss": 0.4875, "step": 25129 }, { "epoch": 0.7719718612723866, "grad_norm": 0.36115792393684387, "learning_rate": 1.3533984412663521e-05, "loss": 0.5817, "step": 25130 }, { "epoch": 0.7720025804073357, "grad_norm": 0.3674725592136383, "learning_rate": 1.3533532315686563e-05, "loss": 0.5623, "step": 25131 }, { "epoch": 0.7720332995422848, "grad_norm": 0.37219691276550293, "learning_rate": 1.3533080210456796e-05, "loss": 0.5794, "step": 25132 }, { "epoch": 0.7720640186772341, "grad_norm": 0.36996641755104065, "learning_rate": 1.353262809697527e-05, "loss": 0.6163, "step": 25133 }, { "epoch": 0.7720947378121832, "grad_norm": 0.42371422052383423, "learning_rate": 1.353217597524305e-05, "loss": 0.5416, "step": 25134 }, { "epoch": 0.7721254569471324, "grad_norm": 0.4252770245075226, "learning_rate": 1.353172384526118e-05, "loss": 0.4982, "step": 25135 }, { "epoch": 0.7721561760820815, "grad_norm": 0.3493882715702057, "learning_rate": 1.3531271707030726e-05, "loss": 0.4959, "step": 25136 }, { "epoch": 0.7721868952170307, "grad_norm": 0.3705718517303467, "learning_rate": 1.353081956055274e-05, "loss": 0.5663, "step": 25137 }, { "epoch": 0.7722176143519799, "grad_norm": 0.3655765652656555, "learning_rate": 1.3530367405828277e-05, "loss": 0.5779, "step": 25138 }, { "epoch": 0.772248333486929, "grad_norm": 0.3717970848083496, "learning_rate": 1.3529915242858395e-05, "loss": 0.506, "step": 25139 }, { "epoch": 0.7722790526218781, "grad_norm": 0.3697565197944641, "learning_rate": 1.3529463071644151e-05, "loss": 0.5324, "step": 25140 }, { "epoch": 0.7723097717568274, "grad_norm": 0.4754319190979004, "learning_rate": 1.3529010892186596e-05, "loss": 0.591, "step": 25141 }, { "epoch": 0.7723404908917765, "grad_norm": 0.35305097699165344, "learning_rate": 1.3528558704486794e-05, "loss": 0.561, "step": 25142 }, { "epoch": 0.7723712100267256, "grad_norm": 0.3333267271518707, "learning_rate": 1.3528106508545791e-05, "loss": 0.5396, "step": 25143 }, { "epoch": 0.7724019291616748, "grad_norm": 0.360700786113739, "learning_rate": 1.3527654304364654e-05, "loss": 0.6128, "step": 25144 }, { "epoch": 0.772432648296624, "grad_norm": 0.4018217623233795, "learning_rate": 1.3527202091944434e-05, "loss": 0.5343, "step": 25145 }, { "epoch": 0.7724633674315732, "grad_norm": 0.515845537185669, "learning_rate": 1.3526749871286185e-05, "loss": 0.6422, "step": 25146 }, { "epoch": 0.7724940865665223, "grad_norm": 0.37371689081192017, "learning_rate": 1.3526297642390964e-05, "loss": 0.5668, "step": 25147 }, { "epoch": 0.7725248057014714, "grad_norm": 0.41571044921875, "learning_rate": 1.352584540525983e-05, "loss": 0.5498, "step": 25148 }, { "epoch": 0.7725555248364206, "grad_norm": 0.39307358860969543, "learning_rate": 1.3525393159893839e-05, "loss": 0.57, "step": 25149 }, { "epoch": 0.7725862439713698, "grad_norm": 0.32149896025657654, "learning_rate": 1.3524940906294041e-05, "loss": 0.5905, "step": 25150 }, { "epoch": 0.7726169631063189, "grad_norm": 0.3698086738586426, "learning_rate": 1.3524488644461504e-05, "loss": 0.4559, "step": 25151 }, { "epoch": 0.7726476822412681, "grad_norm": 0.3495207130908966, "learning_rate": 1.3524036374397275e-05, "loss": 0.5564, "step": 25152 }, { "epoch": 0.7726784013762172, "grad_norm": 0.3402637541294098, "learning_rate": 1.3523584096102413e-05, "loss": 0.5643, "step": 25153 }, { "epoch": 0.7727091205111664, "grad_norm": 0.3745013475418091, "learning_rate": 1.3523131809577976e-05, "loss": 0.5529, "step": 25154 }, { "epoch": 0.7727398396461156, "grad_norm": 0.362822562456131, "learning_rate": 1.3522679514825017e-05, "loss": 0.5845, "step": 25155 }, { "epoch": 0.7727705587810647, "grad_norm": 0.35359346866607666, "learning_rate": 1.3522227211844595e-05, "loss": 0.4878, "step": 25156 }, { "epoch": 0.7728012779160139, "grad_norm": 0.37099695205688477, "learning_rate": 1.3521774900637761e-05, "loss": 0.4887, "step": 25157 }, { "epoch": 0.7728319970509631, "grad_norm": 0.36537429690361023, "learning_rate": 1.3521322581205581e-05, "loss": 0.587, "step": 25158 }, { "epoch": 0.7728627161859122, "grad_norm": 0.31292474269866943, "learning_rate": 1.3520870253549103e-05, "loss": 0.5283, "step": 25159 }, { "epoch": 0.7728934353208614, "grad_norm": 0.34704798460006714, "learning_rate": 1.3520417917669391e-05, "loss": 0.5599, "step": 25160 }, { "epoch": 0.7729241544558105, "grad_norm": 0.33407044410705566, "learning_rate": 1.3519965573567491e-05, "loss": 0.5661, "step": 25161 }, { "epoch": 0.7729548735907597, "grad_norm": 0.3482835590839386, "learning_rate": 1.351951322124447e-05, "loss": 0.527, "step": 25162 }, { "epoch": 0.7729855927257089, "grad_norm": 0.4636492133140564, "learning_rate": 1.3519060860701379e-05, "loss": 0.6227, "step": 25163 }, { "epoch": 0.773016311860658, "grad_norm": 0.4204336404800415, "learning_rate": 1.3518608491939275e-05, "loss": 0.5296, "step": 25164 }, { "epoch": 0.7730470309956071, "grad_norm": 0.42802518606185913, "learning_rate": 1.3518156114959215e-05, "loss": 0.6183, "step": 25165 }, { "epoch": 0.7730777501305564, "grad_norm": 0.34051018953323364, "learning_rate": 1.3517703729762255e-05, "loss": 0.4807, "step": 25166 }, { "epoch": 0.7731084692655055, "grad_norm": 0.3681139349937439, "learning_rate": 1.3517251336349454e-05, "loss": 0.5061, "step": 25167 }, { "epoch": 0.7731391884004546, "grad_norm": 0.31925177574157715, "learning_rate": 1.3516798934721866e-05, "loss": 0.5194, "step": 25168 }, { "epoch": 0.7731699075354038, "grad_norm": 0.340689092874527, "learning_rate": 1.3516346524880548e-05, "loss": 0.5146, "step": 25169 }, { "epoch": 0.7732006266703529, "grad_norm": 0.4468967020511627, "learning_rate": 1.3515894106826558e-05, "loss": 0.5951, "step": 25170 }, { "epoch": 0.7732313458053022, "grad_norm": 0.37360602617263794, "learning_rate": 1.351544168056095e-05, "loss": 0.6344, "step": 25171 }, { "epoch": 0.7732620649402513, "grad_norm": 0.360920250415802, "learning_rate": 1.3514989246084786e-05, "loss": 0.5862, "step": 25172 }, { "epoch": 0.7732927840752004, "grad_norm": 0.3904268443584442, "learning_rate": 1.3514536803399115e-05, "loss": 0.5005, "step": 25173 }, { "epoch": 0.7733235032101496, "grad_norm": 0.424314945936203, "learning_rate": 1.3514084352505002e-05, "loss": 0.5092, "step": 25174 }, { "epoch": 0.7733542223450988, "grad_norm": 0.349274218082428, "learning_rate": 1.3513631893403495e-05, "loss": 0.5403, "step": 25175 }, { "epoch": 0.7733849414800479, "grad_norm": 0.35791221261024475, "learning_rate": 1.3513179426095659e-05, "loss": 0.5171, "step": 25176 }, { "epoch": 0.7734156606149971, "grad_norm": 0.33968386054039, "learning_rate": 1.3512726950582543e-05, "loss": 0.5408, "step": 25177 }, { "epoch": 0.7734463797499462, "grad_norm": 0.36292874813079834, "learning_rate": 1.3512274466865214e-05, "loss": 0.5715, "step": 25178 }, { "epoch": 0.7734770988848954, "grad_norm": 0.3391778767108917, "learning_rate": 1.3511821974944722e-05, "loss": 0.458, "step": 25179 }, { "epoch": 0.7735078180198446, "grad_norm": 0.35848307609558105, "learning_rate": 1.351136947482212e-05, "loss": 0.5543, "step": 25180 }, { "epoch": 0.7735385371547937, "grad_norm": 0.3925538957118988, "learning_rate": 1.3510916966498471e-05, "loss": 0.5665, "step": 25181 }, { "epoch": 0.7735692562897429, "grad_norm": 0.3666202425956726, "learning_rate": 1.3510464449974832e-05, "loss": 0.5702, "step": 25182 }, { "epoch": 0.773599975424692, "grad_norm": 0.5064810514450073, "learning_rate": 1.3510011925252255e-05, "loss": 0.5822, "step": 25183 }, { "epoch": 0.7736306945596412, "grad_norm": 0.36120423674583435, "learning_rate": 1.3509559392331802e-05, "loss": 0.5246, "step": 25184 }, { "epoch": 0.7736614136945904, "grad_norm": 0.3710765838623047, "learning_rate": 1.350910685121453e-05, "loss": 0.528, "step": 25185 }, { "epoch": 0.7736921328295395, "grad_norm": 0.39113208651542664, "learning_rate": 1.350865430190149e-05, "loss": 0.5967, "step": 25186 }, { "epoch": 0.7737228519644886, "grad_norm": 0.39366084337234497, "learning_rate": 1.3508201744393743e-05, "loss": 0.531, "step": 25187 }, { "epoch": 0.7737535710994379, "grad_norm": 0.5768016576766968, "learning_rate": 1.3507749178692346e-05, "loss": 0.5561, "step": 25188 }, { "epoch": 0.773784290234387, "grad_norm": 0.33897095918655396, "learning_rate": 1.3507296604798356e-05, "loss": 0.4866, "step": 25189 }, { "epoch": 0.7738150093693361, "grad_norm": 0.439957857131958, "learning_rate": 1.3506844022712832e-05, "loss": 0.5446, "step": 25190 }, { "epoch": 0.7738457285042853, "grad_norm": 0.3660014271736145, "learning_rate": 1.3506391432436827e-05, "loss": 0.5418, "step": 25191 }, { "epoch": 0.7738764476392345, "grad_norm": 0.36046698689460754, "learning_rate": 1.3505938833971402e-05, "loss": 0.5936, "step": 25192 }, { "epoch": 0.7739071667741836, "grad_norm": 0.37256497144699097, "learning_rate": 1.350548622731761e-05, "loss": 0.5671, "step": 25193 }, { "epoch": 0.7739378859091328, "grad_norm": 0.37172457575798035, "learning_rate": 1.3505033612476513e-05, "loss": 0.5398, "step": 25194 }, { "epoch": 0.7739686050440819, "grad_norm": 0.4176674485206604, "learning_rate": 1.350458098944916e-05, "loss": 0.601, "step": 25195 }, { "epoch": 0.7739993241790312, "grad_norm": 0.3282027840614319, "learning_rate": 1.3504128358236617e-05, "loss": 0.5401, "step": 25196 }, { "epoch": 0.7740300433139803, "grad_norm": 0.35868483781814575, "learning_rate": 1.350367571883994e-05, "loss": 0.5697, "step": 25197 }, { "epoch": 0.7740607624489294, "grad_norm": 0.33146336674690247, "learning_rate": 1.3503223071260178e-05, "loss": 0.4998, "step": 25198 }, { "epoch": 0.7740914815838786, "grad_norm": 0.4379097819328308, "learning_rate": 1.3502770415498396e-05, "loss": 0.5447, "step": 25199 }, { "epoch": 0.7741222007188278, "grad_norm": 0.34099557995796204, "learning_rate": 1.3502317751555649e-05, "loss": 0.5353, "step": 25200 }, { "epoch": 0.7741529198537769, "grad_norm": 0.4512154161930084, "learning_rate": 1.3501865079432994e-05, "loss": 0.5791, "step": 25201 }, { "epoch": 0.7741836389887261, "grad_norm": 0.35762810707092285, "learning_rate": 1.3501412399131488e-05, "loss": 0.5141, "step": 25202 }, { "epoch": 0.7742143581236752, "grad_norm": 0.4140133261680603, "learning_rate": 1.3500959710652192e-05, "loss": 0.5952, "step": 25203 }, { "epoch": 0.7742450772586243, "grad_norm": 0.3543786406517029, "learning_rate": 1.3500507013996159e-05, "loss": 0.5366, "step": 25204 }, { "epoch": 0.7742757963935736, "grad_norm": 0.3493157625198364, "learning_rate": 1.3500054309164443e-05, "loss": 0.5672, "step": 25205 }, { "epoch": 0.7743065155285227, "grad_norm": 0.3714223802089691, "learning_rate": 1.3499601596158112e-05, "loss": 0.5149, "step": 25206 }, { "epoch": 0.7743372346634719, "grad_norm": 0.4733324646949768, "learning_rate": 1.3499148874978211e-05, "loss": 0.5661, "step": 25207 }, { "epoch": 0.774367953798421, "grad_norm": 0.38066887855529785, "learning_rate": 1.349869614562581e-05, "loss": 0.5969, "step": 25208 }, { "epoch": 0.7743986729333702, "grad_norm": 0.36567452549934387, "learning_rate": 1.3498243408101955e-05, "loss": 0.5041, "step": 25209 }, { "epoch": 0.7744293920683194, "grad_norm": 0.31159812211990356, "learning_rate": 1.349779066240771e-05, "loss": 0.5187, "step": 25210 }, { "epoch": 0.7744601112032685, "grad_norm": 0.3633589744567871, "learning_rate": 1.3497337908544127e-05, "loss": 0.5678, "step": 25211 }, { "epoch": 0.7744908303382176, "grad_norm": 0.4474671483039856, "learning_rate": 1.3496885146512273e-05, "loss": 0.5063, "step": 25212 }, { "epoch": 0.7745215494731669, "grad_norm": 0.37399858236312866, "learning_rate": 1.3496432376313195e-05, "loss": 0.5879, "step": 25213 }, { "epoch": 0.774552268608116, "grad_norm": 0.3540458381175995, "learning_rate": 1.3495979597947959e-05, "loss": 0.5402, "step": 25214 }, { "epoch": 0.7745829877430651, "grad_norm": 0.3617362380027771, "learning_rate": 1.3495526811417618e-05, "loss": 0.5363, "step": 25215 }, { "epoch": 0.7746137068780143, "grad_norm": 0.3555610179901123, "learning_rate": 1.3495074016723227e-05, "loss": 0.5039, "step": 25216 }, { "epoch": 0.7746444260129635, "grad_norm": 0.39198052883148193, "learning_rate": 1.3494621213865848e-05, "loss": 0.49, "step": 25217 }, { "epoch": 0.7746751451479126, "grad_norm": 0.3302285671234131, "learning_rate": 1.3494168402846536e-05, "loss": 0.4957, "step": 25218 }, { "epoch": 0.7747058642828618, "grad_norm": 0.37303435802459717, "learning_rate": 1.3493715583666354e-05, "loss": 0.5541, "step": 25219 }, { "epoch": 0.7747365834178109, "grad_norm": 0.40773919224739075, "learning_rate": 1.3493262756326348e-05, "loss": 0.5536, "step": 25220 }, { "epoch": 0.7747673025527602, "grad_norm": 0.34209415316581726, "learning_rate": 1.349280992082759e-05, "loss": 0.5144, "step": 25221 }, { "epoch": 0.7747980216877093, "grad_norm": 0.33332929015159607, "learning_rate": 1.3492357077171126e-05, "loss": 0.5483, "step": 25222 }, { "epoch": 0.7748287408226584, "grad_norm": 0.35871657729148865, "learning_rate": 1.3491904225358016e-05, "loss": 0.5767, "step": 25223 }, { "epoch": 0.7748594599576076, "grad_norm": 0.36609935760498047, "learning_rate": 1.3491451365389326e-05, "loss": 0.6108, "step": 25224 }, { "epoch": 0.7748901790925568, "grad_norm": 0.34706488251686096, "learning_rate": 1.3490998497266104e-05, "loss": 0.5314, "step": 25225 }, { "epoch": 0.7749208982275059, "grad_norm": 0.4993201792240143, "learning_rate": 1.3490545620989412e-05, "loss": 0.5685, "step": 25226 }, { "epoch": 0.7749516173624551, "grad_norm": 0.3881640136241913, "learning_rate": 1.3490092736560304e-05, "loss": 0.5411, "step": 25227 }, { "epoch": 0.7749823364974042, "grad_norm": 0.3876967132091522, "learning_rate": 1.3489639843979846e-05, "loss": 0.52, "step": 25228 }, { "epoch": 0.7750130556323533, "grad_norm": 0.402572900056839, "learning_rate": 1.3489186943249086e-05, "loss": 0.5259, "step": 25229 }, { "epoch": 0.7750437747673026, "grad_norm": 0.39318907260894775, "learning_rate": 1.348873403436909e-05, "loss": 0.5199, "step": 25230 }, { "epoch": 0.7750744939022517, "grad_norm": 0.354442298412323, "learning_rate": 1.3488281117340911e-05, "loss": 0.5766, "step": 25231 }, { "epoch": 0.7751052130372009, "grad_norm": 0.5182688236236572, "learning_rate": 1.3487828192165606e-05, "loss": 0.5915, "step": 25232 }, { "epoch": 0.77513593217215, "grad_norm": 0.32040834426879883, "learning_rate": 1.3487375258844238e-05, "loss": 0.5427, "step": 25233 }, { "epoch": 0.7751666513070992, "grad_norm": 0.38991475105285645, "learning_rate": 1.3486922317377858e-05, "loss": 0.6304, "step": 25234 }, { "epoch": 0.7751973704420484, "grad_norm": 0.35334259271621704, "learning_rate": 1.348646936776753e-05, "loss": 0.5827, "step": 25235 }, { "epoch": 0.7752280895769975, "grad_norm": 0.4270765781402588, "learning_rate": 1.3486016410014306e-05, "loss": 0.6221, "step": 25236 }, { "epoch": 0.7752588087119466, "grad_norm": 0.36150509119033813, "learning_rate": 1.348556344411925e-05, "loss": 0.5587, "step": 25237 }, { "epoch": 0.7752895278468959, "grad_norm": 0.36569827795028687, "learning_rate": 1.3485110470083416e-05, "loss": 0.5961, "step": 25238 }, { "epoch": 0.775320246981845, "grad_norm": 0.33766064047813416, "learning_rate": 1.3484657487907865e-05, "loss": 0.5735, "step": 25239 }, { "epoch": 0.7753509661167941, "grad_norm": 0.37152284383773804, "learning_rate": 1.348420449759365e-05, "loss": 0.6312, "step": 25240 }, { "epoch": 0.7753816852517433, "grad_norm": 0.3427976369857788, "learning_rate": 1.3483751499141832e-05, "loss": 0.5607, "step": 25241 }, { "epoch": 0.7754124043866925, "grad_norm": 0.5267860889434814, "learning_rate": 1.3483298492553472e-05, "loss": 0.5698, "step": 25242 }, { "epoch": 0.7754431235216416, "grad_norm": 0.38991230726242065, "learning_rate": 1.3482845477829623e-05, "loss": 0.5176, "step": 25243 }, { "epoch": 0.7754738426565908, "grad_norm": 0.3636607825756073, "learning_rate": 1.3482392454971345e-05, "loss": 0.5887, "step": 25244 }, { "epoch": 0.7755045617915399, "grad_norm": 0.3607715666294098, "learning_rate": 1.3481939423979698e-05, "loss": 0.5677, "step": 25245 }, { "epoch": 0.7755352809264892, "grad_norm": 0.3278428614139557, "learning_rate": 1.3481486384855738e-05, "loss": 0.4561, "step": 25246 }, { "epoch": 0.7755660000614383, "grad_norm": 0.3420860469341278, "learning_rate": 1.348103333760052e-05, "loss": 0.5597, "step": 25247 }, { "epoch": 0.7755967191963874, "grad_norm": 0.38649651408195496, "learning_rate": 1.348058028221511e-05, "loss": 0.575, "step": 25248 }, { "epoch": 0.7756274383313366, "grad_norm": 0.3594468832015991, "learning_rate": 1.348012721870056e-05, "loss": 0.5464, "step": 25249 }, { "epoch": 0.7756581574662857, "grad_norm": 0.35271602869033813, "learning_rate": 1.3479674147057931e-05, "loss": 0.5613, "step": 25250 }, { "epoch": 0.7756888766012349, "grad_norm": 0.3671829402446747, "learning_rate": 1.3479221067288277e-05, "loss": 0.5094, "step": 25251 }, { "epoch": 0.7757195957361841, "grad_norm": 0.4051421284675598, "learning_rate": 1.3478767979392662e-05, "loss": 0.5995, "step": 25252 }, { "epoch": 0.7757503148711332, "grad_norm": 0.33397966623306274, "learning_rate": 1.347831488337214e-05, "loss": 0.4742, "step": 25253 }, { "epoch": 0.7757810340060823, "grad_norm": 0.4159320592880249, "learning_rate": 1.347786177922777e-05, "loss": 0.5466, "step": 25254 }, { "epoch": 0.7758117531410316, "grad_norm": 0.4010430872440338, "learning_rate": 1.3477408666960615e-05, "loss": 0.5608, "step": 25255 }, { "epoch": 0.7758424722759807, "grad_norm": 0.33290719985961914, "learning_rate": 1.3476955546571725e-05, "loss": 0.5529, "step": 25256 }, { "epoch": 0.7758731914109299, "grad_norm": 0.357727974653244, "learning_rate": 1.3476502418062163e-05, "loss": 0.5522, "step": 25257 }, { "epoch": 0.775903910545879, "grad_norm": 0.4090959429740906, "learning_rate": 1.3476049281432989e-05, "loss": 0.6148, "step": 25258 }, { "epoch": 0.7759346296808282, "grad_norm": 0.3502054810523987, "learning_rate": 1.3475596136685254e-05, "loss": 0.6157, "step": 25259 }, { "epoch": 0.7759653488157774, "grad_norm": 0.33966565132141113, "learning_rate": 1.3475142983820026e-05, "loss": 0.6282, "step": 25260 }, { "epoch": 0.7759960679507265, "grad_norm": 0.5535008311271667, "learning_rate": 1.3474689822838359e-05, "loss": 0.5119, "step": 25261 }, { "epoch": 0.7760267870856756, "grad_norm": 0.3758619725704193, "learning_rate": 1.347423665374131e-05, "loss": 0.4925, "step": 25262 }, { "epoch": 0.7760575062206249, "grad_norm": 0.40378236770629883, "learning_rate": 1.3473783476529938e-05, "loss": 0.518, "step": 25263 }, { "epoch": 0.776088225355574, "grad_norm": 0.362070769071579, "learning_rate": 1.3473330291205305e-05, "loss": 0.546, "step": 25264 }, { "epoch": 0.7761189444905231, "grad_norm": 0.4217730462551117, "learning_rate": 1.3472877097768463e-05, "loss": 0.5476, "step": 25265 }, { "epoch": 0.7761496636254723, "grad_norm": 0.34437698125839233, "learning_rate": 1.3472423896220477e-05, "loss": 0.5564, "step": 25266 }, { "epoch": 0.7761803827604215, "grad_norm": 0.32641080021858215, "learning_rate": 1.3471970686562402e-05, "loss": 0.4162, "step": 25267 }, { "epoch": 0.7762111018953707, "grad_norm": 0.37982118129730225, "learning_rate": 1.3471517468795294e-05, "loss": 0.5248, "step": 25268 }, { "epoch": 0.7762418210303198, "grad_norm": 0.3894161880016327, "learning_rate": 1.3471064242920218e-05, "loss": 0.5507, "step": 25269 }, { "epoch": 0.7762725401652689, "grad_norm": 0.36163344979286194, "learning_rate": 1.3470611008938227e-05, "loss": 0.5499, "step": 25270 }, { "epoch": 0.7763032593002182, "grad_norm": 0.37006235122680664, "learning_rate": 1.347015776685038e-05, "loss": 0.5574, "step": 25271 }, { "epoch": 0.7763339784351673, "grad_norm": 0.4831543266773224, "learning_rate": 1.346970451665774e-05, "loss": 0.5848, "step": 25272 }, { "epoch": 0.7763646975701164, "grad_norm": 0.38679325580596924, "learning_rate": 1.3469251258361362e-05, "loss": 0.5414, "step": 25273 }, { "epoch": 0.7763954167050656, "grad_norm": 0.39228931069374084, "learning_rate": 1.3468797991962306e-05, "loss": 0.5582, "step": 25274 }, { "epoch": 0.7764261358400147, "grad_norm": 0.3644556999206543, "learning_rate": 1.346834471746163e-05, "loss": 0.5057, "step": 25275 }, { "epoch": 0.7764568549749639, "grad_norm": 0.33305203914642334, "learning_rate": 1.3467891434860392e-05, "loss": 0.5701, "step": 25276 }, { "epoch": 0.7764875741099131, "grad_norm": 0.39877599477767944, "learning_rate": 1.346743814415965e-05, "loss": 0.6347, "step": 25277 }, { "epoch": 0.7765182932448622, "grad_norm": 0.8412812352180481, "learning_rate": 1.3466984845360467e-05, "loss": 0.5292, "step": 25278 }, { "epoch": 0.7765490123798113, "grad_norm": 0.4234369695186615, "learning_rate": 1.3466531538463897e-05, "loss": 0.5933, "step": 25279 }, { "epoch": 0.7765797315147606, "grad_norm": 0.36390137672424316, "learning_rate": 1.3466078223471e-05, "loss": 0.4922, "step": 25280 }, { "epoch": 0.7766104506497097, "grad_norm": 0.34518471360206604, "learning_rate": 1.3465624900382834e-05, "loss": 0.5283, "step": 25281 }, { "epoch": 0.7766411697846589, "grad_norm": 0.34135758876800537, "learning_rate": 1.3465171569200464e-05, "loss": 0.5156, "step": 25282 }, { "epoch": 0.776671888919608, "grad_norm": 0.4083661139011383, "learning_rate": 1.346471822992494e-05, "loss": 0.5727, "step": 25283 }, { "epoch": 0.7767026080545572, "grad_norm": 0.3926020562648773, "learning_rate": 1.3464264882557325e-05, "loss": 0.6841, "step": 25284 }, { "epoch": 0.7767333271895064, "grad_norm": 0.38974612951278687, "learning_rate": 1.346381152709868e-05, "loss": 0.5533, "step": 25285 }, { "epoch": 0.7767640463244555, "grad_norm": 0.3562687933444977, "learning_rate": 1.3463358163550057e-05, "loss": 0.463, "step": 25286 }, { "epoch": 0.7767947654594046, "grad_norm": 0.35048019886016846, "learning_rate": 1.3462904791912523e-05, "loss": 0.5052, "step": 25287 }, { "epoch": 0.7768254845943539, "grad_norm": 0.3371111750602722, "learning_rate": 1.346245141218713e-05, "loss": 0.5763, "step": 25288 }, { "epoch": 0.776856203729303, "grad_norm": 0.3992452025413513, "learning_rate": 1.3461998024374942e-05, "loss": 0.5979, "step": 25289 }, { "epoch": 0.7768869228642521, "grad_norm": 0.3532593548297882, "learning_rate": 1.3461544628477015e-05, "loss": 0.5271, "step": 25290 }, { "epoch": 0.7769176419992013, "grad_norm": 0.40844422578811646, "learning_rate": 1.346109122449441e-05, "loss": 0.5994, "step": 25291 }, { "epoch": 0.7769483611341504, "grad_norm": 0.3614770472049713, "learning_rate": 1.3460637812428181e-05, "loss": 0.5314, "step": 25292 }, { "epoch": 0.7769790802690997, "grad_norm": 0.3540353775024414, "learning_rate": 1.3460184392279396e-05, "loss": 0.5258, "step": 25293 }, { "epoch": 0.7770097994040488, "grad_norm": 0.38987648487091064, "learning_rate": 1.3459730964049105e-05, "loss": 0.5714, "step": 25294 }, { "epoch": 0.7770405185389979, "grad_norm": 0.34863176941871643, "learning_rate": 1.345927752773837e-05, "loss": 0.5835, "step": 25295 }, { "epoch": 0.7770712376739471, "grad_norm": 0.3301045596599579, "learning_rate": 1.3458824083348253e-05, "loss": 0.5025, "step": 25296 }, { "epoch": 0.7771019568088963, "grad_norm": 0.6660440564155579, "learning_rate": 1.3458370630879808e-05, "loss": 0.5078, "step": 25297 }, { "epoch": 0.7771326759438454, "grad_norm": 0.3936895430088043, "learning_rate": 1.3457917170334103e-05, "loss": 0.5008, "step": 25298 }, { "epoch": 0.7771633950787946, "grad_norm": 0.41538479924201965, "learning_rate": 1.3457463701712183e-05, "loss": 0.4924, "step": 25299 }, { "epoch": 0.7771941142137437, "grad_norm": 0.3807576894760132, "learning_rate": 1.3457010225015121e-05, "loss": 0.4949, "step": 25300 }, { "epoch": 0.7772248333486929, "grad_norm": 0.41542530059814453, "learning_rate": 1.3456556740243972e-05, "loss": 0.5867, "step": 25301 }, { "epoch": 0.7772555524836421, "grad_norm": 0.3442547619342804, "learning_rate": 1.3456103247399787e-05, "loss": 0.4642, "step": 25302 }, { "epoch": 0.7772862716185912, "grad_norm": 0.32935598492622375, "learning_rate": 1.3455649746483634e-05, "loss": 0.5288, "step": 25303 }, { "epoch": 0.7773169907535403, "grad_norm": 0.3589381277561188, "learning_rate": 1.3455196237496568e-05, "loss": 0.5045, "step": 25304 }, { "epoch": 0.7773477098884896, "grad_norm": 0.44792893528938293, "learning_rate": 1.3454742720439655e-05, "loss": 0.5426, "step": 25305 }, { "epoch": 0.7773784290234387, "grad_norm": 0.3484170138835907, "learning_rate": 1.3454289195313943e-05, "loss": 0.5586, "step": 25306 }, { "epoch": 0.7774091481583879, "grad_norm": 0.6329813599586487, "learning_rate": 1.3453835662120502e-05, "loss": 0.5679, "step": 25307 }, { "epoch": 0.777439867293337, "grad_norm": 0.3425394296646118, "learning_rate": 1.3453382120860383e-05, "loss": 0.5536, "step": 25308 }, { "epoch": 0.7774705864282861, "grad_norm": 0.39550167322158813, "learning_rate": 1.345292857153465e-05, "loss": 0.5024, "step": 25309 }, { "epoch": 0.7775013055632354, "grad_norm": 0.3701869547367096, "learning_rate": 1.3452475014144365e-05, "loss": 0.5074, "step": 25310 }, { "epoch": 0.7775320246981845, "grad_norm": 0.3423631489276886, "learning_rate": 1.345202144869058e-05, "loss": 0.5745, "step": 25311 }, { "epoch": 0.7775627438331336, "grad_norm": 0.34731659293174744, "learning_rate": 1.3451567875174358e-05, "loss": 0.5815, "step": 25312 }, { "epoch": 0.7775934629680828, "grad_norm": 0.34888166189193726, "learning_rate": 1.3451114293596757e-05, "loss": 0.5359, "step": 25313 }, { "epoch": 0.777624182103032, "grad_norm": 0.3701856732368469, "learning_rate": 1.3450660703958837e-05, "loss": 0.5845, "step": 25314 }, { "epoch": 0.7776549012379811, "grad_norm": 0.35856732726097107, "learning_rate": 1.3450207106261658e-05, "loss": 0.51, "step": 25315 }, { "epoch": 0.7776856203729303, "grad_norm": 0.3475412130355835, "learning_rate": 1.3449753500506284e-05, "loss": 0.5518, "step": 25316 }, { "epoch": 0.7777163395078794, "grad_norm": 0.37135612964630127, "learning_rate": 1.3449299886693765e-05, "loss": 0.5372, "step": 25317 }, { "epoch": 0.7777470586428287, "grad_norm": 0.3626139163970947, "learning_rate": 1.3448846264825168e-05, "loss": 0.6045, "step": 25318 }, { "epoch": 0.7777777777777778, "grad_norm": 0.3501361906528473, "learning_rate": 1.344839263490155e-05, "loss": 0.4851, "step": 25319 }, { "epoch": 0.7778084969127269, "grad_norm": 0.33188533782958984, "learning_rate": 1.3447938996923968e-05, "loss": 0.5425, "step": 25320 }, { "epoch": 0.7778392160476761, "grad_norm": 0.361068993806839, "learning_rate": 1.3447485350893485e-05, "loss": 0.5761, "step": 25321 }, { "epoch": 0.7778699351826253, "grad_norm": 0.37324652075767517, "learning_rate": 1.344703169681116e-05, "loss": 0.5271, "step": 25322 }, { "epoch": 0.7779006543175744, "grad_norm": 0.33266377449035645, "learning_rate": 1.344657803467805e-05, "loss": 0.5228, "step": 25323 }, { "epoch": 0.7779313734525236, "grad_norm": 0.358826607465744, "learning_rate": 1.3446124364495217e-05, "loss": 0.6264, "step": 25324 }, { "epoch": 0.7779620925874727, "grad_norm": 0.3595719337463379, "learning_rate": 1.3445670686263719e-05, "loss": 0.5891, "step": 25325 }, { "epoch": 0.7779928117224219, "grad_norm": 0.3739510774612427, "learning_rate": 1.3445216999984616e-05, "loss": 0.6293, "step": 25326 }, { "epoch": 0.7780235308573711, "grad_norm": 0.31744933128356934, "learning_rate": 1.344476330565897e-05, "loss": 0.5296, "step": 25327 }, { "epoch": 0.7780542499923202, "grad_norm": 0.3563578724861145, "learning_rate": 1.3444309603287837e-05, "loss": 0.5909, "step": 25328 }, { "epoch": 0.7780849691272693, "grad_norm": 0.3548218309879303, "learning_rate": 1.344385589287228e-05, "loss": 0.5721, "step": 25329 }, { "epoch": 0.7781156882622186, "grad_norm": 0.3794254660606384, "learning_rate": 1.3443402174413358e-05, "loss": 0.5629, "step": 25330 }, { "epoch": 0.7781464073971677, "grad_norm": 0.4093839228153229, "learning_rate": 1.3442948447912128e-05, "loss": 0.5785, "step": 25331 }, { "epoch": 0.7781771265321169, "grad_norm": 0.4015212953090668, "learning_rate": 1.344249471336965e-05, "loss": 0.5161, "step": 25332 }, { "epoch": 0.778207845667066, "grad_norm": 0.3633307218551636, "learning_rate": 1.3442040970786985e-05, "loss": 0.6327, "step": 25333 }, { "epoch": 0.7782385648020151, "grad_norm": 0.37398844957351685, "learning_rate": 1.3441587220165196e-05, "loss": 0.5962, "step": 25334 }, { "epoch": 0.7782692839369644, "grad_norm": 0.4088019132614136, "learning_rate": 1.3441133461505338e-05, "loss": 0.5339, "step": 25335 }, { "epoch": 0.7783000030719135, "grad_norm": 0.47408124804496765, "learning_rate": 1.3440679694808471e-05, "loss": 0.6156, "step": 25336 }, { "epoch": 0.7783307222068626, "grad_norm": 0.3817709684371948, "learning_rate": 1.344022592007566e-05, "loss": 0.5322, "step": 25337 }, { "epoch": 0.7783614413418118, "grad_norm": 0.3480789065361023, "learning_rate": 1.3439772137307959e-05, "loss": 0.5046, "step": 25338 }, { "epoch": 0.778392160476761, "grad_norm": 0.5870785713195801, "learning_rate": 1.3439318346506431e-05, "loss": 0.5641, "step": 25339 }, { "epoch": 0.7784228796117101, "grad_norm": 0.4098789095878601, "learning_rate": 1.3438864547672133e-05, "loss": 0.6001, "step": 25340 }, { "epoch": 0.7784535987466593, "grad_norm": 0.342734694480896, "learning_rate": 1.343841074080613e-05, "loss": 0.4918, "step": 25341 }, { "epoch": 0.7784843178816084, "grad_norm": 0.3593689203262329, "learning_rate": 1.3437956925909474e-05, "loss": 0.5742, "step": 25342 }, { "epoch": 0.7785150370165577, "grad_norm": 0.3903767466545105, "learning_rate": 1.3437503102983231e-05, "loss": 0.5695, "step": 25343 }, { "epoch": 0.7785457561515068, "grad_norm": 0.33146557211875916, "learning_rate": 1.3437049272028459e-05, "loss": 0.4993, "step": 25344 }, { "epoch": 0.7785764752864559, "grad_norm": 0.38158559799194336, "learning_rate": 1.3436595433046222e-05, "loss": 0.5992, "step": 25345 }, { "epoch": 0.7786071944214051, "grad_norm": 0.35384446382522583, "learning_rate": 1.3436141586037572e-05, "loss": 0.6058, "step": 25346 }, { "epoch": 0.7786379135563543, "grad_norm": 0.3497174084186554, "learning_rate": 1.3435687731003577e-05, "loss": 0.5294, "step": 25347 }, { "epoch": 0.7786686326913034, "grad_norm": 0.37381163239479065, "learning_rate": 1.3435233867945293e-05, "loss": 0.5505, "step": 25348 }, { "epoch": 0.7786993518262526, "grad_norm": 0.38258105516433716, "learning_rate": 1.3434779996863778e-05, "loss": 0.6102, "step": 25349 }, { "epoch": 0.7787300709612017, "grad_norm": 0.3897209167480469, "learning_rate": 1.3434326117760094e-05, "loss": 0.5376, "step": 25350 }, { "epoch": 0.7787607900961508, "grad_norm": 0.38428884744644165, "learning_rate": 1.3433872230635301e-05, "loss": 0.5249, "step": 25351 }, { "epoch": 0.7787915092311001, "grad_norm": 0.35507258772850037, "learning_rate": 1.3433418335490462e-05, "loss": 0.5398, "step": 25352 }, { "epoch": 0.7788222283660492, "grad_norm": 0.3902254104614258, "learning_rate": 1.3432964432326636e-05, "loss": 0.5503, "step": 25353 }, { "epoch": 0.7788529475009984, "grad_norm": 0.35821232199668884, "learning_rate": 1.3432510521144879e-05, "loss": 0.4916, "step": 25354 }, { "epoch": 0.7788836666359475, "grad_norm": 0.33157098293304443, "learning_rate": 1.3432056601946255e-05, "loss": 0.5194, "step": 25355 }, { "epoch": 0.7789143857708967, "grad_norm": 0.37467464804649353, "learning_rate": 1.3431602674731823e-05, "loss": 0.6041, "step": 25356 }, { "epoch": 0.7789451049058459, "grad_norm": 0.34330224990844727, "learning_rate": 1.3431148739502644e-05, "loss": 0.6805, "step": 25357 }, { "epoch": 0.778975824040795, "grad_norm": 0.3360547423362732, "learning_rate": 1.3430694796259775e-05, "loss": 0.5523, "step": 25358 }, { "epoch": 0.7790065431757441, "grad_norm": 0.3709743618965149, "learning_rate": 1.3430240845004283e-05, "loss": 0.5489, "step": 25359 }, { "epoch": 0.7790372623106934, "grad_norm": 3.0571234226226807, "learning_rate": 1.342978688573722e-05, "loss": 0.6264, "step": 25360 }, { "epoch": 0.7790679814456425, "grad_norm": 0.34793245792388916, "learning_rate": 1.3429332918459652e-05, "loss": 0.5765, "step": 25361 }, { "epoch": 0.7790987005805916, "grad_norm": 0.33626753091812134, "learning_rate": 1.3428878943172636e-05, "loss": 0.4937, "step": 25362 }, { "epoch": 0.7791294197155408, "grad_norm": 0.4144015610218048, "learning_rate": 1.3428424959877235e-05, "loss": 0.5499, "step": 25363 }, { "epoch": 0.77916013885049, "grad_norm": 0.36853718757629395, "learning_rate": 1.342797096857451e-05, "loss": 0.478, "step": 25364 }, { "epoch": 0.7791908579854391, "grad_norm": 0.37357810139656067, "learning_rate": 1.3427516969265518e-05, "loss": 0.5454, "step": 25365 }, { "epoch": 0.7792215771203883, "grad_norm": 0.39185628294944763, "learning_rate": 1.342706296195132e-05, "loss": 0.5605, "step": 25366 }, { "epoch": 0.7792522962553374, "grad_norm": 0.3228795528411865, "learning_rate": 1.3426608946632975e-05, "loss": 0.5232, "step": 25367 }, { "epoch": 0.7792830153902867, "grad_norm": 0.3419444262981415, "learning_rate": 1.3426154923311547e-05, "loss": 0.5758, "step": 25368 }, { "epoch": 0.7793137345252358, "grad_norm": 0.3828301727771759, "learning_rate": 1.3425700891988095e-05, "loss": 0.5693, "step": 25369 }, { "epoch": 0.7793444536601849, "grad_norm": 0.3619670271873474, "learning_rate": 1.342524685266368e-05, "loss": 0.5018, "step": 25370 }, { "epoch": 0.7793751727951341, "grad_norm": 0.3937762677669525, "learning_rate": 1.3424792805339364e-05, "loss": 0.5588, "step": 25371 }, { "epoch": 0.7794058919300832, "grad_norm": 0.34634658694267273, "learning_rate": 1.3424338750016201e-05, "loss": 0.495, "step": 25372 }, { "epoch": 0.7794366110650324, "grad_norm": 0.33378610014915466, "learning_rate": 1.3423884686695258e-05, "loss": 0.4685, "step": 25373 }, { "epoch": 0.7794673301999816, "grad_norm": 0.3674440383911133, "learning_rate": 1.3423430615377593e-05, "loss": 0.6711, "step": 25374 }, { "epoch": 0.7794980493349307, "grad_norm": 0.37254300713539124, "learning_rate": 1.3422976536064267e-05, "loss": 0.5487, "step": 25375 }, { "epoch": 0.7795287684698798, "grad_norm": 0.44890666007995605, "learning_rate": 1.3422522448756339e-05, "loss": 0.5595, "step": 25376 }, { "epoch": 0.7795594876048291, "grad_norm": 0.3581266701221466, "learning_rate": 1.342206835345487e-05, "loss": 0.5095, "step": 25377 }, { "epoch": 0.7795902067397782, "grad_norm": 0.3864741325378418, "learning_rate": 1.3421614250160924e-05, "loss": 0.5161, "step": 25378 }, { "epoch": 0.7796209258747274, "grad_norm": 0.39340147376060486, "learning_rate": 1.342116013887556e-05, "loss": 0.5378, "step": 25379 }, { "epoch": 0.7796516450096765, "grad_norm": 0.34029507637023926, "learning_rate": 1.3420706019599834e-05, "loss": 0.6134, "step": 25380 }, { "epoch": 0.7796823641446257, "grad_norm": 0.5762830376625061, "learning_rate": 1.3420251892334812e-05, "loss": 0.6358, "step": 25381 }, { "epoch": 0.7797130832795749, "grad_norm": 0.4355362355709076, "learning_rate": 1.3419797757081555e-05, "loss": 0.5924, "step": 25382 }, { "epoch": 0.779743802414524, "grad_norm": 0.3613542914390564, "learning_rate": 1.3419343613841116e-05, "loss": 0.5315, "step": 25383 }, { "epoch": 0.7797745215494731, "grad_norm": 0.38934892416000366, "learning_rate": 1.3418889462614565e-05, "loss": 0.5269, "step": 25384 }, { "epoch": 0.7798052406844224, "grad_norm": 0.33399951457977295, "learning_rate": 1.3418435303402957e-05, "loss": 0.4478, "step": 25385 }, { "epoch": 0.7798359598193715, "grad_norm": 0.39937764406204224, "learning_rate": 1.3417981136207356e-05, "loss": 0.582, "step": 25386 }, { "epoch": 0.7798666789543206, "grad_norm": 0.3945847153663635, "learning_rate": 1.341752696102882e-05, "loss": 0.5138, "step": 25387 }, { "epoch": 0.7798973980892698, "grad_norm": 0.3713791072368622, "learning_rate": 1.3417072777868412e-05, "loss": 0.5303, "step": 25388 }, { "epoch": 0.779928117224219, "grad_norm": 0.38290295004844666, "learning_rate": 1.3416618586727192e-05, "loss": 0.5922, "step": 25389 }, { "epoch": 0.7799588363591681, "grad_norm": 0.3691234588623047, "learning_rate": 1.341616438760622e-05, "loss": 0.4907, "step": 25390 }, { "epoch": 0.7799895554941173, "grad_norm": 0.39937642216682434, "learning_rate": 1.341571018050656e-05, "loss": 0.5287, "step": 25391 }, { "epoch": 0.7800202746290664, "grad_norm": 0.391257107257843, "learning_rate": 1.3415255965429266e-05, "loss": 0.5953, "step": 25392 }, { "epoch": 0.7800509937640157, "grad_norm": 0.40183404088020325, "learning_rate": 1.3414801742375408e-05, "loss": 0.523, "step": 25393 }, { "epoch": 0.7800817128989648, "grad_norm": 0.3319677412509918, "learning_rate": 1.341434751134604e-05, "loss": 0.4851, "step": 25394 }, { "epoch": 0.7801124320339139, "grad_norm": 0.4038982689380646, "learning_rate": 1.3413893272342224e-05, "loss": 0.6328, "step": 25395 }, { "epoch": 0.7801431511688631, "grad_norm": 0.41185614466667175, "learning_rate": 1.3413439025365023e-05, "loss": 0.5466, "step": 25396 }, { "epoch": 0.7801738703038122, "grad_norm": 0.37210994958877563, "learning_rate": 1.3412984770415497e-05, "loss": 0.518, "step": 25397 }, { "epoch": 0.7802045894387614, "grad_norm": 1.0328165292739868, "learning_rate": 1.3412530507494703e-05, "loss": 0.5442, "step": 25398 }, { "epoch": 0.7802353085737106, "grad_norm": 0.3443508446216583, "learning_rate": 1.341207623660371e-05, "loss": 0.5892, "step": 25399 }, { "epoch": 0.7802660277086597, "grad_norm": 0.3988948464393616, "learning_rate": 1.3411621957743575e-05, "loss": 0.5849, "step": 25400 }, { "epoch": 0.7802967468436088, "grad_norm": 0.3709220886230469, "learning_rate": 1.3411167670915354e-05, "loss": 0.5511, "step": 25401 }, { "epoch": 0.7803274659785581, "grad_norm": 0.35383281111717224, "learning_rate": 1.3410713376120117e-05, "loss": 0.53, "step": 25402 }, { "epoch": 0.7803581851135072, "grad_norm": 0.36785727739334106, "learning_rate": 1.341025907335892e-05, "loss": 0.5733, "step": 25403 }, { "epoch": 0.7803889042484564, "grad_norm": 0.398689329624176, "learning_rate": 1.3409804762632823e-05, "loss": 0.5556, "step": 25404 }, { "epoch": 0.7804196233834055, "grad_norm": 0.3944772779941559, "learning_rate": 1.340935044394289e-05, "loss": 0.5523, "step": 25405 }, { "epoch": 0.7804503425183547, "grad_norm": 0.3519741892814636, "learning_rate": 1.340889611729018e-05, "loss": 0.5836, "step": 25406 }, { "epoch": 0.7804810616533039, "grad_norm": 0.5836843848228455, "learning_rate": 1.3408441782675756e-05, "loss": 0.5682, "step": 25407 }, { "epoch": 0.780511780788253, "grad_norm": 0.36111968755722046, "learning_rate": 1.3407987440100678e-05, "loss": 0.5138, "step": 25408 }, { "epoch": 0.7805424999232021, "grad_norm": 0.3941628336906433, "learning_rate": 1.3407533089566008e-05, "loss": 0.5831, "step": 25409 }, { "epoch": 0.7805732190581514, "grad_norm": 0.3433554470539093, "learning_rate": 1.3407078731072804e-05, "loss": 0.5351, "step": 25410 }, { "epoch": 0.7806039381931005, "grad_norm": 0.3623279929161072, "learning_rate": 1.340662436462213e-05, "loss": 0.4839, "step": 25411 }, { "epoch": 0.7806346573280496, "grad_norm": 0.3555039167404175, "learning_rate": 1.340616999021505e-05, "loss": 0.5366, "step": 25412 }, { "epoch": 0.7806653764629988, "grad_norm": 0.37102562189102173, "learning_rate": 1.340571560785262e-05, "loss": 0.6106, "step": 25413 }, { "epoch": 0.780696095597948, "grad_norm": 0.37807053327560425, "learning_rate": 1.3405261217535905e-05, "loss": 0.6196, "step": 25414 }, { "epoch": 0.7807268147328971, "grad_norm": 0.3432677686214447, "learning_rate": 1.3404806819265963e-05, "loss": 0.442, "step": 25415 }, { "epoch": 0.7807575338678463, "grad_norm": 0.40235623717308044, "learning_rate": 1.340435241304386e-05, "loss": 0.6275, "step": 25416 }, { "epoch": 0.7807882530027954, "grad_norm": 0.39316585659980774, "learning_rate": 1.340389799887065e-05, "loss": 0.585, "step": 25417 }, { "epoch": 0.7808189721377446, "grad_norm": 0.39260178804397583, "learning_rate": 1.34034435767474e-05, "loss": 0.5572, "step": 25418 }, { "epoch": 0.7808496912726938, "grad_norm": 0.4002392590045929, "learning_rate": 1.340298914667517e-05, "loss": 0.5039, "step": 25419 }, { "epoch": 0.7808804104076429, "grad_norm": 0.3448773920536041, "learning_rate": 1.3402534708655022e-05, "loss": 0.561, "step": 25420 }, { "epoch": 0.7809111295425921, "grad_norm": 0.3654865622520447, "learning_rate": 1.3402080262688016e-05, "loss": 0.4893, "step": 25421 }, { "epoch": 0.7809418486775412, "grad_norm": 0.3474367558956146, "learning_rate": 1.3401625808775212e-05, "loss": 0.5516, "step": 25422 }, { "epoch": 0.7809725678124904, "grad_norm": 0.4402119815349579, "learning_rate": 1.3401171346917674e-05, "loss": 0.476, "step": 25423 }, { "epoch": 0.7810032869474396, "grad_norm": 0.38286375999450684, "learning_rate": 1.3400716877116465e-05, "loss": 0.5569, "step": 25424 }, { "epoch": 0.7810340060823887, "grad_norm": 0.3321911096572876, "learning_rate": 1.3400262399372642e-05, "loss": 0.5495, "step": 25425 }, { "epoch": 0.7810647252173378, "grad_norm": 0.36268606781959534, "learning_rate": 1.3399807913687268e-05, "loss": 0.4847, "step": 25426 }, { "epoch": 0.7810954443522871, "grad_norm": 0.8800076842308044, "learning_rate": 1.339935342006141e-05, "loss": 0.4389, "step": 25427 }, { "epoch": 0.7811261634872362, "grad_norm": 0.3670199513435364, "learning_rate": 1.339889891849612e-05, "loss": 0.5766, "step": 25428 }, { "epoch": 0.7811568826221854, "grad_norm": 0.35813069343566895, "learning_rate": 1.3398444408992466e-05, "loss": 0.554, "step": 25429 }, { "epoch": 0.7811876017571345, "grad_norm": 0.3663506805896759, "learning_rate": 1.3397989891551506e-05, "loss": 0.555, "step": 25430 }, { "epoch": 0.7812183208920837, "grad_norm": 0.37882521748542786, "learning_rate": 1.3397535366174306e-05, "loss": 0.5579, "step": 25431 }, { "epoch": 0.7812490400270329, "grad_norm": 0.3580639362335205, "learning_rate": 1.3397080832861921e-05, "loss": 0.5868, "step": 25432 }, { "epoch": 0.781279759161982, "grad_norm": 0.3240402340888977, "learning_rate": 1.339662629161542e-05, "loss": 0.5613, "step": 25433 }, { "epoch": 0.7813104782969311, "grad_norm": 0.3988301157951355, "learning_rate": 1.3396171742435862e-05, "loss": 0.6174, "step": 25434 }, { "epoch": 0.7813411974318804, "grad_norm": 0.36444029211997986, "learning_rate": 1.3395717185324305e-05, "loss": 0.605, "step": 25435 }, { "epoch": 0.7813719165668295, "grad_norm": 0.38440564274787903, "learning_rate": 1.3395262620281815e-05, "loss": 0.6787, "step": 25436 }, { "epoch": 0.7814026357017786, "grad_norm": 0.3899966776371002, "learning_rate": 1.3394808047309451e-05, "loss": 0.5839, "step": 25437 }, { "epoch": 0.7814333548367278, "grad_norm": 0.4687120318412781, "learning_rate": 1.3394353466408277e-05, "loss": 0.5937, "step": 25438 }, { "epoch": 0.7814640739716769, "grad_norm": 0.35310161113739014, "learning_rate": 1.339389887757935e-05, "loss": 0.5446, "step": 25439 }, { "epoch": 0.7814947931066261, "grad_norm": 0.3946397006511688, "learning_rate": 1.3393444280823738e-05, "loss": 0.5814, "step": 25440 }, { "epoch": 0.7815255122415753, "grad_norm": 0.3907114267349243, "learning_rate": 1.3392989676142499e-05, "loss": 0.4759, "step": 25441 }, { "epoch": 0.7815562313765244, "grad_norm": 0.399709016084671, "learning_rate": 1.3392535063536695e-05, "loss": 0.5818, "step": 25442 }, { "epoch": 0.7815869505114736, "grad_norm": 0.38215410709381104, "learning_rate": 1.339208044300739e-05, "loss": 0.5633, "step": 25443 }, { "epoch": 0.7816176696464228, "grad_norm": 0.35187140107154846, "learning_rate": 1.3391625814555641e-05, "loss": 0.5452, "step": 25444 }, { "epoch": 0.7816483887813719, "grad_norm": 0.3553260266780853, "learning_rate": 1.3391171178182519e-05, "loss": 0.569, "step": 25445 }, { "epoch": 0.7816791079163211, "grad_norm": 0.37788867950439453, "learning_rate": 1.3390716533889074e-05, "loss": 0.5418, "step": 25446 }, { "epoch": 0.7817098270512702, "grad_norm": 0.3705982267856598, "learning_rate": 1.3390261881676379e-05, "loss": 0.5927, "step": 25447 }, { "epoch": 0.7817405461862194, "grad_norm": 0.488834023475647, "learning_rate": 1.3389807221545485e-05, "loss": 0.576, "step": 25448 }, { "epoch": 0.7817712653211686, "grad_norm": 0.36718320846557617, "learning_rate": 1.3389352553497465e-05, "loss": 0.6011, "step": 25449 }, { "epoch": 0.7818019844561177, "grad_norm": 0.3808114528656006, "learning_rate": 1.338889787753337e-05, "loss": 0.5389, "step": 25450 }, { "epoch": 0.7818327035910668, "grad_norm": 0.37641847133636475, "learning_rate": 1.3388443193654273e-05, "loss": 0.5609, "step": 25451 }, { "epoch": 0.781863422726016, "grad_norm": 0.40877464413642883, "learning_rate": 1.338798850186123e-05, "loss": 0.6189, "step": 25452 }, { "epoch": 0.7818941418609652, "grad_norm": 0.413413405418396, "learning_rate": 1.3387533802155297e-05, "loss": 0.5168, "step": 25453 }, { "epoch": 0.7819248609959144, "grad_norm": 0.34732913970947266, "learning_rate": 1.3387079094537547e-05, "loss": 0.5551, "step": 25454 }, { "epoch": 0.7819555801308635, "grad_norm": 0.34031808376312256, "learning_rate": 1.3386624379009035e-05, "loss": 0.4886, "step": 25455 }, { "epoch": 0.7819862992658126, "grad_norm": 0.4412764310836792, "learning_rate": 1.3386169655570829e-05, "loss": 0.5736, "step": 25456 }, { "epoch": 0.7820170184007619, "grad_norm": 0.3408191204071045, "learning_rate": 1.3385714924223983e-05, "loss": 0.5199, "step": 25457 }, { "epoch": 0.782047737535711, "grad_norm": 0.3460613787174225, "learning_rate": 1.3385260184969564e-05, "loss": 0.5542, "step": 25458 }, { "epoch": 0.7820784566706601, "grad_norm": 0.3856275677680969, "learning_rate": 1.3384805437808635e-05, "loss": 0.5806, "step": 25459 }, { "epoch": 0.7821091758056093, "grad_norm": 0.3641132116317749, "learning_rate": 1.3384350682742256e-05, "loss": 0.5459, "step": 25460 }, { "epoch": 0.7821398949405585, "grad_norm": 0.3307967483997345, "learning_rate": 1.338389591977149e-05, "loss": 0.583, "step": 25461 }, { "epoch": 0.7821706140755076, "grad_norm": 0.37387216091156006, "learning_rate": 1.3383441148897396e-05, "loss": 0.6185, "step": 25462 }, { "epoch": 0.7822013332104568, "grad_norm": 0.35643720626831055, "learning_rate": 1.3382986370121043e-05, "loss": 0.5202, "step": 25463 }, { "epoch": 0.7822320523454059, "grad_norm": 0.36681830883026123, "learning_rate": 1.3382531583443486e-05, "loss": 0.5486, "step": 25464 }, { "epoch": 0.7822627714803552, "grad_norm": 0.42116814851760864, "learning_rate": 1.3382076788865792e-05, "loss": 0.5417, "step": 25465 }, { "epoch": 0.7822934906153043, "grad_norm": 0.39442339539527893, "learning_rate": 1.338162198638902e-05, "loss": 0.4897, "step": 25466 }, { "epoch": 0.7823242097502534, "grad_norm": 0.41067278385162354, "learning_rate": 1.3381167176014235e-05, "loss": 0.6592, "step": 25467 }, { "epoch": 0.7823549288852026, "grad_norm": 0.3425329327583313, "learning_rate": 1.3380712357742498e-05, "loss": 0.5348, "step": 25468 }, { "epoch": 0.7823856480201518, "grad_norm": 0.45461419224739075, "learning_rate": 1.3380257531574871e-05, "loss": 0.6171, "step": 25469 }, { "epoch": 0.7824163671551009, "grad_norm": 0.40245959162712097, "learning_rate": 1.3379802697512417e-05, "loss": 0.5347, "step": 25470 }, { "epoch": 0.7824470862900501, "grad_norm": 0.36355966329574585, "learning_rate": 1.3379347855556195e-05, "loss": 0.6856, "step": 25471 }, { "epoch": 0.7824778054249992, "grad_norm": 0.3683556616306305, "learning_rate": 1.3378893005707275e-05, "loss": 0.6031, "step": 25472 }, { "epoch": 0.7825085245599483, "grad_norm": 0.9150704145431519, "learning_rate": 1.337843814796671e-05, "loss": 0.6635, "step": 25473 }, { "epoch": 0.7825392436948976, "grad_norm": 0.34667113423347473, "learning_rate": 1.3377983282335566e-05, "loss": 0.5691, "step": 25474 }, { "epoch": 0.7825699628298467, "grad_norm": 0.3663831353187561, "learning_rate": 1.3377528408814907e-05, "loss": 0.5481, "step": 25475 }, { "epoch": 0.7826006819647958, "grad_norm": 0.39653897285461426, "learning_rate": 1.3377073527405799e-05, "loss": 0.5314, "step": 25476 }, { "epoch": 0.782631401099745, "grad_norm": 0.3745523691177368, "learning_rate": 1.3376618638109295e-05, "loss": 0.5317, "step": 25477 }, { "epoch": 0.7826621202346942, "grad_norm": 0.37393254041671753, "learning_rate": 1.3376163740926464e-05, "loss": 0.5437, "step": 25478 }, { "epoch": 0.7826928393696434, "grad_norm": 0.36288779973983765, "learning_rate": 1.3375708835858367e-05, "loss": 0.5361, "step": 25479 }, { "epoch": 0.7827235585045925, "grad_norm": 0.3399887681007385, "learning_rate": 1.3375253922906066e-05, "loss": 0.5658, "step": 25480 }, { "epoch": 0.7827542776395416, "grad_norm": 0.40628135204315186, "learning_rate": 1.3374799002070625e-05, "loss": 0.5388, "step": 25481 }, { "epoch": 0.7827849967744909, "grad_norm": 0.3708997070789337, "learning_rate": 1.3374344073353103e-05, "loss": 0.4977, "step": 25482 }, { "epoch": 0.78281571590944, "grad_norm": 0.38399815559387207, "learning_rate": 1.3373889136754568e-05, "loss": 0.5027, "step": 25483 }, { "epoch": 0.7828464350443891, "grad_norm": 0.36928945779800415, "learning_rate": 1.3373434192276075e-05, "loss": 0.5671, "step": 25484 }, { "epoch": 0.7828771541793383, "grad_norm": 0.38718757033348083, "learning_rate": 1.3372979239918692e-05, "loss": 0.5085, "step": 25485 }, { "epoch": 0.7829078733142875, "grad_norm": 0.3805525302886963, "learning_rate": 1.3372524279683486e-05, "loss": 0.4707, "step": 25486 }, { "epoch": 0.7829385924492366, "grad_norm": 0.35944536328315735, "learning_rate": 1.3372069311571507e-05, "loss": 0.5301, "step": 25487 }, { "epoch": 0.7829693115841858, "grad_norm": 0.3874491751194, "learning_rate": 1.3371614335583828e-05, "loss": 0.5973, "step": 25488 }, { "epoch": 0.7830000307191349, "grad_norm": 0.38892674446105957, "learning_rate": 1.3371159351721505e-05, "loss": 0.6247, "step": 25489 }, { "epoch": 0.7830307498540842, "grad_norm": 0.38408342003822327, "learning_rate": 1.337070435998561e-05, "loss": 0.5587, "step": 25490 }, { "epoch": 0.7830614689890333, "grad_norm": 0.7183984518051147, "learning_rate": 1.3370249360377195e-05, "loss": 0.6235, "step": 25491 }, { "epoch": 0.7830921881239824, "grad_norm": 0.6154394745826721, "learning_rate": 1.3369794352897327e-05, "loss": 0.5902, "step": 25492 }, { "epoch": 0.7831229072589316, "grad_norm": 0.37668076157569885, "learning_rate": 1.3369339337547069e-05, "loss": 0.5712, "step": 25493 }, { "epoch": 0.7831536263938808, "grad_norm": 0.3805103898048401, "learning_rate": 1.3368884314327485e-05, "loss": 0.6355, "step": 25494 }, { "epoch": 0.7831843455288299, "grad_norm": 0.5883638858795166, "learning_rate": 1.3368429283239635e-05, "loss": 0.5083, "step": 25495 }, { "epoch": 0.7832150646637791, "grad_norm": 0.33270472288131714, "learning_rate": 1.3367974244284585e-05, "loss": 0.5334, "step": 25496 }, { "epoch": 0.7832457837987282, "grad_norm": 0.4229500889778137, "learning_rate": 1.3367519197463394e-05, "loss": 0.551, "step": 25497 }, { "epoch": 0.7832765029336773, "grad_norm": 0.36030712723731995, "learning_rate": 1.3367064142777127e-05, "loss": 0.5755, "step": 25498 }, { "epoch": 0.7833072220686266, "grad_norm": 0.39602363109588623, "learning_rate": 1.3366609080226849e-05, "loss": 0.6312, "step": 25499 }, { "epoch": 0.7833379412035757, "grad_norm": 0.3805014193058014, "learning_rate": 1.3366154009813617e-05, "loss": 0.518, "step": 25500 }, { "epoch": 0.7833686603385248, "grad_norm": 0.4507044851779938, "learning_rate": 1.33656989315385e-05, "loss": 0.6166, "step": 25501 }, { "epoch": 0.783399379473474, "grad_norm": 0.3746791481971741, "learning_rate": 1.3365243845402556e-05, "loss": 0.5281, "step": 25502 }, { "epoch": 0.7834300986084232, "grad_norm": 0.400005578994751, "learning_rate": 1.3364788751406854e-05, "loss": 0.5958, "step": 25503 }, { "epoch": 0.7834608177433724, "grad_norm": 0.5186564326286316, "learning_rate": 1.336433364955245e-05, "loss": 0.557, "step": 25504 }, { "epoch": 0.7834915368783215, "grad_norm": 0.34143975377082825, "learning_rate": 1.3363878539840409e-05, "loss": 0.5423, "step": 25505 }, { "epoch": 0.7835222560132706, "grad_norm": 0.4372069835662842, "learning_rate": 1.3363423422271795e-05, "loss": 0.5469, "step": 25506 }, { "epoch": 0.7835529751482199, "grad_norm": 0.3531036078929901, "learning_rate": 1.3362968296847672e-05, "loss": 0.5663, "step": 25507 }, { "epoch": 0.783583694283169, "grad_norm": 0.405367910861969, "learning_rate": 1.33625131635691e-05, "loss": 0.5053, "step": 25508 }, { "epoch": 0.7836144134181181, "grad_norm": 0.3817085027694702, "learning_rate": 1.3362058022437143e-05, "loss": 0.5888, "step": 25509 }, { "epoch": 0.7836451325530673, "grad_norm": 0.3639458417892456, "learning_rate": 1.3361602873452869e-05, "loss": 0.5509, "step": 25510 }, { "epoch": 0.7836758516880165, "grad_norm": 0.4658116400241852, "learning_rate": 1.3361147716617333e-05, "loss": 0.5312, "step": 25511 }, { "epoch": 0.7837065708229656, "grad_norm": 0.3359285295009613, "learning_rate": 1.3360692551931602e-05, "loss": 0.5825, "step": 25512 }, { "epoch": 0.7837372899579148, "grad_norm": 0.3234119415283203, "learning_rate": 1.3360237379396739e-05, "loss": 0.5878, "step": 25513 }, { "epoch": 0.7837680090928639, "grad_norm": 0.35607561469078064, "learning_rate": 1.3359782199013807e-05, "loss": 0.6053, "step": 25514 }, { "epoch": 0.7837987282278132, "grad_norm": 0.38064688444137573, "learning_rate": 1.3359327010783871e-05, "loss": 0.5842, "step": 25515 }, { "epoch": 0.7838294473627623, "grad_norm": 0.38499417901039124, "learning_rate": 1.335887181470799e-05, "loss": 0.5048, "step": 25516 }, { "epoch": 0.7838601664977114, "grad_norm": 0.46147620677948, "learning_rate": 1.335841661078723e-05, "loss": 0.6135, "step": 25517 }, { "epoch": 0.7838908856326606, "grad_norm": 0.3723396956920624, "learning_rate": 1.3357961399022652e-05, "loss": 0.571, "step": 25518 }, { "epoch": 0.7839216047676097, "grad_norm": 0.3888065814971924, "learning_rate": 1.3357506179415322e-05, "loss": 0.5377, "step": 25519 }, { "epoch": 0.7839523239025589, "grad_norm": 0.3613230884075165, "learning_rate": 1.3357050951966304e-05, "loss": 0.5107, "step": 25520 }, { "epoch": 0.7839830430375081, "grad_norm": 0.40241968631744385, "learning_rate": 1.3356595716676655e-05, "loss": 0.6104, "step": 25521 }, { "epoch": 0.7840137621724572, "grad_norm": 0.37505683302879333, "learning_rate": 1.3356140473547446e-05, "loss": 0.5634, "step": 25522 }, { "epoch": 0.7840444813074063, "grad_norm": 0.3414255976676941, "learning_rate": 1.3355685222579732e-05, "loss": 0.4435, "step": 25523 }, { "epoch": 0.7840752004423556, "grad_norm": 0.3712668716907501, "learning_rate": 1.3355229963774585e-05, "loss": 0.6068, "step": 25524 }, { "epoch": 0.7841059195773047, "grad_norm": 0.45417168736457825, "learning_rate": 1.3354774697133061e-05, "loss": 0.5091, "step": 25525 }, { "epoch": 0.7841366387122538, "grad_norm": 0.4303506016731262, "learning_rate": 1.3354319422656231e-05, "loss": 0.4593, "step": 25526 }, { "epoch": 0.784167357847203, "grad_norm": 0.38060033321380615, "learning_rate": 1.3353864140345149e-05, "loss": 0.554, "step": 25527 }, { "epoch": 0.7841980769821522, "grad_norm": 0.38089561462402344, "learning_rate": 1.3353408850200886e-05, "loss": 0.5286, "step": 25528 }, { "epoch": 0.7842287961171014, "grad_norm": 0.4334641695022583, "learning_rate": 1.3352953552224501e-05, "loss": 0.5695, "step": 25529 }, { "epoch": 0.7842595152520505, "grad_norm": 0.44873523712158203, "learning_rate": 1.3352498246417059e-05, "loss": 0.575, "step": 25530 }, { "epoch": 0.7842902343869996, "grad_norm": 0.3432571589946747, "learning_rate": 1.3352042932779626e-05, "loss": 0.4923, "step": 25531 }, { "epoch": 0.7843209535219489, "grad_norm": 0.35618212819099426, "learning_rate": 1.3351587611313257e-05, "loss": 0.5668, "step": 25532 }, { "epoch": 0.784351672656898, "grad_norm": 0.3585471212863922, "learning_rate": 1.3351132282019024e-05, "loss": 0.5695, "step": 25533 }, { "epoch": 0.7843823917918471, "grad_norm": 0.3715992569923401, "learning_rate": 1.3350676944897987e-05, "loss": 0.5914, "step": 25534 }, { "epoch": 0.7844131109267963, "grad_norm": 0.37443631887435913, "learning_rate": 1.335022159995121e-05, "loss": 0.5123, "step": 25535 }, { "epoch": 0.7844438300617455, "grad_norm": 0.3763824999332428, "learning_rate": 1.3349766247179756e-05, "loss": 0.5333, "step": 25536 }, { "epoch": 0.7844745491966946, "grad_norm": 0.4436018168926239, "learning_rate": 1.3349310886584692e-05, "loss": 0.5567, "step": 25537 }, { "epoch": 0.7845052683316438, "grad_norm": 0.5318166613578796, "learning_rate": 1.3348855518167076e-05, "loss": 0.5224, "step": 25538 }, { "epoch": 0.7845359874665929, "grad_norm": 0.34165671467781067, "learning_rate": 1.3348400141927972e-05, "loss": 0.4988, "step": 25539 }, { "epoch": 0.7845667066015422, "grad_norm": 0.3528457581996918, "learning_rate": 1.3347944757868452e-05, "loss": 0.5272, "step": 25540 }, { "epoch": 0.7845974257364913, "grad_norm": 0.3273635506629944, "learning_rate": 1.3347489365989567e-05, "loss": 0.5707, "step": 25541 }, { "epoch": 0.7846281448714404, "grad_norm": 0.38448965549468994, "learning_rate": 1.334703396629239e-05, "loss": 0.6032, "step": 25542 }, { "epoch": 0.7846588640063896, "grad_norm": 0.3486146330833435, "learning_rate": 1.3346578558777981e-05, "loss": 0.5538, "step": 25543 }, { "epoch": 0.7846895831413387, "grad_norm": 0.3923783302307129, "learning_rate": 1.3346123143447402e-05, "loss": 0.5272, "step": 25544 }, { "epoch": 0.7847203022762879, "grad_norm": 0.44027969241142273, "learning_rate": 1.334566772030172e-05, "loss": 0.5656, "step": 25545 }, { "epoch": 0.7847510214112371, "grad_norm": 0.3876269459724426, "learning_rate": 1.3345212289341998e-05, "loss": 0.56, "step": 25546 }, { "epoch": 0.7847817405461862, "grad_norm": 0.34452760219573975, "learning_rate": 1.3344756850569299e-05, "loss": 0.5405, "step": 25547 }, { "epoch": 0.7848124596811353, "grad_norm": 0.34904757142066956, "learning_rate": 1.3344301403984688e-05, "loss": 0.5814, "step": 25548 }, { "epoch": 0.7848431788160846, "grad_norm": 0.35193687677383423, "learning_rate": 1.3343845949589227e-05, "loss": 0.5327, "step": 25549 }, { "epoch": 0.7848738979510337, "grad_norm": 0.39284974336624146, "learning_rate": 1.3343390487383979e-05, "loss": 0.5728, "step": 25550 }, { "epoch": 0.7849046170859829, "grad_norm": 0.5599719285964966, "learning_rate": 1.3342935017370006e-05, "loss": 0.5192, "step": 25551 }, { "epoch": 0.784935336220932, "grad_norm": 0.3482486307621002, "learning_rate": 1.3342479539548379e-05, "loss": 0.5887, "step": 25552 }, { "epoch": 0.7849660553558812, "grad_norm": 0.34623146057128906, "learning_rate": 1.3342024053920155e-05, "loss": 0.4702, "step": 25553 }, { "epoch": 0.7849967744908304, "grad_norm": 0.3524731993675232, "learning_rate": 1.3341568560486402e-05, "loss": 0.5602, "step": 25554 }, { "epoch": 0.7850274936257795, "grad_norm": 0.40665990114212036, "learning_rate": 1.3341113059248183e-05, "loss": 0.5959, "step": 25555 }, { "epoch": 0.7850582127607286, "grad_norm": 0.3921956717967987, "learning_rate": 1.3340657550206561e-05, "loss": 0.5435, "step": 25556 }, { "epoch": 0.7850889318956779, "grad_norm": 0.38502198457717896, "learning_rate": 1.3340202033362599e-05, "loss": 0.597, "step": 25557 }, { "epoch": 0.785119651030627, "grad_norm": 0.38371628522872925, "learning_rate": 1.3339746508717364e-05, "loss": 0.5348, "step": 25558 }, { "epoch": 0.7851503701655761, "grad_norm": 0.33413758873939514, "learning_rate": 1.3339290976271915e-05, "loss": 0.5403, "step": 25559 }, { "epoch": 0.7851810893005253, "grad_norm": 0.4093566834926605, "learning_rate": 1.333883543602732e-05, "loss": 0.542, "step": 25560 }, { "epoch": 0.7852118084354744, "grad_norm": 0.3369514048099518, "learning_rate": 1.333837988798464e-05, "loss": 0.5459, "step": 25561 }, { "epoch": 0.7852425275704236, "grad_norm": 0.43897727131843567, "learning_rate": 1.3337924332144945e-05, "loss": 0.5079, "step": 25562 }, { "epoch": 0.7852732467053728, "grad_norm": 0.35562804341316223, "learning_rate": 1.333746876850929e-05, "loss": 0.462, "step": 25563 }, { "epoch": 0.7853039658403219, "grad_norm": 0.32126665115356445, "learning_rate": 1.3337013197078746e-05, "loss": 0.4749, "step": 25564 }, { "epoch": 0.7853346849752711, "grad_norm": 0.3852425217628479, "learning_rate": 1.3336557617854373e-05, "loss": 0.5025, "step": 25565 }, { "epoch": 0.7853654041102203, "grad_norm": 0.3827793002128601, "learning_rate": 1.3336102030837239e-05, "loss": 0.6432, "step": 25566 }, { "epoch": 0.7853961232451694, "grad_norm": 0.3752584755420685, "learning_rate": 1.3335646436028405e-05, "loss": 0.5119, "step": 25567 }, { "epoch": 0.7854268423801186, "grad_norm": 0.616121232509613, "learning_rate": 1.3335190833428934e-05, "loss": 0.532, "step": 25568 }, { "epoch": 0.7854575615150677, "grad_norm": 0.46701109409332275, "learning_rate": 1.3334735223039893e-05, "loss": 0.6175, "step": 25569 }, { "epoch": 0.7854882806500169, "grad_norm": 0.3490441143512726, "learning_rate": 1.3334279604862341e-05, "loss": 0.5154, "step": 25570 }, { "epoch": 0.7855189997849661, "grad_norm": 0.5582562685012817, "learning_rate": 1.3333823978897353e-05, "loss": 0.6458, "step": 25571 }, { "epoch": 0.7855497189199152, "grad_norm": 0.36094221472740173, "learning_rate": 1.3333368345145983e-05, "loss": 0.6117, "step": 25572 }, { "epoch": 0.7855804380548643, "grad_norm": 0.368044376373291, "learning_rate": 1.3332912703609297e-05, "loss": 0.5629, "step": 25573 }, { "epoch": 0.7856111571898136, "grad_norm": 0.4347415864467621, "learning_rate": 1.3332457054288367e-05, "loss": 0.5508, "step": 25574 }, { "epoch": 0.7856418763247627, "grad_norm": 0.5349265336990356, "learning_rate": 1.3332001397184242e-05, "loss": 0.5642, "step": 25575 }, { "epoch": 0.7856725954597119, "grad_norm": 0.3700239062309265, "learning_rate": 1.3331545732298002e-05, "loss": 0.5327, "step": 25576 }, { "epoch": 0.785703314594661, "grad_norm": 0.4159432649612427, "learning_rate": 1.33310900596307e-05, "loss": 0.5061, "step": 25577 }, { "epoch": 0.7857340337296101, "grad_norm": 0.3447383940219879, "learning_rate": 1.3330634379183407e-05, "loss": 0.5369, "step": 25578 }, { "epoch": 0.7857647528645594, "grad_norm": 0.424851655960083, "learning_rate": 1.3330178690957182e-05, "loss": 0.5402, "step": 25579 }, { "epoch": 0.7857954719995085, "grad_norm": 0.35713207721710205, "learning_rate": 1.3329722994953097e-05, "loss": 0.5325, "step": 25580 }, { "epoch": 0.7858261911344576, "grad_norm": 0.346017062664032, "learning_rate": 1.3329267291172205e-05, "loss": 0.5939, "step": 25581 }, { "epoch": 0.7858569102694068, "grad_norm": 0.3509264886379242, "learning_rate": 1.3328811579615581e-05, "loss": 0.5824, "step": 25582 }, { "epoch": 0.785887629404356, "grad_norm": 0.32563287019729614, "learning_rate": 1.3328355860284283e-05, "loss": 0.5326, "step": 25583 }, { "epoch": 0.7859183485393051, "grad_norm": 0.3614368140697479, "learning_rate": 1.3327900133179382e-05, "loss": 0.6141, "step": 25584 }, { "epoch": 0.7859490676742543, "grad_norm": 0.347596675157547, "learning_rate": 1.3327444398301933e-05, "loss": 0.5293, "step": 25585 }, { "epoch": 0.7859797868092034, "grad_norm": 0.33221226930618286, "learning_rate": 1.3326988655653005e-05, "loss": 0.5534, "step": 25586 }, { "epoch": 0.7860105059441526, "grad_norm": 0.32850930094718933, "learning_rate": 1.3326532905233665e-05, "loss": 0.4745, "step": 25587 }, { "epoch": 0.7860412250791018, "grad_norm": 0.37539973855018616, "learning_rate": 1.3326077147044974e-05, "loss": 0.5832, "step": 25588 }, { "epoch": 0.7860719442140509, "grad_norm": 0.36711615324020386, "learning_rate": 1.3325621381088e-05, "loss": 0.5192, "step": 25589 }, { "epoch": 0.7861026633490001, "grad_norm": 0.4215741753578186, "learning_rate": 1.3325165607363801e-05, "loss": 0.5945, "step": 25590 }, { "epoch": 0.7861333824839493, "grad_norm": 0.3396335542201996, "learning_rate": 1.3324709825873447e-05, "loss": 0.5985, "step": 25591 }, { "epoch": 0.7861641016188984, "grad_norm": 0.41550081968307495, "learning_rate": 1.3324254036618003e-05, "loss": 0.5855, "step": 25592 }, { "epoch": 0.7861948207538476, "grad_norm": 0.36707574129104614, "learning_rate": 1.3323798239598528e-05, "loss": 0.5548, "step": 25593 }, { "epoch": 0.7862255398887967, "grad_norm": 0.35108357667922974, "learning_rate": 1.3323342434816093e-05, "loss": 0.6027, "step": 25594 }, { "epoch": 0.7862562590237459, "grad_norm": 0.4170188903808594, "learning_rate": 1.3322886622271758e-05, "loss": 0.541, "step": 25595 }, { "epoch": 0.7862869781586951, "grad_norm": 0.5123753547668457, "learning_rate": 1.332243080196659e-05, "loss": 0.5869, "step": 25596 }, { "epoch": 0.7863176972936442, "grad_norm": 0.4324592649936676, "learning_rate": 1.3321974973901652e-05, "loss": 0.5148, "step": 25597 }, { "epoch": 0.7863484164285933, "grad_norm": 0.3376709222793579, "learning_rate": 1.3321519138078012e-05, "loss": 0.5309, "step": 25598 }, { "epoch": 0.7863791355635426, "grad_norm": 0.3790147304534912, "learning_rate": 1.3321063294496728e-05, "loss": 0.5958, "step": 25599 }, { "epoch": 0.7864098546984917, "grad_norm": 0.3540298640727997, "learning_rate": 1.3320607443158873e-05, "loss": 0.553, "step": 25600 }, { "epoch": 0.7864405738334409, "grad_norm": 0.3628934323787689, "learning_rate": 1.3320151584065507e-05, "loss": 0.6074, "step": 25601 }, { "epoch": 0.78647129296839, "grad_norm": 0.36749911308288574, "learning_rate": 1.3319695717217691e-05, "loss": 0.6347, "step": 25602 }, { "epoch": 0.7865020121033391, "grad_norm": 0.3608871400356293, "learning_rate": 1.3319239842616498e-05, "loss": 0.5884, "step": 25603 }, { "epoch": 0.7865327312382884, "grad_norm": 0.3629334270954132, "learning_rate": 1.3318783960262986e-05, "loss": 0.5447, "step": 25604 }, { "epoch": 0.7865634503732375, "grad_norm": 0.40241578221321106, "learning_rate": 1.3318328070158222e-05, "loss": 0.5937, "step": 25605 }, { "epoch": 0.7865941695081866, "grad_norm": 0.36860406398773193, "learning_rate": 1.331787217230327e-05, "loss": 0.4874, "step": 25606 }, { "epoch": 0.7866248886431358, "grad_norm": 0.3694966435432434, "learning_rate": 1.3317416266699199e-05, "loss": 0.6023, "step": 25607 }, { "epoch": 0.786655607778085, "grad_norm": 0.4055890738964081, "learning_rate": 1.331696035334707e-05, "loss": 0.5523, "step": 25608 }, { "epoch": 0.7866863269130341, "grad_norm": 0.36765769124031067, "learning_rate": 1.3316504432247948e-05, "loss": 0.5516, "step": 25609 }, { "epoch": 0.7867170460479833, "grad_norm": 0.3507416844367981, "learning_rate": 1.33160485034029e-05, "loss": 0.5798, "step": 25610 }, { "epoch": 0.7867477651829324, "grad_norm": 0.3906136751174927, "learning_rate": 1.3315592566812984e-05, "loss": 0.6149, "step": 25611 }, { "epoch": 0.7867784843178816, "grad_norm": 0.3383846580982208, "learning_rate": 1.3315136622479274e-05, "loss": 0.5051, "step": 25612 }, { "epoch": 0.7868092034528308, "grad_norm": 0.38725757598876953, "learning_rate": 1.331468067040283e-05, "loss": 0.5909, "step": 25613 }, { "epoch": 0.7868399225877799, "grad_norm": 0.412456750869751, "learning_rate": 1.331422471058472e-05, "loss": 0.61, "step": 25614 }, { "epoch": 0.7868706417227291, "grad_norm": 0.3624573051929474, "learning_rate": 1.3313768743026004e-05, "loss": 0.5139, "step": 25615 }, { "epoch": 0.7869013608576783, "grad_norm": 0.494440495967865, "learning_rate": 1.331331276772775e-05, "loss": 0.5903, "step": 25616 }, { "epoch": 0.7869320799926274, "grad_norm": 0.40452221035957336, "learning_rate": 1.3312856784691025e-05, "loss": 0.5814, "step": 25617 }, { "epoch": 0.7869627991275766, "grad_norm": 0.40734562277793884, "learning_rate": 1.3312400793916888e-05, "loss": 0.5173, "step": 25618 }, { "epoch": 0.7869935182625257, "grad_norm": 0.342227965593338, "learning_rate": 1.331194479540641e-05, "loss": 0.5355, "step": 25619 }, { "epoch": 0.7870242373974748, "grad_norm": 0.3684552311897278, "learning_rate": 1.3311488789160654e-05, "loss": 0.516, "step": 25620 }, { "epoch": 0.7870549565324241, "grad_norm": 0.34446555376052856, "learning_rate": 1.3311032775180683e-05, "loss": 0.6284, "step": 25621 }, { "epoch": 0.7870856756673732, "grad_norm": 0.36331692337989807, "learning_rate": 1.3310576753467564e-05, "loss": 0.6126, "step": 25622 }, { "epoch": 0.7871163948023223, "grad_norm": 0.4586779475212097, "learning_rate": 1.3310120724022363e-05, "loss": 0.5283, "step": 25623 }, { "epoch": 0.7871471139372715, "grad_norm": 0.3868986666202545, "learning_rate": 1.3309664686846139e-05, "loss": 0.5896, "step": 25624 }, { "epoch": 0.7871778330722207, "grad_norm": 0.3517029583454132, "learning_rate": 1.3309208641939968e-05, "loss": 0.5768, "step": 25625 }, { "epoch": 0.7872085522071699, "grad_norm": 0.34237027168273926, "learning_rate": 1.3308752589304907e-05, "loss": 0.5752, "step": 25626 }, { "epoch": 0.787239271342119, "grad_norm": 0.3666422367095947, "learning_rate": 1.3308296528942025e-05, "loss": 0.5357, "step": 25627 }, { "epoch": 0.7872699904770681, "grad_norm": 0.3896860182285309, "learning_rate": 1.3307840460852384e-05, "loss": 0.4713, "step": 25628 }, { "epoch": 0.7873007096120174, "grad_norm": 0.35154592990875244, "learning_rate": 1.3307384385037048e-05, "loss": 0.5133, "step": 25629 }, { "epoch": 0.7873314287469665, "grad_norm": 0.36977192759513855, "learning_rate": 1.3306928301497088e-05, "loss": 0.572, "step": 25630 }, { "epoch": 0.7873621478819156, "grad_norm": 0.35299184918403625, "learning_rate": 1.3306472210233564e-05, "loss": 0.6855, "step": 25631 }, { "epoch": 0.7873928670168648, "grad_norm": 0.3907211124897003, "learning_rate": 1.3306016111247547e-05, "loss": 0.6053, "step": 25632 }, { "epoch": 0.787423586151814, "grad_norm": 0.4332434833049774, "learning_rate": 1.3305560004540094e-05, "loss": 0.6298, "step": 25633 }, { "epoch": 0.7874543052867631, "grad_norm": 0.387882262468338, "learning_rate": 1.3305103890112276e-05, "loss": 0.5204, "step": 25634 }, { "epoch": 0.7874850244217123, "grad_norm": 0.3816657066345215, "learning_rate": 1.3304647767965157e-05, "loss": 0.5184, "step": 25635 }, { "epoch": 0.7875157435566614, "grad_norm": 0.41527900099754333, "learning_rate": 1.3304191638099803e-05, "loss": 0.6147, "step": 25636 }, { "epoch": 0.7875464626916105, "grad_norm": 0.42363908886909485, "learning_rate": 1.3303735500517281e-05, "loss": 0.5283, "step": 25637 }, { "epoch": 0.7875771818265598, "grad_norm": 0.4128238558769226, "learning_rate": 1.330327935521865e-05, "loss": 0.6157, "step": 25638 }, { "epoch": 0.7876079009615089, "grad_norm": 0.35600385069847107, "learning_rate": 1.3302823202204981e-05, "loss": 0.5162, "step": 25639 }, { "epoch": 0.7876386200964581, "grad_norm": 0.39153972268104553, "learning_rate": 1.3302367041477338e-05, "loss": 0.5236, "step": 25640 }, { "epoch": 0.7876693392314073, "grad_norm": 0.453346848487854, "learning_rate": 1.3301910873036787e-05, "loss": 0.6279, "step": 25641 }, { "epoch": 0.7877000583663564, "grad_norm": 0.39570149779319763, "learning_rate": 1.3301454696884391e-05, "loss": 0.5172, "step": 25642 }, { "epoch": 0.7877307775013056, "grad_norm": 0.37996286153793335, "learning_rate": 1.3300998513021217e-05, "loss": 0.5045, "step": 25643 }, { "epoch": 0.7877614966362547, "grad_norm": 0.3277568221092224, "learning_rate": 1.3300542321448332e-05, "loss": 0.5344, "step": 25644 }, { "epoch": 0.7877922157712038, "grad_norm": 0.3520147204399109, "learning_rate": 1.3300086122166798e-05, "loss": 0.5181, "step": 25645 }, { "epoch": 0.7878229349061531, "grad_norm": 0.439515620470047, "learning_rate": 1.3299629915177683e-05, "loss": 0.5806, "step": 25646 }, { "epoch": 0.7878536540411022, "grad_norm": 0.38588863611221313, "learning_rate": 1.329917370048205e-05, "loss": 0.5606, "step": 25647 }, { "epoch": 0.7878843731760513, "grad_norm": 0.3634164333343506, "learning_rate": 1.329871747808097e-05, "loss": 0.5357, "step": 25648 }, { "epoch": 0.7879150923110005, "grad_norm": 0.44586917757987976, "learning_rate": 1.3298261247975503e-05, "loss": 0.5408, "step": 25649 }, { "epoch": 0.7879458114459497, "grad_norm": 0.355695903301239, "learning_rate": 1.3297805010166718e-05, "loss": 0.544, "step": 25650 }, { "epoch": 0.7879765305808989, "grad_norm": 0.38222232460975647, "learning_rate": 1.3297348764655675e-05, "loss": 0.5517, "step": 25651 }, { "epoch": 0.788007249715848, "grad_norm": 0.38252341747283936, "learning_rate": 1.3296892511443448e-05, "loss": 0.5422, "step": 25652 }, { "epoch": 0.7880379688507971, "grad_norm": 0.32421037554740906, "learning_rate": 1.32964362505311e-05, "loss": 0.4887, "step": 25653 }, { "epoch": 0.7880686879857464, "grad_norm": 0.4235456585884094, "learning_rate": 1.3295979981919688e-05, "loss": 0.628, "step": 25654 }, { "epoch": 0.7880994071206955, "grad_norm": 0.3408637046813965, "learning_rate": 1.3295523705610291e-05, "loss": 0.553, "step": 25655 }, { "epoch": 0.7881301262556446, "grad_norm": 0.3882433772087097, "learning_rate": 1.3295067421603964e-05, "loss": 0.576, "step": 25656 }, { "epoch": 0.7881608453905938, "grad_norm": 0.38968291878700256, "learning_rate": 1.329461112990178e-05, "loss": 0.51, "step": 25657 }, { "epoch": 0.788191564525543, "grad_norm": 0.35534653067588806, "learning_rate": 1.32941548305048e-05, "loss": 0.4957, "step": 25658 }, { "epoch": 0.7882222836604921, "grad_norm": 0.3757345378398895, "learning_rate": 1.3293698523414093e-05, "loss": 0.5031, "step": 25659 }, { "epoch": 0.7882530027954413, "grad_norm": 0.341940313577652, "learning_rate": 1.3293242208630722e-05, "loss": 0.5049, "step": 25660 }, { "epoch": 0.7882837219303904, "grad_norm": 0.36503955721855164, "learning_rate": 1.329278588615575e-05, "loss": 0.5593, "step": 25661 }, { "epoch": 0.7883144410653397, "grad_norm": 0.3988998234272003, "learning_rate": 1.329232955599025e-05, "loss": 0.5971, "step": 25662 }, { "epoch": 0.7883451602002888, "grad_norm": 0.3681340217590332, "learning_rate": 1.3291873218135285e-05, "loss": 0.5609, "step": 25663 }, { "epoch": 0.7883758793352379, "grad_norm": 0.3537129759788513, "learning_rate": 1.329141687259192e-05, "loss": 0.4897, "step": 25664 }, { "epoch": 0.7884065984701871, "grad_norm": 0.4013456106185913, "learning_rate": 1.329096051936122e-05, "loss": 0.6066, "step": 25665 }, { "epoch": 0.7884373176051362, "grad_norm": 0.3670867085456848, "learning_rate": 1.3290504158444254e-05, "loss": 0.561, "step": 25666 }, { "epoch": 0.7884680367400854, "grad_norm": 0.40838295221328735, "learning_rate": 1.3290047789842082e-05, "loss": 0.6163, "step": 25667 }, { "epoch": 0.7884987558750346, "grad_norm": 0.35899460315704346, "learning_rate": 1.3289591413555777e-05, "loss": 0.4897, "step": 25668 }, { "epoch": 0.7885294750099837, "grad_norm": 0.40172243118286133, "learning_rate": 1.32891350295864e-05, "loss": 0.5566, "step": 25669 }, { "epoch": 0.7885601941449328, "grad_norm": 0.40334561467170715, "learning_rate": 1.328867863793502e-05, "loss": 0.4813, "step": 25670 }, { "epoch": 0.7885909132798821, "grad_norm": 0.38538166880607605, "learning_rate": 1.3288222238602701e-05, "loss": 0.5803, "step": 25671 }, { "epoch": 0.7886216324148312, "grad_norm": 0.3578324317932129, "learning_rate": 1.3287765831590506e-05, "loss": 0.6156, "step": 25672 }, { "epoch": 0.7886523515497803, "grad_norm": 0.3929175138473511, "learning_rate": 1.3287309416899508e-05, "loss": 0.6036, "step": 25673 }, { "epoch": 0.7886830706847295, "grad_norm": 0.3469280004501343, "learning_rate": 1.3286852994530767e-05, "loss": 0.6175, "step": 25674 }, { "epoch": 0.7887137898196787, "grad_norm": 0.4527817964553833, "learning_rate": 1.3286396564485353e-05, "loss": 0.6764, "step": 25675 }, { "epoch": 0.7887445089546279, "grad_norm": 0.362690269947052, "learning_rate": 1.3285940126764329e-05, "loss": 0.5472, "step": 25676 }, { "epoch": 0.788775228089577, "grad_norm": 0.3603293299674988, "learning_rate": 1.3285483681368765e-05, "loss": 0.5026, "step": 25677 }, { "epoch": 0.7888059472245261, "grad_norm": 0.40015462040901184, "learning_rate": 1.3285027228299721e-05, "loss": 0.4586, "step": 25678 }, { "epoch": 0.7888366663594754, "grad_norm": 0.34264475107192993, "learning_rate": 1.3284570767558268e-05, "loss": 0.5516, "step": 25679 }, { "epoch": 0.7888673854944245, "grad_norm": 0.42345890402793884, "learning_rate": 1.328411429914547e-05, "loss": 0.4954, "step": 25680 }, { "epoch": 0.7888981046293736, "grad_norm": 0.40134525299072266, "learning_rate": 1.3283657823062393e-05, "loss": 0.5502, "step": 25681 }, { "epoch": 0.7889288237643228, "grad_norm": 0.3862386643886566, "learning_rate": 1.3283201339310106e-05, "loss": 0.5858, "step": 25682 }, { "epoch": 0.788959542899272, "grad_norm": 0.3684404790401459, "learning_rate": 1.328274484788967e-05, "loss": 0.5139, "step": 25683 }, { "epoch": 0.7889902620342211, "grad_norm": 0.4124489724636078, "learning_rate": 1.3282288348802158e-05, "loss": 0.6384, "step": 25684 }, { "epoch": 0.7890209811691703, "grad_norm": 0.40197429060935974, "learning_rate": 1.328183184204863e-05, "loss": 0.6033, "step": 25685 }, { "epoch": 0.7890517003041194, "grad_norm": 0.354043573141098, "learning_rate": 1.3281375327630155e-05, "loss": 0.5751, "step": 25686 }, { "epoch": 0.7890824194390686, "grad_norm": 0.37822332978248596, "learning_rate": 1.3280918805547797e-05, "loss": 0.5161, "step": 25687 }, { "epoch": 0.7891131385740178, "grad_norm": 0.38954412937164307, "learning_rate": 1.3280462275802629e-05, "loss": 0.535, "step": 25688 }, { "epoch": 0.7891438577089669, "grad_norm": 0.3428371846675873, "learning_rate": 1.3280005738395707e-05, "loss": 0.6032, "step": 25689 }, { "epoch": 0.7891745768439161, "grad_norm": 0.5000455975532532, "learning_rate": 1.3279549193328106e-05, "loss": 0.5393, "step": 25690 }, { "epoch": 0.7892052959788652, "grad_norm": 0.35214370489120483, "learning_rate": 1.3279092640600885e-05, "loss": 0.5841, "step": 25691 }, { "epoch": 0.7892360151138144, "grad_norm": 0.4078379273414612, "learning_rate": 1.3278636080215117e-05, "loss": 0.5308, "step": 25692 }, { "epoch": 0.7892667342487636, "grad_norm": 0.39192599058151245, "learning_rate": 1.3278179512171865e-05, "loss": 0.5735, "step": 25693 }, { "epoch": 0.7892974533837127, "grad_norm": 0.5683619976043701, "learning_rate": 1.3277722936472195e-05, "loss": 0.5356, "step": 25694 }, { "epoch": 0.7893281725186618, "grad_norm": 0.39576098322868347, "learning_rate": 1.3277266353117174e-05, "loss": 0.597, "step": 25695 }, { "epoch": 0.7893588916536111, "grad_norm": 0.5175675749778748, "learning_rate": 1.327680976210787e-05, "loss": 0.5486, "step": 25696 }, { "epoch": 0.7893896107885602, "grad_norm": 0.41663670539855957, "learning_rate": 1.3276353163445346e-05, "loss": 0.6124, "step": 25697 }, { "epoch": 0.7894203299235093, "grad_norm": 0.3682381808757782, "learning_rate": 1.3275896557130671e-05, "loss": 0.5381, "step": 25698 }, { "epoch": 0.7894510490584585, "grad_norm": 0.3717355728149414, "learning_rate": 1.3275439943164914e-05, "loss": 0.5721, "step": 25699 }, { "epoch": 0.7894817681934077, "grad_norm": 0.39973074197769165, "learning_rate": 1.3274983321549136e-05, "loss": 0.5524, "step": 25700 }, { "epoch": 0.7895124873283569, "grad_norm": 0.37077632546424866, "learning_rate": 1.3274526692284404e-05, "loss": 0.558, "step": 25701 }, { "epoch": 0.789543206463306, "grad_norm": 0.32797327637672424, "learning_rate": 1.3274070055371789e-05, "loss": 0.5279, "step": 25702 }, { "epoch": 0.7895739255982551, "grad_norm": 0.36330369114875793, "learning_rate": 1.3273613410812352e-05, "loss": 0.57, "step": 25703 }, { "epoch": 0.7896046447332044, "grad_norm": 0.3982851505279541, "learning_rate": 1.3273156758607165e-05, "loss": 0.5124, "step": 25704 }, { "epoch": 0.7896353638681535, "grad_norm": 0.3433174192905426, "learning_rate": 1.3272700098757292e-05, "loss": 0.5569, "step": 25705 }, { "epoch": 0.7896660830031026, "grad_norm": 0.5805022120475769, "learning_rate": 1.3272243431263797e-05, "loss": 0.5159, "step": 25706 }, { "epoch": 0.7896968021380518, "grad_norm": 0.3628769814968109, "learning_rate": 1.3271786756127749e-05, "loss": 0.5532, "step": 25707 }, { "epoch": 0.7897275212730009, "grad_norm": 0.437651127576828, "learning_rate": 1.3271330073350216e-05, "loss": 0.5841, "step": 25708 }, { "epoch": 0.7897582404079501, "grad_norm": 0.3609328269958496, "learning_rate": 1.3270873382932262e-05, "loss": 0.5224, "step": 25709 }, { "epoch": 0.7897889595428993, "grad_norm": 0.39422059059143066, "learning_rate": 1.3270416684874954e-05, "loss": 0.5222, "step": 25710 }, { "epoch": 0.7898196786778484, "grad_norm": 0.35076776146888733, "learning_rate": 1.3269959979179362e-05, "loss": 0.5892, "step": 25711 }, { "epoch": 0.7898503978127976, "grad_norm": 0.37264102697372437, "learning_rate": 1.3269503265846548e-05, "loss": 0.5012, "step": 25712 }, { "epoch": 0.7898811169477468, "grad_norm": 0.33443158864974976, "learning_rate": 1.3269046544877581e-05, "loss": 0.4609, "step": 25713 }, { "epoch": 0.7899118360826959, "grad_norm": 0.35535380244255066, "learning_rate": 1.3268589816273529e-05, "loss": 0.5216, "step": 25714 }, { "epoch": 0.7899425552176451, "grad_norm": 0.32718202471733093, "learning_rate": 1.3268133080035455e-05, "loss": 0.4343, "step": 25715 }, { "epoch": 0.7899732743525942, "grad_norm": 0.4873031675815582, "learning_rate": 1.326767633616443e-05, "loss": 0.5398, "step": 25716 }, { "epoch": 0.7900039934875434, "grad_norm": 1.0549070835113525, "learning_rate": 1.3267219584661515e-05, "loss": 0.6101, "step": 25717 }, { "epoch": 0.7900347126224926, "grad_norm": 0.4245976507663727, "learning_rate": 1.3266762825527786e-05, "loss": 0.5209, "step": 25718 }, { "epoch": 0.7900654317574417, "grad_norm": 0.3408292531967163, "learning_rate": 1.32663060587643e-05, "loss": 0.5389, "step": 25719 }, { "epoch": 0.7900961508923908, "grad_norm": 0.3595948815345764, "learning_rate": 1.326584928437213e-05, "loss": 0.5231, "step": 25720 }, { "epoch": 0.7901268700273401, "grad_norm": 0.37688174843788147, "learning_rate": 1.326539250235234e-05, "loss": 0.4399, "step": 25721 }, { "epoch": 0.7901575891622892, "grad_norm": 0.45100268721580505, "learning_rate": 1.3264935712706e-05, "loss": 0.54, "step": 25722 }, { "epoch": 0.7901883082972383, "grad_norm": 0.39235901832580566, "learning_rate": 1.3264478915434174e-05, "loss": 0.5067, "step": 25723 }, { "epoch": 0.7902190274321875, "grad_norm": 0.3500441312789917, "learning_rate": 1.3264022110537926e-05, "loss": 0.5207, "step": 25724 }, { "epoch": 0.7902497465671366, "grad_norm": 0.3739815950393677, "learning_rate": 1.3263565298018331e-05, "loss": 0.5692, "step": 25725 }, { "epoch": 0.7902804657020859, "grad_norm": 0.33621975779533386, "learning_rate": 1.326310847787645e-05, "loss": 0.5457, "step": 25726 }, { "epoch": 0.790311184837035, "grad_norm": 0.3923107385635376, "learning_rate": 1.3262651650113349e-05, "loss": 0.5466, "step": 25727 }, { "epoch": 0.7903419039719841, "grad_norm": 0.3825068175792694, "learning_rate": 1.3262194814730099e-05, "loss": 0.5081, "step": 25728 }, { "epoch": 0.7903726231069333, "grad_norm": 0.4155884087085724, "learning_rate": 1.3261737971727766e-05, "loss": 0.5324, "step": 25729 }, { "epoch": 0.7904033422418825, "grad_norm": 0.38793569803237915, "learning_rate": 1.3261281121107415e-05, "loss": 0.6692, "step": 25730 }, { "epoch": 0.7904340613768316, "grad_norm": 0.551550030708313, "learning_rate": 1.3260824262870113e-05, "loss": 0.5863, "step": 25731 }, { "epoch": 0.7904647805117808, "grad_norm": 0.3852141499519348, "learning_rate": 1.3260367397016928e-05, "loss": 0.5807, "step": 25732 }, { "epoch": 0.7904954996467299, "grad_norm": 0.3602704703807831, "learning_rate": 1.3259910523548929e-05, "loss": 0.5963, "step": 25733 }, { "epoch": 0.7905262187816791, "grad_norm": 0.34892114996910095, "learning_rate": 1.3259453642467181e-05, "loss": 0.5612, "step": 25734 }, { "epoch": 0.7905569379166283, "grad_norm": 0.35050442814826965, "learning_rate": 1.325899675377275e-05, "loss": 0.6125, "step": 25735 }, { "epoch": 0.7905876570515774, "grad_norm": 0.4609145224094391, "learning_rate": 1.3258539857466708e-05, "loss": 0.5933, "step": 25736 }, { "epoch": 0.7906183761865266, "grad_norm": 0.4693754315376282, "learning_rate": 1.3258082953550115e-05, "loss": 0.5888, "step": 25737 }, { "epoch": 0.7906490953214758, "grad_norm": 0.3798951506614685, "learning_rate": 1.3257626042024043e-05, "loss": 0.5383, "step": 25738 }, { "epoch": 0.7906798144564249, "grad_norm": 0.3959641754627228, "learning_rate": 1.3257169122889559e-05, "loss": 0.5176, "step": 25739 }, { "epoch": 0.7907105335913741, "grad_norm": 0.4093362092971802, "learning_rate": 1.3256712196147727e-05, "loss": 0.5824, "step": 25740 }, { "epoch": 0.7907412527263232, "grad_norm": 0.41426965594291687, "learning_rate": 1.325625526179962e-05, "loss": 0.532, "step": 25741 }, { "epoch": 0.7907719718612723, "grad_norm": 0.3576471507549286, "learning_rate": 1.3255798319846296e-05, "loss": 0.5223, "step": 25742 }, { "epoch": 0.7908026909962216, "grad_norm": 0.34835365414619446, "learning_rate": 1.3255341370288832e-05, "loss": 0.4967, "step": 25743 }, { "epoch": 0.7908334101311707, "grad_norm": 0.36119282245635986, "learning_rate": 1.3254884413128289e-05, "loss": 0.4606, "step": 25744 }, { "epoch": 0.7908641292661198, "grad_norm": 0.3695414662361145, "learning_rate": 1.3254427448365737e-05, "loss": 0.4368, "step": 25745 }, { "epoch": 0.790894848401069, "grad_norm": 0.35363277792930603, "learning_rate": 1.325397047600224e-05, "loss": 0.4992, "step": 25746 }, { "epoch": 0.7909255675360182, "grad_norm": 0.3889347016811371, "learning_rate": 1.3253513496038872e-05, "loss": 0.5834, "step": 25747 }, { "epoch": 0.7909562866709673, "grad_norm": 0.35414406657218933, "learning_rate": 1.3253056508476692e-05, "loss": 0.5022, "step": 25748 }, { "epoch": 0.7909870058059165, "grad_norm": 0.392501562833786, "learning_rate": 1.3252599513316774e-05, "loss": 0.4581, "step": 25749 }, { "epoch": 0.7910177249408656, "grad_norm": 0.3868059515953064, "learning_rate": 1.325214251056018e-05, "loss": 0.5468, "step": 25750 }, { "epoch": 0.7910484440758149, "grad_norm": 0.3780817985534668, "learning_rate": 1.3251685500207983e-05, "loss": 0.4762, "step": 25751 }, { "epoch": 0.791079163210764, "grad_norm": 0.37030836939811707, "learning_rate": 1.3251228482261245e-05, "loss": 0.5527, "step": 25752 }, { "epoch": 0.7911098823457131, "grad_norm": 0.3759697377681732, "learning_rate": 1.3250771456721035e-05, "loss": 0.6467, "step": 25753 }, { "epoch": 0.7911406014806623, "grad_norm": 0.3712805509567261, "learning_rate": 1.3250314423588425e-05, "loss": 0.5707, "step": 25754 }, { "epoch": 0.7911713206156115, "grad_norm": 0.37761417031288147, "learning_rate": 1.3249857382864475e-05, "loss": 0.62, "step": 25755 }, { "epoch": 0.7912020397505606, "grad_norm": 0.36121633648872375, "learning_rate": 1.3249400334550257e-05, "loss": 0.4944, "step": 25756 }, { "epoch": 0.7912327588855098, "grad_norm": 0.364177405834198, "learning_rate": 1.324894327864684e-05, "loss": 0.5036, "step": 25757 }, { "epoch": 0.7912634780204589, "grad_norm": 0.4014582633972168, "learning_rate": 1.3248486215155286e-05, "loss": 0.4842, "step": 25758 }, { "epoch": 0.791294197155408, "grad_norm": 0.3554246127605438, "learning_rate": 1.3248029144076669e-05, "loss": 0.536, "step": 25759 }, { "epoch": 0.7913249162903573, "grad_norm": 0.3712151050567627, "learning_rate": 1.3247572065412048e-05, "loss": 0.6394, "step": 25760 }, { "epoch": 0.7913556354253064, "grad_norm": 0.3768034279346466, "learning_rate": 1.3247114979162502e-05, "loss": 0.5661, "step": 25761 }, { "epoch": 0.7913863545602556, "grad_norm": 0.3731251358985901, "learning_rate": 1.3246657885329088e-05, "loss": 0.5934, "step": 25762 }, { "epoch": 0.7914170736952048, "grad_norm": 0.36354610323905945, "learning_rate": 1.3246200783912879e-05, "loss": 0.6468, "step": 25763 }, { "epoch": 0.7914477928301539, "grad_norm": 0.35715755820274353, "learning_rate": 1.324574367491494e-05, "loss": 0.5824, "step": 25764 }, { "epoch": 0.7914785119651031, "grad_norm": 0.33757272362709045, "learning_rate": 1.3245286558336341e-05, "loss": 0.4847, "step": 25765 }, { "epoch": 0.7915092311000522, "grad_norm": 0.34051695466041565, "learning_rate": 1.3244829434178148e-05, "loss": 0.5242, "step": 25766 }, { "epoch": 0.7915399502350013, "grad_norm": 0.40342119336128235, "learning_rate": 1.324437230244143e-05, "loss": 0.5896, "step": 25767 }, { "epoch": 0.7915706693699506, "grad_norm": 0.35796675086021423, "learning_rate": 1.3243915163127254e-05, "loss": 0.5204, "step": 25768 }, { "epoch": 0.7916013885048997, "grad_norm": 0.32412853837013245, "learning_rate": 1.3243458016236688e-05, "loss": 0.566, "step": 25769 }, { "epoch": 0.7916321076398488, "grad_norm": 0.3665205240249634, "learning_rate": 1.3243000861770799e-05, "loss": 0.5088, "step": 25770 }, { "epoch": 0.791662826774798, "grad_norm": 0.3635343015193939, "learning_rate": 1.3242543699730652e-05, "loss": 0.5856, "step": 25771 }, { "epoch": 0.7916935459097472, "grad_norm": 0.5605050325393677, "learning_rate": 1.3242086530117322e-05, "loss": 0.5953, "step": 25772 }, { "epoch": 0.7917242650446964, "grad_norm": 0.3521329164505005, "learning_rate": 1.3241629352931868e-05, "loss": 0.5163, "step": 25773 }, { "epoch": 0.7917549841796455, "grad_norm": 0.3583928942680359, "learning_rate": 1.3241172168175367e-05, "loss": 0.5724, "step": 25774 }, { "epoch": 0.7917857033145946, "grad_norm": 0.389596551656723, "learning_rate": 1.3240714975848881e-05, "loss": 0.5748, "step": 25775 }, { "epoch": 0.7918164224495439, "grad_norm": 0.3750671148300171, "learning_rate": 1.3240257775953478e-05, "loss": 0.6031, "step": 25776 }, { "epoch": 0.791847141584493, "grad_norm": 0.3380883038043976, "learning_rate": 1.3239800568490226e-05, "loss": 0.5337, "step": 25777 }, { "epoch": 0.7918778607194421, "grad_norm": 0.4212368428707123, "learning_rate": 1.3239343353460194e-05, "loss": 0.5117, "step": 25778 }, { "epoch": 0.7919085798543913, "grad_norm": 0.37040096521377563, "learning_rate": 1.323888613086445e-05, "loss": 0.5522, "step": 25779 }, { "epoch": 0.7919392989893405, "grad_norm": 0.40121498703956604, "learning_rate": 1.3238428900704058e-05, "loss": 0.5291, "step": 25780 }, { "epoch": 0.7919700181242896, "grad_norm": 0.4006633162498474, "learning_rate": 1.3237971662980096e-05, "loss": 0.5585, "step": 25781 }, { "epoch": 0.7920007372592388, "grad_norm": 0.441283643245697, "learning_rate": 1.3237514417693618e-05, "loss": 0.4688, "step": 25782 }, { "epoch": 0.7920314563941879, "grad_norm": 0.3466672897338867, "learning_rate": 1.3237057164845703e-05, "loss": 0.5098, "step": 25783 }, { "epoch": 0.792062175529137, "grad_norm": 0.3659134805202484, "learning_rate": 1.3236599904437412e-05, "loss": 0.5358, "step": 25784 }, { "epoch": 0.7920928946640863, "grad_norm": 0.3746253252029419, "learning_rate": 1.323614263646982e-05, "loss": 0.6053, "step": 25785 }, { "epoch": 0.7921236137990354, "grad_norm": 0.40822792053222656, "learning_rate": 1.323568536094399e-05, "loss": 0.4377, "step": 25786 }, { "epoch": 0.7921543329339846, "grad_norm": 0.3702104389667511, "learning_rate": 1.3235228077860986e-05, "loss": 0.476, "step": 25787 }, { "epoch": 0.7921850520689337, "grad_norm": 0.44354337453842163, "learning_rate": 1.3234770787221884e-05, "loss": 0.5691, "step": 25788 }, { "epoch": 0.7922157712038829, "grad_norm": 0.36377689242362976, "learning_rate": 1.323431348902775e-05, "loss": 0.5475, "step": 25789 }, { "epoch": 0.7922464903388321, "grad_norm": 0.477884441614151, "learning_rate": 1.323385618327965e-05, "loss": 0.5385, "step": 25790 }, { "epoch": 0.7922772094737812, "grad_norm": 0.34985437989234924, "learning_rate": 1.3233398869978654e-05, "loss": 0.4797, "step": 25791 }, { "epoch": 0.7923079286087303, "grad_norm": 0.3667285442352295, "learning_rate": 1.3232941549125829e-05, "loss": 0.5403, "step": 25792 }, { "epoch": 0.7923386477436796, "grad_norm": 0.3726455271244049, "learning_rate": 1.3232484220722241e-05, "loss": 0.5188, "step": 25793 }, { "epoch": 0.7923693668786287, "grad_norm": 0.36684057116508484, "learning_rate": 1.3232026884768962e-05, "loss": 0.5665, "step": 25794 }, { "epoch": 0.7924000860135778, "grad_norm": 0.33736908435821533, "learning_rate": 1.323156954126706e-05, "loss": 0.5464, "step": 25795 }, { "epoch": 0.792430805148527, "grad_norm": 0.32632485032081604, "learning_rate": 1.3231112190217598e-05, "loss": 0.5839, "step": 25796 }, { "epoch": 0.7924615242834762, "grad_norm": 0.359298437833786, "learning_rate": 1.3230654831621651e-05, "loss": 0.4757, "step": 25797 }, { "epoch": 0.7924922434184254, "grad_norm": 0.4403110444545746, "learning_rate": 1.323019746548028e-05, "loss": 0.5741, "step": 25798 }, { "epoch": 0.7925229625533745, "grad_norm": 0.33675453066825867, "learning_rate": 1.3229740091794562e-05, "loss": 0.4781, "step": 25799 }, { "epoch": 0.7925536816883236, "grad_norm": 0.39627501368522644, "learning_rate": 1.3229282710565558e-05, "loss": 0.5577, "step": 25800 }, { "epoch": 0.7925844008232729, "grad_norm": 0.3867175877094269, "learning_rate": 1.322882532179434e-05, "loss": 0.6455, "step": 25801 }, { "epoch": 0.792615119958222, "grad_norm": 0.3626076877117157, "learning_rate": 1.3228367925481976e-05, "loss": 0.5679, "step": 25802 }, { "epoch": 0.7926458390931711, "grad_norm": 0.3795565962791443, "learning_rate": 1.322791052162953e-05, "loss": 0.5153, "step": 25803 }, { "epoch": 0.7926765582281203, "grad_norm": 0.3664473593235016, "learning_rate": 1.3227453110238075e-05, "loss": 0.635, "step": 25804 }, { "epoch": 0.7927072773630695, "grad_norm": 0.3738345801830292, "learning_rate": 1.322699569130868e-05, "loss": 0.5548, "step": 25805 }, { "epoch": 0.7927379964980186, "grad_norm": 0.3252347409725189, "learning_rate": 1.3226538264842405e-05, "loss": 0.4367, "step": 25806 }, { "epoch": 0.7927687156329678, "grad_norm": 0.3404231667518616, "learning_rate": 1.3226080830840329e-05, "loss": 0.5742, "step": 25807 }, { "epoch": 0.7927994347679169, "grad_norm": 0.40306469798088074, "learning_rate": 1.3225623389303517e-05, "loss": 0.4877, "step": 25808 }, { "epoch": 0.792830153902866, "grad_norm": 0.4006347954273224, "learning_rate": 1.3225165940233034e-05, "loss": 0.6347, "step": 25809 }, { "epoch": 0.7928608730378153, "grad_norm": 0.3108697533607483, "learning_rate": 1.322470848362995e-05, "loss": 0.5192, "step": 25810 }, { "epoch": 0.7928915921727644, "grad_norm": 0.3681524395942688, "learning_rate": 1.3224251019495338e-05, "loss": 0.6011, "step": 25811 }, { "epoch": 0.7929223113077136, "grad_norm": 0.4632471799850464, "learning_rate": 1.3223793547830259e-05, "loss": 0.5304, "step": 25812 }, { "epoch": 0.7929530304426627, "grad_norm": 0.38758862018585205, "learning_rate": 1.3223336068635786e-05, "loss": 0.5394, "step": 25813 }, { "epoch": 0.7929837495776119, "grad_norm": 0.3653997778892517, "learning_rate": 1.3222878581912988e-05, "loss": 0.5041, "step": 25814 }, { "epoch": 0.7930144687125611, "grad_norm": 0.36733362078666687, "learning_rate": 1.3222421087662931e-05, "loss": 0.5632, "step": 25815 }, { "epoch": 0.7930451878475102, "grad_norm": 0.34324222803115845, "learning_rate": 1.3221963585886681e-05, "loss": 0.5638, "step": 25816 }, { "epoch": 0.7930759069824593, "grad_norm": 0.40598562359809875, "learning_rate": 1.3221506076585316e-05, "loss": 0.5857, "step": 25817 }, { "epoch": 0.7931066261174086, "grad_norm": 0.3438378572463989, "learning_rate": 1.3221048559759894e-05, "loss": 0.5819, "step": 25818 }, { "epoch": 0.7931373452523577, "grad_norm": 0.32699480652809143, "learning_rate": 1.3220591035411492e-05, "loss": 0.5356, "step": 25819 }, { "epoch": 0.7931680643873068, "grad_norm": 0.39893078804016113, "learning_rate": 1.3220133503541171e-05, "loss": 0.5315, "step": 25820 }, { "epoch": 0.793198783522256, "grad_norm": 0.3583940267562866, "learning_rate": 1.3219675964150006e-05, "loss": 0.5902, "step": 25821 }, { "epoch": 0.7932295026572052, "grad_norm": 0.34539905190467834, "learning_rate": 1.3219218417239063e-05, "loss": 0.552, "step": 25822 }, { "epoch": 0.7932602217921544, "grad_norm": 0.3656444251537323, "learning_rate": 1.3218760862809407e-05, "loss": 0.5045, "step": 25823 }, { "epoch": 0.7932909409271035, "grad_norm": 0.3478304445743561, "learning_rate": 1.3218303300862115e-05, "loss": 0.5424, "step": 25824 }, { "epoch": 0.7933216600620526, "grad_norm": 0.41325902938842773, "learning_rate": 1.3217845731398244e-05, "loss": 0.6544, "step": 25825 }, { "epoch": 0.7933523791970019, "grad_norm": 0.35479438304901123, "learning_rate": 1.3217388154418877e-05, "loss": 0.5674, "step": 25826 }, { "epoch": 0.793383098331951, "grad_norm": 0.48120903968811035, "learning_rate": 1.3216930569925072e-05, "loss": 0.5562, "step": 25827 }, { "epoch": 0.7934138174669001, "grad_norm": 0.3582687973976135, "learning_rate": 1.3216472977917901e-05, "loss": 0.5696, "step": 25828 }, { "epoch": 0.7934445366018493, "grad_norm": 0.3155936300754547, "learning_rate": 1.3216015378398435e-05, "loss": 0.4876, "step": 25829 }, { "epoch": 0.7934752557367984, "grad_norm": 0.3492187559604645, "learning_rate": 1.3215557771367738e-05, "loss": 0.58, "step": 25830 }, { "epoch": 0.7935059748717476, "grad_norm": 0.36287569999694824, "learning_rate": 1.3215100156826881e-05, "loss": 0.4786, "step": 25831 }, { "epoch": 0.7935366940066968, "grad_norm": 0.3723464012145996, "learning_rate": 1.3214642534776933e-05, "loss": 0.5879, "step": 25832 }, { "epoch": 0.7935674131416459, "grad_norm": 0.36767345666885376, "learning_rate": 1.3214184905218965e-05, "loss": 0.5485, "step": 25833 }, { "epoch": 0.793598132276595, "grad_norm": 0.3693479895591736, "learning_rate": 1.3213727268154042e-05, "loss": 0.5334, "step": 25834 }, { "epoch": 0.7936288514115443, "grad_norm": 0.34294751286506653, "learning_rate": 1.3213269623583233e-05, "loss": 0.5726, "step": 25835 }, { "epoch": 0.7936595705464934, "grad_norm": 0.3199531137943268, "learning_rate": 1.321281197150761e-05, "loss": 0.5757, "step": 25836 }, { "epoch": 0.7936902896814426, "grad_norm": 0.3703114986419678, "learning_rate": 1.3212354311928245e-05, "loss": 0.5612, "step": 25837 }, { "epoch": 0.7937210088163917, "grad_norm": 0.36325645446777344, "learning_rate": 1.3211896644846197e-05, "loss": 0.5201, "step": 25838 }, { "epoch": 0.7937517279513409, "grad_norm": 0.3574269413948059, "learning_rate": 1.321143897026254e-05, "loss": 0.5289, "step": 25839 }, { "epoch": 0.7937824470862901, "grad_norm": 0.395268976688385, "learning_rate": 1.3210981288178345e-05, "loss": 0.5009, "step": 25840 }, { "epoch": 0.7938131662212392, "grad_norm": 0.3448356091976166, "learning_rate": 1.3210523598594675e-05, "loss": 0.4567, "step": 25841 }, { "epoch": 0.7938438853561883, "grad_norm": 0.4270251393318176, "learning_rate": 1.3210065901512607e-05, "loss": 0.6268, "step": 25842 }, { "epoch": 0.7938746044911376, "grad_norm": 0.3678332567214966, "learning_rate": 1.3209608196933202e-05, "loss": 0.5621, "step": 25843 }, { "epoch": 0.7939053236260867, "grad_norm": 0.39322003722190857, "learning_rate": 1.3209150484857536e-05, "loss": 0.5349, "step": 25844 }, { "epoch": 0.7939360427610358, "grad_norm": 0.3558408319950104, "learning_rate": 1.3208692765286674e-05, "loss": 0.6106, "step": 25845 }, { "epoch": 0.793966761895985, "grad_norm": 0.38039734959602356, "learning_rate": 1.3208235038221687e-05, "loss": 0.5281, "step": 25846 }, { "epoch": 0.7939974810309341, "grad_norm": 0.34674689173698425, "learning_rate": 1.3207777303663642e-05, "loss": 0.5728, "step": 25847 }, { "epoch": 0.7940282001658834, "grad_norm": 0.39170360565185547, "learning_rate": 1.3207319561613607e-05, "loss": 0.5011, "step": 25848 }, { "epoch": 0.7940589193008325, "grad_norm": 0.345059871673584, "learning_rate": 1.3206861812072657e-05, "loss": 0.6412, "step": 25849 }, { "epoch": 0.7940896384357816, "grad_norm": 0.3976093828678131, "learning_rate": 1.3206404055041855e-05, "loss": 0.6255, "step": 25850 }, { "epoch": 0.7941203575707309, "grad_norm": 0.3727046251296997, "learning_rate": 1.3205946290522276e-05, "loss": 0.5899, "step": 25851 }, { "epoch": 0.79415107670568, "grad_norm": 0.38945960998535156, "learning_rate": 1.320548851851498e-05, "loss": 0.4975, "step": 25852 }, { "epoch": 0.7941817958406291, "grad_norm": 0.39285561442375183, "learning_rate": 1.3205030739021046e-05, "loss": 0.519, "step": 25853 }, { "epoch": 0.7942125149755783, "grad_norm": 0.3303069472312927, "learning_rate": 1.3204572952041537e-05, "loss": 0.5136, "step": 25854 }, { "epoch": 0.7942432341105274, "grad_norm": 0.37648889422416687, "learning_rate": 1.3204115157577526e-05, "loss": 0.5636, "step": 25855 }, { "epoch": 0.7942739532454766, "grad_norm": 0.4016236960887909, "learning_rate": 1.3203657355630077e-05, "loss": 0.602, "step": 25856 }, { "epoch": 0.7943046723804258, "grad_norm": 0.40242651104927063, "learning_rate": 1.3203199546200265e-05, "loss": 0.5425, "step": 25857 }, { "epoch": 0.7943353915153749, "grad_norm": 0.3945392668247223, "learning_rate": 1.3202741729289157e-05, "loss": 0.519, "step": 25858 }, { "epoch": 0.7943661106503241, "grad_norm": 0.39229291677474976, "learning_rate": 1.320228390489782e-05, "loss": 0.5966, "step": 25859 }, { "epoch": 0.7943968297852733, "grad_norm": 0.34062322974205017, "learning_rate": 1.3201826073027326e-05, "loss": 0.4611, "step": 25860 }, { "epoch": 0.7944275489202224, "grad_norm": 0.3792220652103424, "learning_rate": 1.3201368233678743e-05, "loss": 0.4805, "step": 25861 }, { "epoch": 0.7944582680551716, "grad_norm": 0.34507840871810913, "learning_rate": 1.320091038685314e-05, "loss": 0.5377, "step": 25862 }, { "epoch": 0.7944889871901207, "grad_norm": 0.3438222110271454, "learning_rate": 1.3200452532551592e-05, "loss": 0.5447, "step": 25863 }, { "epoch": 0.7945197063250699, "grad_norm": 0.39573249220848083, "learning_rate": 1.3199994670775158e-05, "loss": 0.5694, "step": 25864 }, { "epoch": 0.7945504254600191, "grad_norm": 0.35836952924728394, "learning_rate": 1.3199536801524918e-05, "loss": 0.5106, "step": 25865 }, { "epoch": 0.7945811445949682, "grad_norm": 0.3538137376308441, "learning_rate": 1.319907892480193e-05, "loss": 0.5042, "step": 25866 }, { "epoch": 0.7946118637299173, "grad_norm": 0.41393041610717773, "learning_rate": 1.3198621040607276e-05, "loss": 0.6169, "step": 25867 }, { "epoch": 0.7946425828648666, "grad_norm": 0.8836017847061157, "learning_rate": 1.3198163148942017e-05, "loss": 0.5839, "step": 25868 }, { "epoch": 0.7946733019998157, "grad_norm": 0.35661768913269043, "learning_rate": 1.3197705249807224e-05, "loss": 0.5868, "step": 25869 }, { "epoch": 0.7947040211347648, "grad_norm": 0.35603585839271545, "learning_rate": 1.3197247343203967e-05, "loss": 0.47, "step": 25870 }, { "epoch": 0.794734740269714, "grad_norm": 0.3621756434440613, "learning_rate": 1.3196789429133317e-05, "loss": 0.5984, "step": 25871 }, { "epoch": 0.7947654594046631, "grad_norm": 0.3830057978630066, "learning_rate": 1.3196331507596341e-05, "loss": 0.6385, "step": 25872 }, { "epoch": 0.7947961785396124, "grad_norm": 0.3489990234375, "learning_rate": 1.319587357859411e-05, "loss": 0.5929, "step": 25873 }, { "epoch": 0.7948268976745615, "grad_norm": 0.3625670373439789, "learning_rate": 1.3195415642127693e-05, "loss": 0.6166, "step": 25874 }, { "epoch": 0.7948576168095106, "grad_norm": 0.8529596924781799, "learning_rate": 1.3194957698198158e-05, "loss": 0.5415, "step": 25875 }, { "epoch": 0.7948883359444598, "grad_norm": 0.39071017503738403, "learning_rate": 1.3194499746806577e-05, "loss": 0.4914, "step": 25876 }, { "epoch": 0.794919055079409, "grad_norm": 0.3501439392566681, "learning_rate": 1.319404178795402e-05, "loss": 0.5455, "step": 25877 }, { "epoch": 0.7949497742143581, "grad_norm": 0.36604151129722595, "learning_rate": 1.3193583821641556e-05, "loss": 0.5413, "step": 25878 }, { "epoch": 0.7949804933493073, "grad_norm": 0.44482243061065674, "learning_rate": 1.3193125847870252e-05, "loss": 0.5816, "step": 25879 }, { "epoch": 0.7950112124842564, "grad_norm": 0.334824800491333, "learning_rate": 1.3192667866641179e-05, "loss": 0.5343, "step": 25880 }, { "epoch": 0.7950419316192056, "grad_norm": 0.4012795388698578, "learning_rate": 1.3192209877955406e-05, "loss": 0.505, "step": 25881 }, { "epoch": 0.7950726507541548, "grad_norm": 0.37336716055870056, "learning_rate": 1.3191751881814008e-05, "loss": 0.5681, "step": 25882 }, { "epoch": 0.7951033698891039, "grad_norm": 0.39793428778648376, "learning_rate": 1.3191293878218049e-05, "loss": 0.5389, "step": 25883 }, { "epoch": 0.7951340890240531, "grad_norm": 0.4709588885307312, "learning_rate": 1.3190835867168599e-05, "loss": 0.4914, "step": 25884 }, { "epoch": 0.7951648081590023, "grad_norm": 0.3957739472389221, "learning_rate": 1.3190377848666728e-05, "loss": 0.6156, "step": 25885 }, { "epoch": 0.7951955272939514, "grad_norm": 0.648902177810669, "learning_rate": 1.318991982271351e-05, "loss": 0.54, "step": 25886 }, { "epoch": 0.7952262464289006, "grad_norm": 0.33387696743011475, "learning_rate": 1.3189461789310011e-05, "loss": 0.5741, "step": 25887 }, { "epoch": 0.7952569655638497, "grad_norm": 0.32171788811683655, "learning_rate": 1.31890037484573e-05, "loss": 0.5665, "step": 25888 }, { "epoch": 0.7952876846987988, "grad_norm": 0.3639090359210968, "learning_rate": 1.3188545700156447e-05, "loss": 0.4912, "step": 25889 }, { "epoch": 0.7953184038337481, "grad_norm": 0.36800283193588257, "learning_rate": 1.3188087644408527e-05, "loss": 0.4694, "step": 25890 }, { "epoch": 0.7953491229686972, "grad_norm": 0.35165005922317505, "learning_rate": 1.3187629581214601e-05, "loss": 0.53, "step": 25891 }, { "epoch": 0.7953798421036463, "grad_norm": 0.36563125252723694, "learning_rate": 1.3187171510575747e-05, "loss": 0.5528, "step": 25892 }, { "epoch": 0.7954105612385955, "grad_norm": 0.3461935520172119, "learning_rate": 1.3186713432493028e-05, "loss": 0.4923, "step": 25893 }, { "epoch": 0.7954412803735447, "grad_norm": 0.35816389322280884, "learning_rate": 1.318625534696752e-05, "loss": 0.5931, "step": 25894 }, { "epoch": 0.7954719995084938, "grad_norm": 0.5125569701194763, "learning_rate": 1.3185797254000288e-05, "loss": 0.5042, "step": 25895 }, { "epoch": 0.795502718643443, "grad_norm": 0.3476797044277191, "learning_rate": 1.3185339153592407e-05, "loss": 0.5541, "step": 25896 }, { "epoch": 0.7955334377783921, "grad_norm": 0.46420225501060486, "learning_rate": 1.3184881045744939e-05, "loss": 0.5535, "step": 25897 }, { "epoch": 0.7955641569133414, "grad_norm": 0.354490727186203, "learning_rate": 1.3184422930458963e-05, "loss": 0.5629, "step": 25898 }, { "epoch": 0.7955948760482905, "grad_norm": 0.3906576335430145, "learning_rate": 1.3183964807735542e-05, "loss": 0.6078, "step": 25899 }, { "epoch": 0.7956255951832396, "grad_norm": 0.40138643980026245, "learning_rate": 1.3183506677575752e-05, "loss": 0.4725, "step": 25900 }, { "epoch": 0.7956563143181888, "grad_norm": 0.3969191312789917, "learning_rate": 1.3183048539980658e-05, "loss": 0.5899, "step": 25901 }, { "epoch": 0.795687033453138, "grad_norm": 0.3644534945487976, "learning_rate": 1.3182590394951332e-05, "loss": 0.5621, "step": 25902 }, { "epoch": 0.7957177525880871, "grad_norm": 0.3749605119228363, "learning_rate": 1.3182132242488846e-05, "loss": 0.5198, "step": 25903 }, { "epoch": 0.7957484717230363, "grad_norm": 0.35019445419311523, "learning_rate": 1.3181674082594264e-05, "loss": 0.5607, "step": 25904 }, { "epoch": 0.7957791908579854, "grad_norm": 0.35686853528022766, "learning_rate": 1.3181215915268664e-05, "loss": 0.6409, "step": 25905 }, { "epoch": 0.7958099099929345, "grad_norm": 0.3461194634437561, "learning_rate": 1.3180757740513108e-05, "loss": 0.5445, "step": 25906 }, { "epoch": 0.7958406291278838, "grad_norm": 0.40002989768981934, "learning_rate": 1.3180299558328673e-05, "loss": 0.4958, "step": 25907 }, { "epoch": 0.7958713482628329, "grad_norm": 0.3387254476547241, "learning_rate": 1.3179841368716426e-05, "loss": 0.5379, "step": 25908 }, { "epoch": 0.7959020673977821, "grad_norm": 0.36108672618865967, "learning_rate": 1.3179383171677435e-05, "loss": 0.5326, "step": 25909 }, { "epoch": 0.7959327865327313, "grad_norm": 0.4868420958518982, "learning_rate": 1.3178924967212776e-05, "loss": 0.5795, "step": 25910 }, { "epoch": 0.7959635056676804, "grad_norm": 0.41119539737701416, "learning_rate": 1.3178466755323514e-05, "loss": 0.5419, "step": 25911 }, { "epoch": 0.7959942248026296, "grad_norm": 0.35152876377105713, "learning_rate": 1.3178008536010723e-05, "loss": 0.6099, "step": 25912 }, { "epoch": 0.7960249439375787, "grad_norm": 0.40046218037605286, "learning_rate": 1.3177550309275467e-05, "loss": 0.5164, "step": 25913 }, { "epoch": 0.7960556630725278, "grad_norm": 0.6004210710525513, "learning_rate": 1.3177092075118823e-05, "loss": 0.556, "step": 25914 }, { "epoch": 0.7960863822074771, "grad_norm": 0.31721651554107666, "learning_rate": 1.317663383354186e-05, "loss": 0.4676, "step": 25915 }, { "epoch": 0.7961171013424262, "grad_norm": 0.4093199074268341, "learning_rate": 1.317617558454564e-05, "loss": 0.5447, "step": 25916 }, { "epoch": 0.7961478204773753, "grad_norm": 0.37297341227531433, "learning_rate": 1.3175717328131248e-05, "loss": 0.5748, "step": 25917 }, { "epoch": 0.7961785396123245, "grad_norm": 0.4421272277832031, "learning_rate": 1.3175259064299742e-05, "loss": 0.6304, "step": 25918 }, { "epoch": 0.7962092587472737, "grad_norm": 0.3593054711818695, "learning_rate": 1.3174800793052198e-05, "loss": 0.5739, "step": 25919 }, { "epoch": 0.7962399778822228, "grad_norm": 0.35300108790397644, "learning_rate": 1.3174342514389684e-05, "loss": 0.514, "step": 25920 }, { "epoch": 0.796270697017172, "grad_norm": 0.3637191951274872, "learning_rate": 1.3173884228313274e-05, "loss": 0.5216, "step": 25921 }, { "epoch": 0.7963014161521211, "grad_norm": 0.373343825340271, "learning_rate": 1.3173425934824033e-05, "loss": 0.5721, "step": 25922 }, { "epoch": 0.7963321352870704, "grad_norm": 0.405061811208725, "learning_rate": 1.3172967633923038e-05, "loss": 0.5421, "step": 25923 }, { "epoch": 0.7963628544220195, "grad_norm": 0.35237690806388855, "learning_rate": 1.3172509325611351e-05, "loss": 0.5633, "step": 25924 }, { "epoch": 0.7963935735569686, "grad_norm": 0.3478228747844696, "learning_rate": 1.3172051009890048e-05, "loss": 0.5429, "step": 25925 }, { "epoch": 0.7964242926919178, "grad_norm": 0.34268006682395935, "learning_rate": 1.3171592686760201e-05, "loss": 0.5617, "step": 25926 }, { "epoch": 0.796455011826867, "grad_norm": 0.3770497441291809, "learning_rate": 1.3171134356222875e-05, "loss": 0.635, "step": 25927 }, { "epoch": 0.7964857309618161, "grad_norm": 0.3874342739582062, "learning_rate": 1.3170676018279146e-05, "loss": 0.5387, "step": 25928 }, { "epoch": 0.7965164500967653, "grad_norm": 0.3645528256893158, "learning_rate": 1.3170217672930076e-05, "loss": 0.486, "step": 25929 }, { "epoch": 0.7965471692317144, "grad_norm": 0.36777979135513306, "learning_rate": 1.3169759320176746e-05, "loss": 0.5982, "step": 25930 }, { "epoch": 0.7965778883666635, "grad_norm": 0.4138661324977875, "learning_rate": 1.3169300960020222e-05, "loss": 0.5446, "step": 25931 }, { "epoch": 0.7966086075016128, "grad_norm": 0.43886759877204895, "learning_rate": 1.3168842592461572e-05, "loss": 0.4749, "step": 25932 }, { "epoch": 0.7966393266365619, "grad_norm": 0.3796290159225464, "learning_rate": 1.3168384217501871e-05, "loss": 0.5898, "step": 25933 }, { "epoch": 0.7966700457715111, "grad_norm": 0.36930012702941895, "learning_rate": 1.3167925835142183e-05, "loss": 0.5209, "step": 25934 }, { "epoch": 0.7967007649064602, "grad_norm": 0.3713112771511078, "learning_rate": 1.3167467445383588e-05, "loss": 0.6039, "step": 25935 }, { "epoch": 0.7967314840414094, "grad_norm": 0.3340182602405548, "learning_rate": 1.3167009048227148e-05, "loss": 0.5045, "step": 25936 }, { "epoch": 0.7967622031763586, "grad_norm": 0.43085676431655884, "learning_rate": 1.316655064367394e-05, "loss": 0.4896, "step": 25937 }, { "epoch": 0.7967929223113077, "grad_norm": 0.35012221336364746, "learning_rate": 1.316609223172503e-05, "loss": 0.5828, "step": 25938 }, { "epoch": 0.7968236414462568, "grad_norm": 0.37341564893722534, "learning_rate": 1.3165633812381494e-05, "loss": 0.5654, "step": 25939 }, { "epoch": 0.7968543605812061, "grad_norm": 0.34541675448417664, "learning_rate": 1.3165175385644393e-05, "loss": 0.5491, "step": 25940 }, { "epoch": 0.7968850797161552, "grad_norm": 0.3932779133319855, "learning_rate": 1.3164716951514809e-05, "loss": 0.5298, "step": 25941 }, { "epoch": 0.7969157988511043, "grad_norm": 0.346660315990448, "learning_rate": 1.3164258509993806e-05, "loss": 0.5222, "step": 25942 }, { "epoch": 0.7969465179860535, "grad_norm": 0.419208288192749, "learning_rate": 1.3163800061082457e-05, "loss": 0.6187, "step": 25943 }, { "epoch": 0.7969772371210027, "grad_norm": 0.35131463408470154, "learning_rate": 1.3163341604781831e-05, "loss": 0.5315, "step": 25944 }, { "epoch": 0.7970079562559518, "grad_norm": 0.36767035722732544, "learning_rate": 1.3162883141093e-05, "loss": 0.5724, "step": 25945 }, { "epoch": 0.797038675390901, "grad_norm": 0.36650556325912476, "learning_rate": 1.3162424670017032e-05, "loss": 0.526, "step": 25946 }, { "epoch": 0.7970693945258501, "grad_norm": 0.34720495343208313, "learning_rate": 1.3161966191555004e-05, "loss": 0.4715, "step": 25947 }, { "epoch": 0.7971001136607994, "grad_norm": 0.4589812457561493, "learning_rate": 1.3161507705707984e-05, "loss": 0.5772, "step": 25948 }, { "epoch": 0.7971308327957485, "grad_norm": 0.3617023825645447, "learning_rate": 1.3161049212477038e-05, "loss": 0.594, "step": 25949 }, { "epoch": 0.7971615519306976, "grad_norm": 0.325928658246994, "learning_rate": 1.3160590711863241e-05, "loss": 0.4992, "step": 25950 }, { "epoch": 0.7971922710656468, "grad_norm": 0.39157634973526, "learning_rate": 1.3160132203867666e-05, "loss": 0.5727, "step": 25951 }, { "epoch": 0.797222990200596, "grad_norm": 0.3368477523326874, "learning_rate": 1.315967368849138e-05, "loss": 0.5172, "step": 25952 }, { "epoch": 0.7972537093355451, "grad_norm": 0.37543144822120667, "learning_rate": 1.3159215165735458e-05, "loss": 0.6124, "step": 25953 }, { "epoch": 0.7972844284704943, "grad_norm": 0.3722783029079437, "learning_rate": 1.3158756635600964e-05, "loss": 0.6216, "step": 25954 }, { "epoch": 0.7973151476054434, "grad_norm": 0.3719848096370697, "learning_rate": 1.3158298098088977e-05, "loss": 0.5072, "step": 25955 }, { "epoch": 0.7973458667403925, "grad_norm": 0.3670866787433624, "learning_rate": 1.3157839553200562e-05, "loss": 0.6064, "step": 25956 }, { "epoch": 0.7973765858753418, "grad_norm": 0.3615095913410187, "learning_rate": 1.3157381000936793e-05, "loss": 0.6143, "step": 25957 }, { "epoch": 0.7974073050102909, "grad_norm": 0.4489337205886841, "learning_rate": 1.315692244129874e-05, "loss": 0.5191, "step": 25958 }, { "epoch": 0.7974380241452401, "grad_norm": 0.3746497631072998, "learning_rate": 1.3156463874287475e-05, "loss": 0.6196, "step": 25959 }, { "epoch": 0.7974687432801892, "grad_norm": 0.36237776279449463, "learning_rate": 1.315600529990407e-05, "loss": 0.5764, "step": 25960 }, { "epoch": 0.7974994624151384, "grad_norm": 0.36777809262275696, "learning_rate": 1.315554671814959e-05, "loss": 0.565, "step": 25961 }, { "epoch": 0.7975301815500876, "grad_norm": 0.4179181158542633, "learning_rate": 1.3155088129025111e-05, "loss": 0.5269, "step": 25962 }, { "epoch": 0.7975609006850367, "grad_norm": 0.33473071455955505, "learning_rate": 1.3154629532531705e-05, "loss": 0.4891, "step": 25963 }, { "epoch": 0.7975916198199858, "grad_norm": 0.36183544993400574, "learning_rate": 1.3154170928670442e-05, "loss": 0.5305, "step": 25964 }, { "epoch": 0.7976223389549351, "grad_norm": 0.36269935965538025, "learning_rate": 1.3153712317442389e-05, "loss": 0.4534, "step": 25965 }, { "epoch": 0.7976530580898842, "grad_norm": 0.37418845295906067, "learning_rate": 1.3153253698848624e-05, "loss": 0.6281, "step": 25966 }, { "epoch": 0.7976837772248333, "grad_norm": 0.3524051904678345, "learning_rate": 1.3152795072890214e-05, "loss": 0.4773, "step": 25967 }, { "epoch": 0.7977144963597825, "grad_norm": 0.355947345495224, "learning_rate": 1.3152336439568232e-05, "loss": 0.5422, "step": 25968 }, { "epoch": 0.7977452154947317, "grad_norm": 0.3754839599132538, "learning_rate": 1.3151877798883745e-05, "loss": 0.6105, "step": 25969 }, { "epoch": 0.7977759346296809, "grad_norm": 0.351158082485199, "learning_rate": 1.315141915083783e-05, "loss": 0.5574, "step": 25970 }, { "epoch": 0.79780665376463, "grad_norm": 0.3891996443271637, "learning_rate": 1.3150960495431553e-05, "loss": 0.4584, "step": 25971 }, { "epoch": 0.7978373728995791, "grad_norm": 0.4051453769207001, "learning_rate": 1.3150501832665988e-05, "loss": 0.5332, "step": 25972 }, { "epoch": 0.7978680920345284, "grad_norm": 0.7068103551864624, "learning_rate": 1.3150043162542208e-05, "loss": 0.5822, "step": 25973 }, { "epoch": 0.7978988111694775, "grad_norm": 0.4201067388057709, "learning_rate": 1.3149584485061283e-05, "loss": 0.5172, "step": 25974 }, { "epoch": 0.7979295303044266, "grad_norm": 0.42194539308547974, "learning_rate": 1.3149125800224281e-05, "loss": 0.5604, "step": 25975 }, { "epoch": 0.7979602494393758, "grad_norm": 0.3290785253047943, "learning_rate": 1.3148667108032276e-05, "loss": 0.5293, "step": 25976 }, { "epoch": 0.7979909685743249, "grad_norm": 0.33781561255455017, "learning_rate": 1.314820840848634e-05, "loss": 0.4256, "step": 25977 }, { "epoch": 0.7980216877092741, "grad_norm": 0.4012170135974884, "learning_rate": 1.3147749701587545e-05, "loss": 0.5301, "step": 25978 }, { "epoch": 0.7980524068442233, "grad_norm": 0.3817545473575592, "learning_rate": 1.3147290987336956e-05, "loss": 0.5707, "step": 25979 }, { "epoch": 0.7980831259791724, "grad_norm": 0.3623744249343872, "learning_rate": 1.3146832265735654e-05, "loss": 0.525, "step": 25980 }, { "epoch": 0.7981138451141215, "grad_norm": 0.3681064546108246, "learning_rate": 1.3146373536784702e-05, "loss": 0.55, "step": 25981 }, { "epoch": 0.7981445642490708, "grad_norm": 0.3796258568763733, "learning_rate": 1.3145914800485175e-05, "loss": 0.5583, "step": 25982 }, { "epoch": 0.7981752833840199, "grad_norm": 0.4165703058242798, "learning_rate": 1.3145456056838146e-05, "loss": 0.5049, "step": 25983 }, { "epoch": 0.7982060025189691, "grad_norm": 0.30720359086990356, "learning_rate": 1.3144997305844687e-05, "loss": 0.5534, "step": 25984 }, { "epoch": 0.7982367216539182, "grad_norm": 0.3921777904033661, "learning_rate": 1.3144538547505863e-05, "loss": 0.5402, "step": 25985 }, { "epoch": 0.7982674407888674, "grad_norm": 0.38557904958724976, "learning_rate": 1.314407978182275e-05, "loss": 0.5538, "step": 25986 }, { "epoch": 0.7982981599238166, "grad_norm": 0.3743959367275238, "learning_rate": 1.3143621008796418e-05, "loss": 0.6154, "step": 25987 }, { "epoch": 0.7983288790587657, "grad_norm": 0.351915180683136, "learning_rate": 1.3143162228427944e-05, "loss": 0.5138, "step": 25988 }, { "epoch": 0.7983595981937148, "grad_norm": 0.3843056559562683, "learning_rate": 1.3142703440718392e-05, "loss": 0.4554, "step": 25989 }, { "epoch": 0.7983903173286641, "grad_norm": 0.3413323760032654, "learning_rate": 1.3142244645668836e-05, "loss": 0.5444, "step": 25990 }, { "epoch": 0.7984210364636132, "grad_norm": 0.3556300103664398, "learning_rate": 1.314178584328035e-05, "loss": 0.4655, "step": 25991 }, { "epoch": 0.7984517555985623, "grad_norm": 0.39269739389419556, "learning_rate": 1.3141327033554004e-05, "loss": 0.5096, "step": 25992 }, { "epoch": 0.7984824747335115, "grad_norm": 0.3481486141681671, "learning_rate": 1.3140868216490868e-05, "loss": 0.5791, "step": 25993 }, { "epoch": 0.7985131938684606, "grad_norm": 0.35920044779777527, "learning_rate": 1.3140409392092017e-05, "loss": 0.5273, "step": 25994 }, { "epoch": 0.7985439130034099, "grad_norm": 0.3546205461025238, "learning_rate": 1.3139950560358518e-05, "loss": 0.5304, "step": 25995 }, { "epoch": 0.798574632138359, "grad_norm": 0.34377312660217285, "learning_rate": 1.3139491721291449e-05, "loss": 0.5021, "step": 25996 }, { "epoch": 0.7986053512733081, "grad_norm": 0.3557325303554535, "learning_rate": 1.3139032874891873e-05, "loss": 0.5431, "step": 25997 }, { "epoch": 0.7986360704082573, "grad_norm": 0.9006661176681519, "learning_rate": 1.3138574021160869e-05, "loss": 0.5306, "step": 25998 }, { "epoch": 0.7986667895432065, "grad_norm": 0.3597590923309326, "learning_rate": 1.3138115160099505e-05, "loss": 0.5717, "step": 25999 }, { "epoch": 0.7986975086781556, "grad_norm": 0.34672948718070984, "learning_rate": 1.3137656291708856e-05, "loss": 0.5783, "step": 26000 }, { "epoch": 0.7987282278131048, "grad_norm": 0.3330441117286682, "learning_rate": 1.3137197415989988e-05, "loss": 0.5581, "step": 26001 }, { "epoch": 0.7987589469480539, "grad_norm": 0.36343884468078613, "learning_rate": 1.313673853294398e-05, "loss": 0.5037, "step": 26002 }, { "epoch": 0.7987896660830031, "grad_norm": 0.34563183784484863, "learning_rate": 1.3136279642571897e-05, "loss": 0.5492, "step": 26003 }, { "epoch": 0.7988203852179523, "grad_norm": 0.3613867461681366, "learning_rate": 1.3135820744874816e-05, "loss": 0.5662, "step": 26004 }, { "epoch": 0.7988511043529014, "grad_norm": 0.35942599177360535, "learning_rate": 1.3135361839853809e-05, "loss": 0.5943, "step": 26005 }, { "epoch": 0.7988818234878505, "grad_norm": 0.38746094703674316, "learning_rate": 1.313490292750994e-05, "loss": 0.5341, "step": 26006 }, { "epoch": 0.7989125426227998, "grad_norm": 0.41687777638435364, "learning_rate": 1.3134444007844289e-05, "loss": 0.5818, "step": 26007 }, { "epoch": 0.7989432617577489, "grad_norm": 0.3636980354785919, "learning_rate": 1.3133985080857923e-05, "loss": 0.5501, "step": 26008 }, { "epoch": 0.7989739808926981, "grad_norm": 0.35696107149124146, "learning_rate": 1.3133526146551918e-05, "loss": 0.604, "step": 26009 }, { "epoch": 0.7990047000276472, "grad_norm": 0.3698446452617645, "learning_rate": 1.3133067204927344e-05, "loss": 0.5697, "step": 26010 }, { "epoch": 0.7990354191625963, "grad_norm": 0.38343730568885803, "learning_rate": 1.3132608255985273e-05, "loss": 0.515, "step": 26011 }, { "epoch": 0.7990661382975456, "grad_norm": 0.3417481482028961, "learning_rate": 1.3132149299726777e-05, "loss": 0.5512, "step": 26012 }, { "epoch": 0.7990968574324947, "grad_norm": 0.40238040685653687, "learning_rate": 1.3131690336152927e-05, "loss": 0.6578, "step": 26013 }, { "epoch": 0.7991275765674438, "grad_norm": 0.37843987345695496, "learning_rate": 1.3131231365264797e-05, "loss": 0.4885, "step": 26014 }, { "epoch": 0.799158295702393, "grad_norm": 0.32668381929397583, "learning_rate": 1.3130772387063453e-05, "loss": 0.5503, "step": 26015 }, { "epoch": 0.7991890148373422, "grad_norm": 0.4286276400089264, "learning_rate": 1.3130313401549974e-05, "loss": 0.5791, "step": 26016 }, { "epoch": 0.7992197339722913, "grad_norm": 0.36461856961250305, "learning_rate": 1.3129854408725429e-05, "loss": 0.5349, "step": 26017 }, { "epoch": 0.7992504531072405, "grad_norm": 0.3559931516647339, "learning_rate": 1.3129395408590892e-05, "loss": 0.6079, "step": 26018 }, { "epoch": 0.7992811722421896, "grad_norm": 0.34531503915786743, "learning_rate": 1.3128936401147432e-05, "loss": 0.4731, "step": 26019 }, { "epoch": 0.7993118913771389, "grad_norm": 0.3795378506183624, "learning_rate": 1.3128477386396124e-05, "loss": 0.502, "step": 26020 }, { "epoch": 0.799342610512088, "grad_norm": 0.3717140853404999, "learning_rate": 1.3128018364338035e-05, "loss": 0.5116, "step": 26021 }, { "epoch": 0.7993733296470371, "grad_norm": 0.417337030172348, "learning_rate": 1.3127559334974245e-05, "loss": 0.551, "step": 26022 }, { "epoch": 0.7994040487819863, "grad_norm": 0.33806276321411133, "learning_rate": 1.3127100298305822e-05, "loss": 0.5385, "step": 26023 }, { "epoch": 0.7994347679169355, "grad_norm": 0.5566702485084534, "learning_rate": 1.3126641254333834e-05, "loss": 0.5249, "step": 26024 }, { "epoch": 0.7994654870518846, "grad_norm": 0.37143343687057495, "learning_rate": 1.3126182203059358e-05, "loss": 0.4764, "step": 26025 }, { "epoch": 0.7994962061868338, "grad_norm": 0.38810285925865173, "learning_rate": 1.3125723144483465e-05, "loss": 0.5605, "step": 26026 }, { "epoch": 0.7995269253217829, "grad_norm": 0.34514349699020386, "learning_rate": 1.312526407860723e-05, "loss": 0.5472, "step": 26027 }, { "epoch": 0.799557644456732, "grad_norm": 0.34701666235923767, "learning_rate": 1.3124805005431718e-05, "loss": 0.5455, "step": 26028 }, { "epoch": 0.7995883635916813, "grad_norm": 0.3594312369823456, "learning_rate": 1.312434592495801e-05, "loss": 0.5794, "step": 26029 }, { "epoch": 0.7996190827266304, "grad_norm": 0.46964624524116516, "learning_rate": 1.3123886837187173e-05, "loss": 0.494, "step": 26030 }, { "epoch": 0.7996498018615795, "grad_norm": 0.4257698059082031, "learning_rate": 1.312342774212028e-05, "loss": 0.5138, "step": 26031 }, { "epoch": 0.7996805209965288, "grad_norm": 0.3353979289531708, "learning_rate": 1.3122968639758402e-05, "loss": 0.5041, "step": 26032 }, { "epoch": 0.7997112401314779, "grad_norm": 0.3489910364151001, "learning_rate": 1.3122509530102614e-05, "loss": 0.529, "step": 26033 }, { "epoch": 0.7997419592664271, "grad_norm": 0.4353380799293518, "learning_rate": 1.3122050413153987e-05, "loss": 0.5471, "step": 26034 }, { "epoch": 0.7997726784013762, "grad_norm": 0.351635605096817, "learning_rate": 1.312159128891359e-05, "loss": 0.5006, "step": 26035 }, { "epoch": 0.7998033975363253, "grad_norm": 0.4392237663269043, "learning_rate": 1.3121132157382504e-05, "loss": 0.5391, "step": 26036 }, { "epoch": 0.7998341166712746, "grad_norm": 0.4381001889705658, "learning_rate": 1.3120673018561791e-05, "loss": 0.5026, "step": 26037 }, { "epoch": 0.7998648358062237, "grad_norm": 0.3264738619327545, "learning_rate": 1.3120213872452532e-05, "loss": 0.5123, "step": 26038 }, { "epoch": 0.7998955549411728, "grad_norm": 0.360623300075531, "learning_rate": 1.3119754719055793e-05, "loss": 0.5545, "step": 26039 }, { "epoch": 0.799926274076122, "grad_norm": 0.3634301722049713, "learning_rate": 1.311929555837265e-05, "loss": 0.4933, "step": 26040 }, { "epoch": 0.7999569932110712, "grad_norm": 0.3439560830593109, "learning_rate": 1.3118836390404174e-05, "loss": 0.5777, "step": 26041 }, { "epoch": 0.7999877123460203, "grad_norm": 0.38237085938453674, "learning_rate": 1.3118377215151439e-05, "loss": 0.6134, "step": 26042 }, { "epoch": 0.8000184314809695, "grad_norm": 0.3699914813041687, "learning_rate": 1.3117918032615515e-05, "loss": 0.4554, "step": 26043 }, { "epoch": 0.8000491506159186, "grad_norm": 0.3734308183193207, "learning_rate": 1.3117458842797472e-05, "loss": 0.5808, "step": 26044 }, { "epoch": 0.8000798697508679, "grad_norm": 0.38362058997154236, "learning_rate": 1.3116999645698392e-05, "loss": 0.5005, "step": 26045 }, { "epoch": 0.800110588885817, "grad_norm": 0.41260799765586853, "learning_rate": 1.3116540441319341e-05, "loss": 0.5286, "step": 26046 }, { "epoch": 0.8001413080207661, "grad_norm": 0.40700340270996094, "learning_rate": 1.3116081229661389e-05, "loss": 0.5099, "step": 26047 }, { "epoch": 0.8001720271557153, "grad_norm": 0.4948415756225586, "learning_rate": 1.3115622010725614e-05, "loss": 0.5313, "step": 26048 }, { "epoch": 0.8002027462906645, "grad_norm": 0.39953237771987915, "learning_rate": 1.3115162784513085e-05, "loss": 0.4943, "step": 26049 }, { "epoch": 0.8002334654256136, "grad_norm": 0.3643600046634674, "learning_rate": 1.3114703551024876e-05, "loss": 0.4957, "step": 26050 }, { "epoch": 0.8002641845605628, "grad_norm": 0.370609849691391, "learning_rate": 1.311424431026206e-05, "loss": 0.4733, "step": 26051 }, { "epoch": 0.8002949036955119, "grad_norm": 0.3524817228317261, "learning_rate": 1.311378506222571e-05, "loss": 0.6039, "step": 26052 }, { "epoch": 0.800325622830461, "grad_norm": 0.34567371010780334, "learning_rate": 1.3113325806916895e-05, "loss": 0.5226, "step": 26053 }, { "epoch": 0.8003563419654103, "grad_norm": 0.37508171796798706, "learning_rate": 1.311286654433669e-05, "loss": 0.5484, "step": 26054 }, { "epoch": 0.8003870611003594, "grad_norm": 0.441914826631546, "learning_rate": 1.3112407274486169e-05, "loss": 0.576, "step": 26055 }, { "epoch": 0.8004177802353086, "grad_norm": 0.3712744414806366, "learning_rate": 1.3111947997366405e-05, "loss": 0.607, "step": 26056 }, { "epoch": 0.8004484993702577, "grad_norm": 0.37870481610298157, "learning_rate": 1.3111488712978469e-05, "loss": 0.6215, "step": 26057 }, { "epoch": 0.8004792185052069, "grad_norm": 0.34891700744628906, "learning_rate": 1.311102942132343e-05, "loss": 0.5602, "step": 26058 }, { "epoch": 0.8005099376401561, "grad_norm": 0.3529869616031647, "learning_rate": 1.3110570122402369e-05, "loss": 0.5471, "step": 26059 }, { "epoch": 0.8005406567751052, "grad_norm": 0.35381096601486206, "learning_rate": 1.3110110816216351e-05, "loss": 0.5831, "step": 26060 }, { "epoch": 0.8005713759100543, "grad_norm": 0.5019901990890503, "learning_rate": 1.3109651502766454e-05, "loss": 0.5873, "step": 26061 }, { "epoch": 0.8006020950450036, "grad_norm": 0.3516576588153839, "learning_rate": 1.3109192182053745e-05, "loss": 0.5347, "step": 26062 }, { "epoch": 0.8006328141799527, "grad_norm": 0.3638651967048645, "learning_rate": 1.3108732854079304e-05, "loss": 0.5682, "step": 26063 }, { "epoch": 0.8006635333149018, "grad_norm": 0.38570958375930786, "learning_rate": 1.3108273518844202e-05, "loss": 0.5175, "step": 26064 }, { "epoch": 0.800694252449851, "grad_norm": 0.3804757297039032, "learning_rate": 1.3107814176349507e-05, "loss": 0.5777, "step": 26065 }, { "epoch": 0.8007249715848002, "grad_norm": 0.3805074691772461, "learning_rate": 1.3107354826596297e-05, "loss": 0.5204, "step": 26066 }, { "epoch": 0.8007556907197493, "grad_norm": 0.3425505459308624, "learning_rate": 1.3106895469585638e-05, "loss": 0.5762, "step": 26067 }, { "epoch": 0.8007864098546985, "grad_norm": 0.4042263329029083, "learning_rate": 1.3106436105318614e-05, "loss": 0.537, "step": 26068 }, { "epoch": 0.8008171289896476, "grad_norm": 0.41676095128059387, "learning_rate": 1.3105976733796286e-05, "loss": 0.6434, "step": 26069 }, { "epoch": 0.8008478481245969, "grad_norm": 0.3552660644054413, "learning_rate": 1.310551735501974e-05, "loss": 0.6455, "step": 26070 }, { "epoch": 0.800878567259546, "grad_norm": 0.40283024311065674, "learning_rate": 1.3105057968990033e-05, "loss": 0.4653, "step": 26071 }, { "epoch": 0.8009092863944951, "grad_norm": 0.3518514037132263, "learning_rate": 1.3104598575708251e-05, "loss": 0.5296, "step": 26072 }, { "epoch": 0.8009400055294443, "grad_norm": 0.3493010401725769, "learning_rate": 1.310413917517546e-05, "loss": 0.4753, "step": 26073 }, { "epoch": 0.8009707246643935, "grad_norm": 0.39399948716163635, "learning_rate": 1.3103679767392737e-05, "loss": 0.5953, "step": 26074 }, { "epoch": 0.8010014437993426, "grad_norm": 0.34893447160720825, "learning_rate": 1.3103220352361152e-05, "loss": 0.5921, "step": 26075 }, { "epoch": 0.8010321629342918, "grad_norm": 0.3695739805698395, "learning_rate": 1.310276093008178e-05, "loss": 0.5171, "step": 26076 }, { "epoch": 0.8010628820692409, "grad_norm": 0.37786665558815, "learning_rate": 1.3102301500555692e-05, "loss": 0.6175, "step": 26077 }, { "epoch": 0.80109360120419, "grad_norm": 0.44746172428131104, "learning_rate": 1.3101842063783963e-05, "loss": 0.5858, "step": 26078 }, { "epoch": 0.8011243203391393, "grad_norm": 0.36504459381103516, "learning_rate": 1.3101382619767667e-05, "loss": 0.5267, "step": 26079 }, { "epoch": 0.8011550394740884, "grad_norm": 0.43510743975639343, "learning_rate": 1.3100923168507869e-05, "loss": 0.6664, "step": 26080 }, { "epoch": 0.8011857586090376, "grad_norm": 0.5376330018043518, "learning_rate": 1.3100463710005652e-05, "loss": 0.55, "step": 26081 }, { "epoch": 0.8012164777439867, "grad_norm": 0.33635497093200684, "learning_rate": 1.310000424426209e-05, "loss": 0.534, "step": 26082 }, { "epoch": 0.8012471968789359, "grad_norm": 0.33453264832496643, "learning_rate": 1.3099544771278245e-05, "loss": 0.464, "step": 26083 }, { "epoch": 0.8012779160138851, "grad_norm": 0.3376138210296631, "learning_rate": 1.30990852910552e-05, "loss": 0.6122, "step": 26084 }, { "epoch": 0.8013086351488342, "grad_norm": 0.4851231873035431, "learning_rate": 1.3098625803594023e-05, "loss": 0.6001, "step": 26085 }, { "epoch": 0.8013393542837833, "grad_norm": 0.342579185962677, "learning_rate": 1.3098166308895792e-05, "loss": 0.4462, "step": 26086 }, { "epoch": 0.8013700734187326, "grad_norm": 0.3580623269081116, "learning_rate": 1.3097706806961575e-05, "loss": 0.5604, "step": 26087 }, { "epoch": 0.8014007925536817, "grad_norm": 0.3634633719921112, "learning_rate": 1.3097247297792448e-05, "loss": 0.585, "step": 26088 }, { "epoch": 0.8014315116886308, "grad_norm": 0.3509052097797394, "learning_rate": 1.3096787781389484e-05, "loss": 0.5446, "step": 26089 }, { "epoch": 0.80146223082358, "grad_norm": 0.4009198546409607, "learning_rate": 1.3096328257753756e-05, "loss": 0.5178, "step": 26090 }, { "epoch": 0.8014929499585292, "grad_norm": 0.36034858226776123, "learning_rate": 1.3095868726886333e-05, "loss": 0.5305, "step": 26091 }, { "epoch": 0.8015236690934783, "grad_norm": 0.34434840083122253, "learning_rate": 1.3095409188788299e-05, "loss": 0.5737, "step": 26092 }, { "epoch": 0.8015543882284275, "grad_norm": 0.35694393515586853, "learning_rate": 1.309494964346072e-05, "loss": 0.5447, "step": 26093 }, { "epoch": 0.8015851073633766, "grad_norm": 0.3767606019973755, "learning_rate": 1.3094490090904666e-05, "loss": 0.6028, "step": 26094 }, { "epoch": 0.8016158264983259, "grad_norm": 0.43184271454811096, "learning_rate": 1.3094030531121216e-05, "loss": 0.4991, "step": 26095 }, { "epoch": 0.801646545633275, "grad_norm": 0.37133118510246277, "learning_rate": 1.309357096411144e-05, "loss": 0.5939, "step": 26096 }, { "epoch": 0.8016772647682241, "grad_norm": 0.36673563718795776, "learning_rate": 1.3093111389876416e-05, "loss": 0.5366, "step": 26097 }, { "epoch": 0.8017079839031733, "grad_norm": 0.33350828289985657, "learning_rate": 1.309265180841721e-05, "loss": 0.545, "step": 26098 }, { "epoch": 0.8017387030381224, "grad_norm": 0.349079966545105, "learning_rate": 1.3092192219734903e-05, "loss": 0.5359, "step": 26099 }, { "epoch": 0.8017694221730716, "grad_norm": 0.4071907699108124, "learning_rate": 1.3091732623830566e-05, "loss": 0.5414, "step": 26100 }, { "epoch": 0.8018001413080208, "grad_norm": 0.3710370361804962, "learning_rate": 1.3091273020705269e-05, "loss": 0.5169, "step": 26101 }, { "epoch": 0.8018308604429699, "grad_norm": 0.39904922246932983, "learning_rate": 1.3090813410360088e-05, "loss": 0.6757, "step": 26102 }, { "epoch": 0.801861579577919, "grad_norm": 0.3046227693557739, "learning_rate": 1.3090353792796098e-05, "loss": 0.4833, "step": 26103 }, { "epoch": 0.8018922987128683, "grad_norm": 0.35173580050468445, "learning_rate": 1.308989416801437e-05, "loss": 0.588, "step": 26104 }, { "epoch": 0.8019230178478174, "grad_norm": 0.35749971866607666, "learning_rate": 1.308943453601598e-05, "loss": 0.5499, "step": 26105 }, { "epoch": 0.8019537369827666, "grad_norm": 0.35244616866111755, "learning_rate": 1.3088974896801998e-05, "loss": 0.5191, "step": 26106 }, { "epoch": 0.8019844561177157, "grad_norm": 0.41570258140563965, "learning_rate": 1.30885152503735e-05, "loss": 0.5274, "step": 26107 }, { "epoch": 0.8020151752526649, "grad_norm": 0.3391374349594116, "learning_rate": 1.308805559673156e-05, "loss": 0.5548, "step": 26108 }, { "epoch": 0.8020458943876141, "grad_norm": 0.3799046576023102, "learning_rate": 1.3087595935877247e-05, "loss": 0.5868, "step": 26109 }, { "epoch": 0.8020766135225632, "grad_norm": 0.4321231245994568, "learning_rate": 1.3087136267811642e-05, "loss": 0.5322, "step": 26110 }, { "epoch": 0.8021073326575123, "grad_norm": 0.3721866309642792, "learning_rate": 1.3086676592535814e-05, "loss": 0.5174, "step": 26111 }, { "epoch": 0.8021380517924616, "grad_norm": 0.6453045010566711, "learning_rate": 1.3086216910050835e-05, "loss": 0.6006, "step": 26112 }, { "epoch": 0.8021687709274107, "grad_norm": 0.33022141456604004, "learning_rate": 1.3085757220357785e-05, "loss": 0.4718, "step": 26113 }, { "epoch": 0.8021994900623598, "grad_norm": 0.36995232105255127, "learning_rate": 1.308529752345773e-05, "loss": 0.5654, "step": 26114 }, { "epoch": 0.802230209197309, "grad_norm": 0.3938498795032501, "learning_rate": 1.3084837819351748e-05, "loss": 0.5507, "step": 26115 }, { "epoch": 0.8022609283322581, "grad_norm": 0.35462838411331177, "learning_rate": 1.3084378108040909e-05, "loss": 0.4581, "step": 26116 }, { "epoch": 0.8022916474672073, "grad_norm": 0.4495857357978821, "learning_rate": 1.3083918389526291e-05, "loss": 0.5661, "step": 26117 }, { "epoch": 0.8023223666021565, "grad_norm": 0.41356581449508667, "learning_rate": 1.308345866380897e-05, "loss": 0.5226, "step": 26118 }, { "epoch": 0.8023530857371056, "grad_norm": 0.3567899465560913, "learning_rate": 1.3082998930890011e-05, "loss": 0.5365, "step": 26119 }, { "epoch": 0.8023838048720549, "grad_norm": 0.4289098083972931, "learning_rate": 1.3082539190770497e-05, "loss": 0.5753, "step": 26120 }, { "epoch": 0.802414524007004, "grad_norm": 0.35690388083457947, "learning_rate": 1.3082079443451495e-05, "loss": 0.5955, "step": 26121 }, { "epoch": 0.8024452431419531, "grad_norm": 0.3410249650478363, "learning_rate": 1.3081619688934082e-05, "loss": 0.5562, "step": 26122 }, { "epoch": 0.8024759622769023, "grad_norm": 0.47192901372909546, "learning_rate": 1.308115992721933e-05, "loss": 0.5877, "step": 26123 }, { "epoch": 0.8025066814118514, "grad_norm": 0.362843781709671, "learning_rate": 1.3080700158308315e-05, "loss": 0.5638, "step": 26124 }, { "epoch": 0.8025374005468006, "grad_norm": 0.3768976926803589, "learning_rate": 1.3080240382202108e-05, "loss": 0.5355, "step": 26125 }, { "epoch": 0.8025681196817498, "grad_norm": 0.3481595814228058, "learning_rate": 1.3079780598901788e-05, "loss": 0.4967, "step": 26126 }, { "epoch": 0.8025988388166989, "grad_norm": 0.3105914890766144, "learning_rate": 1.3079320808408423e-05, "loss": 0.5132, "step": 26127 }, { "epoch": 0.802629557951648, "grad_norm": 0.36178484559059143, "learning_rate": 1.3078861010723088e-05, "loss": 0.5824, "step": 26128 }, { "epoch": 0.8026602770865973, "grad_norm": 0.594438374042511, "learning_rate": 1.307840120584686e-05, "loss": 0.5404, "step": 26129 }, { "epoch": 0.8026909962215464, "grad_norm": 0.35874930024147034, "learning_rate": 1.307794139378081e-05, "loss": 0.5563, "step": 26130 }, { "epoch": 0.8027217153564956, "grad_norm": 0.36837640404701233, "learning_rate": 1.3077481574526013e-05, "loss": 0.5924, "step": 26131 }, { "epoch": 0.8027524344914447, "grad_norm": 0.38493528962135315, "learning_rate": 1.3077021748083542e-05, "loss": 0.6269, "step": 26132 }, { "epoch": 0.8027831536263939, "grad_norm": 0.3791649043560028, "learning_rate": 1.3076561914454474e-05, "loss": 0.6111, "step": 26133 }, { "epoch": 0.8028138727613431, "grad_norm": 0.3735343813896179, "learning_rate": 1.3076102073639878e-05, "loss": 0.5442, "step": 26134 }, { "epoch": 0.8028445918962922, "grad_norm": 0.3755734860897064, "learning_rate": 1.3075642225640833e-05, "loss": 0.6674, "step": 26135 }, { "epoch": 0.8028753110312413, "grad_norm": 0.377521276473999, "learning_rate": 1.307518237045841e-05, "loss": 0.457, "step": 26136 }, { "epoch": 0.8029060301661906, "grad_norm": 0.4080037772655487, "learning_rate": 1.3074722508093683e-05, "loss": 0.6039, "step": 26137 }, { "epoch": 0.8029367493011397, "grad_norm": 1.3671406507492065, "learning_rate": 1.3074262638547728e-05, "loss": 0.61, "step": 26138 }, { "epoch": 0.8029674684360888, "grad_norm": 0.3870183527469635, "learning_rate": 1.307380276182162e-05, "loss": 0.5089, "step": 26139 }, { "epoch": 0.802998187571038, "grad_norm": 0.45058515667915344, "learning_rate": 1.3073342877916426e-05, "loss": 0.5216, "step": 26140 }, { "epoch": 0.8030289067059871, "grad_norm": 0.42971301078796387, "learning_rate": 1.3072882986833229e-05, "loss": 0.5788, "step": 26141 }, { "epoch": 0.8030596258409363, "grad_norm": 0.37529247999191284, "learning_rate": 1.3072423088573099e-05, "loss": 0.5662, "step": 26142 }, { "epoch": 0.8030903449758855, "grad_norm": 0.3305055797100067, "learning_rate": 1.3071963183137109e-05, "loss": 0.4941, "step": 26143 }, { "epoch": 0.8031210641108346, "grad_norm": 0.3648013174533844, "learning_rate": 1.3071503270526333e-05, "loss": 0.5809, "step": 26144 }, { "epoch": 0.8031517832457838, "grad_norm": 0.4816353917121887, "learning_rate": 1.307104335074185e-05, "loss": 0.5126, "step": 26145 }, { "epoch": 0.803182502380733, "grad_norm": 0.4044342339038849, "learning_rate": 1.3070583423784729e-05, "loss": 0.5257, "step": 26146 }, { "epoch": 0.8032132215156821, "grad_norm": 0.38759416341781616, "learning_rate": 1.3070123489656045e-05, "loss": 0.5658, "step": 26147 }, { "epoch": 0.8032439406506313, "grad_norm": 0.4582498073577881, "learning_rate": 1.3069663548356875e-05, "loss": 0.5421, "step": 26148 }, { "epoch": 0.8032746597855804, "grad_norm": 0.36160436272621155, "learning_rate": 1.3069203599888291e-05, "loss": 0.5037, "step": 26149 }, { "epoch": 0.8033053789205296, "grad_norm": 0.3275303840637207, "learning_rate": 1.3068743644251364e-05, "loss": 0.495, "step": 26150 }, { "epoch": 0.8033360980554788, "grad_norm": 0.35767847299575806, "learning_rate": 1.3068283681447178e-05, "loss": 0.6357, "step": 26151 }, { "epoch": 0.8033668171904279, "grad_norm": 0.37351754307746887, "learning_rate": 1.3067823711476796e-05, "loss": 0.4879, "step": 26152 }, { "epoch": 0.803397536325377, "grad_norm": 0.3640378713607788, "learning_rate": 1.30673637343413e-05, "loss": 0.5204, "step": 26153 }, { "epoch": 0.8034282554603263, "grad_norm": 0.4045173227787018, "learning_rate": 1.3066903750041764e-05, "loss": 0.5193, "step": 26154 }, { "epoch": 0.8034589745952754, "grad_norm": 0.38443228602409363, "learning_rate": 1.3066443758579255e-05, "loss": 0.5428, "step": 26155 }, { "epoch": 0.8034896937302246, "grad_norm": 0.38330134749412537, "learning_rate": 1.3065983759954857e-05, "loss": 0.508, "step": 26156 }, { "epoch": 0.8035204128651737, "grad_norm": 0.335918128490448, "learning_rate": 1.3065523754169635e-05, "loss": 0.538, "step": 26157 }, { "epoch": 0.8035511320001228, "grad_norm": 0.36141470074653625, "learning_rate": 1.3065063741224673e-05, "loss": 0.5355, "step": 26158 }, { "epoch": 0.8035818511350721, "grad_norm": 0.40230101346969604, "learning_rate": 1.3064603721121039e-05, "loss": 0.519, "step": 26159 }, { "epoch": 0.8036125702700212, "grad_norm": 0.3505796492099762, "learning_rate": 1.3064143693859807e-05, "loss": 0.5701, "step": 26160 }, { "epoch": 0.8036432894049703, "grad_norm": 0.363002210855484, "learning_rate": 1.3063683659442055e-05, "loss": 0.4814, "step": 26161 }, { "epoch": 0.8036740085399195, "grad_norm": 0.41682595014572144, "learning_rate": 1.3063223617868856e-05, "loss": 0.5254, "step": 26162 }, { "epoch": 0.8037047276748687, "grad_norm": 0.34676799178123474, "learning_rate": 1.3062763569141285e-05, "loss": 0.5791, "step": 26163 }, { "epoch": 0.8037354468098178, "grad_norm": 0.36504411697387695, "learning_rate": 1.3062303513260414e-05, "loss": 0.5741, "step": 26164 }, { "epoch": 0.803766165944767, "grad_norm": 0.4885893762111664, "learning_rate": 1.3061843450227322e-05, "loss": 0.536, "step": 26165 }, { "epoch": 0.8037968850797161, "grad_norm": 0.33469223976135254, "learning_rate": 1.3061383380043077e-05, "loss": 0.4773, "step": 26166 }, { "epoch": 0.8038276042146654, "grad_norm": 0.34790417551994324, "learning_rate": 1.306092330270876e-05, "loss": 0.5168, "step": 26167 }, { "epoch": 0.8038583233496145, "grad_norm": 0.5030750036239624, "learning_rate": 1.3060463218225444e-05, "loss": 0.5441, "step": 26168 }, { "epoch": 0.8038890424845636, "grad_norm": 0.3610546290874481, "learning_rate": 1.30600031265942e-05, "loss": 0.5446, "step": 26169 }, { "epoch": 0.8039197616195128, "grad_norm": 0.36830395460128784, "learning_rate": 1.3059543027816105e-05, "loss": 0.4705, "step": 26170 }, { "epoch": 0.803950480754462, "grad_norm": 0.38100942969322205, "learning_rate": 1.3059082921892233e-05, "loss": 0.4993, "step": 26171 }, { "epoch": 0.8039811998894111, "grad_norm": 0.34796422719955444, "learning_rate": 1.3058622808823663e-05, "loss": 0.5547, "step": 26172 }, { "epoch": 0.8040119190243603, "grad_norm": 0.33574172854423523, "learning_rate": 1.305816268861146e-05, "loss": 0.5567, "step": 26173 }, { "epoch": 0.8040426381593094, "grad_norm": 0.3602200448513031, "learning_rate": 1.3057702561256708e-05, "loss": 0.4804, "step": 26174 }, { "epoch": 0.8040733572942586, "grad_norm": 0.4978688359260559, "learning_rate": 1.3057242426760479e-05, "loss": 0.6546, "step": 26175 }, { "epoch": 0.8041040764292078, "grad_norm": 0.36030077934265137, "learning_rate": 1.3056782285123846e-05, "loss": 0.4954, "step": 26176 }, { "epoch": 0.8041347955641569, "grad_norm": 0.3359937071800232, "learning_rate": 1.3056322136347884e-05, "loss": 0.492, "step": 26177 }, { "epoch": 0.804165514699106, "grad_norm": 0.38371211290359497, "learning_rate": 1.3055861980433669e-05, "loss": 0.5896, "step": 26178 }, { "epoch": 0.8041962338340553, "grad_norm": 0.40420010685920715, "learning_rate": 1.3055401817382274e-05, "loss": 0.5055, "step": 26179 }, { "epoch": 0.8042269529690044, "grad_norm": 0.35213908553123474, "learning_rate": 1.3054941647194775e-05, "loss": 0.5602, "step": 26180 }, { "epoch": 0.8042576721039536, "grad_norm": 0.3941834568977356, "learning_rate": 1.3054481469872247e-05, "loss": 0.492, "step": 26181 }, { "epoch": 0.8042883912389027, "grad_norm": 0.3993501365184784, "learning_rate": 1.3054021285415761e-05, "loss": 0.5977, "step": 26182 }, { "epoch": 0.8043191103738518, "grad_norm": 0.37732118368148804, "learning_rate": 1.3053561093826398e-05, "loss": 0.5892, "step": 26183 }, { "epoch": 0.8043498295088011, "grad_norm": 0.3478439748287201, "learning_rate": 1.305310089510523e-05, "loss": 0.5641, "step": 26184 }, { "epoch": 0.8043805486437502, "grad_norm": 0.3879576325416565, "learning_rate": 1.305264068925333e-05, "loss": 0.5004, "step": 26185 }, { "epoch": 0.8044112677786993, "grad_norm": 0.3448277711868286, "learning_rate": 1.3052180476271777e-05, "loss": 0.5618, "step": 26186 }, { "epoch": 0.8044419869136485, "grad_norm": 0.3645698130130768, "learning_rate": 1.3051720256161642e-05, "loss": 0.5525, "step": 26187 }, { "epoch": 0.8044727060485977, "grad_norm": 0.4729974567890167, "learning_rate": 1.3051260028924e-05, "loss": 0.5716, "step": 26188 }, { "epoch": 0.8045034251835468, "grad_norm": 0.36281731724739075, "learning_rate": 1.305079979455993e-05, "loss": 0.5964, "step": 26189 }, { "epoch": 0.804534144318496, "grad_norm": 0.36487072706222534, "learning_rate": 1.30503395530705e-05, "loss": 0.6258, "step": 26190 }, { "epoch": 0.8045648634534451, "grad_norm": 0.3819264769554138, "learning_rate": 1.304987930445679e-05, "loss": 0.5361, "step": 26191 }, { "epoch": 0.8045955825883944, "grad_norm": 0.41823437809944153, "learning_rate": 1.3049419048719876e-05, "loss": 0.5231, "step": 26192 }, { "epoch": 0.8046263017233435, "grad_norm": 0.47325199842453003, "learning_rate": 1.3048958785860828e-05, "loss": 0.5794, "step": 26193 }, { "epoch": 0.8046570208582926, "grad_norm": 0.5372823476791382, "learning_rate": 1.3048498515880726e-05, "loss": 0.4905, "step": 26194 }, { "epoch": 0.8046877399932418, "grad_norm": 0.39971253275871277, "learning_rate": 1.3048038238780643e-05, "loss": 0.5551, "step": 26195 }, { "epoch": 0.804718459128191, "grad_norm": 0.3594813942909241, "learning_rate": 1.3047577954561653e-05, "loss": 0.564, "step": 26196 }, { "epoch": 0.8047491782631401, "grad_norm": 0.36493849754333496, "learning_rate": 1.3047117663224833e-05, "loss": 0.5949, "step": 26197 }, { "epoch": 0.8047798973980893, "grad_norm": 0.3660360872745514, "learning_rate": 1.3046657364771256e-05, "loss": 0.595, "step": 26198 }, { "epoch": 0.8048106165330384, "grad_norm": 0.37878596782684326, "learning_rate": 1.3046197059201996e-05, "loss": 0.6114, "step": 26199 }, { "epoch": 0.8048413356679875, "grad_norm": 0.3497733175754547, "learning_rate": 1.3045736746518132e-05, "loss": 0.6151, "step": 26200 }, { "epoch": 0.8048720548029368, "grad_norm": 0.3922208845615387, "learning_rate": 1.304527642672074e-05, "loss": 0.5216, "step": 26201 }, { "epoch": 0.8049027739378859, "grad_norm": 0.3552209734916687, "learning_rate": 1.3044816099810886e-05, "loss": 0.5395, "step": 26202 }, { "epoch": 0.804933493072835, "grad_norm": 0.3840649425983429, "learning_rate": 1.3044355765789655e-05, "loss": 0.6501, "step": 26203 }, { "epoch": 0.8049642122077842, "grad_norm": 0.38087403774261475, "learning_rate": 1.304389542465812e-05, "loss": 0.6219, "step": 26204 }, { "epoch": 0.8049949313427334, "grad_norm": 0.3819849491119385, "learning_rate": 1.3043435076417353e-05, "loss": 0.5644, "step": 26205 }, { "epoch": 0.8050256504776826, "grad_norm": 0.39014339447021484, "learning_rate": 1.3042974721068429e-05, "loss": 0.5543, "step": 26206 }, { "epoch": 0.8050563696126317, "grad_norm": 0.3366779386997223, "learning_rate": 1.3042514358612426e-05, "loss": 0.5379, "step": 26207 }, { "epoch": 0.8050870887475808, "grad_norm": 0.34676599502563477, "learning_rate": 1.3042053989050418e-05, "loss": 0.5719, "step": 26208 }, { "epoch": 0.8051178078825301, "grad_norm": 0.4481033980846405, "learning_rate": 1.304159361238348e-05, "loss": 0.586, "step": 26209 }, { "epoch": 0.8051485270174792, "grad_norm": 0.43807855248451233, "learning_rate": 1.304113322861269e-05, "loss": 0.5269, "step": 26210 }, { "epoch": 0.8051792461524283, "grad_norm": 0.347015380859375, "learning_rate": 1.304067283773912e-05, "loss": 0.627, "step": 26211 }, { "epoch": 0.8052099652873775, "grad_norm": 0.3601894676685333, "learning_rate": 1.3040212439763847e-05, "loss": 0.5724, "step": 26212 }, { "epoch": 0.8052406844223267, "grad_norm": 0.3456360995769501, "learning_rate": 1.3039752034687943e-05, "loss": 0.5606, "step": 26213 }, { "epoch": 0.8052714035572758, "grad_norm": 0.3674483299255371, "learning_rate": 1.3039291622512488e-05, "loss": 0.5624, "step": 26214 }, { "epoch": 0.805302122692225, "grad_norm": 0.3995487689971924, "learning_rate": 1.3038831203238559e-05, "loss": 0.5758, "step": 26215 }, { "epoch": 0.8053328418271741, "grad_norm": 0.37542200088500977, "learning_rate": 1.303837077686722e-05, "loss": 0.6329, "step": 26216 }, { "epoch": 0.8053635609621234, "grad_norm": 0.39776766300201416, "learning_rate": 1.3037910343399557e-05, "loss": 0.5393, "step": 26217 }, { "epoch": 0.8053942800970725, "grad_norm": 0.4224998652935028, "learning_rate": 1.303744990283664e-05, "loss": 0.5237, "step": 26218 }, { "epoch": 0.8054249992320216, "grad_norm": 0.3587382435798645, "learning_rate": 1.3036989455179552e-05, "loss": 0.5405, "step": 26219 }, { "epoch": 0.8054557183669708, "grad_norm": 0.3469826579093933, "learning_rate": 1.3036529000429358e-05, "loss": 0.5155, "step": 26220 }, { "epoch": 0.80548643750192, "grad_norm": 0.3534233272075653, "learning_rate": 1.3036068538587143e-05, "loss": 0.5303, "step": 26221 }, { "epoch": 0.8055171566368691, "grad_norm": 0.36357125639915466, "learning_rate": 1.3035608069653974e-05, "loss": 0.5416, "step": 26222 }, { "epoch": 0.8055478757718183, "grad_norm": 0.3614296317100525, "learning_rate": 1.3035147593630932e-05, "loss": 0.5017, "step": 26223 }, { "epoch": 0.8055785949067674, "grad_norm": 0.351402223110199, "learning_rate": 1.3034687110519089e-05, "loss": 0.5689, "step": 26224 }, { "epoch": 0.8056093140417165, "grad_norm": 0.4191986620426178, "learning_rate": 1.3034226620319523e-05, "loss": 0.5439, "step": 26225 }, { "epoch": 0.8056400331766658, "grad_norm": 0.4044738709926605, "learning_rate": 1.3033766123033308e-05, "loss": 0.6235, "step": 26226 }, { "epoch": 0.8056707523116149, "grad_norm": 0.3714693784713745, "learning_rate": 1.3033305618661523e-05, "loss": 0.5336, "step": 26227 }, { "epoch": 0.805701471446564, "grad_norm": 0.346048504114151, "learning_rate": 1.3032845107205241e-05, "loss": 0.5203, "step": 26228 }, { "epoch": 0.8057321905815132, "grad_norm": 1.2490400075912476, "learning_rate": 1.3032384588665534e-05, "loss": 0.5734, "step": 26229 }, { "epoch": 0.8057629097164624, "grad_norm": 0.41657087206840515, "learning_rate": 1.3031924063043487e-05, "loss": 0.5711, "step": 26230 }, { "epoch": 0.8057936288514116, "grad_norm": 0.36753466725349426, "learning_rate": 1.3031463530340165e-05, "loss": 0.5534, "step": 26231 }, { "epoch": 0.8058243479863607, "grad_norm": 0.3476009666919708, "learning_rate": 1.3031002990556647e-05, "loss": 0.5323, "step": 26232 }, { "epoch": 0.8058550671213098, "grad_norm": 0.3581063747406006, "learning_rate": 1.3030542443694015e-05, "loss": 0.5128, "step": 26233 }, { "epoch": 0.8058857862562591, "grad_norm": 0.36520400643348694, "learning_rate": 1.3030081889753334e-05, "loss": 0.5186, "step": 26234 }, { "epoch": 0.8059165053912082, "grad_norm": 0.36810168623924255, "learning_rate": 1.3029621328735688e-05, "loss": 0.5307, "step": 26235 }, { "epoch": 0.8059472245261573, "grad_norm": 0.3962430953979492, "learning_rate": 1.3029160760642147e-05, "loss": 0.4835, "step": 26236 }, { "epoch": 0.8059779436611065, "grad_norm": 0.38799992203712463, "learning_rate": 1.3028700185473794e-05, "loss": 0.4842, "step": 26237 }, { "epoch": 0.8060086627960557, "grad_norm": 0.39240726828575134, "learning_rate": 1.3028239603231696e-05, "loss": 0.5753, "step": 26238 }, { "epoch": 0.8060393819310048, "grad_norm": 0.38567131757736206, "learning_rate": 1.3027779013916936e-05, "loss": 0.5616, "step": 26239 }, { "epoch": 0.806070101065954, "grad_norm": 0.3760822117328644, "learning_rate": 1.3027318417530586e-05, "loss": 0.5594, "step": 26240 }, { "epoch": 0.8061008202009031, "grad_norm": 0.3950330317020416, "learning_rate": 1.3026857814073721e-05, "loss": 0.6016, "step": 26241 }, { "epoch": 0.8061315393358524, "grad_norm": 0.354851633310318, "learning_rate": 1.3026397203547421e-05, "loss": 0.5425, "step": 26242 }, { "epoch": 0.8061622584708015, "grad_norm": 0.365953266620636, "learning_rate": 1.3025936585952756e-05, "loss": 0.5183, "step": 26243 }, { "epoch": 0.8061929776057506, "grad_norm": 0.3861527144908905, "learning_rate": 1.3025475961290806e-05, "loss": 0.5637, "step": 26244 }, { "epoch": 0.8062236967406998, "grad_norm": 0.3649355173110962, "learning_rate": 1.3025015329562645e-05, "loss": 0.5151, "step": 26245 }, { "epoch": 0.8062544158756489, "grad_norm": 0.3794938921928406, "learning_rate": 1.302455469076935e-05, "loss": 0.5429, "step": 26246 }, { "epoch": 0.8062851350105981, "grad_norm": 0.36975228786468506, "learning_rate": 1.3024094044911994e-05, "loss": 0.514, "step": 26247 }, { "epoch": 0.8063158541455473, "grad_norm": 0.3514556288719177, "learning_rate": 1.302363339199166e-05, "loss": 0.6141, "step": 26248 }, { "epoch": 0.8063465732804964, "grad_norm": 0.4300982654094696, "learning_rate": 1.3023172732009416e-05, "loss": 0.5451, "step": 26249 }, { "epoch": 0.8063772924154455, "grad_norm": 0.36475369334220886, "learning_rate": 1.302271206496634e-05, "loss": 0.5807, "step": 26250 }, { "epoch": 0.8064080115503948, "grad_norm": 0.34604331851005554, "learning_rate": 1.3022251390863511e-05, "loss": 0.5997, "step": 26251 }, { "epoch": 0.8064387306853439, "grad_norm": 0.4339134991168976, "learning_rate": 1.3021790709702002e-05, "loss": 0.5491, "step": 26252 }, { "epoch": 0.806469449820293, "grad_norm": 0.3714050352573395, "learning_rate": 1.3021330021482889e-05, "loss": 0.5728, "step": 26253 }, { "epoch": 0.8065001689552422, "grad_norm": 0.4229991137981415, "learning_rate": 1.3020869326207248e-05, "loss": 0.6213, "step": 26254 }, { "epoch": 0.8065308880901914, "grad_norm": 2.1319212913513184, "learning_rate": 1.302040862387616e-05, "loss": 0.5435, "step": 26255 }, { "epoch": 0.8065616072251406, "grad_norm": 0.3969366252422333, "learning_rate": 1.3019947914490691e-05, "loss": 0.5429, "step": 26256 }, { "epoch": 0.8065923263600897, "grad_norm": 0.3642822206020355, "learning_rate": 1.3019487198051928e-05, "loss": 0.5327, "step": 26257 }, { "epoch": 0.8066230454950388, "grad_norm": 0.3263793885707855, "learning_rate": 1.3019026474560939e-05, "loss": 0.5879, "step": 26258 }, { "epoch": 0.8066537646299881, "grad_norm": 0.4166395366191864, "learning_rate": 1.3018565744018803e-05, "loss": 0.637, "step": 26259 }, { "epoch": 0.8066844837649372, "grad_norm": 0.39320409297943115, "learning_rate": 1.3018105006426598e-05, "loss": 0.5853, "step": 26260 }, { "epoch": 0.8067152028998863, "grad_norm": 0.3255317509174347, "learning_rate": 1.3017644261785396e-05, "loss": 0.5394, "step": 26261 }, { "epoch": 0.8067459220348355, "grad_norm": 0.35818132758140564, "learning_rate": 1.3017183510096275e-05, "loss": 0.5608, "step": 26262 }, { "epoch": 0.8067766411697846, "grad_norm": 0.3707267642021179, "learning_rate": 1.3016722751360309e-05, "loss": 0.5753, "step": 26263 }, { "epoch": 0.8068073603047338, "grad_norm": 0.8200882077217102, "learning_rate": 1.301626198557858e-05, "loss": 0.5981, "step": 26264 }, { "epoch": 0.806838079439683, "grad_norm": 0.37741178274154663, "learning_rate": 1.3015801212752156e-05, "loss": 0.5609, "step": 26265 }, { "epoch": 0.8068687985746321, "grad_norm": 0.3725721538066864, "learning_rate": 1.3015340432882124e-05, "loss": 0.5925, "step": 26266 }, { "epoch": 0.8068995177095813, "grad_norm": 0.35568422079086304, "learning_rate": 1.301487964596955e-05, "loss": 0.5644, "step": 26267 }, { "epoch": 0.8069302368445305, "grad_norm": 0.380314439535141, "learning_rate": 1.3014418852015513e-05, "loss": 0.5009, "step": 26268 }, { "epoch": 0.8069609559794796, "grad_norm": 0.3544039726257324, "learning_rate": 1.3013958051021093e-05, "loss": 0.5912, "step": 26269 }, { "epoch": 0.8069916751144288, "grad_norm": 0.9892155528068542, "learning_rate": 1.3013497242987363e-05, "loss": 0.5895, "step": 26270 }, { "epoch": 0.8070223942493779, "grad_norm": 0.3749963939189911, "learning_rate": 1.30130364279154e-05, "loss": 0.5706, "step": 26271 }, { "epoch": 0.8070531133843271, "grad_norm": 0.4023978114128113, "learning_rate": 1.301257560580628e-05, "loss": 0.5451, "step": 26272 }, { "epoch": 0.8070838325192763, "grad_norm": 0.3741403818130493, "learning_rate": 1.3012114776661078e-05, "loss": 0.5553, "step": 26273 }, { "epoch": 0.8071145516542254, "grad_norm": 0.5140959024429321, "learning_rate": 1.301165394048087e-05, "loss": 0.5552, "step": 26274 }, { "epoch": 0.8071452707891745, "grad_norm": 0.3407035768032074, "learning_rate": 1.3011193097266737e-05, "loss": 0.4742, "step": 26275 }, { "epoch": 0.8071759899241238, "grad_norm": 0.39074766635894775, "learning_rate": 1.301073224701975e-05, "loss": 0.5629, "step": 26276 }, { "epoch": 0.8072067090590729, "grad_norm": 0.371873140335083, "learning_rate": 1.301027138974099e-05, "loss": 0.6285, "step": 26277 }, { "epoch": 0.8072374281940221, "grad_norm": 0.32973986864089966, "learning_rate": 1.300981052543153e-05, "loss": 0.4187, "step": 26278 }, { "epoch": 0.8072681473289712, "grad_norm": 0.38441959023475647, "learning_rate": 1.3009349654092447e-05, "loss": 0.6055, "step": 26279 }, { "epoch": 0.8072988664639204, "grad_norm": 0.38520804047584534, "learning_rate": 1.3008888775724817e-05, "loss": 0.5275, "step": 26280 }, { "epoch": 0.8073295855988696, "grad_norm": 0.4668167233467102, "learning_rate": 1.3008427890329715e-05, "loss": 0.6372, "step": 26281 }, { "epoch": 0.8073603047338187, "grad_norm": 0.4099523723125458, "learning_rate": 1.3007966997908223e-05, "loss": 0.5475, "step": 26282 }, { "epoch": 0.8073910238687678, "grad_norm": 0.36631104350090027, "learning_rate": 1.3007506098461416e-05, "loss": 0.5622, "step": 26283 }, { "epoch": 0.807421743003717, "grad_norm": 0.4290277063846588, "learning_rate": 1.3007045191990364e-05, "loss": 0.5694, "step": 26284 }, { "epoch": 0.8074524621386662, "grad_norm": 0.4658887982368469, "learning_rate": 1.300658427849615e-05, "loss": 0.5024, "step": 26285 }, { "epoch": 0.8074831812736153, "grad_norm": 0.34043169021606445, "learning_rate": 1.3006123357979848e-05, "loss": 0.5207, "step": 26286 }, { "epoch": 0.8075139004085645, "grad_norm": 0.3901040554046631, "learning_rate": 1.3005662430442536e-05, "loss": 0.4945, "step": 26287 }, { "epoch": 0.8075446195435136, "grad_norm": 0.36909550428390503, "learning_rate": 1.3005201495885286e-05, "loss": 0.4783, "step": 26288 }, { "epoch": 0.8075753386784628, "grad_norm": 0.3665211796760559, "learning_rate": 1.3004740554309183e-05, "loss": 0.5604, "step": 26289 }, { "epoch": 0.807606057813412, "grad_norm": 0.3449922800064087, "learning_rate": 1.3004279605715295e-05, "loss": 0.57, "step": 26290 }, { "epoch": 0.8076367769483611, "grad_norm": 0.3206842243671417, "learning_rate": 1.3003818650104706e-05, "loss": 0.5172, "step": 26291 }, { "epoch": 0.8076674960833103, "grad_norm": 0.3982529640197754, "learning_rate": 1.3003357687478483e-05, "loss": 0.6048, "step": 26292 }, { "epoch": 0.8076982152182595, "grad_norm": 0.33728912472724915, "learning_rate": 1.3002896717837712e-05, "loss": 0.5122, "step": 26293 }, { "epoch": 0.8077289343532086, "grad_norm": 0.35261106491088867, "learning_rate": 1.3002435741183468e-05, "loss": 0.5546, "step": 26294 }, { "epoch": 0.8077596534881578, "grad_norm": 0.3608912229537964, "learning_rate": 1.3001974757516822e-05, "loss": 0.5086, "step": 26295 }, { "epoch": 0.8077903726231069, "grad_norm": 0.3399486541748047, "learning_rate": 1.3001513766838856e-05, "loss": 0.4999, "step": 26296 }, { "epoch": 0.807821091758056, "grad_norm": 0.40286314487457275, "learning_rate": 1.3001052769150643e-05, "loss": 0.5487, "step": 26297 }, { "epoch": 0.8078518108930053, "grad_norm": 0.36024102568626404, "learning_rate": 1.3000591764453265e-05, "loss": 0.5752, "step": 26298 }, { "epoch": 0.8078825300279544, "grad_norm": 0.3642594516277313, "learning_rate": 1.3000130752747789e-05, "loss": 0.5376, "step": 26299 }, { "epoch": 0.8079132491629035, "grad_norm": 0.40295639634132385, "learning_rate": 1.2999669734035303e-05, "loss": 0.5943, "step": 26300 }, { "epoch": 0.8079439682978528, "grad_norm": 0.3413909375667572, "learning_rate": 1.299920870831688e-05, "loss": 0.6137, "step": 26301 }, { "epoch": 0.8079746874328019, "grad_norm": 0.3756406009197235, "learning_rate": 1.2998747675593592e-05, "loss": 0.5672, "step": 26302 }, { "epoch": 0.8080054065677511, "grad_norm": 0.36127954721450806, "learning_rate": 1.2998286635866524e-05, "loss": 0.5861, "step": 26303 }, { "epoch": 0.8080361257027002, "grad_norm": 0.32230839133262634, "learning_rate": 1.2997825589136744e-05, "loss": 0.4741, "step": 26304 }, { "epoch": 0.8080668448376493, "grad_norm": 0.39252856373786926, "learning_rate": 1.2997364535405335e-05, "loss": 0.5421, "step": 26305 }, { "epoch": 0.8080975639725986, "grad_norm": 0.38859912753105164, "learning_rate": 1.299690347467337e-05, "loss": 0.5028, "step": 26306 }, { "epoch": 0.8081282831075477, "grad_norm": 0.36077508330345154, "learning_rate": 1.299644240694193e-05, "loss": 0.5906, "step": 26307 }, { "epoch": 0.8081590022424968, "grad_norm": 0.5037927627563477, "learning_rate": 1.2995981332212089e-05, "loss": 0.5641, "step": 26308 }, { "epoch": 0.808189721377446, "grad_norm": 0.3409503698348999, "learning_rate": 1.2995520250484923e-05, "loss": 0.5466, "step": 26309 }, { "epoch": 0.8082204405123952, "grad_norm": 0.3528132140636444, "learning_rate": 1.299505916176151e-05, "loss": 0.538, "step": 26310 }, { "epoch": 0.8082511596473443, "grad_norm": 0.36082446575164795, "learning_rate": 1.2994598066042929e-05, "loss": 0.6009, "step": 26311 }, { "epoch": 0.8082818787822935, "grad_norm": 0.40234968066215515, "learning_rate": 1.2994136963330256e-05, "loss": 0.6248, "step": 26312 }, { "epoch": 0.8083125979172426, "grad_norm": 0.44480109214782715, "learning_rate": 1.2993675853624565e-05, "loss": 0.6477, "step": 26313 }, { "epoch": 0.8083433170521918, "grad_norm": 0.345110148191452, "learning_rate": 1.2993214736926936e-05, "loss": 0.4832, "step": 26314 }, { "epoch": 0.808374036187141, "grad_norm": 0.46998482942581177, "learning_rate": 1.2992753613238442e-05, "loss": 0.586, "step": 26315 }, { "epoch": 0.8084047553220901, "grad_norm": 0.39987924695014954, "learning_rate": 1.2992292482560166e-05, "loss": 0.6204, "step": 26316 }, { "epoch": 0.8084354744570393, "grad_norm": 0.38310760259628296, "learning_rate": 1.2991831344893178e-05, "loss": 0.571, "step": 26317 }, { "epoch": 0.8084661935919885, "grad_norm": 0.3928704857826233, "learning_rate": 1.2991370200238564e-05, "loss": 0.5458, "step": 26318 }, { "epoch": 0.8084969127269376, "grad_norm": 0.37309587001800537, "learning_rate": 1.2990909048597395e-05, "loss": 0.5808, "step": 26319 }, { "epoch": 0.8085276318618868, "grad_norm": 0.3715684711933136, "learning_rate": 1.2990447889970747e-05, "loss": 0.6439, "step": 26320 }, { "epoch": 0.8085583509968359, "grad_norm": 0.360910028219223, "learning_rate": 1.29899867243597e-05, "loss": 0.5646, "step": 26321 }, { "epoch": 0.808589070131785, "grad_norm": 0.3752129077911377, "learning_rate": 1.2989525551765329e-05, "loss": 0.5117, "step": 26322 }, { "epoch": 0.8086197892667343, "grad_norm": 0.3989008963108063, "learning_rate": 1.2989064372188715e-05, "loss": 0.5585, "step": 26323 }, { "epoch": 0.8086505084016834, "grad_norm": 0.3436627984046936, "learning_rate": 1.298860318563093e-05, "loss": 0.543, "step": 26324 }, { "epoch": 0.8086812275366325, "grad_norm": 0.36354485154151917, "learning_rate": 1.2988141992093056e-05, "loss": 0.5593, "step": 26325 }, { "epoch": 0.8087119466715817, "grad_norm": 0.5585031509399414, "learning_rate": 1.2987680791576165e-05, "loss": 0.5655, "step": 26326 }, { "epoch": 0.8087426658065309, "grad_norm": 0.4120101034641266, "learning_rate": 1.2987219584081339e-05, "loss": 0.6, "step": 26327 }, { "epoch": 0.8087733849414801, "grad_norm": 0.35381197929382324, "learning_rate": 1.2986758369609651e-05, "loss": 0.5473, "step": 26328 }, { "epoch": 0.8088041040764292, "grad_norm": 0.35954567790031433, "learning_rate": 1.2986297148162183e-05, "loss": 0.6057, "step": 26329 }, { "epoch": 0.8088348232113783, "grad_norm": 0.35674044489860535, "learning_rate": 1.2985835919740008e-05, "loss": 0.6093, "step": 26330 }, { "epoch": 0.8088655423463276, "grad_norm": 0.3696783483028412, "learning_rate": 1.2985374684344203e-05, "loss": 0.572, "step": 26331 }, { "epoch": 0.8088962614812767, "grad_norm": 0.32367846369743347, "learning_rate": 1.298491344197585e-05, "loss": 0.4889, "step": 26332 }, { "epoch": 0.8089269806162258, "grad_norm": 0.35038328170776367, "learning_rate": 1.2984452192636022e-05, "loss": 0.4857, "step": 26333 }, { "epoch": 0.808957699751175, "grad_norm": 0.37545809149742126, "learning_rate": 1.2983990936325796e-05, "loss": 0.5241, "step": 26334 }, { "epoch": 0.8089884188861242, "grad_norm": 0.35379937291145325, "learning_rate": 1.2983529673046252e-05, "loss": 0.5247, "step": 26335 }, { "epoch": 0.8090191380210733, "grad_norm": 0.38808172941207886, "learning_rate": 1.2983068402798465e-05, "loss": 0.5723, "step": 26336 }, { "epoch": 0.8090498571560225, "grad_norm": 0.420814573764801, "learning_rate": 1.2982607125583515e-05, "loss": 0.5781, "step": 26337 }, { "epoch": 0.8090805762909716, "grad_norm": 0.33358269929885864, "learning_rate": 1.2982145841402476e-05, "loss": 0.5932, "step": 26338 }, { "epoch": 0.8091112954259208, "grad_norm": 0.35613033175468445, "learning_rate": 1.2981684550256429e-05, "loss": 0.6279, "step": 26339 }, { "epoch": 0.80914201456087, "grad_norm": 0.6786279678344727, "learning_rate": 1.2981223252146446e-05, "loss": 0.5746, "step": 26340 }, { "epoch": 0.8091727336958191, "grad_norm": 0.3769492208957672, "learning_rate": 1.2980761947073613e-05, "loss": 0.5314, "step": 26341 }, { "epoch": 0.8092034528307683, "grad_norm": 0.36453139781951904, "learning_rate": 1.2980300635038998e-05, "loss": 0.5235, "step": 26342 }, { "epoch": 0.8092341719657175, "grad_norm": 0.3504563868045807, "learning_rate": 1.2979839316043685e-05, "loss": 0.5143, "step": 26343 }, { "epoch": 0.8092648911006666, "grad_norm": 0.4593275785446167, "learning_rate": 1.2979377990088748e-05, "loss": 0.5181, "step": 26344 }, { "epoch": 0.8092956102356158, "grad_norm": 0.343403697013855, "learning_rate": 1.2978916657175268e-05, "loss": 0.5199, "step": 26345 }, { "epoch": 0.8093263293705649, "grad_norm": 0.37772324681282043, "learning_rate": 1.2978455317304317e-05, "loss": 0.5475, "step": 26346 }, { "epoch": 0.809357048505514, "grad_norm": 0.3801419138908386, "learning_rate": 1.2977993970476979e-05, "loss": 0.5361, "step": 26347 }, { "epoch": 0.8093877676404633, "grad_norm": 0.3731255829334259, "learning_rate": 1.2977532616694326e-05, "loss": 0.5706, "step": 26348 }, { "epoch": 0.8094184867754124, "grad_norm": 0.49654605984687805, "learning_rate": 1.2977071255957436e-05, "loss": 0.4813, "step": 26349 }, { "epoch": 0.8094492059103615, "grad_norm": 0.34187424182891846, "learning_rate": 1.2976609888267394e-05, "loss": 0.579, "step": 26350 }, { "epoch": 0.8094799250453107, "grad_norm": 0.3656806945800781, "learning_rate": 1.2976148513625266e-05, "loss": 0.54, "step": 26351 }, { "epoch": 0.8095106441802599, "grad_norm": 0.37535804510116577, "learning_rate": 1.2975687132032138e-05, "loss": 0.5417, "step": 26352 }, { "epoch": 0.8095413633152091, "grad_norm": 0.3643534779548645, "learning_rate": 1.297522574348908e-05, "loss": 0.5631, "step": 26353 }, { "epoch": 0.8095720824501582, "grad_norm": 0.38546738028526306, "learning_rate": 1.2974764347997179e-05, "loss": 0.5468, "step": 26354 }, { "epoch": 0.8096028015851073, "grad_norm": 0.41131043434143066, "learning_rate": 1.2974302945557508e-05, "loss": 0.548, "step": 26355 }, { "epoch": 0.8096335207200566, "grad_norm": 0.3809976875782013, "learning_rate": 1.2973841536171144e-05, "loss": 0.5961, "step": 26356 }, { "epoch": 0.8096642398550057, "grad_norm": 0.36341017484664917, "learning_rate": 1.2973380119839165e-05, "loss": 0.566, "step": 26357 }, { "epoch": 0.8096949589899548, "grad_norm": 0.3617106080055237, "learning_rate": 1.297291869656265e-05, "loss": 0.5716, "step": 26358 }, { "epoch": 0.809725678124904, "grad_norm": 0.34949472546577454, "learning_rate": 1.2972457266342677e-05, "loss": 0.4913, "step": 26359 }, { "epoch": 0.8097563972598532, "grad_norm": 0.35772091150283813, "learning_rate": 1.297199582918032e-05, "loss": 0.5918, "step": 26360 }, { "epoch": 0.8097871163948023, "grad_norm": 0.40720921754837036, "learning_rate": 1.297153438507666e-05, "loss": 0.478, "step": 26361 }, { "epoch": 0.8098178355297515, "grad_norm": 0.3368711769580841, "learning_rate": 1.2971072934032774e-05, "loss": 0.4303, "step": 26362 }, { "epoch": 0.8098485546647006, "grad_norm": 0.35987818241119385, "learning_rate": 1.2970611476049739e-05, "loss": 0.549, "step": 26363 }, { "epoch": 0.8098792737996499, "grad_norm": 0.40766483545303345, "learning_rate": 1.2970150011128637e-05, "loss": 0.5048, "step": 26364 }, { "epoch": 0.809909992934599, "grad_norm": 0.3607076406478882, "learning_rate": 1.2969688539270537e-05, "loss": 0.5574, "step": 26365 }, { "epoch": 0.8099407120695481, "grad_norm": 0.3565358817577362, "learning_rate": 1.2969227060476526e-05, "loss": 0.4964, "step": 26366 }, { "epoch": 0.8099714312044973, "grad_norm": 0.3727165460586548, "learning_rate": 1.2968765574747675e-05, "loss": 0.5178, "step": 26367 }, { "epoch": 0.8100021503394464, "grad_norm": 0.39156100153923035, "learning_rate": 1.2968304082085065e-05, "loss": 0.5006, "step": 26368 }, { "epoch": 0.8100328694743956, "grad_norm": 0.3437104821205139, "learning_rate": 1.2967842582489775e-05, "loss": 0.5329, "step": 26369 }, { "epoch": 0.8100635886093448, "grad_norm": 0.3287113308906555, "learning_rate": 1.2967381075962882e-05, "loss": 0.5884, "step": 26370 }, { "epoch": 0.8100943077442939, "grad_norm": 0.3492245674133301, "learning_rate": 1.296691956250546e-05, "loss": 0.4598, "step": 26371 }, { "epoch": 0.810125026879243, "grad_norm": 0.33483439683914185, "learning_rate": 1.2966458042118593e-05, "loss": 0.5274, "step": 26372 }, { "epoch": 0.8101557460141923, "grad_norm": 0.3432069420814514, "learning_rate": 1.2965996514803354e-05, "loss": 0.5544, "step": 26373 }, { "epoch": 0.8101864651491414, "grad_norm": 0.3242409825325012, "learning_rate": 1.2965534980560823e-05, "loss": 0.4676, "step": 26374 }, { "epoch": 0.8102171842840905, "grad_norm": 0.37942391633987427, "learning_rate": 1.296507343939208e-05, "loss": 0.5614, "step": 26375 }, { "epoch": 0.8102479034190397, "grad_norm": 0.403598815202713, "learning_rate": 1.29646118912982e-05, "loss": 0.5785, "step": 26376 }, { "epoch": 0.8102786225539889, "grad_norm": 0.4394424557685852, "learning_rate": 1.2964150336280263e-05, "loss": 0.6241, "step": 26377 }, { "epoch": 0.8103093416889381, "grad_norm": 0.3628803491592407, "learning_rate": 1.2963688774339343e-05, "loss": 0.5805, "step": 26378 }, { "epoch": 0.8103400608238872, "grad_norm": 0.4794370234012604, "learning_rate": 1.2963227205476522e-05, "loss": 0.6002, "step": 26379 }, { "epoch": 0.8103707799588363, "grad_norm": 0.3566838800907135, "learning_rate": 1.296276562969288e-05, "loss": 0.5959, "step": 26380 }, { "epoch": 0.8104014990937856, "grad_norm": 0.3458081781864166, "learning_rate": 1.2962304046989489e-05, "loss": 0.4802, "step": 26381 }, { "epoch": 0.8104322182287347, "grad_norm": 0.4249017834663391, "learning_rate": 1.2961842457367432e-05, "loss": 0.5007, "step": 26382 }, { "epoch": 0.8104629373636838, "grad_norm": 0.3667864203453064, "learning_rate": 1.2961380860827783e-05, "loss": 0.4958, "step": 26383 }, { "epoch": 0.810493656498633, "grad_norm": 0.39660921692848206, "learning_rate": 1.2960919257371622e-05, "loss": 0.5127, "step": 26384 }, { "epoch": 0.8105243756335822, "grad_norm": 0.38112813234329224, "learning_rate": 1.2960457647000028e-05, "loss": 0.6122, "step": 26385 }, { "epoch": 0.8105550947685313, "grad_norm": 0.3655250370502472, "learning_rate": 1.2959996029714079e-05, "loss": 0.5702, "step": 26386 }, { "epoch": 0.8105858139034805, "grad_norm": 0.3464469015598297, "learning_rate": 1.2959534405514852e-05, "loss": 0.5618, "step": 26387 }, { "epoch": 0.8106165330384296, "grad_norm": 0.3917478919029236, "learning_rate": 1.2959072774403427e-05, "loss": 0.4267, "step": 26388 }, { "epoch": 0.8106472521733789, "grad_norm": 0.3730996251106262, "learning_rate": 1.2958611136380878e-05, "loss": 0.6136, "step": 26389 }, { "epoch": 0.810677971308328, "grad_norm": 0.36949342489242554, "learning_rate": 1.2958149491448287e-05, "loss": 0.6091, "step": 26390 }, { "epoch": 0.8107086904432771, "grad_norm": 0.37642398476600647, "learning_rate": 1.2957687839606732e-05, "loss": 0.6213, "step": 26391 }, { "epoch": 0.8107394095782263, "grad_norm": 0.37730175256729126, "learning_rate": 1.295722618085729e-05, "loss": 0.5827, "step": 26392 }, { "epoch": 0.8107701287131754, "grad_norm": 0.3733924925327301, "learning_rate": 1.2956764515201042e-05, "loss": 0.5765, "step": 26393 }, { "epoch": 0.8108008478481246, "grad_norm": 0.355344295501709, "learning_rate": 1.295630284263906e-05, "loss": 0.5821, "step": 26394 }, { "epoch": 0.8108315669830738, "grad_norm": 0.3903282582759857, "learning_rate": 1.295584116317243e-05, "loss": 0.5312, "step": 26395 }, { "epoch": 0.8108622861180229, "grad_norm": 0.3222629725933075, "learning_rate": 1.2955379476802224e-05, "loss": 0.5252, "step": 26396 }, { "epoch": 0.810893005252972, "grad_norm": 0.38473957777023315, "learning_rate": 1.2954917783529525e-05, "loss": 0.5738, "step": 26397 }, { "epoch": 0.8109237243879213, "grad_norm": 0.37645223736763, "learning_rate": 1.2954456083355407e-05, "loss": 0.5932, "step": 26398 }, { "epoch": 0.8109544435228704, "grad_norm": 0.42383983731269836, "learning_rate": 1.2953994376280951e-05, "loss": 0.6152, "step": 26399 }, { "epoch": 0.8109851626578195, "grad_norm": 0.3605952560901642, "learning_rate": 1.2953532662307238e-05, "loss": 0.5709, "step": 26400 }, { "epoch": 0.8110158817927687, "grad_norm": 0.4155566096305847, "learning_rate": 1.2953070941435338e-05, "loss": 0.6133, "step": 26401 }, { "epoch": 0.8110466009277179, "grad_norm": 0.41319355368614197, "learning_rate": 1.2952609213666338e-05, "loss": 0.5096, "step": 26402 }, { "epoch": 0.8110773200626671, "grad_norm": 0.6090933680534363, "learning_rate": 1.2952147479001312e-05, "loss": 0.4993, "step": 26403 }, { "epoch": 0.8111080391976162, "grad_norm": 0.3590015769004822, "learning_rate": 1.295168573744134e-05, "loss": 0.5549, "step": 26404 }, { "epoch": 0.8111387583325653, "grad_norm": 0.6072195768356323, "learning_rate": 1.2951223988987497e-05, "loss": 0.5314, "step": 26405 }, { "epoch": 0.8111694774675146, "grad_norm": 0.4618387520313263, "learning_rate": 1.2950762233640868e-05, "loss": 0.5276, "step": 26406 }, { "epoch": 0.8112001966024637, "grad_norm": 0.33752164244651794, "learning_rate": 1.2950300471402526e-05, "loss": 0.5752, "step": 26407 }, { "epoch": 0.8112309157374128, "grad_norm": 0.3628162443637848, "learning_rate": 1.2949838702273551e-05, "loss": 0.6347, "step": 26408 }, { "epoch": 0.811261634872362, "grad_norm": 0.3517531454563141, "learning_rate": 1.2949376926255022e-05, "loss": 0.5017, "step": 26409 }, { "epoch": 0.8112923540073111, "grad_norm": 0.3824778199195862, "learning_rate": 1.2948915143348016e-05, "loss": 0.557, "step": 26410 }, { "epoch": 0.8113230731422603, "grad_norm": 0.39080628752708435, "learning_rate": 1.2948453353553613e-05, "loss": 0.544, "step": 26411 }, { "epoch": 0.8113537922772095, "grad_norm": 0.3370012044906616, "learning_rate": 1.2947991556872893e-05, "loss": 0.4743, "step": 26412 }, { "epoch": 0.8113845114121586, "grad_norm": 0.3486691415309906, "learning_rate": 1.2947529753306933e-05, "loss": 0.5406, "step": 26413 }, { "epoch": 0.8114152305471078, "grad_norm": 0.3955516219139099, "learning_rate": 1.294706794285681e-05, "loss": 0.5893, "step": 26414 }, { "epoch": 0.811445949682057, "grad_norm": 0.38106635212898254, "learning_rate": 1.2946606125523604e-05, "loss": 0.6065, "step": 26415 }, { "epoch": 0.8114766688170061, "grad_norm": 0.39322054386138916, "learning_rate": 1.2946144301308396e-05, "loss": 0.545, "step": 26416 }, { "epoch": 0.8115073879519553, "grad_norm": 0.36962664127349854, "learning_rate": 1.294568247021226e-05, "loss": 0.5193, "step": 26417 }, { "epoch": 0.8115381070869044, "grad_norm": 0.36737972497940063, "learning_rate": 1.2945220632236279e-05, "loss": 0.4888, "step": 26418 }, { "epoch": 0.8115688262218536, "grad_norm": 0.3425534665584564, "learning_rate": 1.2944758787381526e-05, "loss": 0.6118, "step": 26419 }, { "epoch": 0.8115995453568028, "grad_norm": 0.31854334473609924, "learning_rate": 1.2944296935649085e-05, "loss": 0.552, "step": 26420 }, { "epoch": 0.8116302644917519, "grad_norm": 0.38726574182510376, "learning_rate": 1.2943835077040032e-05, "loss": 0.5528, "step": 26421 }, { "epoch": 0.811660983626701, "grad_norm": 0.3809078335762024, "learning_rate": 1.294337321155545e-05, "loss": 0.5427, "step": 26422 }, { "epoch": 0.8116917027616503, "grad_norm": 0.3932734727859497, "learning_rate": 1.2942911339196408e-05, "loss": 0.5409, "step": 26423 }, { "epoch": 0.8117224218965994, "grad_norm": 0.37386205792427063, "learning_rate": 1.2942449459963997e-05, "loss": 0.5135, "step": 26424 }, { "epoch": 0.8117531410315485, "grad_norm": 0.3925045430660248, "learning_rate": 1.2941987573859287e-05, "loss": 0.5477, "step": 26425 }, { "epoch": 0.8117838601664977, "grad_norm": 0.34404581785202026, "learning_rate": 1.294152568088336e-05, "loss": 0.5514, "step": 26426 }, { "epoch": 0.8118145793014468, "grad_norm": 0.35686731338500977, "learning_rate": 1.2941063781037296e-05, "loss": 0.5341, "step": 26427 }, { "epoch": 0.8118452984363961, "grad_norm": 0.3774053454399109, "learning_rate": 1.2940601874322167e-05, "loss": 0.4639, "step": 26428 }, { "epoch": 0.8118760175713452, "grad_norm": 0.3856010138988495, "learning_rate": 1.2940139960739062e-05, "loss": 0.6271, "step": 26429 }, { "epoch": 0.8119067367062943, "grad_norm": 0.40336117148399353, "learning_rate": 1.2939678040289052e-05, "loss": 0.6133, "step": 26430 }, { "epoch": 0.8119374558412435, "grad_norm": 0.4569160044193268, "learning_rate": 1.2939216112973221e-05, "loss": 0.5108, "step": 26431 }, { "epoch": 0.8119681749761927, "grad_norm": 0.3374609351158142, "learning_rate": 1.2938754178792643e-05, "loss": 0.4877, "step": 26432 }, { "epoch": 0.8119988941111418, "grad_norm": 0.3516600728034973, "learning_rate": 1.2938292237748402e-05, "loss": 0.5516, "step": 26433 }, { "epoch": 0.812029613246091, "grad_norm": 0.3998529016971588, "learning_rate": 1.2937830289841575e-05, "loss": 0.4923, "step": 26434 }, { "epoch": 0.8120603323810401, "grad_norm": 0.35559195280075073, "learning_rate": 1.2937368335073237e-05, "loss": 0.5183, "step": 26435 }, { "epoch": 0.8120910515159893, "grad_norm": 0.5950182676315308, "learning_rate": 1.2936906373444473e-05, "loss": 0.6565, "step": 26436 }, { "epoch": 0.8121217706509385, "grad_norm": 0.38230201601982117, "learning_rate": 1.2936444404956356e-05, "loss": 0.5523, "step": 26437 }, { "epoch": 0.8121524897858876, "grad_norm": 0.3262674808502197, "learning_rate": 1.293598242960997e-05, "loss": 0.5279, "step": 26438 }, { "epoch": 0.8121832089208368, "grad_norm": 0.39603301882743835, "learning_rate": 1.2935520447406389e-05, "loss": 0.5503, "step": 26439 }, { "epoch": 0.812213928055786, "grad_norm": 0.42001083493232727, "learning_rate": 1.2935058458346702e-05, "loss": 0.6036, "step": 26440 }, { "epoch": 0.8122446471907351, "grad_norm": 0.40994277596473694, "learning_rate": 1.2934596462431971e-05, "loss": 0.622, "step": 26441 }, { "epoch": 0.8122753663256843, "grad_norm": 0.36905398964881897, "learning_rate": 1.2934134459663295e-05, "loss": 0.5778, "step": 26442 }, { "epoch": 0.8123060854606334, "grad_norm": 0.34382495284080505, "learning_rate": 1.2933672450041735e-05, "loss": 0.5611, "step": 26443 }, { "epoch": 0.8123368045955826, "grad_norm": 0.3495948910713196, "learning_rate": 1.2933210433568384e-05, "loss": 0.5097, "step": 26444 }, { "epoch": 0.8123675237305318, "grad_norm": 0.3726136088371277, "learning_rate": 1.2932748410244311e-05, "loss": 0.5751, "step": 26445 }, { "epoch": 0.8123982428654809, "grad_norm": 0.37144482135772705, "learning_rate": 1.29322863800706e-05, "loss": 0.5462, "step": 26446 }, { "epoch": 0.81242896200043, "grad_norm": 0.3718896210193634, "learning_rate": 1.2931824343048331e-05, "loss": 0.6286, "step": 26447 }, { "epoch": 0.8124596811353793, "grad_norm": 0.38879191875457764, "learning_rate": 1.293136229917858e-05, "loss": 0.5112, "step": 26448 }, { "epoch": 0.8124904002703284, "grad_norm": 0.37452811002731323, "learning_rate": 1.293090024846243e-05, "loss": 0.5208, "step": 26449 }, { "epoch": 0.8125211194052775, "grad_norm": 0.3608669340610504, "learning_rate": 1.2930438190900953e-05, "loss": 0.5454, "step": 26450 }, { "epoch": 0.8125518385402267, "grad_norm": 0.38066473603248596, "learning_rate": 1.2929976126495239e-05, "loss": 0.6008, "step": 26451 }, { "epoch": 0.8125825576751758, "grad_norm": 0.3458561301231384, "learning_rate": 1.2929514055246359e-05, "loss": 0.5672, "step": 26452 }, { "epoch": 0.8126132768101251, "grad_norm": 0.3984225392341614, "learning_rate": 1.2929051977155394e-05, "loss": 0.5391, "step": 26453 }, { "epoch": 0.8126439959450742, "grad_norm": 0.3725435733795166, "learning_rate": 1.2928589892223422e-05, "loss": 0.5661, "step": 26454 }, { "epoch": 0.8126747150800233, "grad_norm": 0.36203479766845703, "learning_rate": 1.2928127800451524e-05, "loss": 0.5956, "step": 26455 }, { "epoch": 0.8127054342149725, "grad_norm": 0.4190383553504944, "learning_rate": 1.2927665701840781e-05, "loss": 0.5763, "step": 26456 }, { "epoch": 0.8127361533499217, "grad_norm": 0.37946683168411255, "learning_rate": 1.292720359639227e-05, "loss": 0.489, "step": 26457 }, { "epoch": 0.8127668724848708, "grad_norm": 0.3249615728855133, "learning_rate": 1.292674148410707e-05, "loss": 0.4806, "step": 26458 }, { "epoch": 0.81279759161982, "grad_norm": 0.38206392526626587, "learning_rate": 1.2926279364986261e-05, "loss": 0.5961, "step": 26459 }, { "epoch": 0.8128283107547691, "grad_norm": 0.3396950960159302, "learning_rate": 1.2925817239030923e-05, "loss": 0.5884, "step": 26460 }, { "epoch": 0.8128590298897183, "grad_norm": 0.43572553992271423, "learning_rate": 1.2925355106242132e-05, "loss": 0.4921, "step": 26461 }, { "epoch": 0.8128897490246675, "grad_norm": 0.3288643956184387, "learning_rate": 1.2924892966620972e-05, "loss": 0.5422, "step": 26462 }, { "epoch": 0.8129204681596166, "grad_norm": 0.3555660545825958, "learning_rate": 1.2924430820168516e-05, "loss": 0.4688, "step": 26463 }, { "epoch": 0.8129511872945658, "grad_norm": 0.35203567147254944, "learning_rate": 1.292396866688585e-05, "loss": 0.5118, "step": 26464 }, { "epoch": 0.812981906429515, "grad_norm": 0.3439578711986542, "learning_rate": 1.2923506506774055e-05, "loss": 0.525, "step": 26465 }, { "epoch": 0.8130126255644641, "grad_norm": 0.33699071407318115, "learning_rate": 1.2923044339834201e-05, "loss": 0.5196, "step": 26466 }, { "epoch": 0.8130433446994133, "grad_norm": 0.3710973560810089, "learning_rate": 1.2922582166067377e-05, "loss": 0.6124, "step": 26467 }, { "epoch": 0.8130740638343624, "grad_norm": 0.39785996079444885, "learning_rate": 1.2922119985474656e-05, "loss": 0.5164, "step": 26468 }, { "epoch": 0.8131047829693115, "grad_norm": 0.4112301468849182, "learning_rate": 1.292165779805712e-05, "loss": 0.5954, "step": 26469 }, { "epoch": 0.8131355021042608, "grad_norm": 0.35945817828178406, "learning_rate": 1.292119560381585e-05, "loss": 0.5036, "step": 26470 }, { "epoch": 0.8131662212392099, "grad_norm": 0.33008959889411926, "learning_rate": 1.2920733402751922e-05, "loss": 0.5604, "step": 26471 }, { "epoch": 0.813196940374159, "grad_norm": 0.505324125289917, "learning_rate": 1.2920271194866416e-05, "loss": 0.5596, "step": 26472 }, { "epoch": 0.8132276595091082, "grad_norm": 0.36151811480522156, "learning_rate": 1.2919808980160414e-05, "loss": 0.4865, "step": 26473 }, { "epoch": 0.8132583786440574, "grad_norm": 0.3582923710346222, "learning_rate": 1.2919346758634994e-05, "loss": 0.5736, "step": 26474 }, { "epoch": 0.8132890977790066, "grad_norm": 0.33583956956863403, "learning_rate": 1.2918884530291235e-05, "loss": 0.499, "step": 26475 }, { "epoch": 0.8133198169139557, "grad_norm": 0.3300650417804718, "learning_rate": 1.291842229513022e-05, "loss": 0.5082, "step": 26476 }, { "epoch": 0.8133505360489048, "grad_norm": 0.3728289008140564, "learning_rate": 1.2917960053153023e-05, "loss": 0.5404, "step": 26477 }, { "epoch": 0.8133812551838541, "grad_norm": 0.43847864866256714, "learning_rate": 1.2917497804360728e-05, "loss": 0.4874, "step": 26478 }, { "epoch": 0.8134119743188032, "grad_norm": 0.36215412616729736, "learning_rate": 1.2917035548754412e-05, "loss": 0.485, "step": 26479 }, { "epoch": 0.8134426934537523, "grad_norm": 0.3328584134578705, "learning_rate": 1.2916573286335157e-05, "loss": 0.5356, "step": 26480 }, { "epoch": 0.8134734125887015, "grad_norm": 0.34985485672950745, "learning_rate": 1.2916111017104043e-05, "loss": 0.5691, "step": 26481 }, { "epoch": 0.8135041317236507, "grad_norm": 0.3631986975669861, "learning_rate": 1.2915648741062141e-05, "loss": 0.6083, "step": 26482 }, { "epoch": 0.8135348508585998, "grad_norm": 0.3691447973251343, "learning_rate": 1.2915186458210545e-05, "loss": 0.5315, "step": 26483 }, { "epoch": 0.813565569993549, "grad_norm": 0.3760109543800354, "learning_rate": 1.2914724168550323e-05, "loss": 0.6082, "step": 26484 }, { "epoch": 0.8135962891284981, "grad_norm": 0.36880791187286377, "learning_rate": 1.291426187208256e-05, "loss": 0.5094, "step": 26485 }, { "epoch": 0.8136270082634472, "grad_norm": 0.37761619687080383, "learning_rate": 1.2913799568808339e-05, "loss": 0.4661, "step": 26486 }, { "epoch": 0.8136577273983965, "grad_norm": 0.45436012744903564, "learning_rate": 1.2913337258728731e-05, "loss": 0.4583, "step": 26487 }, { "epoch": 0.8136884465333456, "grad_norm": 0.39030030369758606, "learning_rate": 1.2912874941844825e-05, "loss": 0.5959, "step": 26488 }, { "epoch": 0.8137191656682948, "grad_norm": 0.3501146733760834, "learning_rate": 1.291241261815769e-05, "loss": 0.5018, "step": 26489 }, { "epoch": 0.813749884803244, "grad_norm": 0.39768216013908386, "learning_rate": 1.2911950287668416e-05, "loss": 0.5639, "step": 26490 }, { "epoch": 0.8137806039381931, "grad_norm": 0.42301541566848755, "learning_rate": 1.2911487950378076e-05, "loss": 0.6498, "step": 26491 }, { "epoch": 0.8138113230731423, "grad_norm": 0.49064961075782776, "learning_rate": 1.2911025606287756e-05, "loss": 0.5881, "step": 26492 }, { "epoch": 0.8138420422080914, "grad_norm": 0.3504241108894348, "learning_rate": 1.2910563255398531e-05, "loss": 0.5785, "step": 26493 }, { "epoch": 0.8138727613430405, "grad_norm": 0.3845399022102356, "learning_rate": 1.2910100897711481e-05, "loss": 0.4906, "step": 26494 }, { "epoch": 0.8139034804779898, "grad_norm": 0.366380900144577, "learning_rate": 1.290963853322769e-05, "loss": 0.5909, "step": 26495 }, { "epoch": 0.8139341996129389, "grad_norm": 0.3511548638343811, "learning_rate": 1.2909176161948232e-05, "loss": 0.6026, "step": 26496 }, { "epoch": 0.813964918747888, "grad_norm": 0.3456345200538635, "learning_rate": 1.2908713783874194e-05, "loss": 0.5059, "step": 26497 }, { "epoch": 0.8139956378828372, "grad_norm": 0.3841927945613861, "learning_rate": 1.2908251399006646e-05, "loss": 0.5836, "step": 26498 }, { "epoch": 0.8140263570177864, "grad_norm": 0.3587930202484131, "learning_rate": 1.2907789007346678e-05, "loss": 0.6113, "step": 26499 }, { "epoch": 0.8140570761527356, "grad_norm": 0.39908716082572937, "learning_rate": 1.290732660889536e-05, "loss": 0.6484, "step": 26500 }, { "epoch": 0.8140877952876847, "grad_norm": 0.3345057964324951, "learning_rate": 1.2906864203653783e-05, "loss": 0.5312, "step": 26501 }, { "epoch": 0.8141185144226338, "grad_norm": 0.36177539825439453, "learning_rate": 1.290640179162302e-05, "loss": 0.5538, "step": 26502 }, { "epoch": 0.8141492335575831, "grad_norm": 0.3504860997200012, "learning_rate": 1.2905939372804155e-05, "loss": 0.4696, "step": 26503 }, { "epoch": 0.8141799526925322, "grad_norm": 0.38284510374069214, "learning_rate": 1.2905476947198264e-05, "loss": 0.6032, "step": 26504 }, { "epoch": 0.8142106718274813, "grad_norm": 0.36135542392730713, "learning_rate": 1.2905014514806426e-05, "loss": 0.5325, "step": 26505 }, { "epoch": 0.8142413909624305, "grad_norm": 0.43138477206230164, "learning_rate": 1.2904552075629727e-05, "loss": 0.5441, "step": 26506 }, { "epoch": 0.8142721100973797, "grad_norm": 0.3645302355289459, "learning_rate": 1.2904089629669243e-05, "loss": 0.6413, "step": 26507 }, { "epoch": 0.8143028292323288, "grad_norm": 0.36785924434661865, "learning_rate": 1.2903627176926057e-05, "loss": 0.5964, "step": 26508 }, { "epoch": 0.814333548367278, "grad_norm": 0.3558421730995178, "learning_rate": 1.2903164717401245e-05, "loss": 0.5124, "step": 26509 }, { "epoch": 0.8143642675022271, "grad_norm": 0.3688459098339081, "learning_rate": 1.2902702251095888e-05, "loss": 0.563, "step": 26510 }, { "epoch": 0.8143949866371762, "grad_norm": 0.4209260642528534, "learning_rate": 1.2902239778011068e-05, "loss": 0.5608, "step": 26511 }, { "epoch": 0.8144257057721255, "grad_norm": 0.3600713312625885, "learning_rate": 1.2901777298147863e-05, "loss": 0.5274, "step": 26512 }, { "epoch": 0.8144564249070746, "grad_norm": 0.38266992568969727, "learning_rate": 1.2901314811507355e-05, "loss": 0.6188, "step": 26513 }, { "epoch": 0.8144871440420238, "grad_norm": 0.42566370964050293, "learning_rate": 1.2900852318090627e-05, "loss": 0.5332, "step": 26514 }, { "epoch": 0.814517863176973, "grad_norm": 0.33958351612091064, "learning_rate": 1.2900389817898754e-05, "loss": 0.5488, "step": 26515 }, { "epoch": 0.8145485823119221, "grad_norm": 0.4252163767814636, "learning_rate": 1.2899927310932815e-05, "loss": 0.574, "step": 26516 }, { "epoch": 0.8145793014468713, "grad_norm": 0.40581971406936646, "learning_rate": 1.2899464797193897e-05, "loss": 0.4814, "step": 26517 }, { "epoch": 0.8146100205818204, "grad_norm": 0.4008352756500244, "learning_rate": 1.2899002276683072e-05, "loss": 0.5334, "step": 26518 }, { "epoch": 0.8146407397167695, "grad_norm": 0.33051058650016785, "learning_rate": 1.2898539749401428e-05, "loss": 0.5349, "step": 26519 }, { "epoch": 0.8146714588517188, "grad_norm": 0.3394036591053009, "learning_rate": 1.2898077215350044e-05, "loss": 0.6085, "step": 26520 }, { "epoch": 0.8147021779866679, "grad_norm": 0.3550262749195099, "learning_rate": 1.2897614674529995e-05, "loss": 0.5865, "step": 26521 }, { "epoch": 0.814732897121617, "grad_norm": 0.3818245232105255, "learning_rate": 1.2897152126942367e-05, "loss": 0.5331, "step": 26522 }, { "epoch": 0.8147636162565662, "grad_norm": 0.3538561165332794, "learning_rate": 1.2896689572588234e-05, "loss": 0.5566, "step": 26523 }, { "epoch": 0.8147943353915154, "grad_norm": 0.37494024634361267, "learning_rate": 1.2896227011468683e-05, "loss": 0.6082, "step": 26524 }, { "epoch": 0.8148250545264646, "grad_norm": 0.3490696847438812, "learning_rate": 1.289576444358479e-05, "loss": 0.528, "step": 26525 }, { "epoch": 0.8148557736614137, "grad_norm": 0.39498940110206604, "learning_rate": 1.289530186893764e-05, "loss": 0.5165, "step": 26526 }, { "epoch": 0.8148864927963628, "grad_norm": 0.36816808581352234, "learning_rate": 1.2894839287528308e-05, "loss": 0.5535, "step": 26527 }, { "epoch": 0.8149172119313121, "grad_norm": 0.3549196422100067, "learning_rate": 1.2894376699357878e-05, "loss": 0.5556, "step": 26528 }, { "epoch": 0.8149479310662612, "grad_norm": 0.3507249355316162, "learning_rate": 1.2893914104427428e-05, "loss": 0.5948, "step": 26529 }, { "epoch": 0.8149786502012103, "grad_norm": 0.40923547744750977, "learning_rate": 1.289345150273804e-05, "loss": 0.5729, "step": 26530 }, { "epoch": 0.8150093693361595, "grad_norm": 0.41361674666404724, "learning_rate": 1.2892988894290795e-05, "loss": 0.6025, "step": 26531 }, { "epoch": 0.8150400884711086, "grad_norm": 0.364076167345047, "learning_rate": 1.2892526279086768e-05, "loss": 0.5275, "step": 26532 }, { "epoch": 0.8150708076060578, "grad_norm": 0.3750988841056824, "learning_rate": 1.2892063657127048e-05, "loss": 0.6123, "step": 26533 }, { "epoch": 0.815101526741007, "grad_norm": 0.40364697575569153, "learning_rate": 1.2891601028412709e-05, "loss": 0.5955, "step": 26534 }, { "epoch": 0.8151322458759561, "grad_norm": 0.3940364122390747, "learning_rate": 1.2891138392944835e-05, "loss": 0.5669, "step": 26535 }, { "epoch": 0.8151629650109052, "grad_norm": 0.4586775302886963, "learning_rate": 1.2890675750724503e-05, "loss": 0.5108, "step": 26536 }, { "epoch": 0.8151936841458545, "grad_norm": 0.3610551953315735, "learning_rate": 1.28902131017528e-05, "loss": 0.5835, "step": 26537 }, { "epoch": 0.8152244032808036, "grad_norm": 0.3765886127948761, "learning_rate": 1.2889750446030802e-05, "loss": 0.6289, "step": 26538 }, { "epoch": 0.8152551224157528, "grad_norm": 0.36427947878837585, "learning_rate": 1.2889287783559588e-05, "loss": 0.5642, "step": 26539 }, { "epoch": 0.8152858415507019, "grad_norm": 0.3973386287689209, "learning_rate": 1.288882511434024e-05, "loss": 0.6083, "step": 26540 }, { "epoch": 0.8153165606856511, "grad_norm": 0.36149242520332336, "learning_rate": 1.2888362438373841e-05, "loss": 0.626, "step": 26541 }, { "epoch": 0.8153472798206003, "grad_norm": 0.37453633546829224, "learning_rate": 1.288789975566147e-05, "loss": 0.5114, "step": 26542 }, { "epoch": 0.8153779989555494, "grad_norm": 0.4469817280769348, "learning_rate": 1.2887437066204204e-05, "loss": 0.6581, "step": 26543 }, { "epoch": 0.8154087180904985, "grad_norm": 0.3579249680042267, "learning_rate": 1.2886974370003132e-05, "loss": 0.5587, "step": 26544 }, { "epoch": 0.8154394372254478, "grad_norm": 0.3796764314174652, "learning_rate": 1.2886511667059325e-05, "loss": 0.5354, "step": 26545 }, { "epoch": 0.8154701563603969, "grad_norm": 0.43063852190971375, "learning_rate": 1.2886048957373874e-05, "loss": 0.5293, "step": 26546 }, { "epoch": 0.815500875495346, "grad_norm": 0.6034186482429504, "learning_rate": 1.288558624094785e-05, "loss": 0.5988, "step": 26547 }, { "epoch": 0.8155315946302952, "grad_norm": 0.3926079273223877, "learning_rate": 1.288512351778234e-05, "loss": 0.5524, "step": 26548 }, { "epoch": 0.8155623137652444, "grad_norm": 0.35242095589637756, "learning_rate": 1.2884660787878422e-05, "loss": 0.6642, "step": 26549 }, { "epoch": 0.8155930329001936, "grad_norm": 0.3455394506454468, "learning_rate": 1.2884198051237173e-05, "loss": 0.5254, "step": 26550 }, { "epoch": 0.8156237520351427, "grad_norm": 0.39459022879600525, "learning_rate": 1.2883735307859684e-05, "loss": 0.6228, "step": 26551 }, { "epoch": 0.8156544711700918, "grad_norm": 0.3630228340625763, "learning_rate": 1.2883272557747026e-05, "loss": 0.4882, "step": 26552 }, { "epoch": 0.815685190305041, "grad_norm": 0.3363417983055115, "learning_rate": 1.2882809800900286e-05, "loss": 0.5372, "step": 26553 }, { "epoch": 0.8157159094399902, "grad_norm": 0.3423668444156647, "learning_rate": 1.2882347037320542e-05, "loss": 0.5151, "step": 26554 }, { "epoch": 0.8157466285749393, "grad_norm": 0.38148602843284607, "learning_rate": 1.2881884267008874e-05, "loss": 0.5726, "step": 26555 }, { "epoch": 0.8157773477098885, "grad_norm": 0.3850660026073456, "learning_rate": 1.2881421489966363e-05, "loss": 0.5145, "step": 26556 }, { "epoch": 0.8158080668448376, "grad_norm": 0.34062284231185913, "learning_rate": 1.2880958706194093e-05, "loss": 0.5523, "step": 26557 }, { "epoch": 0.8158387859797868, "grad_norm": 0.3429887890815735, "learning_rate": 1.2880495915693143e-05, "loss": 0.626, "step": 26558 }, { "epoch": 0.815869505114736, "grad_norm": 0.4187089204788208, "learning_rate": 1.2880033118464592e-05, "loss": 0.5098, "step": 26559 }, { "epoch": 0.8159002242496851, "grad_norm": 0.37302738428115845, "learning_rate": 1.2879570314509524e-05, "loss": 0.5715, "step": 26560 }, { "epoch": 0.8159309433846343, "grad_norm": 0.49190664291381836, "learning_rate": 1.2879107503829016e-05, "loss": 0.5842, "step": 26561 }, { "epoch": 0.8159616625195835, "grad_norm": 0.3454671800136566, "learning_rate": 1.2878644686424155e-05, "loss": 0.57, "step": 26562 }, { "epoch": 0.8159923816545326, "grad_norm": 0.35619622468948364, "learning_rate": 1.2878181862296015e-05, "loss": 0.5482, "step": 26563 }, { "epoch": 0.8160231007894818, "grad_norm": 0.3326692283153534, "learning_rate": 1.2877719031445684e-05, "loss": 0.5441, "step": 26564 }, { "epoch": 0.8160538199244309, "grad_norm": 0.42644599080085754, "learning_rate": 1.2877256193874235e-05, "loss": 0.6054, "step": 26565 }, { "epoch": 0.81608453905938, "grad_norm": 0.36348623037338257, "learning_rate": 1.2876793349582757e-05, "loss": 0.5061, "step": 26566 }, { "epoch": 0.8161152581943293, "grad_norm": 0.3471420109272003, "learning_rate": 1.2876330498572325e-05, "loss": 0.5818, "step": 26567 }, { "epoch": 0.8161459773292784, "grad_norm": 0.3448346257209778, "learning_rate": 1.2875867640844023e-05, "loss": 0.537, "step": 26568 }, { "epoch": 0.8161766964642275, "grad_norm": 0.4122471809387207, "learning_rate": 1.287540477639893e-05, "loss": 0.55, "step": 26569 }, { "epoch": 0.8162074155991768, "grad_norm": 0.3715798556804657, "learning_rate": 1.2874941905238129e-05, "loss": 0.5283, "step": 26570 }, { "epoch": 0.8162381347341259, "grad_norm": 0.3450283706188202, "learning_rate": 1.28744790273627e-05, "loss": 0.6106, "step": 26571 }, { "epoch": 0.816268853869075, "grad_norm": 0.346669465303421, "learning_rate": 1.2874016142773725e-05, "loss": 0.5686, "step": 26572 }, { "epoch": 0.8162995730040242, "grad_norm": 0.38140857219696045, "learning_rate": 1.2873553251472284e-05, "loss": 0.5552, "step": 26573 }, { "epoch": 0.8163302921389733, "grad_norm": 0.4900709390640259, "learning_rate": 1.2873090353459459e-05, "loss": 0.5673, "step": 26574 }, { "epoch": 0.8163610112739226, "grad_norm": 0.36131322383880615, "learning_rate": 1.287262744873633e-05, "loss": 0.5903, "step": 26575 }, { "epoch": 0.8163917304088717, "grad_norm": 0.3640042543411255, "learning_rate": 1.2872164537303981e-05, "loss": 0.559, "step": 26576 }, { "epoch": 0.8164224495438208, "grad_norm": 0.36333274841308594, "learning_rate": 1.287170161916349e-05, "loss": 0.5635, "step": 26577 }, { "epoch": 0.81645316867877, "grad_norm": 0.38221627473831177, "learning_rate": 1.2871238694315941e-05, "loss": 0.5525, "step": 26578 }, { "epoch": 0.8164838878137192, "grad_norm": 0.35699403285980225, "learning_rate": 1.287077576276241e-05, "loss": 0.6229, "step": 26579 }, { "epoch": 0.8165146069486683, "grad_norm": 0.3956488370895386, "learning_rate": 1.2870312824503986e-05, "loss": 0.5577, "step": 26580 }, { "epoch": 0.8165453260836175, "grad_norm": 0.3523346781730652, "learning_rate": 1.2869849879541742e-05, "loss": 0.5678, "step": 26581 }, { "epoch": 0.8165760452185666, "grad_norm": 0.3940275311470032, "learning_rate": 1.2869386927876766e-05, "loss": 0.504, "step": 26582 }, { "epoch": 0.8166067643535158, "grad_norm": 0.4273434579372406, "learning_rate": 1.2868923969510136e-05, "loss": 0.4669, "step": 26583 }, { "epoch": 0.816637483488465, "grad_norm": 0.33706095814704895, "learning_rate": 1.2868461004442932e-05, "loss": 0.5672, "step": 26584 }, { "epoch": 0.8166682026234141, "grad_norm": 0.3759264349937439, "learning_rate": 1.2867998032676238e-05, "loss": 0.5709, "step": 26585 }, { "epoch": 0.8166989217583633, "grad_norm": 0.3641316890716553, "learning_rate": 1.2867535054211136e-05, "loss": 0.5247, "step": 26586 }, { "epoch": 0.8167296408933125, "grad_norm": 0.37306806445121765, "learning_rate": 1.2867072069048702e-05, "loss": 0.5101, "step": 26587 }, { "epoch": 0.8167603600282616, "grad_norm": 0.404131680727005, "learning_rate": 1.2866609077190022e-05, "loss": 0.6094, "step": 26588 }, { "epoch": 0.8167910791632108, "grad_norm": 0.39831849932670593, "learning_rate": 1.2866146078636179e-05, "loss": 0.4629, "step": 26589 }, { "epoch": 0.8168217982981599, "grad_norm": 0.3604658842086792, "learning_rate": 1.2865683073388249e-05, "loss": 0.5245, "step": 26590 }, { "epoch": 0.816852517433109, "grad_norm": 0.35793808102607727, "learning_rate": 1.2865220061447317e-05, "loss": 0.4985, "step": 26591 }, { "epoch": 0.8168832365680583, "grad_norm": 0.38337764143943787, "learning_rate": 1.2864757042814464e-05, "loss": 0.4935, "step": 26592 }, { "epoch": 0.8169139557030074, "grad_norm": 0.3628069758415222, "learning_rate": 1.2864294017490768e-05, "loss": 0.5581, "step": 26593 }, { "epoch": 0.8169446748379565, "grad_norm": 0.356502503156662, "learning_rate": 1.2863830985477317e-05, "loss": 0.5975, "step": 26594 }, { "epoch": 0.8169753939729058, "grad_norm": 0.37766793370246887, "learning_rate": 1.2863367946775184e-05, "loss": 0.5546, "step": 26595 }, { "epoch": 0.8170061131078549, "grad_norm": 0.38351938128471375, "learning_rate": 1.2862904901385462e-05, "loss": 0.6379, "step": 26596 }, { "epoch": 0.817036832242804, "grad_norm": 0.3637135624885559, "learning_rate": 1.2862441849309222e-05, "loss": 0.544, "step": 26597 }, { "epoch": 0.8170675513777532, "grad_norm": 0.35460320115089417, "learning_rate": 1.2861978790547549e-05, "loss": 0.6215, "step": 26598 }, { "epoch": 0.8170982705127023, "grad_norm": 0.3706585466861725, "learning_rate": 1.2861515725101523e-05, "loss": 0.5235, "step": 26599 }, { "epoch": 0.8171289896476516, "grad_norm": 0.3398427963256836, "learning_rate": 1.286105265297223e-05, "loss": 0.4705, "step": 26600 }, { "epoch": 0.8171597087826007, "grad_norm": 0.406465619802475, "learning_rate": 1.286058957416075e-05, "loss": 0.6001, "step": 26601 }, { "epoch": 0.8171904279175498, "grad_norm": 0.40822702646255493, "learning_rate": 1.286012648866816e-05, "loss": 0.5629, "step": 26602 }, { "epoch": 0.817221147052499, "grad_norm": 0.38090789318084717, "learning_rate": 1.2859663396495545e-05, "loss": 0.5019, "step": 26603 }, { "epoch": 0.8172518661874482, "grad_norm": 0.3451652228832245, "learning_rate": 1.2859200297643988e-05, "loss": 0.5171, "step": 26604 }, { "epoch": 0.8172825853223973, "grad_norm": 0.39935728907585144, "learning_rate": 1.2858737192114568e-05, "loss": 0.5459, "step": 26605 }, { "epoch": 0.8173133044573465, "grad_norm": 0.47929126024246216, "learning_rate": 1.2858274079908366e-05, "loss": 0.5064, "step": 26606 }, { "epoch": 0.8173440235922956, "grad_norm": 0.3528982996940613, "learning_rate": 1.2857810961026468e-05, "loss": 0.5763, "step": 26607 }, { "epoch": 0.8173747427272448, "grad_norm": 0.42673200368881226, "learning_rate": 1.2857347835469953e-05, "loss": 0.6216, "step": 26608 }, { "epoch": 0.817405461862194, "grad_norm": 0.36647799611091614, "learning_rate": 1.28568847032399e-05, "loss": 0.6114, "step": 26609 }, { "epoch": 0.8174361809971431, "grad_norm": 0.3945540189743042, "learning_rate": 1.2856421564337395e-05, "loss": 0.5686, "step": 26610 }, { "epoch": 0.8174669001320923, "grad_norm": 0.35531947016716003, "learning_rate": 1.2855958418763516e-05, "loss": 0.6125, "step": 26611 }, { "epoch": 0.8174976192670415, "grad_norm": 0.33505356311798096, "learning_rate": 1.2855495266519351e-05, "loss": 0.5816, "step": 26612 }, { "epoch": 0.8175283384019906, "grad_norm": 0.378243625164032, "learning_rate": 1.2855032107605974e-05, "loss": 0.548, "step": 26613 }, { "epoch": 0.8175590575369398, "grad_norm": 0.3609682023525238, "learning_rate": 1.2854568942024472e-05, "loss": 0.5256, "step": 26614 }, { "epoch": 0.8175897766718889, "grad_norm": 0.36049309372901917, "learning_rate": 1.2854105769775922e-05, "loss": 0.5476, "step": 26615 }, { "epoch": 0.817620495806838, "grad_norm": 0.3632739186286926, "learning_rate": 1.2853642590861412e-05, "loss": 0.6111, "step": 26616 }, { "epoch": 0.8176512149417873, "grad_norm": 0.35682809352874756, "learning_rate": 1.285317940528202e-05, "loss": 0.5605, "step": 26617 }, { "epoch": 0.8176819340767364, "grad_norm": 0.3773444592952728, "learning_rate": 1.2852716213038827e-05, "loss": 0.5063, "step": 26618 }, { "epoch": 0.8177126532116855, "grad_norm": 0.3831283152103424, "learning_rate": 1.2852253014132918e-05, "loss": 0.4977, "step": 26619 }, { "epoch": 0.8177433723466347, "grad_norm": 0.372177392244339, "learning_rate": 1.2851789808565369e-05, "loss": 0.6047, "step": 26620 }, { "epoch": 0.8177740914815839, "grad_norm": 0.4862440526485443, "learning_rate": 1.2851326596337268e-05, "loss": 0.5362, "step": 26621 }, { "epoch": 0.817804810616533, "grad_norm": 0.3269592523574829, "learning_rate": 1.2850863377449695e-05, "loss": 0.468, "step": 26622 }, { "epoch": 0.8178355297514822, "grad_norm": 0.3521438539028168, "learning_rate": 1.2850400151903733e-05, "loss": 0.4921, "step": 26623 }, { "epoch": 0.8178662488864313, "grad_norm": 0.3778867721557617, "learning_rate": 1.2849936919700458e-05, "loss": 0.5696, "step": 26624 }, { "epoch": 0.8178969680213806, "grad_norm": 0.43788042664527893, "learning_rate": 1.284947368084096e-05, "loss": 0.6042, "step": 26625 }, { "epoch": 0.8179276871563297, "grad_norm": 0.37505558133125305, "learning_rate": 1.2849010435326314e-05, "loss": 0.5824, "step": 26626 }, { "epoch": 0.8179584062912788, "grad_norm": 0.3833819329738617, "learning_rate": 1.284854718315761e-05, "loss": 0.5573, "step": 26627 }, { "epoch": 0.817989125426228, "grad_norm": 0.6364284753799438, "learning_rate": 1.2848083924335918e-05, "loss": 0.5017, "step": 26628 }, { "epoch": 0.8180198445611772, "grad_norm": 0.3856721520423889, "learning_rate": 1.2847620658862332e-05, "loss": 0.4806, "step": 26629 }, { "epoch": 0.8180505636961263, "grad_norm": 0.3414755165576935, "learning_rate": 1.2847157386737929e-05, "loss": 0.6052, "step": 26630 }, { "epoch": 0.8180812828310755, "grad_norm": 0.34879007935523987, "learning_rate": 1.2846694107963789e-05, "loss": 0.5214, "step": 26631 }, { "epoch": 0.8181120019660246, "grad_norm": 0.3319236636161804, "learning_rate": 1.2846230822541e-05, "loss": 0.5467, "step": 26632 }, { "epoch": 0.8181427211009737, "grad_norm": 0.4025714099407196, "learning_rate": 1.2845767530470634e-05, "loss": 0.59, "step": 26633 }, { "epoch": 0.818173440235923, "grad_norm": 0.4107726216316223, "learning_rate": 1.2845304231753786e-05, "loss": 0.6358, "step": 26634 }, { "epoch": 0.8182041593708721, "grad_norm": 0.3983345031738281, "learning_rate": 1.2844840926391526e-05, "loss": 0.5521, "step": 26635 }, { "epoch": 0.8182348785058213, "grad_norm": 0.3498138189315796, "learning_rate": 1.2844377614384944e-05, "loss": 0.5294, "step": 26636 }, { "epoch": 0.8182655976407704, "grad_norm": 0.3937561810016632, "learning_rate": 1.284391429573512e-05, "loss": 0.5336, "step": 26637 }, { "epoch": 0.8182963167757196, "grad_norm": 0.34828683733940125, "learning_rate": 1.2843450970443132e-05, "loss": 0.6055, "step": 26638 }, { "epoch": 0.8183270359106688, "grad_norm": 0.4137212336063385, "learning_rate": 1.2842987638510071e-05, "loss": 0.4964, "step": 26639 }, { "epoch": 0.8183577550456179, "grad_norm": 0.37540730834007263, "learning_rate": 1.284252429993701e-05, "loss": 0.5969, "step": 26640 }, { "epoch": 0.818388474180567, "grad_norm": 0.378143310546875, "learning_rate": 1.2842060954725035e-05, "loss": 0.5933, "step": 26641 }, { "epoch": 0.8184191933155163, "grad_norm": 0.3457762598991394, "learning_rate": 1.2841597602875229e-05, "loss": 0.4969, "step": 26642 }, { "epoch": 0.8184499124504654, "grad_norm": 0.39129266142845154, "learning_rate": 1.2841134244388673e-05, "loss": 0.4965, "step": 26643 }, { "epoch": 0.8184806315854145, "grad_norm": 0.3862467408180237, "learning_rate": 1.284067087926645e-05, "loss": 0.5003, "step": 26644 }, { "epoch": 0.8185113507203637, "grad_norm": 0.35875988006591797, "learning_rate": 1.2840207507509643e-05, "loss": 0.5127, "step": 26645 }, { "epoch": 0.8185420698553129, "grad_norm": 0.3680393397808075, "learning_rate": 1.283974412911933e-05, "loss": 0.4839, "step": 26646 }, { "epoch": 0.818572788990262, "grad_norm": 0.3366445302963257, "learning_rate": 1.2839280744096599e-05, "loss": 0.508, "step": 26647 }, { "epoch": 0.8186035081252112, "grad_norm": 0.35272228717803955, "learning_rate": 1.2838817352442528e-05, "loss": 0.5442, "step": 26648 }, { "epoch": 0.8186342272601603, "grad_norm": 0.3683136999607086, "learning_rate": 1.28383539541582e-05, "loss": 0.505, "step": 26649 }, { "epoch": 0.8186649463951096, "grad_norm": 0.33038634061813354, "learning_rate": 1.28378905492447e-05, "loss": 0.5165, "step": 26650 }, { "epoch": 0.8186956655300587, "grad_norm": 0.37008967995643616, "learning_rate": 1.2837427137703107e-05, "loss": 0.546, "step": 26651 }, { "epoch": 0.8187263846650078, "grad_norm": 0.3393779695034027, "learning_rate": 1.2836963719534507e-05, "loss": 0.5892, "step": 26652 }, { "epoch": 0.818757103799957, "grad_norm": 0.3466517925262451, "learning_rate": 1.283650029473998e-05, "loss": 0.5378, "step": 26653 }, { "epoch": 0.8187878229349062, "grad_norm": 0.35285013914108276, "learning_rate": 1.2836036863320604e-05, "loss": 0.5607, "step": 26654 }, { "epoch": 0.8188185420698553, "grad_norm": 0.36361852288246155, "learning_rate": 1.2835573425277473e-05, "loss": 0.5139, "step": 26655 }, { "epoch": 0.8188492612048045, "grad_norm": 0.3348584771156311, "learning_rate": 1.2835109980611656e-05, "loss": 0.5229, "step": 26656 }, { "epoch": 0.8188799803397536, "grad_norm": 0.3546358644962311, "learning_rate": 1.2834646529324245e-05, "loss": 0.4909, "step": 26657 }, { "epoch": 0.8189106994747027, "grad_norm": 0.35301464796066284, "learning_rate": 1.2834183071416316e-05, "loss": 0.585, "step": 26658 }, { "epoch": 0.818941418609652, "grad_norm": 0.38176482915878296, "learning_rate": 1.2833719606888957e-05, "loss": 0.509, "step": 26659 }, { "epoch": 0.8189721377446011, "grad_norm": 0.37141263484954834, "learning_rate": 1.2833256135743246e-05, "loss": 0.6004, "step": 26660 }, { "epoch": 0.8190028568795503, "grad_norm": 0.34485679864883423, "learning_rate": 1.283279265798027e-05, "loss": 0.537, "step": 26661 }, { "epoch": 0.8190335760144994, "grad_norm": 2.714688539505005, "learning_rate": 1.2832329173601105e-05, "loss": 0.4728, "step": 26662 }, { "epoch": 0.8190642951494486, "grad_norm": 0.38392239809036255, "learning_rate": 1.2831865682606843e-05, "loss": 0.5673, "step": 26663 }, { "epoch": 0.8190950142843978, "grad_norm": 0.38120341300964355, "learning_rate": 1.2831402184998556e-05, "loss": 0.4893, "step": 26664 }, { "epoch": 0.8191257334193469, "grad_norm": 0.37049153447151184, "learning_rate": 1.2830938680777334e-05, "loss": 0.4659, "step": 26665 }, { "epoch": 0.819156452554296, "grad_norm": 0.3839811682701111, "learning_rate": 1.2830475169944256e-05, "loss": 0.5283, "step": 26666 }, { "epoch": 0.8191871716892453, "grad_norm": 0.40734946727752686, "learning_rate": 1.2830011652500405e-05, "loss": 0.5939, "step": 26667 }, { "epoch": 0.8192178908241944, "grad_norm": 0.35801512002944946, "learning_rate": 1.2829548128446867e-05, "loss": 0.5537, "step": 26668 }, { "epoch": 0.8192486099591435, "grad_norm": 0.3291199803352356, "learning_rate": 1.2829084597784717e-05, "loss": 0.5551, "step": 26669 }, { "epoch": 0.8192793290940927, "grad_norm": 0.3493121266365051, "learning_rate": 1.2828621060515048e-05, "loss": 0.5811, "step": 26670 }, { "epoch": 0.8193100482290419, "grad_norm": 0.339057594537735, "learning_rate": 1.2828157516638935e-05, "loss": 0.5661, "step": 26671 }, { "epoch": 0.8193407673639911, "grad_norm": 0.3333013951778412, "learning_rate": 1.2827693966157462e-05, "loss": 0.4752, "step": 26672 }, { "epoch": 0.8193714864989402, "grad_norm": 0.3755849003791809, "learning_rate": 1.2827230409071711e-05, "loss": 0.5246, "step": 26673 }, { "epoch": 0.8194022056338893, "grad_norm": 0.35570818185806274, "learning_rate": 1.2826766845382767e-05, "loss": 0.5083, "step": 26674 }, { "epoch": 0.8194329247688386, "grad_norm": 0.34806710481643677, "learning_rate": 1.2826303275091711e-05, "loss": 0.5784, "step": 26675 }, { "epoch": 0.8194636439037877, "grad_norm": 0.3820943832397461, "learning_rate": 1.2825839698199628e-05, "loss": 0.5229, "step": 26676 }, { "epoch": 0.8194943630387368, "grad_norm": 0.4037698209285736, "learning_rate": 1.2825376114707597e-05, "loss": 0.4678, "step": 26677 }, { "epoch": 0.819525082173686, "grad_norm": 0.3750264048576355, "learning_rate": 1.2824912524616704e-05, "loss": 0.5624, "step": 26678 }, { "epoch": 0.8195558013086351, "grad_norm": 0.3362840712070465, "learning_rate": 1.2824448927928031e-05, "loss": 0.443, "step": 26679 }, { "epoch": 0.8195865204435843, "grad_norm": 0.329715758562088, "learning_rate": 1.2823985324642658e-05, "loss": 0.4974, "step": 26680 }, { "epoch": 0.8196172395785335, "grad_norm": 0.3688911199569702, "learning_rate": 1.2823521714761672e-05, "loss": 0.5821, "step": 26681 }, { "epoch": 0.8196479587134826, "grad_norm": 0.34257587790489197, "learning_rate": 1.2823058098286153e-05, "loss": 0.5317, "step": 26682 }, { "epoch": 0.8196786778484317, "grad_norm": 0.3699883818626404, "learning_rate": 1.2822594475217183e-05, "loss": 0.518, "step": 26683 }, { "epoch": 0.819709396983381, "grad_norm": 0.37947767972946167, "learning_rate": 1.282213084555585e-05, "loss": 0.5747, "step": 26684 }, { "epoch": 0.8197401161183301, "grad_norm": 0.3992612957954407, "learning_rate": 1.2821667209303233e-05, "loss": 0.579, "step": 26685 }, { "epoch": 0.8197708352532793, "grad_norm": 0.38369929790496826, "learning_rate": 1.2821203566460414e-05, "loss": 0.5787, "step": 26686 }, { "epoch": 0.8198015543882284, "grad_norm": 0.40040698647499084, "learning_rate": 1.2820739917028474e-05, "loss": 0.515, "step": 26687 }, { "epoch": 0.8198322735231776, "grad_norm": 0.3496083915233612, "learning_rate": 1.2820276261008504e-05, "loss": 0.5482, "step": 26688 }, { "epoch": 0.8198629926581268, "grad_norm": 0.3906041085720062, "learning_rate": 1.2819812598401579e-05, "loss": 0.5083, "step": 26689 }, { "epoch": 0.8198937117930759, "grad_norm": 0.5139492154121399, "learning_rate": 1.2819348929208786e-05, "loss": 0.6354, "step": 26690 }, { "epoch": 0.819924430928025, "grad_norm": 0.36296072602272034, "learning_rate": 1.2818885253431208e-05, "loss": 0.5452, "step": 26691 }, { "epoch": 0.8199551500629743, "grad_norm": 0.4085529148578644, "learning_rate": 1.2818421571069923e-05, "loss": 0.4984, "step": 26692 }, { "epoch": 0.8199858691979234, "grad_norm": 0.3664094805717468, "learning_rate": 1.2817957882126023e-05, "loss": 0.5381, "step": 26693 }, { "epoch": 0.8200165883328725, "grad_norm": 0.3694048821926117, "learning_rate": 1.2817494186600578e-05, "loss": 0.5276, "step": 26694 }, { "epoch": 0.8200473074678217, "grad_norm": 0.35093119740486145, "learning_rate": 1.2817030484494684e-05, "loss": 0.5999, "step": 26695 }, { "epoch": 0.8200780266027708, "grad_norm": 0.437137246131897, "learning_rate": 1.2816566775809415e-05, "loss": 0.6814, "step": 26696 }, { "epoch": 0.8201087457377201, "grad_norm": 0.34527620673179626, "learning_rate": 1.2816103060545862e-05, "loss": 0.5225, "step": 26697 }, { "epoch": 0.8201394648726692, "grad_norm": 0.41106754541397095, "learning_rate": 1.2815639338705099e-05, "loss": 0.6017, "step": 26698 }, { "epoch": 0.8201701840076183, "grad_norm": 0.37421828508377075, "learning_rate": 1.2815175610288218e-05, "loss": 0.5909, "step": 26699 }, { "epoch": 0.8202009031425676, "grad_norm": 0.35073021054267883, "learning_rate": 1.2814711875296297e-05, "loss": 0.565, "step": 26700 }, { "epoch": 0.8202316222775167, "grad_norm": 0.3860192596912384, "learning_rate": 1.2814248133730415e-05, "loss": 0.5472, "step": 26701 }, { "epoch": 0.8202623414124658, "grad_norm": 0.4029444754123688, "learning_rate": 1.2813784385591665e-05, "loss": 0.6072, "step": 26702 }, { "epoch": 0.820293060547415, "grad_norm": 0.4001292288303375, "learning_rate": 1.2813320630881121e-05, "loss": 0.4705, "step": 26703 }, { "epoch": 0.8203237796823641, "grad_norm": 0.3801327049732208, "learning_rate": 1.2812856869599874e-05, "loss": 0.5505, "step": 26704 }, { "epoch": 0.8203544988173133, "grad_norm": 0.33018672466278076, "learning_rate": 1.2812393101749003e-05, "loss": 0.4962, "step": 26705 }, { "epoch": 0.8203852179522625, "grad_norm": 0.37238186597824097, "learning_rate": 1.281192932732959e-05, "loss": 0.5396, "step": 26706 }, { "epoch": 0.8204159370872116, "grad_norm": 0.3814857006072998, "learning_rate": 1.281146554634272e-05, "loss": 0.5173, "step": 26707 }, { "epoch": 0.8204466562221607, "grad_norm": 0.3562465310096741, "learning_rate": 1.2811001758789476e-05, "loss": 0.6026, "step": 26708 }, { "epoch": 0.82047737535711, "grad_norm": 0.3753871023654938, "learning_rate": 1.2810537964670945e-05, "loss": 0.5792, "step": 26709 }, { "epoch": 0.8205080944920591, "grad_norm": 0.3687065839767456, "learning_rate": 1.28100741639882e-05, "loss": 0.5179, "step": 26710 }, { "epoch": 0.8205388136270083, "grad_norm": 0.36655738949775696, "learning_rate": 1.2809610356742335e-05, "loss": 0.5491, "step": 26711 }, { "epoch": 0.8205695327619574, "grad_norm": 0.3550429046154022, "learning_rate": 1.2809146542934423e-05, "loss": 0.5543, "step": 26712 }, { "epoch": 0.8206002518969066, "grad_norm": 0.3327757716178894, "learning_rate": 1.280868272256556e-05, "loss": 0.5317, "step": 26713 }, { "epoch": 0.8206309710318558, "grad_norm": 0.3832915425300598, "learning_rate": 1.2808218895636817e-05, "loss": 0.5817, "step": 26714 }, { "epoch": 0.8206616901668049, "grad_norm": 0.34611573815345764, "learning_rate": 1.2807755062149286e-05, "loss": 0.5941, "step": 26715 }, { "epoch": 0.820692409301754, "grad_norm": 0.3728242814540863, "learning_rate": 1.2807291222104045e-05, "loss": 0.6172, "step": 26716 }, { "epoch": 0.8207231284367033, "grad_norm": 0.46497318148612976, "learning_rate": 1.280682737550218e-05, "loss": 0.5529, "step": 26717 }, { "epoch": 0.8207538475716524, "grad_norm": 0.35321682691574097, "learning_rate": 1.2806363522344775e-05, "loss": 0.5499, "step": 26718 }, { "epoch": 0.8207845667066015, "grad_norm": 0.3575892746448517, "learning_rate": 1.2805899662632908e-05, "loss": 0.5575, "step": 26719 }, { "epoch": 0.8208152858415507, "grad_norm": 0.3471446931362152, "learning_rate": 1.2805435796367672e-05, "loss": 0.5099, "step": 26720 }, { "epoch": 0.8208460049764998, "grad_norm": 0.36770880222320557, "learning_rate": 1.2804971923550139e-05, "loss": 0.4932, "step": 26721 }, { "epoch": 0.8208767241114491, "grad_norm": 0.4726170599460602, "learning_rate": 1.2804508044181402e-05, "loss": 0.5631, "step": 26722 }, { "epoch": 0.8209074432463982, "grad_norm": 0.3833828866481781, "learning_rate": 1.2804044158262541e-05, "loss": 0.504, "step": 26723 }, { "epoch": 0.8209381623813473, "grad_norm": 0.4034837484359741, "learning_rate": 1.2803580265794637e-05, "loss": 0.5531, "step": 26724 }, { "epoch": 0.8209688815162965, "grad_norm": 0.3792079985141754, "learning_rate": 1.2803116366778778e-05, "loss": 0.541, "step": 26725 }, { "epoch": 0.8209996006512457, "grad_norm": 0.41782286763191223, "learning_rate": 1.280265246121604e-05, "loss": 0.476, "step": 26726 }, { "epoch": 0.8210303197861948, "grad_norm": 0.3573938310146332, "learning_rate": 1.2802188549107517e-05, "loss": 0.5147, "step": 26727 }, { "epoch": 0.821061038921144, "grad_norm": 0.38235488533973694, "learning_rate": 1.2801724630454282e-05, "loss": 0.5427, "step": 26728 }, { "epoch": 0.8210917580560931, "grad_norm": 0.3863923251628876, "learning_rate": 1.2801260705257428e-05, "loss": 0.6262, "step": 26729 }, { "epoch": 0.8211224771910423, "grad_norm": 0.37784674763679504, "learning_rate": 1.2800796773518031e-05, "loss": 0.5754, "step": 26730 }, { "epoch": 0.8211531963259915, "grad_norm": 0.3436751067638397, "learning_rate": 1.280033283523718e-05, "loss": 0.586, "step": 26731 }, { "epoch": 0.8211839154609406, "grad_norm": 0.40518346428871155, "learning_rate": 1.2799868890415952e-05, "loss": 0.5715, "step": 26732 }, { "epoch": 0.8212146345958897, "grad_norm": 0.3475019633769989, "learning_rate": 1.279940493905544e-05, "loss": 0.5247, "step": 26733 }, { "epoch": 0.821245353730839, "grad_norm": 0.34351521730422974, "learning_rate": 1.279894098115672e-05, "loss": 0.5635, "step": 26734 }, { "epoch": 0.8212760728657881, "grad_norm": 0.37029629945755005, "learning_rate": 1.2798477016720876e-05, "loss": 0.607, "step": 26735 }, { "epoch": 0.8213067920007373, "grad_norm": 0.3678818941116333, "learning_rate": 1.2798013045748996e-05, "loss": 0.5678, "step": 26736 }, { "epoch": 0.8213375111356864, "grad_norm": 0.34143924713134766, "learning_rate": 1.2797549068242156e-05, "loss": 0.5007, "step": 26737 }, { "epoch": 0.8213682302706355, "grad_norm": 0.5050284266471863, "learning_rate": 1.279708508420145e-05, "loss": 0.6013, "step": 26738 }, { "epoch": 0.8213989494055848, "grad_norm": 0.47670185565948486, "learning_rate": 1.2796621093627955e-05, "loss": 0.5924, "step": 26739 }, { "epoch": 0.8214296685405339, "grad_norm": 0.3413056433200836, "learning_rate": 1.2796157096522757e-05, "loss": 0.533, "step": 26740 }, { "epoch": 0.821460387675483, "grad_norm": 0.39173343777656555, "learning_rate": 1.2795693092886938e-05, "loss": 0.6103, "step": 26741 }, { "epoch": 0.8214911068104322, "grad_norm": 0.5585837364196777, "learning_rate": 1.2795229082721583e-05, "loss": 0.518, "step": 26742 }, { "epoch": 0.8215218259453814, "grad_norm": 0.3740701973438263, "learning_rate": 1.2794765066027776e-05, "loss": 0.5775, "step": 26743 }, { "epoch": 0.8215525450803305, "grad_norm": 0.36743295192718506, "learning_rate": 1.2794301042806599e-05, "loss": 0.4785, "step": 26744 }, { "epoch": 0.8215832642152797, "grad_norm": 0.40714138746261597, "learning_rate": 1.2793837013059136e-05, "loss": 0.7104, "step": 26745 }, { "epoch": 0.8216139833502288, "grad_norm": 0.3550049960613251, "learning_rate": 1.279337297678647e-05, "loss": 0.5227, "step": 26746 }, { "epoch": 0.8216447024851781, "grad_norm": 0.5305242538452148, "learning_rate": 1.2792908933989691e-05, "loss": 0.5546, "step": 26747 }, { "epoch": 0.8216754216201272, "grad_norm": 0.36983272433280945, "learning_rate": 1.2792444884669874e-05, "loss": 0.5626, "step": 26748 }, { "epoch": 0.8217061407550763, "grad_norm": 0.335239052772522, "learning_rate": 1.279198082882811e-05, "loss": 0.464, "step": 26749 }, { "epoch": 0.8217368598900255, "grad_norm": 0.371987521648407, "learning_rate": 1.2791516766465477e-05, "loss": 0.548, "step": 26750 }, { "epoch": 0.8217675790249747, "grad_norm": 0.406223863363266, "learning_rate": 1.2791052697583065e-05, "loss": 0.4932, "step": 26751 }, { "epoch": 0.8217982981599238, "grad_norm": 0.4156668186187744, "learning_rate": 1.2790588622181954e-05, "loss": 0.4178, "step": 26752 }, { "epoch": 0.821829017294873, "grad_norm": 0.3877778649330139, "learning_rate": 1.2790124540263227e-05, "loss": 0.5047, "step": 26753 }, { "epoch": 0.8218597364298221, "grad_norm": 0.38718461990356445, "learning_rate": 1.2789660451827968e-05, "loss": 0.582, "step": 26754 }, { "epoch": 0.8218904555647712, "grad_norm": 0.40484949946403503, "learning_rate": 1.278919635687726e-05, "loss": 0.5661, "step": 26755 }, { "epoch": 0.8219211746997205, "grad_norm": 0.4513966143131256, "learning_rate": 1.2788732255412195e-05, "loss": 0.5186, "step": 26756 }, { "epoch": 0.8219518938346696, "grad_norm": 0.3226415514945984, "learning_rate": 1.2788268147433848e-05, "loss": 0.5148, "step": 26757 }, { "epoch": 0.8219826129696188, "grad_norm": 0.3763408660888672, "learning_rate": 1.2787804032943308e-05, "loss": 0.5695, "step": 26758 }, { "epoch": 0.822013332104568, "grad_norm": 0.368598610162735, "learning_rate": 1.2787339911941653e-05, "loss": 0.5626, "step": 26759 }, { "epoch": 0.8220440512395171, "grad_norm": 0.3578270971775055, "learning_rate": 1.2786875784429975e-05, "loss": 0.5795, "step": 26760 }, { "epoch": 0.8220747703744663, "grad_norm": 0.3226490318775177, "learning_rate": 1.2786411650409353e-05, "loss": 0.5813, "step": 26761 }, { "epoch": 0.8221054895094154, "grad_norm": 0.37337321043014526, "learning_rate": 1.2785947509880869e-05, "loss": 0.557, "step": 26762 }, { "epoch": 0.8221362086443645, "grad_norm": 0.49872303009033203, "learning_rate": 1.2785483362845614e-05, "loss": 0.5425, "step": 26763 }, { "epoch": 0.8221669277793138, "grad_norm": 0.3626422882080078, "learning_rate": 1.2785019209304666e-05, "loss": 0.5195, "step": 26764 }, { "epoch": 0.8221976469142629, "grad_norm": 0.37069860100746155, "learning_rate": 1.2784555049259113e-05, "loss": 0.5335, "step": 26765 }, { "epoch": 0.822228366049212, "grad_norm": 0.3670347034931183, "learning_rate": 1.2784090882710034e-05, "loss": 0.5626, "step": 26766 }, { "epoch": 0.8222590851841612, "grad_norm": 0.3780268728733063, "learning_rate": 1.2783626709658522e-05, "loss": 0.4562, "step": 26767 }, { "epoch": 0.8222898043191104, "grad_norm": 0.37552404403686523, "learning_rate": 1.278316253010565e-05, "loss": 0.5114, "step": 26768 }, { "epoch": 0.8223205234540595, "grad_norm": 0.43956393003463745, "learning_rate": 1.2782698344052508e-05, "loss": 0.5657, "step": 26769 }, { "epoch": 0.8223512425890087, "grad_norm": 0.38764679431915283, "learning_rate": 1.2782234151500183e-05, "loss": 0.4983, "step": 26770 }, { "epoch": 0.8223819617239578, "grad_norm": 0.3429953455924988, "learning_rate": 1.2781769952449752e-05, "loss": 0.5026, "step": 26771 }, { "epoch": 0.8224126808589071, "grad_norm": 0.3669179081916809, "learning_rate": 1.2781305746902305e-05, "loss": 0.5748, "step": 26772 }, { "epoch": 0.8224433999938562, "grad_norm": 0.33010101318359375, "learning_rate": 1.2780841534858923e-05, "loss": 0.4762, "step": 26773 }, { "epoch": 0.8224741191288053, "grad_norm": 0.3292222023010254, "learning_rate": 1.2780377316320691e-05, "loss": 0.5554, "step": 26774 }, { "epoch": 0.8225048382637545, "grad_norm": 0.366773396730423, "learning_rate": 1.2779913091288698e-05, "loss": 0.5085, "step": 26775 }, { "epoch": 0.8225355573987037, "grad_norm": 0.32205116748809814, "learning_rate": 1.2779448859764018e-05, "loss": 0.4829, "step": 26776 }, { "epoch": 0.8225662765336528, "grad_norm": 0.37212973833084106, "learning_rate": 1.2778984621747745e-05, "loss": 0.4899, "step": 26777 }, { "epoch": 0.822596995668602, "grad_norm": 0.36014336347579956, "learning_rate": 1.2778520377240957e-05, "loss": 0.4827, "step": 26778 }, { "epoch": 0.8226277148035511, "grad_norm": 0.34410375356674194, "learning_rate": 1.2778056126244745e-05, "loss": 0.4871, "step": 26779 }, { "epoch": 0.8226584339385002, "grad_norm": 0.4373578429222107, "learning_rate": 1.2777591868760184e-05, "loss": 0.5882, "step": 26780 }, { "epoch": 0.8226891530734495, "grad_norm": 0.3046109676361084, "learning_rate": 1.2777127604788366e-05, "loss": 0.4645, "step": 26781 }, { "epoch": 0.8227198722083986, "grad_norm": 0.4053085446357727, "learning_rate": 1.2776663334330368e-05, "loss": 0.5191, "step": 26782 }, { "epoch": 0.8227505913433478, "grad_norm": 0.36246129870414734, "learning_rate": 1.2776199057387284e-05, "loss": 0.5998, "step": 26783 }, { "epoch": 0.822781310478297, "grad_norm": 0.33636635541915894, "learning_rate": 1.277573477396019e-05, "loss": 0.5795, "step": 26784 }, { "epoch": 0.8228120296132461, "grad_norm": 0.3409971296787262, "learning_rate": 1.2775270484050177e-05, "loss": 0.5192, "step": 26785 }, { "epoch": 0.8228427487481953, "grad_norm": 0.40819644927978516, "learning_rate": 1.2774806187658325e-05, "loss": 0.5866, "step": 26786 }, { "epoch": 0.8228734678831444, "grad_norm": 0.41970014572143555, "learning_rate": 1.2774341884785717e-05, "loss": 0.4936, "step": 26787 }, { "epoch": 0.8229041870180935, "grad_norm": 0.3668977618217468, "learning_rate": 1.2773877575433442e-05, "loss": 0.5205, "step": 26788 }, { "epoch": 0.8229349061530428, "grad_norm": 0.34369248151779175, "learning_rate": 1.277341325960258e-05, "loss": 0.5426, "step": 26789 }, { "epoch": 0.8229656252879919, "grad_norm": 0.3897152841091156, "learning_rate": 1.277294893729422e-05, "loss": 0.5675, "step": 26790 }, { "epoch": 0.822996344422941, "grad_norm": 0.3764110803604126, "learning_rate": 1.2772484608509443e-05, "loss": 0.6267, "step": 26791 }, { "epoch": 0.8230270635578902, "grad_norm": 0.4301290214061737, "learning_rate": 1.2772020273249333e-05, "loss": 0.6093, "step": 26792 }, { "epoch": 0.8230577826928394, "grad_norm": 0.6676391363143921, "learning_rate": 1.2771555931514978e-05, "loss": 0.5102, "step": 26793 }, { "epoch": 0.8230885018277885, "grad_norm": 0.3693092167377472, "learning_rate": 1.277109158330746e-05, "loss": 0.5582, "step": 26794 }, { "epoch": 0.8231192209627377, "grad_norm": 0.45971009135246277, "learning_rate": 1.2770627228627865e-05, "loss": 0.538, "step": 26795 }, { "epoch": 0.8231499400976868, "grad_norm": 0.37034544348716736, "learning_rate": 1.2770162867477276e-05, "loss": 0.6277, "step": 26796 }, { "epoch": 0.8231806592326361, "grad_norm": 0.3900626599788666, "learning_rate": 1.2769698499856777e-05, "loss": 0.5843, "step": 26797 }, { "epoch": 0.8232113783675852, "grad_norm": 0.3887786865234375, "learning_rate": 1.2769234125767453e-05, "loss": 0.5802, "step": 26798 }, { "epoch": 0.8232420975025343, "grad_norm": 0.37746381759643555, "learning_rate": 1.2768769745210394e-05, "loss": 0.5901, "step": 26799 }, { "epoch": 0.8232728166374835, "grad_norm": 0.35790348052978516, "learning_rate": 1.2768305358186677e-05, "loss": 0.6135, "step": 26800 }, { "epoch": 0.8233035357724326, "grad_norm": 0.38836193084716797, "learning_rate": 1.2767840964697388e-05, "loss": 0.5796, "step": 26801 }, { "epoch": 0.8233342549073818, "grad_norm": 0.46421217918395996, "learning_rate": 1.2767376564743614e-05, "loss": 0.5787, "step": 26802 }, { "epoch": 0.823364974042331, "grad_norm": 0.418083131313324, "learning_rate": 1.276691215832644e-05, "loss": 0.5466, "step": 26803 }, { "epoch": 0.8233956931772801, "grad_norm": 0.3977488875389099, "learning_rate": 1.276644774544695e-05, "loss": 0.5682, "step": 26804 }, { "epoch": 0.8234264123122292, "grad_norm": 0.3695371448993683, "learning_rate": 1.2765983326106226e-05, "loss": 0.51, "step": 26805 }, { "epoch": 0.8234571314471785, "grad_norm": 0.3585805594921112, "learning_rate": 1.2765518900305356e-05, "loss": 0.4962, "step": 26806 }, { "epoch": 0.8234878505821276, "grad_norm": 0.35752803087234497, "learning_rate": 1.2765054468045424e-05, "loss": 0.4904, "step": 26807 }, { "epoch": 0.8235185697170768, "grad_norm": 0.38967427611351013, "learning_rate": 1.2764590029327512e-05, "loss": 0.6543, "step": 26808 }, { "epoch": 0.8235492888520259, "grad_norm": 0.3433436155319214, "learning_rate": 1.2764125584152708e-05, "loss": 0.5978, "step": 26809 }, { "epoch": 0.8235800079869751, "grad_norm": 0.40899115800857544, "learning_rate": 1.2763661132522094e-05, "loss": 0.6373, "step": 26810 }, { "epoch": 0.8236107271219243, "grad_norm": 0.35696741938591003, "learning_rate": 1.276319667443676e-05, "loss": 0.5826, "step": 26811 }, { "epoch": 0.8236414462568734, "grad_norm": 0.41100433468818665, "learning_rate": 1.2762732209897783e-05, "loss": 0.5697, "step": 26812 }, { "epoch": 0.8236721653918225, "grad_norm": 0.44887465238571167, "learning_rate": 1.2762267738906257e-05, "loss": 0.6413, "step": 26813 }, { "epoch": 0.8237028845267718, "grad_norm": 0.3456462025642395, "learning_rate": 1.2761803261463257e-05, "loss": 0.5103, "step": 26814 }, { "epoch": 0.8237336036617209, "grad_norm": 0.35223740339279175, "learning_rate": 1.2761338777569877e-05, "loss": 0.5871, "step": 26815 }, { "epoch": 0.82376432279667, "grad_norm": 0.368351012468338, "learning_rate": 1.2760874287227194e-05, "loss": 0.5548, "step": 26816 }, { "epoch": 0.8237950419316192, "grad_norm": 0.34283551573753357, "learning_rate": 1.2760409790436298e-05, "loss": 0.5349, "step": 26817 }, { "epoch": 0.8238257610665684, "grad_norm": 0.35810235142707825, "learning_rate": 1.2759945287198272e-05, "loss": 0.5539, "step": 26818 }, { "epoch": 0.8238564802015175, "grad_norm": 0.3546234667301178, "learning_rate": 1.2759480777514201e-05, "loss": 0.5259, "step": 26819 }, { "epoch": 0.8238871993364667, "grad_norm": 0.39571818709373474, "learning_rate": 1.275901626138517e-05, "loss": 0.5375, "step": 26820 }, { "epoch": 0.8239179184714158, "grad_norm": 0.3384588658809662, "learning_rate": 1.2758551738812263e-05, "loss": 0.524, "step": 26821 }, { "epoch": 0.823948637606365, "grad_norm": 0.372892290353775, "learning_rate": 1.2758087209796568e-05, "loss": 0.6188, "step": 26822 }, { "epoch": 0.8239793567413142, "grad_norm": 0.3606509864330292, "learning_rate": 1.2757622674339163e-05, "loss": 0.587, "step": 26823 }, { "epoch": 0.8240100758762633, "grad_norm": 0.37478670477867126, "learning_rate": 1.2757158132441144e-05, "loss": 0.5401, "step": 26824 }, { "epoch": 0.8240407950112125, "grad_norm": 0.38098853826522827, "learning_rate": 1.2756693584103583e-05, "loss": 0.5314, "step": 26825 }, { "epoch": 0.8240715141461616, "grad_norm": 0.34779244661331177, "learning_rate": 1.2756229029327577e-05, "loss": 0.5331, "step": 26826 }, { "epoch": 0.8241022332811108, "grad_norm": 0.37429696321487427, "learning_rate": 1.2755764468114202e-05, "loss": 0.4587, "step": 26827 }, { "epoch": 0.82413295241606, "grad_norm": 0.35360005497932434, "learning_rate": 1.2755299900464546e-05, "loss": 0.571, "step": 26828 }, { "epoch": 0.8241636715510091, "grad_norm": 0.3332570195198059, "learning_rate": 1.2754835326379697e-05, "loss": 0.561, "step": 26829 }, { "epoch": 0.8241943906859582, "grad_norm": 0.397350937128067, "learning_rate": 1.2754370745860737e-05, "loss": 0.6306, "step": 26830 }, { "epoch": 0.8242251098209075, "grad_norm": 0.417923241853714, "learning_rate": 1.2753906158908753e-05, "loss": 0.5376, "step": 26831 }, { "epoch": 0.8242558289558566, "grad_norm": 0.333925724029541, "learning_rate": 1.2753441565524826e-05, "loss": 0.5374, "step": 26832 }, { "epoch": 0.8242865480908058, "grad_norm": 0.3473578095436096, "learning_rate": 1.2752976965710048e-05, "loss": 0.4906, "step": 26833 }, { "epoch": 0.8243172672257549, "grad_norm": 0.39683812856674194, "learning_rate": 1.2752512359465499e-05, "loss": 0.5508, "step": 26834 }, { "epoch": 0.824347986360704, "grad_norm": 0.3640992343425751, "learning_rate": 1.2752047746792264e-05, "loss": 0.5713, "step": 26835 }, { "epoch": 0.8243787054956533, "grad_norm": 0.3662235140800476, "learning_rate": 1.275158312769143e-05, "loss": 0.4979, "step": 26836 }, { "epoch": 0.8244094246306024, "grad_norm": 0.3674812614917755, "learning_rate": 1.2751118502164079e-05, "loss": 0.6574, "step": 26837 }, { "epoch": 0.8244401437655515, "grad_norm": 0.4277437925338745, "learning_rate": 1.2750653870211302e-05, "loss": 0.6209, "step": 26838 }, { "epoch": 0.8244708629005008, "grad_norm": 0.45715996623039246, "learning_rate": 1.2750189231834178e-05, "loss": 0.6132, "step": 26839 }, { "epoch": 0.8245015820354499, "grad_norm": 0.37541842460632324, "learning_rate": 1.2749724587033799e-05, "loss": 0.5107, "step": 26840 }, { "epoch": 0.824532301170399, "grad_norm": 0.39178046584129333, "learning_rate": 1.274925993581124e-05, "loss": 0.6016, "step": 26841 }, { "epoch": 0.8245630203053482, "grad_norm": 0.36148911714553833, "learning_rate": 1.2748795278167599e-05, "loss": 0.5274, "step": 26842 }, { "epoch": 0.8245937394402973, "grad_norm": 0.3702152967453003, "learning_rate": 1.2748330614103947e-05, "loss": 0.5202, "step": 26843 }, { "epoch": 0.8246244585752465, "grad_norm": 0.3676576316356659, "learning_rate": 1.2747865943621384e-05, "loss": 0.6019, "step": 26844 }, { "epoch": 0.8246551777101957, "grad_norm": 0.39211785793304443, "learning_rate": 1.2747401266720984e-05, "loss": 0.4218, "step": 26845 }, { "epoch": 0.8246858968451448, "grad_norm": 0.45239949226379395, "learning_rate": 1.274693658340384e-05, "loss": 0.5222, "step": 26846 }, { "epoch": 0.824716615980094, "grad_norm": 0.37645217776298523, "learning_rate": 1.274647189367103e-05, "loss": 0.5824, "step": 26847 }, { "epoch": 0.8247473351150432, "grad_norm": 0.363032728433609, "learning_rate": 1.2746007197523644e-05, "loss": 0.5464, "step": 26848 }, { "epoch": 0.8247780542499923, "grad_norm": 0.3432900309562683, "learning_rate": 1.2745542494962768e-05, "loss": 0.5581, "step": 26849 }, { "epoch": 0.8248087733849415, "grad_norm": 0.3754114508628845, "learning_rate": 1.2745077785989483e-05, "loss": 0.5808, "step": 26850 }, { "epoch": 0.8248394925198906, "grad_norm": 0.39439842104911804, "learning_rate": 1.274461307060488e-05, "loss": 0.5697, "step": 26851 }, { "epoch": 0.8248702116548398, "grad_norm": 0.37234196066856384, "learning_rate": 1.2744148348810041e-05, "loss": 0.5702, "step": 26852 }, { "epoch": 0.824900930789789, "grad_norm": 0.3806943893432617, "learning_rate": 1.2743683620606051e-05, "loss": 0.5859, "step": 26853 }, { "epoch": 0.8249316499247381, "grad_norm": 0.40372806787490845, "learning_rate": 1.2743218885993997e-05, "loss": 0.5703, "step": 26854 }, { "epoch": 0.8249623690596872, "grad_norm": 0.3689137399196625, "learning_rate": 1.2742754144974964e-05, "loss": 0.5235, "step": 26855 }, { "epoch": 0.8249930881946365, "grad_norm": 0.3790273666381836, "learning_rate": 1.2742289397550038e-05, "loss": 0.6097, "step": 26856 }, { "epoch": 0.8250238073295856, "grad_norm": 0.41985780000686646, "learning_rate": 1.2741824643720303e-05, "loss": 0.5953, "step": 26857 }, { "epoch": 0.8250545264645348, "grad_norm": 0.35004517436027527, "learning_rate": 1.2741359883486844e-05, "loss": 0.5216, "step": 26858 }, { "epoch": 0.8250852455994839, "grad_norm": 0.37208735942840576, "learning_rate": 1.2740895116850746e-05, "loss": 0.5778, "step": 26859 }, { "epoch": 0.825115964734433, "grad_norm": 0.37648236751556396, "learning_rate": 1.2740430343813099e-05, "loss": 0.5732, "step": 26860 }, { "epoch": 0.8251466838693823, "grad_norm": 0.40263068675994873, "learning_rate": 1.2739965564374982e-05, "loss": 0.5155, "step": 26861 }, { "epoch": 0.8251774030043314, "grad_norm": 0.3672737181186676, "learning_rate": 1.273950077853749e-05, "loss": 0.533, "step": 26862 }, { "epoch": 0.8252081221392805, "grad_norm": 0.46238163113594055, "learning_rate": 1.2739035986301698e-05, "loss": 0.4909, "step": 26863 }, { "epoch": 0.8252388412742298, "grad_norm": 0.34652939438819885, "learning_rate": 1.2738571187668699e-05, "loss": 0.5261, "step": 26864 }, { "epoch": 0.8252695604091789, "grad_norm": 0.4207092821598053, "learning_rate": 1.273810638263957e-05, "loss": 0.5476, "step": 26865 }, { "epoch": 0.825300279544128, "grad_norm": 0.37135830521583557, "learning_rate": 1.2737641571215406e-05, "loss": 0.5362, "step": 26866 }, { "epoch": 0.8253309986790772, "grad_norm": 0.3719664514064789, "learning_rate": 1.2737176753397292e-05, "loss": 0.6143, "step": 26867 }, { "epoch": 0.8253617178140263, "grad_norm": 0.3867357671260834, "learning_rate": 1.2736711929186305e-05, "loss": 0.5495, "step": 26868 }, { "epoch": 0.8253924369489756, "grad_norm": 0.3801984488964081, "learning_rate": 1.2736247098583541e-05, "loss": 0.5278, "step": 26869 }, { "epoch": 0.8254231560839247, "grad_norm": 0.34425270557403564, "learning_rate": 1.2735782261590078e-05, "loss": 0.5789, "step": 26870 }, { "epoch": 0.8254538752188738, "grad_norm": 0.4063805639743805, "learning_rate": 1.2735317418207005e-05, "loss": 0.5427, "step": 26871 }, { "epoch": 0.825484594353823, "grad_norm": 0.5455687642097473, "learning_rate": 1.2734852568435408e-05, "loss": 0.5392, "step": 26872 }, { "epoch": 0.8255153134887722, "grad_norm": 0.3391919732093811, "learning_rate": 1.273438771227637e-05, "loss": 0.5047, "step": 26873 }, { "epoch": 0.8255460326237213, "grad_norm": 0.38795292377471924, "learning_rate": 1.273392284973098e-05, "loss": 0.5467, "step": 26874 }, { "epoch": 0.8255767517586705, "grad_norm": 0.3745496869087219, "learning_rate": 1.273345798080032e-05, "loss": 0.5546, "step": 26875 }, { "epoch": 0.8256074708936196, "grad_norm": 0.39853647351264954, "learning_rate": 1.2732993105485483e-05, "loss": 0.5213, "step": 26876 }, { "epoch": 0.8256381900285688, "grad_norm": 0.3566369414329529, "learning_rate": 1.2732528223787543e-05, "loss": 0.5282, "step": 26877 }, { "epoch": 0.825668909163518, "grad_norm": 0.5854928493499756, "learning_rate": 1.2732063335707599e-05, "loss": 0.5305, "step": 26878 }, { "epoch": 0.8256996282984671, "grad_norm": 0.3376336991786957, "learning_rate": 1.2731598441246726e-05, "loss": 0.4939, "step": 26879 }, { "epoch": 0.8257303474334162, "grad_norm": 0.3565661311149597, "learning_rate": 1.2731133540406017e-05, "loss": 0.5843, "step": 26880 }, { "epoch": 0.8257610665683655, "grad_norm": 0.40404394268989563, "learning_rate": 1.2730668633186551e-05, "loss": 0.5342, "step": 26881 }, { "epoch": 0.8257917857033146, "grad_norm": 0.35204431414604187, "learning_rate": 1.2730203719589422e-05, "loss": 0.517, "step": 26882 }, { "epoch": 0.8258225048382638, "grad_norm": 0.3748473525047302, "learning_rate": 1.2729738799615708e-05, "loss": 0.6122, "step": 26883 }, { "epoch": 0.8258532239732129, "grad_norm": 0.35082295536994934, "learning_rate": 1.27292738732665e-05, "loss": 0.506, "step": 26884 }, { "epoch": 0.825883943108162, "grad_norm": 0.3450416028499603, "learning_rate": 1.2728808940542884e-05, "loss": 0.5679, "step": 26885 }, { "epoch": 0.8259146622431113, "grad_norm": 0.35869100689888, "learning_rate": 1.272834400144594e-05, "loss": 0.6147, "step": 26886 }, { "epoch": 0.8259453813780604, "grad_norm": 0.41359660029411316, "learning_rate": 1.2727879055976762e-05, "loss": 0.6054, "step": 26887 }, { "epoch": 0.8259761005130095, "grad_norm": 0.43846526741981506, "learning_rate": 1.2727414104136429e-05, "loss": 0.5724, "step": 26888 }, { "epoch": 0.8260068196479587, "grad_norm": 0.3286062777042389, "learning_rate": 1.272694914592603e-05, "loss": 0.5616, "step": 26889 }, { "epoch": 0.8260375387829079, "grad_norm": 0.33664020895957947, "learning_rate": 1.2726484181346654e-05, "loss": 0.5403, "step": 26890 }, { "epoch": 0.826068257917857, "grad_norm": 0.9134952425956726, "learning_rate": 1.2726019210399381e-05, "loss": 0.6388, "step": 26891 }, { "epoch": 0.8260989770528062, "grad_norm": 0.3833390474319458, "learning_rate": 1.2725554233085301e-05, "loss": 0.5658, "step": 26892 }, { "epoch": 0.8261296961877553, "grad_norm": 0.3463684618473053, "learning_rate": 1.2725089249405498e-05, "loss": 0.4925, "step": 26893 }, { "epoch": 0.8261604153227046, "grad_norm": 0.3991931080818176, "learning_rate": 1.272462425936106e-05, "loss": 0.5404, "step": 26894 }, { "epoch": 0.8261911344576537, "grad_norm": 0.397904634475708, "learning_rate": 1.2724159262953071e-05, "loss": 0.6205, "step": 26895 }, { "epoch": 0.8262218535926028, "grad_norm": 0.33627745509147644, "learning_rate": 1.2723694260182618e-05, "loss": 0.5245, "step": 26896 }, { "epoch": 0.826252572727552, "grad_norm": 0.375637948513031, "learning_rate": 1.2723229251050784e-05, "loss": 0.566, "step": 26897 }, { "epoch": 0.8262832918625012, "grad_norm": 0.38891738653182983, "learning_rate": 1.2722764235558663e-05, "loss": 0.6546, "step": 26898 }, { "epoch": 0.8263140109974503, "grad_norm": 0.4169718623161316, "learning_rate": 1.2722299213707332e-05, "loss": 0.5676, "step": 26899 }, { "epoch": 0.8263447301323995, "grad_norm": 0.369071364402771, "learning_rate": 1.2721834185497883e-05, "loss": 0.5779, "step": 26900 }, { "epoch": 0.8263754492673486, "grad_norm": 0.4235658347606659, "learning_rate": 1.2721369150931398e-05, "loss": 0.5904, "step": 26901 }, { "epoch": 0.8264061684022977, "grad_norm": 0.34950998425483704, "learning_rate": 1.2720904110008964e-05, "loss": 0.5123, "step": 26902 }, { "epoch": 0.826436887537247, "grad_norm": 0.39226070046424866, "learning_rate": 1.2720439062731673e-05, "loss": 0.61, "step": 26903 }, { "epoch": 0.8264676066721961, "grad_norm": 0.38144412636756897, "learning_rate": 1.2719974009100606e-05, "loss": 0.5597, "step": 26904 }, { "epoch": 0.8264983258071452, "grad_norm": 0.37281739711761475, "learning_rate": 1.2719508949116847e-05, "loss": 0.4755, "step": 26905 }, { "epoch": 0.8265290449420944, "grad_norm": 0.3847431242465973, "learning_rate": 1.2719043882781487e-05, "loss": 0.5274, "step": 26906 }, { "epoch": 0.8265597640770436, "grad_norm": 0.3974156081676483, "learning_rate": 1.2718578810095613e-05, "loss": 0.545, "step": 26907 }, { "epoch": 0.8265904832119928, "grad_norm": 0.3563803732395172, "learning_rate": 1.2718113731060305e-05, "loss": 0.562, "step": 26908 }, { "epoch": 0.8266212023469419, "grad_norm": 0.36866700649261475, "learning_rate": 1.271764864567665e-05, "loss": 0.5855, "step": 26909 }, { "epoch": 0.826651921481891, "grad_norm": 0.37142789363861084, "learning_rate": 1.271718355394574e-05, "loss": 0.4401, "step": 26910 }, { "epoch": 0.8266826406168403, "grad_norm": 0.4620428681373596, "learning_rate": 1.2716718455868655e-05, "loss": 0.6054, "step": 26911 }, { "epoch": 0.8267133597517894, "grad_norm": 0.41894206404685974, "learning_rate": 1.2716253351446488e-05, "loss": 0.5897, "step": 26912 }, { "epoch": 0.8267440788867385, "grad_norm": 0.3713472783565521, "learning_rate": 1.271578824068032e-05, "loss": 0.5835, "step": 26913 }, { "epoch": 0.8267747980216877, "grad_norm": 0.43513911962509155, "learning_rate": 1.271532312357124e-05, "loss": 0.5262, "step": 26914 }, { "epoch": 0.8268055171566369, "grad_norm": 0.41626888513565063, "learning_rate": 1.2714858000120332e-05, "loss": 0.5359, "step": 26915 }, { "epoch": 0.826836236291586, "grad_norm": 0.4104022681713104, "learning_rate": 1.2714392870328684e-05, "loss": 0.6153, "step": 26916 }, { "epoch": 0.8268669554265352, "grad_norm": 0.45463335514068604, "learning_rate": 1.2713927734197382e-05, "loss": 0.6179, "step": 26917 }, { "epoch": 0.8268976745614843, "grad_norm": 0.41913846135139465, "learning_rate": 1.2713462591727512e-05, "loss": 0.5849, "step": 26918 }, { "epoch": 0.8269283936964336, "grad_norm": 0.4297998547554016, "learning_rate": 1.271299744292016e-05, "loss": 0.4472, "step": 26919 }, { "epoch": 0.8269591128313827, "grad_norm": 0.34998464584350586, "learning_rate": 1.2712532287776411e-05, "loss": 0.4842, "step": 26920 }, { "epoch": 0.8269898319663318, "grad_norm": 0.35523539781570435, "learning_rate": 1.2712067126297359e-05, "loss": 0.5372, "step": 26921 }, { "epoch": 0.827020551101281, "grad_norm": 0.39635077118873596, "learning_rate": 1.2711601958484079e-05, "loss": 0.5092, "step": 26922 }, { "epoch": 0.8270512702362302, "grad_norm": 0.35925614833831787, "learning_rate": 1.2711136784337667e-05, "loss": 0.5561, "step": 26923 }, { "epoch": 0.8270819893711793, "grad_norm": 0.3712117373943329, "learning_rate": 1.2710671603859205e-05, "loss": 0.6216, "step": 26924 }, { "epoch": 0.8271127085061285, "grad_norm": 1.2848271131515503, "learning_rate": 1.2710206417049782e-05, "loss": 0.6035, "step": 26925 }, { "epoch": 0.8271434276410776, "grad_norm": 0.5938532948493958, "learning_rate": 1.270974122391048e-05, "loss": 0.5125, "step": 26926 }, { "epoch": 0.8271741467760267, "grad_norm": 0.370238721370697, "learning_rate": 1.2709276024442387e-05, "loss": 0.5035, "step": 26927 }, { "epoch": 0.827204865910976, "grad_norm": 0.3752968907356262, "learning_rate": 1.2708810818646593e-05, "loss": 0.5798, "step": 26928 }, { "epoch": 0.8272355850459251, "grad_norm": 0.3538978099822998, "learning_rate": 1.2708345606524183e-05, "loss": 0.5798, "step": 26929 }, { "epoch": 0.8272663041808742, "grad_norm": 0.378939151763916, "learning_rate": 1.2707880388076238e-05, "loss": 0.5222, "step": 26930 }, { "epoch": 0.8272970233158234, "grad_norm": 0.3629481792449951, "learning_rate": 1.2707415163303853e-05, "loss": 0.5349, "step": 26931 }, { "epoch": 0.8273277424507726, "grad_norm": 0.35041531920433044, "learning_rate": 1.2706949932208113e-05, "loss": 0.5024, "step": 26932 }, { "epoch": 0.8273584615857218, "grad_norm": 0.360953688621521, "learning_rate": 1.2706484694790097e-05, "loss": 0.5209, "step": 26933 }, { "epoch": 0.8273891807206709, "grad_norm": 0.3434022068977356, "learning_rate": 1.2706019451050901e-05, "loss": 0.6028, "step": 26934 }, { "epoch": 0.82741989985562, "grad_norm": 0.3644912838935852, "learning_rate": 1.2705554200991606e-05, "loss": 0.5456, "step": 26935 }, { "epoch": 0.8274506189905693, "grad_norm": 0.3548752963542938, "learning_rate": 1.27050889446133e-05, "loss": 0.5368, "step": 26936 }, { "epoch": 0.8274813381255184, "grad_norm": 0.3670947849750519, "learning_rate": 1.2704623681917073e-05, "loss": 0.5686, "step": 26937 }, { "epoch": 0.8275120572604675, "grad_norm": 0.3276214003562927, "learning_rate": 1.2704158412903999e-05, "loss": 0.5497, "step": 26938 }, { "epoch": 0.8275427763954167, "grad_norm": 0.3657810688018799, "learning_rate": 1.2703693137575184e-05, "loss": 0.5394, "step": 26939 }, { "epoch": 0.8275734955303659, "grad_norm": 0.3498515486717224, "learning_rate": 1.2703227855931702e-05, "loss": 0.5252, "step": 26940 }, { "epoch": 0.827604214665315, "grad_norm": 0.3620937764644623, "learning_rate": 1.2702762567974642e-05, "loss": 0.4921, "step": 26941 }, { "epoch": 0.8276349338002642, "grad_norm": 0.32411104440689087, "learning_rate": 1.2702297273705092e-05, "loss": 0.5242, "step": 26942 }, { "epoch": 0.8276656529352133, "grad_norm": 0.4263598620891571, "learning_rate": 1.2701831973124138e-05, "loss": 0.5979, "step": 26943 }, { "epoch": 0.8276963720701626, "grad_norm": 0.36490342020988464, "learning_rate": 1.2701366666232866e-05, "loss": 0.4859, "step": 26944 }, { "epoch": 0.8277270912051117, "grad_norm": 0.3328682482242584, "learning_rate": 1.2700901353032362e-05, "loss": 0.5341, "step": 26945 }, { "epoch": 0.8277578103400608, "grad_norm": 0.4048367738723755, "learning_rate": 1.2700436033523717e-05, "loss": 0.4723, "step": 26946 }, { "epoch": 0.82778852947501, "grad_norm": 0.352411687374115, "learning_rate": 1.2699970707708013e-05, "loss": 0.5374, "step": 26947 }, { "epoch": 0.8278192486099591, "grad_norm": 0.33094558119773865, "learning_rate": 1.2699505375586343e-05, "loss": 0.5543, "step": 26948 }, { "epoch": 0.8278499677449083, "grad_norm": 0.46349436044692993, "learning_rate": 1.2699040037159784e-05, "loss": 0.5712, "step": 26949 }, { "epoch": 0.8278806868798575, "grad_norm": 0.39765995740890503, "learning_rate": 1.2698574692429432e-05, "loss": 0.5794, "step": 26950 }, { "epoch": 0.8279114060148066, "grad_norm": 0.3699580729007721, "learning_rate": 1.269810934139637e-05, "loss": 0.6532, "step": 26951 }, { "epoch": 0.8279421251497557, "grad_norm": 0.3336356580257416, "learning_rate": 1.2697643984061685e-05, "loss": 0.5347, "step": 26952 }, { "epoch": 0.827972844284705, "grad_norm": 0.39645349979400635, "learning_rate": 1.2697178620426463e-05, "loss": 0.6186, "step": 26953 }, { "epoch": 0.8280035634196541, "grad_norm": 0.3887363076210022, "learning_rate": 1.2696713250491792e-05, "loss": 0.5183, "step": 26954 }, { "epoch": 0.8280342825546032, "grad_norm": 0.45315220952033997, "learning_rate": 1.269624787425876e-05, "loss": 0.5647, "step": 26955 }, { "epoch": 0.8280650016895524, "grad_norm": 0.3839477598667145, "learning_rate": 1.269578249172845e-05, "loss": 0.5502, "step": 26956 }, { "epoch": 0.8280957208245016, "grad_norm": 1.3484493494033813, "learning_rate": 1.2695317102901954e-05, "loss": 0.5636, "step": 26957 }, { "epoch": 0.8281264399594508, "grad_norm": 0.3261617422103882, "learning_rate": 1.2694851707780355e-05, "loss": 0.52, "step": 26958 }, { "epoch": 0.8281571590943999, "grad_norm": 0.36670929193496704, "learning_rate": 1.2694386306364746e-05, "loss": 0.5259, "step": 26959 }, { "epoch": 0.828187878229349, "grad_norm": 0.323982834815979, "learning_rate": 1.2693920898656207e-05, "loss": 0.4859, "step": 26960 }, { "epoch": 0.8282185973642983, "grad_norm": 0.37752285599708557, "learning_rate": 1.2693455484655828e-05, "loss": 0.6347, "step": 26961 }, { "epoch": 0.8282493164992474, "grad_norm": 0.3698410987854004, "learning_rate": 1.2692990064364695e-05, "loss": 0.4134, "step": 26962 }, { "epoch": 0.8282800356341965, "grad_norm": 0.37701651453971863, "learning_rate": 1.2692524637783895e-05, "loss": 0.6637, "step": 26963 }, { "epoch": 0.8283107547691457, "grad_norm": 0.39717188477516174, "learning_rate": 1.2692059204914517e-05, "loss": 0.575, "step": 26964 }, { "epoch": 0.8283414739040948, "grad_norm": 0.3470255136489868, "learning_rate": 1.2691593765757645e-05, "loss": 0.5581, "step": 26965 }, { "epoch": 0.828372193039044, "grad_norm": 0.35411548614501953, "learning_rate": 1.269112832031437e-05, "loss": 0.5888, "step": 26966 }, { "epoch": 0.8284029121739932, "grad_norm": 0.3876190185546875, "learning_rate": 1.2690662868585775e-05, "loss": 0.4909, "step": 26967 }, { "epoch": 0.8284336313089423, "grad_norm": 0.35871386528015137, "learning_rate": 1.269019741057295e-05, "loss": 0.5513, "step": 26968 }, { "epoch": 0.8284643504438916, "grad_norm": 0.4035522937774658, "learning_rate": 1.268973194627698e-05, "loss": 0.4657, "step": 26969 }, { "epoch": 0.8284950695788407, "grad_norm": 0.3887275159358978, "learning_rate": 1.2689266475698956e-05, "loss": 0.47, "step": 26970 }, { "epoch": 0.8285257887137898, "grad_norm": 0.35757237672805786, "learning_rate": 1.268880099883996e-05, "loss": 0.5761, "step": 26971 }, { "epoch": 0.828556507848739, "grad_norm": 0.5108929872512817, "learning_rate": 1.2688335515701083e-05, "loss": 0.6257, "step": 26972 }, { "epoch": 0.8285872269836881, "grad_norm": 0.3775435984134674, "learning_rate": 1.268787002628341e-05, "loss": 0.5875, "step": 26973 }, { "epoch": 0.8286179461186373, "grad_norm": 0.374016672372818, "learning_rate": 1.2687404530588024e-05, "loss": 0.548, "step": 26974 }, { "epoch": 0.8286486652535865, "grad_norm": 0.379194438457489, "learning_rate": 1.2686939028616023e-05, "loss": 0.5541, "step": 26975 }, { "epoch": 0.8286793843885356, "grad_norm": 0.40790504217147827, "learning_rate": 1.2686473520368485e-05, "loss": 0.4528, "step": 26976 }, { "epoch": 0.8287101035234847, "grad_norm": 0.6501482129096985, "learning_rate": 1.2686008005846505e-05, "loss": 0.5529, "step": 26977 }, { "epoch": 0.828740822658434, "grad_norm": 0.36176151037216187, "learning_rate": 1.2685542485051161e-05, "loss": 0.5864, "step": 26978 }, { "epoch": 0.8287715417933831, "grad_norm": 0.40803197026252747, "learning_rate": 1.2685076957983546e-05, "loss": 0.4891, "step": 26979 }, { "epoch": 0.8288022609283323, "grad_norm": 0.36752668023109436, "learning_rate": 1.2684611424644748e-05, "loss": 0.5561, "step": 26980 }, { "epoch": 0.8288329800632814, "grad_norm": 0.356103777885437, "learning_rate": 1.268414588503585e-05, "loss": 0.5374, "step": 26981 }, { "epoch": 0.8288636991982306, "grad_norm": 0.3583567142486572, "learning_rate": 1.2683680339157944e-05, "loss": 0.4895, "step": 26982 }, { "epoch": 0.8288944183331798, "grad_norm": 0.4376335144042969, "learning_rate": 1.2683214787012115e-05, "loss": 0.523, "step": 26983 }, { "epoch": 0.8289251374681289, "grad_norm": 0.368416428565979, "learning_rate": 1.268274922859945e-05, "loss": 0.5635, "step": 26984 }, { "epoch": 0.828955856603078, "grad_norm": 0.3480643033981323, "learning_rate": 1.2682283663921035e-05, "loss": 0.5654, "step": 26985 }, { "epoch": 0.8289865757380273, "grad_norm": 0.3765828311443329, "learning_rate": 1.2681818092977962e-05, "loss": 0.6123, "step": 26986 }, { "epoch": 0.8290172948729764, "grad_norm": 0.36674341559410095, "learning_rate": 1.2681352515771315e-05, "loss": 0.4789, "step": 26987 }, { "epoch": 0.8290480140079255, "grad_norm": 0.5024712681770325, "learning_rate": 1.2680886932302181e-05, "loss": 0.5623, "step": 26988 }, { "epoch": 0.8290787331428747, "grad_norm": 0.3492150902748108, "learning_rate": 1.268042134257165e-05, "loss": 0.5467, "step": 26989 }, { "epoch": 0.8291094522778238, "grad_norm": 0.4400257468223572, "learning_rate": 1.2679955746580805e-05, "loss": 0.5641, "step": 26990 }, { "epoch": 0.829140171412773, "grad_norm": 0.44692352414131165, "learning_rate": 1.267949014433074e-05, "loss": 0.5781, "step": 26991 }, { "epoch": 0.8291708905477222, "grad_norm": 0.4036007821559906, "learning_rate": 1.2679024535822535e-05, "loss": 0.5326, "step": 26992 }, { "epoch": 0.8292016096826713, "grad_norm": 0.3722107410430908, "learning_rate": 1.267855892105728e-05, "loss": 0.4937, "step": 26993 }, { "epoch": 0.8292323288176205, "grad_norm": 0.3507424294948578, "learning_rate": 1.2678093300036067e-05, "loss": 0.5224, "step": 26994 }, { "epoch": 0.8292630479525697, "grad_norm": 0.41942551732063293, "learning_rate": 1.2677627672759978e-05, "loss": 0.5678, "step": 26995 }, { "epoch": 0.8292937670875188, "grad_norm": 0.3413357436656952, "learning_rate": 1.2677162039230106e-05, "loss": 0.625, "step": 26996 }, { "epoch": 0.829324486222468, "grad_norm": 0.3853926956653595, "learning_rate": 1.267669639944753e-05, "loss": 0.5494, "step": 26997 }, { "epoch": 0.8293552053574171, "grad_norm": 0.3594391942024231, "learning_rate": 1.2676230753413348e-05, "loss": 0.5656, "step": 26998 }, { "epoch": 0.8293859244923663, "grad_norm": 0.35649624466896057, "learning_rate": 1.2675765101128637e-05, "loss": 0.53, "step": 26999 }, { "epoch": 0.8294166436273155, "grad_norm": 0.3734930157661438, "learning_rate": 1.2675299442594493e-05, "loss": 0.5625, "step": 27000 }, { "epoch": 0.8294473627622646, "grad_norm": 0.37703460454940796, "learning_rate": 1.2674833777811998e-05, "loss": 0.4822, "step": 27001 }, { "epoch": 0.8294780818972137, "grad_norm": 0.4156048595905304, "learning_rate": 1.2674368106782246e-05, "loss": 0.5677, "step": 27002 }, { "epoch": 0.829508801032163, "grad_norm": 0.3362496793270111, "learning_rate": 1.2673902429506317e-05, "loss": 0.4974, "step": 27003 }, { "epoch": 0.8295395201671121, "grad_norm": 0.3763400614261627, "learning_rate": 1.2673436745985303e-05, "loss": 0.6029, "step": 27004 }, { "epoch": 0.8295702393020613, "grad_norm": 0.3739820420742035, "learning_rate": 1.2672971056220292e-05, "loss": 0.5622, "step": 27005 }, { "epoch": 0.8296009584370104, "grad_norm": 0.3769420385360718, "learning_rate": 1.2672505360212368e-05, "loss": 0.5532, "step": 27006 }, { "epoch": 0.8296316775719595, "grad_norm": 0.3873150050640106, "learning_rate": 1.2672039657962625e-05, "loss": 0.5328, "step": 27007 }, { "epoch": 0.8296623967069088, "grad_norm": 0.35003164410591125, "learning_rate": 1.2671573949472143e-05, "loss": 0.4802, "step": 27008 }, { "epoch": 0.8296931158418579, "grad_norm": 0.3781835436820984, "learning_rate": 1.2671108234742015e-05, "loss": 0.5494, "step": 27009 }, { "epoch": 0.829723834976807, "grad_norm": 0.4039385914802551, "learning_rate": 1.2670642513773326e-05, "loss": 0.6248, "step": 27010 }, { "epoch": 0.8297545541117562, "grad_norm": 0.522290050983429, "learning_rate": 1.2670176786567164e-05, "loss": 0.5207, "step": 27011 }, { "epoch": 0.8297852732467054, "grad_norm": 0.3351229429244995, "learning_rate": 1.266971105312462e-05, "loss": 0.5169, "step": 27012 }, { "epoch": 0.8298159923816545, "grad_norm": 0.341731995344162, "learning_rate": 1.2669245313446779e-05, "loss": 0.6113, "step": 27013 }, { "epoch": 0.8298467115166037, "grad_norm": 0.34995561838150024, "learning_rate": 1.2668779567534728e-05, "loss": 0.5406, "step": 27014 }, { "epoch": 0.8298774306515528, "grad_norm": 0.3571462631225586, "learning_rate": 1.2668313815389557e-05, "loss": 0.5835, "step": 27015 }, { "epoch": 0.829908149786502, "grad_norm": 0.3654617667198181, "learning_rate": 1.2667848057012353e-05, "loss": 0.6096, "step": 27016 }, { "epoch": 0.8299388689214512, "grad_norm": 0.42585045099258423, "learning_rate": 1.2667382292404202e-05, "loss": 0.5044, "step": 27017 }, { "epoch": 0.8299695880564003, "grad_norm": 0.3374563455581665, "learning_rate": 1.2666916521566195e-05, "loss": 0.5633, "step": 27018 }, { "epoch": 0.8300003071913495, "grad_norm": 0.35860908031463623, "learning_rate": 1.2666450744499417e-05, "loss": 0.5728, "step": 27019 }, { "epoch": 0.8300310263262987, "grad_norm": 0.3166657090187073, "learning_rate": 1.2665984961204959e-05, "loss": 0.4763, "step": 27020 }, { "epoch": 0.8300617454612478, "grad_norm": 0.9564712643623352, "learning_rate": 1.2665519171683905e-05, "loss": 0.5387, "step": 27021 }, { "epoch": 0.830092464596197, "grad_norm": 0.3492712080478668, "learning_rate": 1.2665053375937344e-05, "loss": 0.4935, "step": 27022 }, { "epoch": 0.8301231837311461, "grad_norm": 0.453664094209671, "learning_rate": 1.2664587573966368e-05, "loss": 0.5151, "step": 27023 }, { "epoch": 0.8301539028660953, "grad_norm": 0.39165177941322327, "learning_rate": 1.266412176577206e-05, "loss": 0.6355, "step": 27024 }, { "epoch": 0.8301846220010445, "grad_norm": 0.33777567744255066, "learning_rate": 1.266365595135551e-05, "loss": 0.6092, "step": 27025 }, { "epoch": 0.8302153411359936, "grad_norm": 0.3641641438007355, "learning_rate": 1.2663190130717804e-05, "loss": 0.5363, "step": 27026 }, { "epoch": 0.8302460602709427, "grad_norm": 0.3698714077472687, "learning_rate": 1.2662724303860033e-05, "loss": 0.6133, "step": 27027 }, { "epoch": 0.830276779405892, "grad_norm": 0.3789611756801605, "learning_rate": 1.266225847078328e-05, "loss": 0.4482, "step": 27028 }, { "epoch": 0.8303074985408411, "grad_norm": 0.491183340549469, "learning_rate": 1.2661792631488639e-05, "loss": 0.5134, "step": 27029 }, { "epoch": 0.8303382176757903, "grad_norm": 0.5464342832565308, "learning_rate": 1.2661326785977198e-05, "loss": 0.5207, "step": 27030 }, { "epoch": 0.8303689368107394, "grad_norm": 0.3414505422115326, "learning_rate": 1.2660860934250036e-05, "loss": 0.5794, "step": 27031 }, { "epoch": 0.8303996559456885, "grad_norm": 0.33488985896110535, "learning_rate": 1.2660395076308254e-05, "loss": 0.4731, "step": 27032 }, { "epoch": 0.8304303750806378, "grad_norm": 0.394453227519989, "learning_rate": 1.2659929212152929e-05, "loss": 0.555, "step": 27033 }, { "epoch": 0.8304610942155869, "grad_norm": 0.3479311764240265, "learning_rate": 1.2659463341785157e-05, "loss": 0.5043, "step": 27034 }, { "epoch": 0.830491813350536, "grad_norm": 0.35404419898986816, "learning_rate": 1.265899746520602e-05, "loss": 0.5893, "step": 27035 }, { "epoch": 0.8305225324854852, "grad_norm": 0.4152596592903137, "learning_rate": 1.2658531582416611e-05, "loss": 0.601, "step": 27036 }, { "epoch": 0.8305532516204344, "grad_norm": 0.3402130603790283, "learning_rate": 1.2658065693418014e-05, "loss": 0.481, "step": 27037 }, { "epoch": 0.8305839707553835, "grad_norm": 0.3849560618400574, "learning_rate": 1.265759979821132e-05, "loss": 0.5607, "step": 27038 }, { "epoch": 0.8306146898903327, "grad_norm": 0.3951937258243561, "learning_rate": 1.2657133896797613e-05, "loss": 0.5913, "step": 27039 }, { "epoch": 0.8306454090252818, "grad_norm": 0.35849565267562866, "learning_rate": 1.2656667989177988e-05, "loss": 0.5908, "step": 27040 }, { "epoch": 0.830676128160231, "grad_norm": 0.3553217947483063, "learning_rate": 1.265620207535353e-05, "loss": 0.5306, "step": 27041 }, { "epoch": 0.8307068472951802, "grad_norm": 0.3615686297416687, "learning_rate": 1.2655736155325324e-05, "loss": 0.5761, "step": 27042 }, { "epoch": 0.8307375664301293, "grad_norm": 0.35458943247795105, "learning_rate": 1.2655270229094462e-05, "loss": 0.5907, "step": 27043 }, { "epoch": 0.8307682855650785, "grad_norm": 0.2981807291507721, "learning_rate": 1.2654804296662028e-05, "loss": 0.5105, "step": 27044 }, { "epoch": 0.8307990047000277, "grad_norm": 0.35650181770324707, "learning_rate": 1.265433835802912e-05, "loss": 0.5413, "step": 27045 }, { "epoch": 0.8308297238349768, "grad_norm": 0.3724871873855591, "learning_rate": 1.2653872413196813e-05, "loss": 0.5024, "step": 27046 }, { "epoch": 0.830860442969926, "grad_norm": 0.35411250591278076, "learning_rate": 1.2653406462166202e-05, "loss": 0.4997, "step": 27047 }, { "epoch": 0.8308911621048751, "grad_norm": 0.3299767076969147, "learning_rate": 1.2652940504938378e-05, "loss": 0.5003, "step": 27048 }, { "epoch": 0.8309218812398242, "grad_norm": 0.39350855350494385, "learning_rate": 1.2652474541514423e-05, "loss": 0.6175, "step": 27049 }, { "epoch": 0.8309526003747735, "grad_norm": 0.3589632213115692, "learning_rate": 1.2652008571895432e-05, "loss": 0.4628, "step": 27050 }, { "epoch": 0.8309833195097226, "grad_norm": 0.3867671489715576, "learning_rate": 1.2651542596082486e-05, "loss": 0.5726, "step": 27051 }, { "epoch": 0.8310140386446717, "grad_norm": 0.5267994999885559, "learning_rate": 1.265107661407668e-05, "loss": 0.6685, "step": 27052 }, { "epoch": 0.831044757779621, "grad_norm": 0.3496708571910858, "learning_rate": 1.2650610625879099e-05, "loss": 0.4579, "step": 27053 }, { "epoch": 0.8310754769145701, "grad_norm": 0.3627551794052124, "learning_rate": 1.2650144631490831e-05, "loss": 0.5418, "step": 27054 }, { "epoch": 0.8311061960495193, "grad_norm": 0.3928287923336029, "learning_rate": 1.2649678630912963e-05, "loss": 0.5457, "step": 27055 }, { "epoch": 0.8311369151844684, "grad_norm": 0.39567604660987854, "learning_rate": 1.264921262414659e-05, "loss": 0.5959, "step": 27056 }, { "epoch": 0.8311676343194175, "grad_norm": 0.40504854917526245, "learning_rate": 1.2648746611192795e-05, "loss": 0.5848, "step": 27057 }, { "epoch": 0.8311983534543668, "grad_norm": 0.5544378757476807, "learning_rate": 1.2648280592052665e-05, "loss": 0.62, "step": 27058 }, { "epoch": 0.8312290725893159, "grad_norm": 0.3729649484157562, "learning_rate": 1.2647814566727293e-05, "loss": 0.5392, "step": 27059 }, { "epoch": 0.831259791724265, "grad_norm": 0.35662877559661865, "learning_rate": 1.2647348535217762e-05, "loss": 0.5347, "step": 27060 }, { "epoch": 0.8312905108592142, "grad_norm": 0.3968266546726227, "learning_rate": 1.2646882497525166e-05, "loss": 0.5528, "step": 27061 }, { "epoch": 0.8313212299941634, "grad_norm": 0.3956371545791626, "learning_rate": 1.2646416453650589e-05, "loss": 0.5311, "step": 27062 }, { "epoch": 0.8313519491291125, "grad_norm": 0.356627881526947, "learning_rate": 1.2645950403595125e-05, "loss": 0.4238, "step": 27063 }, { "epoch": 0.8313826682640617, "grad_norm": 0.32638636231422424, "learning_rate": 1.2645484347359856e-05, "loss": 0.511, "step": 27064 }, { "epoch": 0.8314133873990108, "grad_norm": 0.3899553716182709, "learning_rate": 1.2645018284945876e-05, "loss": 0.5423, "step": 27065 }, { "epoch": 0.8314441065339601, "grad_norm": 0.356752872467041, "learning_rate": 1.264455221635427e-05, "loss": 0.6226, "step": 27066 }, { "epoch": 0.8314748256689092, "grad_norm": 0.4964306354522705, "learning_rate": 1.2644086141586124e-05, "loss": 0.4793, "step": 27067 }, { "epoch": 0.8315055448038583, "grad_norm": 0.3824189603328705, "learning_rate": 1.2643620060642535e-05, "loss": 0.5483, "step": 27068 }, { "epoch": 0.8315362639388075, "grad_norm": 0.4062287211418152, "learning_rate": 1.2643153973524585e-05, "loss": 0.4987, "step": 27069 }, { "epoch": 0.8315669830737566, "grad_norm": 0.40228381752967834, "learning_rate": 1.2642687880233364e-05, "loss": 0.5741, "step": 27070 }, { "epoch": 0.8315977022087058, "grad_norm": 0.35810816287994385, "learning_rate": 1.2642221780769961e-05, "loss": 0.5986, "step": 27071 }, { "epoch": 0.831628421343655, "grad_norm": 0.3371817469596863, "learning_rate": 1.2641755675135466e-05, "loss": 0.6288, "step": 27072 }, { "epoch": 0.8316591404786041, "grad_norm": 0.3353465795516968, "learning_rate": 1.2641289563330963e-05, "loss": 0.5166, "step": 27073 }, { "epoch": 0.8316898596135532, "grad_norm": 0.34841257333755493, "learning_rate": 1.2640823445357549e-05, "loss": 0.5337, "step": 27074 }, { "epoch": 0.8317205787485025, "grad_norm": 0.36266082525253296, "learning_rate": 1.2640357321216304e-05, "loss": 0.5478, "step": 27075 }, { "epoch": 0.8317512978834516, "grad_norm": 0.36563798785209656, "learning_rate": 1.263989119090832e-05, "loss": 0.4824, "step": 27076 }, { "epoch": 0.8317820170184007, "grad_norm": 0.33617064356803894, "learning_rate": 1.2639425054434686e-05, "loss": 0.4805, "step": 27077 }, { "epoch": 0.8318127361533499, "grad_norm": 0.41132470965385437, "learning_rate": 1.263895891179649e-05, "loss": 0.5066, "step": 27078 }, { "epoch": 0.8318434552882991, "grad_norm": 0.3725087642669678, "learning_rate": 1.2638492762994823e-05, "loss": 0.5931, "step": 27079 }, { "epoch": 0.8318741744232483, "grad_norm": 0.3834511339664459, "learning_rate": 1.263802660803077e-05, "loss": 0.584, "step": 27080 }, { "epoch": 0.8319048935581974, "grad_norm": 0.367012619972229, "learning_rate": 1.2637560446905421e-05, "loss": 0.647, "step": 27081 }, { "epoch": 0.8319356126931465, "grad_norm": 0.4073667526245117, "learning_rate": 1.2637094279619865e-05, "loss": 0.5097, "step": 27082 }, { "epoch": 0.8319663318280958, "grad_norm": 0.34496402740478516, "learning_rate": 1.2636628106175196e-05, "loss": 0.4909, "step": 27083 }, { "epoch": 0.8319970509630449, "grad_norm": 0.40438175201416016, "learning_rate": 1.2636161926572491e-05, "loss": 0.6482, "step": 27084 }, { "epoch": 0.832027770097994, "grad_norm": 0.39961132407188416, "learning_rate": 1.263569574081285e-05, "loss": 0.5, "step": 27085 }, { "epoch": 0.8320584892329432, "grad_norm": 0.37350043654441833, "learning_rate": 1.2635229548897358e-05, "loss": 0.6366, "step": 27086 }, { "epoch": 0.8320892083678924, "grad_norm": 0.3785265386104584, "learning_rate": 1.26347633508271e-05, "loss": 0.6599, "step": 27087 }, { "epoch": 0.8321199275028415, "grad_norm": 0.35079658031463623, "learning_rate": 1.2634297146603172e-05, "loss": 0.5993, "step": 27088 }, { "epoch": 0.8321506466377907, "grad_norm": 0.4071947932243347, "learning_rate": 1.2633830936226656e-05, "loss": 0.5722, "step": 27089 }, { "epoch": 0.8321813657727398, "grad_norm": 0.34931713342666626, "learning_rate": 1.2633364719698647e-05, "loss": 0.5477, "step": 27090 }, { "epoch": 0.832212084907689, "grad_norm": 0.377828449010849, "learning_rate": 1.2632898497020228e-05, "loss": 0.526, "step": 27091 }, { "epoch": 0.8322428040426382, "grad_norm": 0.3506288528442383, "learning_rate": 1.2632432268192493e-05, "loss": 0.5377, "step": 27092 }, { "epoch": 0.8322735231775873, "grad_norm": 0.40263330936431885, "learning_rate": 1.2631966033216526e-05, "loss": 0.5154, "step": 27093 }, { "epoch": 0.8323042423125365, "grad_norm": 0.33708661794662476, "learning_rate": 1.263149979209342e-05, "loss": 0.541, "step": 27094 }, { "epoch": 0.8323349614474856, "grad_norm": 0.36350345611572266, "learning_rate": 1.2631033544824265e-05, "loss": 0.5147, "step": 27095 }, { "epoch": 0.8323656805824348, "grad_norm": 0.3678392171859741, "learning_rate": 1.2630567291410143e-05, "loss": 0.5044, "step": 27096 }, { "epoch": 0.832396399717384, "grad_norm": 0.40007472038269043, "learning_rate": 1.2630101031852151e-05, "loss": 0.5781, "step": 27097 }, { "epoch": 0.8324271188523331, "grad_norm": 0.3887214660644531, "learning_rate": 1.262963476615137e-05, "loss": 0.5019, "step": 27098 }, { "epoch": 0.8324578379872822, "grad_norm": 0.39974507689476013, "learning_rate": 1.2629168494308897e-05, "loss": 0.5706, "step": 27099 }, { "epoch": 0.8324885571222315, "grad_norm": 0.3820039927959442, "learning_rate": 1.2628702216325815e-05, "loss": 0.5867, "step": 27100 }, { "epoch": 0.8325192762571806, "grad_norm": 0.3511575162410736, "learning_rate": 1.262823593220322e-05, "loss": 0.5766, "step": 27101 }, { "epoch": 0.8325499953921297, "grad_norm": 0.3556513488292694, "learning_rate": 1.262776964194219e-05, "loss": 0.5901, "step": 27102 }, { "epoch": 0.8325807145270789, "grad_norm": 0.5763691663742065, "learning_rate": 1.2627303345543821e-05, "loss": 0.5477, "step": 27103 }, { "epoch": 0.832611433662028, "grad_norm": 0.3491918444633484, "learning_rate": 1.2626837043009206e-05, "loss": 0.5221, "step": 27104 }, { "epoch": 0.8326421527969773, "grad_norm": 0.37159398198127747, "learning_rate": 1.2626370734339426e-05, "loss": 0.5179, "step": 27105 }, { "epoch": 0.8326728719319264, "grad_norm": 0.40930408239364624, "learning_rate": 1.2625904419535578e-05, "loss": 0.5474, "step": 27106 }, { "epoch": 0.8327035910668755, "grad_norm": 0.4810718595981598, "learning_rate": 1.2625438098598743e-05, "loss": 0.5035, "step": 27107 }, { "epoch": 0.8327343102018248, "grad_norm": 0.37804627418518066, "learning_rate": 1.2624971771530015e-05, "loss": 0.542, "step": 27108 }, { "epoch": 0.8327650293367739, "grad_norm": 0.3892856240272522, "learning_rate": 1.2624505438330483e-05, "loss": 0.5759, "step": 27109 }, { "epoch": 0.832795748471723, "grad_norm": 0.39277002215385437, "learning_rate": 1.2624039099001233e-05, "loss": 0.5953, "step": 27110 }, { "epoch": 0.8328264676066722, "grad_norm": 0.4093296527862549, "learning_rate": 1.262357275354336e-05, "loss": 0.5489, "step": 27111 }, { "epoch": 0.8328571867416213, "grad_norm": 0.34326648712158203, "learning_rate": 1.2623106401957944e-05, "loss": 0.6046, "step": 27112 }, { "epoch": 0.8328879058765705, "grad_norm": 0.38786593079566956, "learning_rate": 1.2622640044246087e-05, "loss": 0.5319, "step": 27113 }, { "epoch": 0.8329186250115197, "grad_norm": 0.3663075566291809, "learning_rate": 1.2622173680408866e-05, "loss": 0.4859, "step": 27114 }, { "epoch": 0.8329493441464688, "grad_norm": 0.35774466395378113, "learning_rate": 1.2621707310447378e-05, "loss": 0.62, "step": 27115 }, { "epoch": 0.832980063281418, "grad_norm": 0.3664122223854065, "learning_rate": 1.2621240934362706e-05, "loss": 0.5744, "step": 27116 }, { "epoch": 0.8330107824163672, "grad_norm": 4.290198802947998, "learning_rate": 1.2620774552155948e-05, "loss": 0.6331, "step": 27117 }, { "epoch": 0.8330415015513163, "grad_norm": 0.36765918135643005, "learning_rate": 1.2620308163828183e-05, "loss": 0.522, "step": 27118 }, { "epoch": 0.8330722206862655, "grad_norm": 0.4206209182739258, "learning_rate": 1.2619841769380507e-05, "loss": 0.4776, "step": 27119 }, { "epoch": 0.8331029398212146, "grad_norm": 0.3859998881816864, "learning_rate": 1.2619375368814007e-05, "loss": 0.5331, "step": 27120 }, { "epoch": 0.8331336589561638, "grad_norm": 0.41310861706733704, "learning_rate": 1.2618908962129772e-05, "loss": 0.5296, "step": 27121 }, { "epoch": 0.833164378091113, "grad_norm": 0.32796263694763184, "learning_rate": 1.2618442549328896e-05, "loss": 0.5386, "step": 27122 }, { "epoch": 0.8331950972260621, "grad_norm": 0.3701478838920593, "learning_rate": 1.2617976130412462e-05, "loss": 0.5554, "step": 27123 }, { "epoch": 0.8332258163610112, "grad_norm": 0.3700091540813446, "learning_rate": 1.2617509705381563e-05, "loss": 0.5021, "step": 27124 }, { "epoch": 0.8332565354959605, "grad_norm": 0.46360135078430176, "learning_rate": 1.2617043274237283e-05, "loss": 0.6205, "step": 27125 }, { "epoch": 0.8332872546309096, "grad_norm": 0.3756280541419983, "learning_rate": 1.2616576836980721e-05, "loss": 0.5202, "step": 27126 }, { "epoch": 0.8333179737658587, "grad_norm": 0.4168359339237213, "learning_rate": 1.2616110393612961e-05, "loss": 0.5967, "step": 27127 }, { "epoch": 0.8333486929008079, "grad_norm": 0.6348683834075928, "learning_rate": 1.261564394413509e-05, "loss": 0.5627, "step": 27128 }, { "epoch": 0.833379412035757, "grad_norm": 0.37496253848075867, "learning_rate": 1.26151774885482e-05, "loss": 0.5323, "step": 27129 }, { "epoch": 0.8334101311707063, "grad_norm": 0.3558742105960846, "learning_rate": 1.2614711026853382e-05, "loss": 0.5375, "step": 27130 }, { "epoch": 0.8334408503056554, "grad_norm": 0.3294356167316437, "learning_rate": 1.2614244559051722e-05, "loss": 0.6085, "step": 27131 }, { "epoch": 0.8334715694406045, "grad_norm": 0.36664435267448425, "learning_rate": 1.2613778085144312e-05, "loss": 0.6222, "step": 27132 }, { "epoch": 0.8335022885755538, "grad_norm": 0.345175176858902, "learning_rate": 1.2613311605132242e-05, "loss": 0.4373, "step": 27133 }, { "epoch": 0.8335330077105029, "grad_norm": 0.39413002133369446, "learning_rate": 1.2612845119016596e-05, "loss": 0.5378, "step": 27134 }, { "epoch": 0.833563726845452, "grad_norm": 0.34135720133781433, "learning_rate": 1.2612378626798473e-05, "loss": 0.552, "step": 27135 }, { "epoch": 0.8335944459804012, "grad_norm": 0.32896044850349426, "learning_rate": 1.2611912128478953e-05, "loss": 0.5106, "step": 27136 }, { "epoch": 0.8336251651153503, "grad_norm": 0.3446107506752014, "learning_rate": 1.2611445624059133e-05, "loss": 0.5259, "step": 27137 }, { "epoch": 0.8336558842502995, "grad_norm": 0.38430890440940857, "learning_rate": 1.26109791135401e-05, "loss": 0.4631, "step": 27138 }, { "epoch": 0.8336866033852487, "grad_norm": 0.3615051209926605, "learning_rate": 1.2610512596922937e-05, "loss": 0.5059, "step": 27139 }, { "epoch": 0.8337173225201978, "grad_norm": 0.35369184613227844, "learning_rate": 1.2610046074208747e-05, "loss": 0.5044, "step": 27140 }, { "epoch": 0.833748041655147, "grad_norm": 0.35535696148872375, "learning_rate": 1.2609579545398607e-05, "loss": 0.5387, "step": 27141 }, { "epoch": 0.8337787607900962, "grad_norm": 0.365693062543869, "learning_rate": 1.2609113010493615e-05, "loss": 0.4936, "step": 27142 }, { "epoch": 0.8338094799250453, "grad_norm": 0.36691272258758545, "learning_rate": 1.2608646469494854e-05, "loss": 0.5632, "step": 27143 }, { "epoch": 0.8338401990599945, "grad_norm": 0.38679590821266174, "learning_rate": 1.2608179922403418e-05, "loss": 0.6109, "step": 27144 }, { "epoch": 0.8338709181949436, "grad_norm": 0.3771839439868927, "learning_rate": 1.2607713369220398e-05, "loss": 0.6022, "step": 27145 }, { "epoch": 0.8339016373298928, "grad_norm": 0.36002257466316223, "learning_rate": 1.260724680994688e-05, "loss": 0.5277, "step": 27146 }, { "epoch": 0.833932356464842, "grad_norm": 0.3957432508468628, "learning_rate": 1.2606780244583953e-05, "loss": 0.5901, "step": 27147 }, { "epoch": 0.8339630755997911, "grad_norm": 0.38463887572288513, "learning_rate": 1.2606313673132711e-05, "loss": 0.5822, "step": 27148 }, { "epoch": 0.8339937947347402, "grad_norm": 0.37445613741874695, "learning_rate": 1.2605847095594244e-05, "loss": 0.5577, "step": 27149 }, { "epoch": 0.8340245138696895, "grad_norm": 0.35003194212913513, "learning_rate": 1.2605380511969634e-05, "loss": 0.5155, "step": 27150 }, { "epoch": 0.8340552330046386, "grad_norm": 0.3339172303676605, "learning_rate": 1.2604913922259979e-05, "loss": 0.4764, "step": 27151 }, { "epoch": 0.8340859521395877, "grad_norm": 0.35432130098342896, "learning_rate": 1.2604447326466362e-05, "loss": 0.5585, "step": 27152 }, { "epoch": 0.8341166712745369, "grad_norm": 0.3885953724384308, "learning_rate": 1.260398072458988e-05, "loss": 0.5477, "step": 27153 }, { "epoch": 0.834147390409486, "grad_norm": 0.35195234417915344, "learning_rate": 1.2603514116631616e-05, "loss": 0.4757, "step": 27154 }, { "epoch": 0.8341781095444353, "grad_norm": 0.9055678844451904, "learning_rate": 1.2603047502592667e-05, "loss": 0.5027, "step": 27155 }, { "epoch": 0.8342088286793844, "grad_norm": 0.3394427001476288, "learning_rate": 1.2602580882474116e-05, "loss": 0.5135, "step": 27156 }, { "epoch": 0.8342395478143335, "grad_norm": 0.34642505645751953, "learning_rate": 1.2602114256277054e-05, "loss": 0.587, "step": 27157 }, { "epoch": 0.8342702669492827, "grad_norm": 0.4406232237815857, "learning_rate": 1.2601647624002576e-05, "loss": 0.5509, "step": 27158 }, { "epoch": 0.8343009860842319, "grad_norm": 0.42881518602371216, "learning_rate": 1.2601180985651764e-05, "loss": 0.5236, "step": 27159 }, { "epoch": 0.834331705219181, "grad_norm": 0.35070428252220154, "learning_rate": 1.2600714341225718e-05, "loss": 0.4504, "step": 27160 }, { "epoch": 0.8343624243541302, "grad_norm": 0.36276328563690186, "learning_rate": 1.2600247690725518e-05, "loss": 0.5395, "step": 27161 }, { "epoch": 0.8343931434890793, "grad_norm": 0.3570937514305115, "learning_rate": 1.259978103415226e-05, "loss": 0.5883, "step": 27162 }, { "epoch": 0.8344238626240285, "grad_norm": 0.3457213342189789, "learning_rate": 1.2599314371507033e-05, "loss": 0.5012, "step": 27163 }, { "epoch": 0.8344545817589777, "grad_norm": 0.3574290871620178, "learning_rate": 1.2598847702790921e-05, "loss": 0.5632, "step": 27164 }, { "epoch": 0.8344853008939268, "grad_norm": 0.37608206272125244, "learning_rate": 1.2598381028005023e-05, "loss": 0.5045, "step": 27165 }, { "epoch": 0.834516020028876, "grad_norm": 0.41430267691612244, "learning_rate": 1.2597914347150421e-05, "loss": 0.5425, "step": 27166 }, { "epoch": 0.8345467391638252, "grad_norm": 0.39588865637779236, "learning_rate": 1.2597447660228212e-05, "loss": 0.6171, "step": 27167 }, { "epoch": 0.8345774582987743, "grad_norm": 0.3423202335834503, "learning_rate": 1.2596980967239479e-05, "loss": 0.5171, "step": 27168 }, { "epoch": 0.8346081774337235, "grad_norm": 0.3583185076713562, "learning_rate": 1.259651426818532e-05, "loss": 0.5068, "step": 27169 }, { "epoch": 0.8346388965686726, "grad_norm": 0.4065319895744324, "learning_rate": 1.2596047563066818e-05, "loss": 0.537, "step": 27170 }, { "epoch": 0.8346696157036217, "grad_norm": 0.3794134557247162, "learning_rate": 1.2595580851885066e-05, "loss": 0.5299, "step": 27171 }, { "epoch": 0.834700334838571, "grad_norm": 0.39265239238739014, "learning_rate": 1.2595114134641151e-05, "loss": 0.5511, "step": 27172 }, { "epoch": 0.8347310539735201, "grad_norm": 0.3983846604824066, "learning_rate": 1.259464741133617e-05, "loss": 0.5187, "step": 27173 }, { "epoch": 0.8347617731084692, "grad_norm": 0.37223607301712036, "learning_rate": 1.2594180681971209e-05, "loss": 0.5409, "step": 27174 }, { "epoch": 0.8347924922434184, "grad_norm": 0.34981220960617065, "learning_rate": 1.2593713946547352e-05, "loss": 0.5385, "step": 27175 }, { "epoch": 0.8348232113783676, "grad_norm": 0.3948376178741455, "learning_rate": 1.25932472050657e-05, "loss": 0.5359, "step": 27176 }, { "epoch": 0.8348539305133168, "grad_norm": 0.3573882281780243, "learning_rate": 1.2592780457527334e-05, "loss": 0.5739, "step": 27177 }, { "epoch": 0.8348846496482659, "grad_norm": 0.3786199688911438, "learning_rate": 1.2592313703933355e-05, "loss": 0.5752, "step": 27178 }, { "epoch": 0.834915368783215, "grad_norm": 0.41349610686302185, "learning_rate": 1.259184694428484e-05, "loss": 0.6043, "step": 27179 }, { "epoch": 0.8349460879181643, "grad_norm": 0.35898005962371826, "learning_rate": 1.2591380178582888e-05, "loss": 0.4856, "step": 27180 }, { "epoch": 0.8349768070531134, "grad_norm": 0.4040217101573944, "learning_rate": 1.2590913406828587e-05, "loss": 0.599, "step": 27181 }, { "epoch": 0.8350075261880625, "grad_norm": 0.36364856362342834, "learning_rate": 1.2590446629023026e-05, "loss": 0.6408, "step": 27182 }, { "epoch": 0.8350382453230117, "grad_norm": 0.3400316536426544, "learning_rate": 1.2589979845167297e-05, "loss": 0.5294, "step": 27183 }, { "epoch": 0.8350689644579609, "grad_norm": 0.387386292219162, "learning_rate": 1.2589513055262485e-05, "loss": 0.526, "step": 27184 }, { "epoch": 0.83509968359291, "grad_norm": 0.32111939787864685, "learning_rate": 1.2589046259309689e-05, "loss": 0.5507, "step": 27185 }, { "epoch": 0.8351304027278592, "grad_norm": 0.37117239832878113, "learning_rate": 1.2588579457309992e-05, "loss": 0.585, "step": 27186 }, { "epoch": 0.8351611218628083, "grad_norm": 0.40193602442741394, "learning_rate": 1.258811264926449e-05, "loss": 0.5938, "step": 27187 }, { "epoch": 0.8351918409977575, "grad_norm": 0.367811918258667, "learning_rate": 1.2587645835174267e-05, "loss": 0.587, "step": 27188 }, { "epoch": 0.8352225601327067, "grad_norm": 0.33812907338142395, "learning_rate": 1.258717901504042e-05, "loss": 0.4802, "step": 27189 }, { "epoch": 0.8352532792676558, "grad_norm": 0.3662867248058319, "learning_rate": 1.2586712188864034e-05, "loss": 0.517, "step": 27190 }, { "epoch": 0.835283998402605, "grad_norm": 0.3785248398780823, "learning_rate": 1.25862453566462e-05, "loss": 0.5511, "step": 27191 }, { "epoch": 0.8353147175375542, "grad_norm": 0.3953835666179657, "learning_rate": 1.2585778518388011e-05, "loss": 0.4806, "step": 27192 }, { "epoch": 0.8353454366725033, "grad_norm": 0.3635611832141876, "learning_rate": 1.2585311674090552e-05, "loss": 0.5604, "step": 27193 }, { "epoch": 0.8353761558074525, "grad_norm": 0.3480069935321808, "learning_rate": 1.258484482375492e-05, "loss": 0.5535, "step": 27194 }, { "epoch": 0.8354068749424016, "grad_norm": 0.37136948108673096, "learning_rate": 1.2584377967382202e-05, "loss": 0.5737, "step": 27195 }, { "epoch": 0.8354375940773507, "grad_norm": 0.38654592633247375, "learning_rate": 1.2583911104973488e-05, "loss": 0.5577, "step": 27196 }, { "epoch": 0.8354683132123, "grad_norm": 0.37401053309440613, "learning_rate": 1.2583444236529872e-05, "loss": 0.5535, "step": 27197 }, { "epoch": 0.8354990323472491, "grad_norm": 0.3488219678401947, "learning_rate": 1.2582977362052438e-05, "loss": 0.532, "step": 27198 }, { "epoch": 0.8355297514821982, "grad_norm": 0.32970908284187317, "learning_rate": 1.2582510481542282e-05, "loss": 0.5795, "step": 27199 }, { "epoch": 0.8355604706171474, "grad_norm": 0.35204964876174927, "learning_rate": 1.2582043595000491e-05, "loss": 0.5623, "step": 27200 }, { "epoch": 0.8355911897520966, "grad_norm": 0.3457474410533905, "learning_rate": 1.258157670242816e-05, "loss": 0.5489, "step": 27201 }, { "epoch": 0.8356219088870458, "grad_norm": 0.3735818564891815, "learning_rate": 1.2581109803826373e-05, "loss": 0.5485, "step": 27202 }, { "epoch": 0.8356526280219949, "grad_norm": 0.37982177734375, "learning_rate": 1.2580642899196226e-05, "loss": 0.5195, "step": 27203 }, { "epoch": 0.835683347156944, "grad_norm": 0.42487820982933044, "learning_rate": 1.2580175988538805e-05, "loss": 0.5141, "step": 27204 }, { "epoch": 0.8357140662918933, "grad_norm": 0.332711398601532, "learning_rate": 1.2579709071855205e-05, "loss": 0.4978, "step": 27205 }, { "epoch": 0.8357447854268424, "grad_norm": 0.3599708676338196, "learning_rate": 1.2579242149146511e-05, "loss": 0.5756, "step": 27206 }, { "epoch": 0.8357755045617915, "grad_norm": 0.5837527513504028, "learning_rate": 1.257877522041382e-05, "loss": 0.4422, "step": 27207 }, { "epoch": 0.8358062236967407, "grad_norm": 0.33339303731918335, "learning_rate": 1.257830828565822e-05, "loss": 0.4924, "step": 27208 }, { "epoch": 0.8358369428316899, "grad_norm": 0.3718223571777344, "learning_rate": 1.2577841344880798e-05, "loss": 0.4919, "step": 27209 }, { "epoch": 0.835867661966639, "grad_norm": 0.5367028117179871, "learning_rate": 1.257737439808265e-05, "loss": 0.4941, "step": 27210 }, { "epoch": 0.8358983811015882, "grad_norm": 0.3740280866622925, "learning_rate": 1.2576907445264862e-05, "loss": 0.5383, "step": 27211 }, { "epoch": 0.8359291002365373, "grad_norm": 0.3890063464641571, "learning_rate": 1.2576440486428524e-05, "loss": 0.6129, "step": 27212 }, { "epoch": 0.8359598193714864, "grad_norm": 0.3525165617465973, "learning_rate": 1.2575973521574734e-05, "loss": 0.5635, "step": 27213 }, { "epoch": 0.8359905385064357, "grad_norm": 0.3547230362892151, "learning_rate": 1.2575506550704578e-05, "loss": 0.5419, "step": 27214 }, { "epoch": 0.8360212576413848, "grad_norm": 0.3752461075782776, "learning_rate": 1.2575039573819145e-05, "loss": 0.6018, "step": 27215 }, { "epoch": 0.836051976776334, "grad_norm": 0.3400513231754303, "learning_rate": 1.2574572590919529e-05, "loss": 0.5319, "step": 27216 }, { "epoch": 0.8360826959112831, "grad_norm": 0.3762591481208801, "learning_rate": 1.2574105602006816e-05, "loss": 0.4963, "step": 27217 }, { "epoch": 0.8361134150462323, "grad_norm": 0.3747873306274414, "learning_rate": 1.2573638607082101e-05, "loss": 0.516, "step": 27218 }, { "epoch": 0.8361441341811815, "grad_norm": 0.3683248460292816, "learning_rate": 1.2573171606146475e-05, "loss": 0.5309, "step": 27219 }, { "epoch": 0.8361748533161306, "grad_norm": 0.3562697172164917, "learning_rate": 1.2572704599201024e-05, "loss": 0.532, "step": 27220 }, { "epoch": 0.8362055724510797, "grad_norm": 0.36692631244659424, "learning_rate": 1.2572237586246845e-05, "loss": 0.544, "step": 27221 }, { "epoch": 0.836236291586029, "grad_norm": 0.35064005851745605, "learning_rate": 1.2571770567285022e-05, "loss": 0.5773, "step": 27222 }, { "epoch": 0.8362670107209781, "grad_norm": 0.32848867774009705, "learning_rate": 1.2571303542316654e-05, "loss": 0.5242, "step": 27223 }, { "epoch": 0.8362977298559272, "grad_norm": 0.3873637914657593, "learning_rate": 1.2570836511342821e-05, "loss": 0.4989, "step": 27224 }, { "epoch": 0.8363284489908764, "grad_norm": 0.4216521382331848, "learning_rate": 1.2570369474364625e-05, "loss": 0.5718, "step": 27225 }, { "epoch": 0.8363591681258256, "grad_norm": 0.3381814658641815, "learning_rate": 1.256990243138315e-05, "loss": 0.5864, "step": 27226 }, { "epoch": 0.8363898872607748, "grad_norm": 0.4319554269313812, "learning_rate": 1.2569435382399486e-05, "loss": 0.5909, "step": 27227 }, { "epoch": 0.8364206063957239, "grad_norm": 0.3411705493927002, "learning_rate": 1.256896832741473e-05, "loss": 0.5046, "step": 27228 }, { "epoch": 0.836451325530673, "grad_norm": 0.35334405303001404, "learning_rate": 1.2568501266429964e-05, "loss": 0.5675, "step": 27229 }, { "epoch": 0.8364820446656223, "grad_norm": 0.36316514015197754, "learning_rate": 1.2568034199446286e-05, "loss": 0.5484, "step": 27230 }, { "epoch": 0.8365127638005714, "grad_norm": 0.39818838238716125, "learning_rate": 1.2567567126464788e-05, "loss": 0.5355, "step": 27231 }, { "epoch": 0.8365434829355205, "grad_norm": 0.37285131216049194, "learning_rate": 1.2567100047486553e-05, "loss": 0.6089, "step": 27232 }, { "epoch": 0.8365742020704697, "grad_norm": 0.38640958070755005, "learning_rate": 1.2566632962512681e-05, "loss": 0.4674, "step": 27233 }, { "epoch": 0.8366049212054188, "grad_norm": 0.3862704634666443, "learning_rate": 1.2566165871544254e-05, "loss": 0.609, "step": 27234 }, { "epoch": 0.836635640340368, "grad_norm": 0.37572693824768066, "learning_rate": 1.256569877458237e-05, "loss": 0.5039, "step": 27235 }, { "epoch": 0.8366663594753172, "grad_norm": 0.35194095969200134, "learning_rate": 1.2565231671628117e-05, "loss": 0.5512, "step": 27236 }, { "epoch": 0.8366970786102663, "grad_norm": 0.4492259919643402, "learning_rate": 1.2564764562682588e-05, "loss": 0.5446, "step": 27237 }, { "epoch": 0.8367277977452154, "grad_norm": 0.31157299876213074, "learning_rate": 1.2564297447746869e-05, "loss": 0.5181, "step": 27238 }, { "epoch": 0.8367585168801647, "grad_norm": 0.3606933355331421, "learning_rate": 1.2563830326822057e-05, "loss": 0.5806, "step": 27239 }, { "epoch": 0.8367892360151138, "grad_norm": 0.34865203499794006, "learning_rate": 1.2563363199909238e-05, "loss": 0.5989, "step": 27240 }, { "epoch": 0.836819955150063, "grad_norm": 0.3872257471084595, "learning_rate": 1.2562896067009507e-05, "loss": 0.6404, "step": 27241 }, { "epoch": 0.8368506742850121, "grad_norm": 0.34300297498703003, "learning_rate": 1.2562428928123953e-05, "loss": 0.5233, "step": 27242 }, { "epoch": 0.8368813934199613, "grad_norm": 0.3609107434749603, "learning_rate": 1.2561961783253666e-05, "loss": 0.5151, "step": 27243 }, { "epoch": 0.8369121125549105, "grad_norm": 0.3554883599281311, "learning_rate": 1.256149463239974e-05, "loss": 0.5541, "step": 27244 }, { "epoch": 0.8369428316898596, "grad_norm": 0.4559686481952667, "learning_rate": 1.2561027475563263e-05, "loss": 0.52, "step": 27245 }, { "epoch": 0.8369735508248087, "grad_norm": 0.3353393077850342, "learning_rate": 1.2560560312745329e-05, "loss": 0.6174, "step": 27246 }, { "epoch": 0.837004269959758, "grad_norm": 0.3315705358982086, "learning_rate": 1.2560093143947025e-05, "loss": 0.501, "step": 27247 }, { "epoch": 0.8370349890947071, "grad_norm": 0.35347384214401245, "learning_rate": 1.2559625969169444e-05, "loss": 0.5362, "step": 27248 }, { "epoch": 0.8370657082296562, "grad_norm": 0.39874184131622314, "learning_rate": 1.2559158788413682e-05, "loss": 0.6031, "step": 27249 }, { "epoch": 0.8370964273646054, "grad_norm": 0.34841418266296387, "learning_rate": 1.2558691601680824e-05, "loss": 0.494, "step": 27250 }, { "epoch": 0.8371271464995546, "grad_norm": 0.36195993423461914, "learning_rate": 1.2558224408971964e-05, "loss": 0.4887, "step": 27251 }, { "epoch": 0.8371578656345038, "grad_norm": 0.36107882857322693, "learning_rate": 1.2557757210288188e-05, "loss": 0.5306, "step": 27252 }, { "epoch": 0.8371885847694529, "grad_norm": 0.42881321907043457, "learning_rate": 1.2557290005630596e-05, "loss": 0.5621, "step": 27253 }, { "epoch": 0.837219303904402, "grad_norm": 0.39872342348098755, "learning_rate": 1.2556822795000272e-05, "loss": 0.5227, "step": 27254 }, { "epoch": 0.8372500230393513, "grad_norm": 0.411713570356369, "learning_rate": 1.2556355578398312e-05, "loss": 0.59, "step": 27255 }, { "epoch": 0.8372807421743004, "grad_norm": 0.37604600191116333, "learning_rate": 1.2555888355825803e-05, "loss": 0.5874, "step": 27256 }, { "epoch": 0.8373114613092495, "grad_norm": 0.41675612330436707, "learning_rate": 1.255542112728384e-05, "loss": 0.5329, "step": 27257 }, { "epoch": 0.8373421804441987, "grad_norm": 0.3506227731704712, "learning_rate": 1.2554953892773511e-05, "loss": 0.5755, "step": 27258 }, { "epoch": 0.8373728995791478, "grad_norm": 0.413656085729599, "learning_rate": 1.255448665229591e-05, "loss": 0.6234, "step": 27259 }, { "epoch": 0.837403618714097, "grad_norm": 0.36648890376091003, "learning_rate": 1.2554019405852129e-05, "loss": 0.5403, "step": 27260 }, { "epoch": 0.8374343378490462, "grad_norm": 0.3683713972568512, "learning_rate": 1.2553552153443254e-05, "loss": 0.5302, "step": 27261 }, { "epoch": 0.8374650569839953, "grad_norm": 0.35544779896736145, "learning_rate": 1.255308489507038e-05, "loss": 0.4855, "step": 27262 }, { "epoch": 0.8374957761189445, "grad_norm": 0.3506776988506317, "learning_rate": 1.2552617630734597e-05, "loss": 0.5792, "step": 27263 }, { "epoch": 0.8375264952538937, "grad_norm": 0.42464935779571533, "learning_rate": 1.2552150360437001e-05, "loss": 0.6403, "step": 27264 }, { "epoch": 0.8375572143888428, "grad_norm": 0.3401305079460144, "learning_rate": 1.2551683084178677e-05, "loss": 0.5161, "step": 27265 }, { "epoch": 0.837587933523792, "grad_norm": 0.37425580620765686, "learning_rate": 1.2551215801960717e-05, "loss": 0.5923, "step": 27266 }, { "epoch": 0.8376186526587411, "grad_norm": 0.3486095666885376, "learning_rate": 1.2550748513784217e-05, "loss": 0.6057, "step": 27267 }, { "epoch": 0.8376493717936903, "grad_norm": 0.39410898089408875, "learning_rate": 1.2550281219650265e-05, "loss": 0.5536, "step": 27268 }, { "epoch": 0.8376800909286395, "grad_norm": 0.38461628556251526, "learning_rate": 1.2549813919559952e-05, "loss": 0.5555, "step": 27269 }, { "epoch": 0.8377108100635886, "grad_norm": 0.37204709649086, "learning_rate": 1.2549346613514372e-05, "loss": 0.6098, "step": 27270 }, { "epoch": 0.8377415291985377, "grad_norm": 0.33690574765205383, "learning_rate": 1.2548879301514615e-05, "loss": 0.5359, "step": 27271 }, { "epoch": 0.837772248333487, "grad_norm": 0.34674566984176636, "learning_rate": 1.2548411983561772e-05, "loss": 0.517, "step": 27272 }, { "epoch": 0.8378029674684361, "grad_norm": 0.35266607999801636, "learning_rate": 1.2547944659656936e-05, "loss": 0.532, "step": 27273 }, { "epoch": 0.8378336866033852, "grad_norm": 0.36461493372917175, "learning_rate": 1.2547477329801194e-05, "loss": 0.525, "step": 27274 }, { "epoch": 0.8378644057383344, "grad_norm": 0.4328823387622833, "learning_rate": 1.2547009993995645e-05, "loss": 0.5665, "step": 27275 }, { "epoch": 0.8378951248732835, "grad_norm": 0.3448951840400696, "learning_rate": 1.2546542652241374e-05, "loss": 0.482, "step": 27276 }, { "epoch": 0.8379258440082328, "grad_norm": 0.43821653723716736, "learning_rate": 1.2546075304539476e-05, "loss": 0.5772, "step": 27277 }, { "epoch": 0.8379565631431819, "grad_norm": 0.3973292410373688, "learning_rate": 1.2545607950891041e-05, "loss": 0.5127, "step": 27278 }, { "epoch": 0.837987282278131, "grad_norm": 0.6576707363128662, "learning_rate": 1.2545140591297159e-05, "loss": 0.6096, "step": 27279 }, { "epoch": 0.8380180014130802, "grad_norm": 0.40060287714004517, "learning_rate": 1.2544673225758926e-05, "loss": 0.5575, "step": 27280 }, { "epoch": 0.8380487205480294, "grad_norm": 0.39765220880508423, "learning_rate": 1.2544205854277427e-05, "loss": 0.521, "step": 27281 }, { "epoch": 0.8380794396829785, "grad_norm": 0.3791797161102295, "learning_rate": 1.2543738476853761e-05, "loss": 0.5163, "step": 27282 }, { "epoch": 0.8381101588179277, "grad_norm": 0.4119715988636017, "learning_rate": 1.2543271093489015e-05, "loss": 0.5958, "step": 27283 }, { "epoch": 0.8381408779528768, "grad_norm": 0.34992966055870056, "learning_rate": 1.2542803704184282e-05, "loss": 0.5296, "step": 27284 }, { "epoch": 0.838171597087826, "grad_norm": 0.32244953513145447, "learning_rate": 1.2542336308940653e-05, "loss": 0.484, "step": 27285 }, { "epoch": 0.8382023162227752, "grad_norm": 0.3373901844024658, "learning_rate": 1.2541868907759218e-05, "loss": 0.4698, "step": 27286 }, { "epoch": 0.8382330353577243, "grad_norm": 0.37629565596580505, "learning_rate": 1.2541401500641074e-05, "loss": 0.5838, "step": 27287 }, { "epoch": 0.8382637544926735, "grad_norm": 0.33786267042160034, "learning_rate": 1.2540934087587308e-05, "loss": 0.5574, "step": 27288 }, { "epoch": 0.8382944736276227, "grad_norm": 0.3478632867336273, "learning_rate": 1.2540466668599014e-05, "loss": 0.5861, "step": 27289 }, { "epoch": 0.8383251927625718, "grad_norm": 0.38321202993392944, "learning_rate": 1.2539999243677282e-05, "loss": 0.5556, "step": 27290 }, { "epoch": 0.838355911897521, "grad_norm": 0.3476204574108124, "learning_rate": 1.2539531812823205e-05, "loss": 0.5372, "step": 27291 }, { "epoch": 0.8383866310324701, "grad_norm": 0.4056239128112793, "learning_rate": 1.2539064376037873e-05, "loss": 0.5995, "step": 27292 }, { "epoch": 0.8384173501674193, "grad_norm": 0.3640434443950653, "learning_rate": 1.253859693332238e-05, "loss": 0.5696, "step": 27293 }, { "epoch": 0.8384480693023685, "grad_norm": 0.3955352008342743, "learning_rate": 1.2538129484677816e-05, "loss": 0.5244, "step": 27294 }, { "epoch": 0.8384787884373176, "grad_norm": 0.36415204405784607, "learning_rate": 1.2537662030105273e-05, "loss": 0.478, "step": 27295 }, { "epoch": 0.8385095075722667, "grad_norm": 0.36376306414604187, "learning_rate": 1.2537194569605845e-05, "loss": 0.6201, "step": 27296 }, { "epoch": 0.838540226707216, "grad_norm": 0.3941848576068878, "learning_rate": 1.2536727103180617e-05, "loss": 0.6175, "step": 27297 }, { "epoch": 0.8385709458421651, "grad_norm": 0.5055290460586548, "learning_rate": 1.253625963083069e-05, "loss": 0.5875, "step": 27298 }, { "epoch": 0.8386016649771142, "grad_norm": 0.37365850806236267, "learning_rate": 1.2535792152557148e-05, "loss": 0.6422, "step": 27299 }, { "epoch": 0.8386323841120634, "grad_norm": 0.35252895951271057, "learning_rate": 1.253532466836109e-05, "loss": 0.5295, "step": 27300 }, { "epoch": 0.8386631032470125, "grad_norm": 0.34480997920036316, "learning_rate": 1.2534857178243603e-05, "loss": 0.5755, "step": 27301 }, { "epoch": 0.8386938223819618, "grad_norm": 0.3659254014492035, "learning_rate": 1.253438968220578e-05, "loss": 0.5531, "step": 27302 }, { "epoch": 0.8387245415169109, "grad_norm": 0.3705558776855469, "learning_rate": 1.2533922180248712e-05, "loss": 0.5273, "step": 27303 }, { "epoch": 0.83875526065186, "grad_norm": 0.34082502126693726, "learning_rate": 1.2533454672373494e-05, "loss": 0.6321, "step": 27304 }, { "epoch": 0.8387859797868092, "grad_norm": 0.35829079151153564, "learning_rate": 1.2532987158581213e-05, "loss": 0.5544, "step": 27305 }, { "epoch": 0.8388166989217584, "grad_norm": 0.4071875810623169, "learning_rate": 1.2532519638872965e-05, "loss": 0.5885, "step": 27306 }, { "epoch": 0.8388474180567075, "grad_norm": 0.35953786969184875, "learning_rate": 1.253205211324984e-05, "loss": 0.4634, "step": 27307 }, { "epoch": 0.8388781371916567, "grad_norm": 0.41640591621398926, "learning_rate": 1.253158458171293e-05, "loss": 0.5271, "step": 27308 }, { "epoch": 0.8389088563266058, "grad_norm": 0.35818150639533997, "learning_rate": 1.253111704426333e-05, "loss": 0.5257, "step": 27309 }, { "epoch": 0.838939575461555, "grad_norm": 0.3595513105392456, "learning_rate": 1.2530649500902126e-05, "loss": 0.6597, "step": 27310 }, { "epoch": 0.8389702945965042, "grad_norm": 0.3898240029811859, "learning_rate": 1.2530181951630415e-05, "loss": 0.5742, "step": 27311 }, { "epoch": 0.8390010137314533, "grad_norm": 0.3992443382740021, "learning_rate": 1.2529714396449288e-05, "loss": 0.6045, "step": 27312 }, { "epoch": 0.8390317328664025, "grad_norm": 0.37491995096206665, "learning_rate": 1.2529246835359837e-05, "loss": 0.6088, "step": 27313 }, { "epoch": 0.8390624520013517, "grad_norm": 0.37493255734443665, "learning_rate": 1.2528779268363152e-05, "loss": 0.5324, "step": 27314 }, { "epoch": 0.8390931711363008, "grad_norm": 0.3594975471496582, "learning_rate": 1.2528311695460325e-05, "loss": 0.5587, "step": 27315 }, { "epoch": 0.83912389027125, "grad_norm": 0.3621596395969391, "learning_rate": 1.2527844116652452e-05, "loss": 0.5697, "step": 27316 }, { "epoch": 0.8391546094061991, "grad_norm": 0.3881145417690277, "learning_rate": 1.2527376531940618e-05, "loss": 0.517, "step": 27317 }, { "epoch": 0.8391853285411482, "grad_norm": 0.3757256865501404, "learning_rate": 1.2526908941325926e-05, "loss": 0.5236, "step": 27318 }, { "epoch": 0.8392160476760975, "grad_norm": 0.3946109116077423, "learning_rate": 1.2526441344809457e-05, "loss": 0.6921, "step": 27319 }, { "epoch": 0.8392467668110466, "grad_norm": 0.35905736684799194, "learning_rate": 1.252597374239231e-05, "loss": 0.4885, "step": 27320 }, { "epoch": 0.8392774859459957, "grad_norm": 0.39490798115730286, "learning_rate": 1.2525506134075573e-05, "loss": 0.6803, "step": 27321 }, { "epoch": 0.839308205080945, "grad_norm": 0.39202913641929626, "learning_rate": 1.2525038519860342e-05, "loss": 0.5878, "step": 27322 }, { "epoch": 0.8393389242158941, "grad_norm": 0.3626631796360016, "learning_rate": 1.2524570899747706e-05, "loss": 0.5796, "step": 27323 }, { "epoch": 0.8393696433508432, "grad_norm": 0.35929930210113525, "learning_rate": 1.2524103273738759e-05, "loss": 0.572, "step": 27324 }, { "epoch": 0.8394003624857924, "grad_norm": 0.40476471185684204, "learning_rate": 1.2523635641834593e-05, "loss": 0.5265, "step": 27325 }, { "epoch": 0.8394310816207415, "grad_norm": 0.3491535484790802, "learning_rate": 1.2523168004036296e-05, "loss": 0.5673, "step": 27326 }, { "epoch": 0.8394618007556908, "grad_norm": 0.35581350326538086, "learning_rate": 1.2522700360344971e-05, "loss": 0.5335, "step": 27327 }, { "epoch": 0.8394925198906399, "grad_norm": 0.4850175380706787, "learning_rate": 1.2522232710761697e-05, "loss": 0.5953, "step": 27328 }, { "epoch": 0.839523239025589, "grad_norm": 0.3805260956287384, "learning_rate": 1.2521765055287575e-05, "loss": 0.5872, "step": 27329 }, { "epoch": 0.8395539581605382, "grad_norm": 0.3627995550632477, "learning_rate": 1.2521297393923697e-05, "loss": 0.5267, "step": 27330 }, { "epoch": 0.8395846772954874, "grad_norm": 0.35500285029411316, "learning_rate": 1.2520829726671146e-05, "loss": 0.4588, "step": 27331 }, { "epoch": 0.8396153964304365, "grad_norm": 0.40164029598236084, "learning_rate": 1.2520362053531028e-05, "loss": 0.5688, "step": 27332 }, { "epoch": 0.8396461155653857, "grad_norm": 0.36194875836372375, "learning_rate": 1.2519894374504424e-05, "loss": 0.5836, "step": 27333 }, { "epoch": 0.8396768347003348, "grad_norm": 0.35290655493736267, "learning_rate": 1.2519426689592433e-05, "loss": 0.4711, "step": 27334 }, { "epoch": 0.839707553835284, "grad_norm": 0.3932843804359436, "learning_rate": 1.2518958998796145e-05, "loss": 0.6345, "step": 27335 }, { "epoch": 0.8397382729702332, "grad_norm": 0.371915340423584, "learning_rate": 1.2518491302116651e-05, "loss": 0.6174, "step": 27336 }, { "epoch": 0.8397689921051823, "grad_norm": 0.36271932721138, "learning_rate": 1.2518023599555045e-05, "loss": 0.5443, "step": 27337 }, { "epoch": 0.8397997112401315, "grad_norm": 0.3493892550468445, "learning_rate": 1.251755589111242e-05, "loss": 0.4735, "step": 27338 }, { "epoch": 0.8398304303750806, "grad_norm": 0.34140512347221375, "learning_rate": 1.2517088176789867e-05, "loss": 0.4272, "step": 27339 }, { "epoch": 0.8398611495100298, "grad_norm": 0.41195449233055115, "learning_rate": 1.2516620456588475e-05, "loss": 0.4455, "step": 27340 }, { "epoch": 0.839891868644979, "grad_norm": 0.37157997488975525, "learning_rate": 1.2516152730509346e-05, "loss": 0.5284, "step": 27341 }, { "epoch": 0.8399225877799281, "grad_norm": 0.37312525510787964, "learning_rate": 1.2515684998553561e-05, "loss": 0.611, "step": 27342 }, { "epoch": 0.8399533069148772, "grad_norm": 0.3409443497657776, "learning_rate": 1.2515217260722223e-05, "loss": 0.5522, "step": 27343 }, { "epoch": 0.8399840260498265, "grad_norm": 0.3357234001159668, "learning_rate": 1.2514749517016419e-05, "loss": 0.6027, "step": 27344 }, { "epoch": 0.8400147451847756, "grad_norm": 0.3447968363761902, "learning_rate": 1.251428176743724e-05, "loss": 0.4346, "step": 27345 }, { "epoch": 0.8400454643197247, "grad_norm": 0.33905208110809326, "learning_rate": 1.251381401198578e-05, "loss": 0.5759, "step": 27346 }, { "epoch": 0.8400761834546739, "grad_norm": 0.3761582374572754, "learning_rate": 1.2513346250663134e-05, "loss": 0.5133, "step": 27347 }, { "epoch": 0.8401069025896231, "grad_norm": 0.3467869162559509, "learning_rate": 1.251287848347039e-05, "loss": 0.536, "step": 27348 }, { "epoch": 0.8401376217245722, "grad_norm": 0.3745793402194977, "learning_rate": 1.2512410710408642e-05, "loss": 0.533, "step": 27349 }, { "epoch": 0.8401683408595214, "grad_norm": 0.3932659327983856, "learning_rate": 1.2511942931478989e-05, "loss": 0.5799, "step": 27350 }, { "epoch": 0.8401990599944705, "grad_norm": 0.3524722158908844, "learning_rate": 1.2511475146682512e-05, "loss": 0.5155, "step": 27351 }, { "epoch": 0.8402297791294198, "grad_norm": 0.36737334728240967, "learning_rate": 1.2511007356020311e-05, "loss": 0.5256, "step": 27352 }, { "epoch": 0.8402604982643689, "grad_norm": 0.36616435647010803, "learning_rate": 1.2510539559493478e-05, "loss": 0.5644, "step": 27353 }, { "epoch": 0.840291217399318, "grad_norm": 0.3424672484397888, "learning_rate": 1.2510071757103103e-05, "loss": 0.6152, "step": 27354 }, { "epoch": 0.8403219365342672, "grad_norm": 0.5065821409225464, "learning_rate": 1.250960394885028e-05, "loss": 0.5329, "step": 27355 }, { "epoch": 0.8403526556692164, "grad_norm": 0.3999069035053253, "learning_rate": 1.2509136134736103e-05, "loss": 0.5879, "step": 27356 }, { "epoch": 0.8403833748041655, "grad_norm": 0.39688003063201904, "learning_rate": 1.2508668314761665e-05, "loss": 0.5411, "step": 27357 }, { "epoch": 0.8404140939391147, "grad_norm": 0.3372783064842224, "learning_rate": 1.250820048892805e-05, "loss": 0.4546, "step": 27358 }, { "epoch": 0.8404448130740638, "grad_norm": 0.3482044041156769, "learning_rate": 1.2507732657236364e-05, "loss": 0.5884, "step": 27359 }, { "epoch": 0.8404755322090129, "grad_norm": 0.400973379611969, "learning_rate": 1.2507264819687693e-05, "loss": 0.6469, "step": 27360 }, { "epoch": 0.8405062513439622, "grad_norm": 0.3507072329521179, "learning_rate": 1.2506796976283129e-05, "loss": 0.5573, "step": 27361 }, { "epoch": 0.8405369704789113, "grad_norm": 0.3534459173679352, "learning_rate": 1.2506329127023765e-05, "loss": 0.525, "step": 27362 }, { "epoch": 0.8405676896138605, "grad_norm": 0.48441699147224426, "learning_rate": 1.2505861271910695e-05, "loss": 0.636, "step": 27363 }, { "epoch": 0.8405984087488096, "grad_norm": 0.3570595979690552, "learning_rate": 1.250539341094501e-05, "loss": 0.5145, "step": 27364 }, { "epoch": 0.8406291278837588, "grad_norm": 0.3805569112300873, "learning_rate": 1.2504925544127804e-05, "loss": 0.5052, "step": 27365 }, { "epoch": 0.840659847018708, "grad_norm": 0.4003191292285919, "learning_rate": 1.250445767146017e-05, "loss": 0.5518, "step": 27366 }, { "epoch": 0.8406905661536571, "grad_norm": 0.38037845492362976, "learning_rate": 1.25039897929432e-05, "loss": 0.5273, "step": 27367 }, { "epoch": 0.8407212852886062, "grad_norm": 0.3821370601654053, "learning_rate": 1.2503521908577987e-05, "loss": 0.5639, "step": 27368 }, { "epoch": 0.8407520044235555, "grad_norm": 0.35012802481651306, "learning_rate": 1.2503054018365624e-05, "loss": 0.6222, "step": 27369 }, { "epoch": 0.8407827235585046, "grad_norm": 0.3252747058868408, "learning_rate": 1.2502586122307202e-05, "loss": 0.501, "step": 27370 }, { "epoch": 0.8408134426934537, "grad_norm": 0.3398558795452118, "learning_rate": 1.2502118220403817e-05, "loss": 0.5629, "step": 27371 }, { "epoch": 0.8408441618284029, "grad_norm": 0.3861139416694641, "learning_rate": 1.2501650312656558e-05, "loss": 0.6155, "step": 27372 }, { "epoch": 0.8408748809633521, "grad_norm": 0.3546919524669647, "learning_rate": 1.2501182399066521e-05, "loss": 0.5837, "step": 27373 }, { "epoch": 0.8409056000983013, "grad_norm": 0.34214919805526733, "learning_rate": 1.2500714479634799e-05, "loss": 0.5734, "step": 27374 }, { "epoch": 0.8409363192332504, "grad_norm": 0.3709850013256073, "learning_rate": 1.2500246554362485e-05, "loss": 0.5346, "step": 27375 }, { "epoch": 0.8409670383681995, "grad_norm": 0.4044996500015259, "learning_rate": 1.2499778623250664e-05, "loss": 0.5733, "step": 27376 }, { "epoch": 0.8409977575031488, "grad_norm": 0.4813549220561981, "learning_rate": 1.2499310686300443e-05, "loss": 0.5387, "step": 27377 }, { "epoch": 0.8410284766380979, "grad_norm": 0.3627794682979584, "learning_rate": 1.2498842743512902e-05, "loss": 0.4665, "step": 27378 }, { "epoch": 0.841059195773047, "grad_norm": 0.430976003408432, "learning_rate": 1.2498374794889143e-05, "loss": 0.5637, "step": 27379 }, { "epoch": 0.8410899149079962, "grad_norm": 0.5340626835823059, "learning_rate": 1.2497906840430253e-05, "loss": 0.5926, "step": 27380 }, { "epoch": 0.8411206340429453, "grad_norm": 0.39707258343696594, "learning_rate": 1.249743888013733e-05, "loss": 0.5544, "step": 27381 }, { "epoch": 0.8411513531778945, "grad_norm": 0.3835461735725403, "learning_rate": 1.2496970914011461e-05, "loss": 0.5724, "step": 27382 }, { "epoch": 0.8411820723128437, "grad_norm": 0.3666451573371887, "learning_rate": 1.2496502942053744e-05, "loss": 0.5926, "step": 27383 }, { "epoch": 0.8412127914477928, "grad_norm": 0.3810656666755676, "learning_rate": 1.2496034964265268e-05, "loss": 0.6066, "step": 27384 }, { "epoch": 0.8412435105827419, "grad_norm": 0.37072497606277466, "learning_rate": 1.249556698064713e-05, "loss": 0.5902, "step": 27385 }, { "epoch": 0.8412742297176912, "grad_norm": 0.6239602565765381, "learning_rate": 1.2495098991200421e-05, "loss": 0.5686, "step": 27386 }, { "epoch": 0.8413049488526403, "grad_norm": 0.33208611607551575, "learning_rate": 1.2494630995926231e-05, "loss": 0.5523, "step": 27387 }, { "epoch": 0.8413356679875895, "grad_norm": 0.35338446497917175, "learning_rate": 1.249416299482566e-05, "loss": 0.5659, "step": 27388 }, { "epoch": 0.8413663871225386, "grad_norm": 0.3607088029384613, "learning_rate": 1.2493694987899794e-05, "loss": 0.5779, "step": 27389 }, { "epoch": 0.8413971062574878, "grad_norm": 0.3727528154850006, "learning_rate": 1.2493226975149732e-05, "loss": 0.4683, "step": 27390 }, { "epoch": 0.841427825392437, "grad_norm": 0.3338002562522888, "learning_rate": 1.249275895657656e-05, "loss": 0.5685, "step": 27391 }, { "epoch": 0.8414585445273861, "grad_norm": 0.355518102645874, "learning_rate": 1.2492290932181381e-05, "loss": 0.5636, "step": 27392 }, { "epoch": 0.8414892636623352, "grad_norm": 0.37375548481941223, "learning_rate": 1.249182290196528e-05, "loss": 0.5666, "step": 27393 }, { "epoch": 0.8415199827972845, "grad_norm": 0.32521119713783264, "learning_rate": 1.2491354865929349e-05, "loss": 0.5823, "step": 27394 }, { "epoch": 0.8415507019322336, "grad_norm": 0.375021368265152, "learning_rate": 1.2490886824074688e-05, "loss": 0.5164, "step": 27395 }, { "epoch": 0.8415814210671827, "grad_norm": 0.38915857672691345, "learning_rate": 1.2490418776402386e-05, "loss": 0.5349, "step": 27396 }, { "epoch": 0.8416121402021319, "grad_norm": 0.33684274554252625, "learning_rate": 1.2489950722913539e-05, "loss": 0.4608, "step": 27397 }, { "epoch": 0.841642859337081, "grad_norm": 0.3802452087402344, "learning_rate": 1.248948266360924e-05, "loss": 0.5946, "step": 27398 }, { "epoch": 0.8416735784720303, "grad_norm": 0.4298010468482971, "learning_rate": 1.2489014598490577e-05, "loss": 0.5988, "step": 27399 }, { "epoch": 0.8417042976069794, "grad_norm": 0.36444807052612305, "learning_rate": 1.2488546527558647e-05, "loss": 0.4773, "step": 27400 }, { "epoch": 0.8417350167419285, "grad_norm": 0.33031296730041504, "learning_rate": 1.2488078450814541e-05, "loss": 0.5425, "step": 27401 }, { "epoch": 0.8417657358768778, "grad_norm": 0.36067917943000793, "learning_rate": 1.2487610368259357e-05, "loss": 0.4461, "step": 27402 }, { "epoch": 0.8417964550118269, "grad_norm": 0.44623494148254395, "learning_rate": 1.2487142279894181e-05, "loss": 0.5321, "step": 27403 }, { "epoch": 0.841827174146776, "grad_norm": 0.3596116304397583, "learning_rate": 1.2486674185720116e-05, "loss": 0.4865, "step": 27404 }, { "epoch": 0.8418578932817252, "grad_norm": 0.38318848609924316, "learning_rate": 1.2486206085738246e-05, "loss": 0.554, "step": 27405 }, { "epoch": 0.8418886124166743, "grad_norm": 0.33712226152420044, "learning_rate": 1.248573797994967e-05, "loss": 0.542, "step": 27406 }, { "epoch": 0.8419193315516235, "grad_norm": 0.36827778816223145, "learning_rate": 1.2485269868355478e-05, "loss": 0.6046, "step": 27407 }, { "epoch": 0.8419500506865727, "grad_norm": 0.32063066959381104, "learning_rate": 1.2484801750956767e-05, "loss": 0.477, "step": 27408 }, { "epoch": 0.8419807698215218, "grad_norm": 0.4022870361804962, "learning_rate": 1.2484333627754625e-05, "loss": 0.5679, "step": 27409 }, { "epoch": 0.8420114889564709, "grad_norm": 0.3763059079647064, "learning_rate": 1.2483865498750152e-05, "loss": 0.5568, "step": 27410 }, { "epoch": 0.8420422080914202, "grad_norm": 0.39209863543510437, "learning_rate": 1.2483397363944436e-05, "loss": 0.5311, "step": 27411 }, { "epoch": 0.8420729272263693, "grad_norm": 0.3428773880004883, "learning_rate": 1.2482929223338566e-05, "loss": 0.6133, "step": 27412 }, { "epoch": 0.8421036463613185, "grad_norm": 0.39146748185157776, "learning_rate": 1.2482461076933649e-05, "loss": 0.5413, "step": 27413 }, { "epoch": 0.8421343654962676, "grad_norm": 0.33835369348526, "learning_rate": 1.2481992924730768e-05, "loss": 0.5309, "step": 27414 }, { "epoch": 0.8421650846312168, "grad_norm": 0.3661634624004364, "learning_rate": 1.2481524766731022e-05, "loss": 0.5791, "step": 27415 }, { "epoch": 0.842195803766166, "grad_norm": 0.3841603696346283, "learning_rate": 1.24810566029355e-05, "loss": 0.5794, "step": 27416 }, { "epoch": 0.8422265229011151, "grad_norm": 0.3765762448310852, "learning_rate": 1.2480588433345296e-05, "loss": 0.5676, "step": 27417 }, { "epoch": 0.8422572420360642, "grad_norm": 0.4025719165802002, "learning_rate": 1.2480120257961503e-05, "loss": 0.5263, "step": 27418 }, { "epoch": 0.8422879611710135, "grad_norm": 0.3541122078895569, "learning_rate": 1.2479652076785218e-05, "loss": 0.5902, "step": 27419 }, { "epoch": 0.8423186803059626, "grad_norm": 0.34577128291130066, "learning_rate": 1.2479183889817534e-05, "loss": 0.5236, "step": 27420 }, { "epoch": 0.8423493994409117, "grad_norm": 0.3452979028224945, "learning_rate": 1.247871569705954e-05, "loss": 0.4874, "step": 27421 }, { "epoch": 0.8423801185758609, "grad_norm": 0.34384456276893616, "learning_rate": 1.2478247498512332e-05, "loss": 0.5005, "step": 27422 }, { "epoch": 0.84241083771081, "grad_norm": 0.35666245222091675, "learning_rate": 1.2477779294177006e-05, "loss": 0.5452, "step": 27423 }, { "epoch": 0.8424415568457593, "grad_norm": 0.962787926197052, "learning_rate": 1.2477311084054652e-05, "loss": 0.5507, "step": 27424 }, { "epoch": 0.8424722759807084, "grad_norm": 0.35569605231285095, "learning_rate": 1.2476842868146365e-05, "loss": 0.5778, "step": 27425 }, { "epoch": 0.8425029951156575, "grad_norm": 0.4006465971469879, "learning_rate": 1.2476374646453242e-05, "loss": 0.5493, "step": 27426 }, { "epoch": 0.8425337142506067, "grad_norm": 0.36328232288360596, "learning_rate": 1.2475906418976372e-05, "loss": 0.527, "step": 27427 }, { "epoch": 0.8425644333855559, "grad_norm": 0.37749743461608887, "learning_rate": 1.2475438185716846e-05, "loss": 0.5454, "step": 27428 }, { "epoch": 0.842595152520505, "grad_norm": 0.39355728030204773, "learning_rate": 1.2474969946675764e-05, "loss": 0.6359, "step": 27429 }, { "epoch": 0.8426258716554542, "grad_norm": 0.37623897194862366, "learning_rate": 1.2474501701854214e-05, "loss": 0.5565, "step": 27430 }, { "epoch": 0.8426565907904033, "grad_norm": 0.6305463314056396, "learning_rate": 1.2474033451253294e-05, "loss": 0.5848, "step": 27431 }, { "epoch": 0.8426873099253525, "grad_norm": 0.3767709732055664, "learning_rate": 1.2473565194874095e-05, "loss": 0.5534, "step": 27432 }, { "epoch": 0.8427180290603017, "grad_norm": 0.36122873425483704, "learning_rate": 1.2473096932717714e-05, "loss": 0.5762, "step": 27433 }, { "epoch": 0.8427487481952508, "grad_norm": 0.36045101284980774, "learning_rate": 1.2472628664785243e-05, "loss": 0.591, "step": 27434 }, { "epoch": 0.8427794673301999, "grad_norm": 0.35751375555992126, "learning_rate": 1.2472160391077773e-05, "loss": 0.4889, "step": 27435 }, { "epoch": 0.8428101864651492, "grad_norm": 0.3631209433078766, "learning_rate": 1.24716921115964e-05, "loss": 0.598, "step": 27436 }, { "epoch": 0.8428409056000983, "grad_norm": 0.40210917592048645, "learning_rate": 1.2471223826342217e-05, "loss": 0.5913, "step": 27437 }, { "epoch": 0.8428716247350475, "grad_norm": 0.36513596773147583, "learning_rate": 1.2470755535316321e-05, "loss": 0.4833, "step": 27438 }, { "epoch": 0.8429023438699966, "grad_norm": 0.35902804136276245, "learning_rate": 1.24702872385198e-05, "loss": 0.544, "step": 27439 }, { "epoch": 0.8429330630049457, "grad_norm": 0.3843648433685303, "learning_rate": 1.2469818935953753e-05, "loss": 0.5446, "step": 27440 }, { "epoch": 0.842963782139895, "grad_norm": 0.3491418957710266, "learning_rate": 1.2469350627619268e-05, "loss": 0.4775, "step": 27441 }, { "epoch": 0.8429945012748441, "grad_norm": 0.3477470874786377, "learning_rate": 1.2468882313517446e-05, "loss": 0.5165, "step": 27442 }, { "epoch": 0.8430252204097932, "grad_norm": 0.3816833794116974, "learning_rate": 1.2468413993649373e-05, "loss": 0.5472, "step": 27443 }, { "epoch": 0.8430559395447424, "grad_norm": 0.3773796558380127, "learning_rate": 1.246794566801615e-05, "loss": 0.512, "step": 27444 }, { "epoch": 0.8430866586796916, "grad_norm": 0.3752632141113281, "learning_rate": 1.2467477336618868e-05, "loss": 0.5166, "step": 27445 }, { "epoch": 0.8431173778146407, "grad_norm": 0.3504323661327362, "learning_rate": 1.2467008999458616e-05, "loss": 0.5573, "step": 27446 }, { "epoch": 0.8431480969495899, "grad_norm": 0.36194926500320435, "learning_rate": 1.2466540656536497e-05, "loss": 0.5513, "step": 27447 }, { "epoch": 0.843178816084539, "grad_norm": 0.3479464650154114, "learning_rate": 1.2466072307853595e-05, "loss": 0.5835, "step": 27448 }, { "epoch": 0.8432095352194883, "grad_norm": 0.40857839584350586, "learning_rate": 1.2465603953411012e-05, "loss": 0.5406, "step": 27449 }, { "epoch": 0.8432402543544374, "grad_norm": 0.406244695186615, "learning_rate": 1.2465135593209839e-05, "loss": 0.5562, "step": 27450 }, { "epoch": 0.8432709734893865, "grad_norm": 0.3406674861907959, "learning_rate": 1.2464667227251168e-05, "loss": 0.447, "step": 27451 }, { "epoch": 0.8433016926243357, "grad_norm": 0.3878394365310669, "learning_rate": 1.2464198855536099e-05, "loss": 0.4099, "step": 27452 }, { "epoch": 0.8433324117592849, "grad_norm": 0.5348444581031799, "learning_rate": 1.2463730478065717e-05, "loss": 0.5598, "step": 27453 }, { "epoch": 0.843363130894234, "grad_norm": 0.3873843848705292, "learning_rate": 1.2463262094841123e-05, "loss": 0.4979, "step": 27454 }, { "epoch": 0.8433938500291832, "grad_norm": 0.3943963050842285, "learning_rate": 1.2462793705863405e-05, "loss": 0.6055, "step": 27455 }, { "epoch": 0.8434245691641323, "grad_norm": 0.37207284569740295, "learning_rate": 1.2462325311133663e-05, "loss": 0.5498, "step": 27456 }, { "epoch": 0.8434552882990815, "grad_norm": 0.38212722539901733, "learning_rate": 1.2461856910652985e-05, "loss": 0.4573, "step": 27457 }, { "epoch": 0.8434860074340307, "grad_norm": 0.34409472346305847, "learning_rate": 1.2461388504422473e-05, "loss": 0.6071, "step": 27458 }, { "epoch": 0.8435167265689798, "grad_norm": 0.36049747467041016, "learning_rate": 1.2460920092443213e-05, "loss": 0.5962, "step": 27459 }, { "epoch": 0.8435474457039289, "grad_norm": 0.33845147490501404, "learning_rate": 1.2460451674716305e-05, "loss": 0.5955, "step": 27460 }, { "epoch": 0.8435781648388782, "grad_norm": 0.36615025997161865, "learning_rate": 1.2459983251242838e-05, "loss": 0.5233, "step": 27461 }, { "epoch": 0.8436088839738273, "grad_norm": 0.35513490438461304, "learning_rate": 1.2459514822023908e-05, "loss": 0.5157, "step": 27462 }, { "epoch": 0.8436396031087765, "grad_norm": 0.39335811138153076, "learning_rate": 1.2459046387060612e-05, "loss": 0.5459, "step": 27463 }, { "epoch": 0.8436703222437256, "grad_norm": 0.408810555934906, "learning_rate": 1.2458577946354038e-05, "loss": 0.5074, "step": 27464 }, { "epoch": 0.8437010413786747, "grad_norm": 0.3937597870826721, "learning_rate": 1.2458109499905286e-05, "loss": 0.5336, "step": 27465 }, { "epoch": 0.843731760513624, "grad_norm": 0.41812509298324585, "learning_rate": 1.2457641047715448e-05, "loss": 0.5243, "step": 27466 }, { "epoch": 0.8437624796485731, "grad_norm": 0.3814831078052521, "learning_rate": 1.2457172589785614e-05, "loss": 0.5572, "step": 27467 }, { "epoch": 0.8437931987835222, "grad_norm": 0.4409952759742737, "learning_rate": 1.2456704126116885e-05, "loss": 0.4851, "step": 27468 }, { "epoch": 0.8438239179184714, "grad_norm": 0.40220651030540466, "learning_rate": 1.2456235656710351e-05, "loss": 0.5336, "step": 27469 }, { "epoch": 0.8438546370534206, "grad_norm": 0.4967983067035675, "learning_rate": 1.245576718156711e-05, "loss": 0.5606, "step": 27470 }, { "epoch": 0.8438853561883697, "grad_norm": 0.3889867663383484, "learning_rate": 1.2455298700688248e-05, "loss": 0.6071, "step": 27471 }, { "epoch": 0.8439160753233189, "grad_norm": 0.4117208421230316, "learning_rate": 1.2454830214074867e-05, "loss": 0.6247, "step": 27472 }, { "epoch": 0.843946794458268, "grad_norm": 0.35783442854881287, "learning_rate": 1.2454361721728057e-05, "loss": 0.5623, "step": 27473 }, { "epoch": 0.8439775135932173, "grad_norm": 0.3555428385734558, "learning_rate": 1.2453893223648916e-05, "loss": 0.5628, "step": 27474 }, { "epoch": 0.8440082327281664, "grad_norm": 0.3803429901599884, "learning_rate": 1.2453424719838537e-05, "loss": 0.5293, "step": 27475 }, { "epoch": 0.8440389518631155, "grad_norm": 0.34978097677230835, "learning_rate": 1.245295621029801e-05, "loss": 0.5623, "step": 27476 }, { "epoch": 0.8440696709980647, "grad_norm": 0.3381558060646057, "learning_rate": 1.2452487695028434e-05, "loss": 0.5503, "step": 27477 }, { "epoch": 0.8441003901330139, "grad_norm": 0.35216203331947327, "learning_rate": 1.2452019174030902e-05, "loss": 0.5005, "step": 27478 }, { "epoch": 0.844131109267963, "grad_norm": 0.34353363513946533, "learning_rate": 1.245155064730651e-05, "loss": 0.4739, "step": 27479 }, { "epoch": 0.8441618284029122, "grad_norm": 0.4045220911502838, "learning_rate": 1.2451082114856345e-05, "loss": 0.5298, "step": 27480 }, { "epoch": 0.8441925475378613, "grad_norm": 0.3818540573120117, "learning_rate": 1.2450613576681513e-05, "loss": 0.5466, "step": 27481 }, { "epoch": 0.8442232666728104, "grad_norm": 0.37245962023735046, "learning_rate": 1.2450145032783097e-05, "loss": 0.5169, "step": 27482 }, { "epoch": 0.8442539858077597, "grad_norm": 0.36826032400131226, "learning_rate": 1.2449676483162196e-05, "loss": 0.5286, "step": 27483 }, { "epoch": 0.8442847049427088, "grad_norm": 0.3319023549556732, "learning_rate": 1.2449207927819906e-05, "loss": 0.5382, "step": 27484 }, { "epoch": 0.844315424077658, "grad_norm": 0.3656357526779175, "learning_rate": 1.244873936675732e-05, "loss": 0.4829, "step": 27485 }, { "epoch": 0.8443461432126071, "grad_norm": 0.3522183895111084, "learning_rate": 1.2448270799975532e-05, "loss": 0.5703, "step": 27486 }, { "epoch": 0.8443768623475563, "grad_norm": 0.3889095187187195, "learning_rate": 1.2447802227475636e-05, "loss": 0.5907, "step": 27487 }, { "epoch": 0.8444075814825055, "grad_norm": 0.417457640171051, "learning_rate": 1.2447333649258729e-05, "loss": 0.5654, "step": 27488 }, { "epoch": 0.8444383006174546, "grad_norm": 0.3740599453449249, "learning_rate": 1.24468650653259e-05, "loss": 0.6197, "step": 27489 }, { "epoch": 0.8444690197524037, "grad_norm": 0.3461305499076843, "learning_rate": 1.2446396475678249e-05, "loss": 0.5453, "step": 27490 }, { "epoch": 0.844499738887353, "grad_norm": 0.4766981899738312, "learning_rate": 1.2445927880316866e-05, "loss": 0.6333, "step": 27491 }, { "epoch": 0.8445304580223021, "grad_norm": 0.3580690026283264, "learning_rate": 1.244545927924285e-05, "loss": 0.4778, "step": 27492 }, { "epoch": 0.8445611771572512, "grad_norm": 0.3485373854637146, "learning_rate": 1.2444990672457293e-05, "loss": 0.5565, "step": 27493 }, { "epoch": 0.8445918962922004, "grad_norm": 0.3937159776687622, "learning_rate": 1.244452205996129e-05, "loss": 0.5676, "step": 27494 }, { "epoch": 0.8446226154271496, "grad_norm": 0.3483898341655731, "learning_rate": 1.2444053441755932e-05, "loss": 0.5715, "step": 27495 }, { "epoch": 0.8446533345620987, "grad_norm": 0.36250749230384827, "learning_rate": 1.244358481784232e-05, "loss": 0.5816, "step": 27496 }, { "epoch": 0.8446840536970479, "grad_norm": 0.46397534012794495, "learning_rate": 1.2443116188221544e-05, "loss": 0.5647, "step": 27497 }, { "epoch": 0.844714772831997, "grad_norm": 0.3613952398300171, "learning_rate": 1.2442647552894697e-05, "loss": 0.5563, "step": 27498 }, { "epoch": 0.8447454919669463, "grad_norm": 0.39790454506874084, "learning_rate": 1.2442178911862881e-05, "loss": 0.5708, "step": 27499 }, { "epoch": 0.8447762111018954, "grad_norm": 0.34501633048057556, "learning_rate": 1.244171026512718e-05, "loss": 0.6243, "step": 27500 }, { "epoch": 0.8448069302368445, "grad_norm": 0.36811622977256775, "learning_rate": 1.24412416126887e-05, "loss": 0.5027, "step": 27501 }, { "epoch": 0.8448376493717937, "grad_norm": 0.36631274223327637, "learning_rate": 1.2440772954548526e-05, "loss": 0.4092, "step": 27502 }, { "epoch": 0.8448683685067429, "grad_norm": 0.3737470805644989, "learning_rate": 1.2440304290707756e-05, "loss": 0.528, "step": 27503 }, { "epoch": 0.844899087641692, "grad_norm": 0.35294249653816223, "learning_rate": 1.2439835621167485e-05, "loss": 0.5674, "step": 27504 }, { "epoch": 0.8449298067766412, "grad_norm": 0.32488858699798584, "learning_rate": 1.2439366945928808e-05, "loss": 0.5054, "step": 27505 }, { "epoch": 0.8449605259115903, "grad_norm": 0.3372909128665924, "learning_rate": 1.2438898264992821e-05, "loss": 0.5236, "step": 27506 }, { "epoch": 0.8449912450465394, "grad_norm": 0.370376318693161, "learning_rate": 1.243842957836061e-05, "loss": 0.4734, "step": 27507 }, { "epoch": 0.8450219641814887, "grad_norm": 0.32630041241645813, "learning_rate": 1.2437960886033284e-05, "loss": 0.5533, "step": 27508 }, { "epoch": 0.8450526833164378, "grad_norm": 0.34646859765052795, "learning_rate": 1.2437492188011928e-05, "loss": 0.5514, "step": 27509 }, { "epoch": 0.845083402451387, "grad_norm": 0.3916209936141968, "learning_rate": 1.243702348429764e-05, "loss": 0.551, "step": 27510 }, { "epoch": 0.8451141215863361, "grad_norm": 0.3674609363079071, "learning_rate": 1.243655477489151e-05, "loss": 0.5221, "step": 27511 }, { "epoch": 0.8451448407212853, "grad_norm": 0.37774842977523804, "learning_rate": 1.2436086059794639e-05, "loss": 0.5602, "step": 27512 }, { "epoch": 0.8451755598562345, "grad_norm": 0.37175115942955017, "learning_rate": 1.2435617339008117e-05, "loss": 0.5174, "step": 27513 }, { "epoch": 0.8452062789911836, "grad_norm": 0.3656691610813141, "learning_rate": 1.2435148612533042e-05, "loss": 0.5709, "step": 27514 }, { "epoch": 0.8452369981261327, "grad_norm": 0.39803406596183777, "learning_rate": 1.2434679880370508e-05, "loss": 0.5414, "step": 27515 }, { "epoch": 0.845267717261082, "grad_norm": 0.3508002758026123, "learning_rate": 1.2434211142521607e-05, "loss": 0.532, "step": 27516 }, { "epoch": 0.8452984363960311, "grad_norm": 0.3391759693622589, "learning_rate": 1.2433742398987439e-05, "loss": 0.5063, "step": 27517 }, { "epoch": 0.8453291555309802, "grad_norm": 0.3664117753505707, "learning_rate": 1.243327364976909e-05, "loss": 0.5723, "step": 27518 }, { "epoch": 0.8453598746659294, "grad_norm": 0.3381238877773285, "learning_rate": 1.2432804894867667e-05, "loss": 0.5431, "step": 27519 }, { "epoch": 0.8453905938008786, "grad_norm": 0.38711369037628174, "learning_rate": 1.2432336134284253e-05, "loss": 0.4919, "step": 27520 }, { "epoch": 0.8454213129358277, "grad_norm": 0.4541545808315277, "learning_rate": 1.2431867368019952e-05, "loss": 0.5279, "step": 27521 }, { "epoch": 0.8454520320707769, "grad_norm": 0.37916433811187744, "learning_rate": 1.2431398596075852e-05, "loss": 0.5499, "step": 27522 }, { "epoch": 0.845482751205726, "grad_norm": 0.3459722399711609, "learning_rate": 1.243092981845305e-05, "loss": 0.5752, "step": 27523 }, { "epoch": 0.8455134703406753, "grad_norm": 0.3798540532588959, "learning_rate": 1.2430461035152646e-05, "loss": 0.5212, "step": 27524 }, { "epoch": 0.8455441894756244, "grad_norm": 0.38089612126350403, "learning_rate": 1.2429992246175726e-05, "loss": 0.5352, "step": 27525 }, { "epoch": 0.8455749086105735, "grad_norm": 0.3884556293487549, "learning_rate": 1.2429523451523392e-05, "loss": 0.5921, "step": 27526 }, { "epoch": 0.8456056277455227, "grad_norm": 0.3622110188007355, "learning_rate": 1.2429054651196734e-05, "loss": 0.4896, "step": 27527 }, { "epoch": 0.8456363468804718, "grad_norm": 0.356887549161911, "learning_rate": 1.2428585845196851e-05, "loss": 0.5377, "step": 27528 }, { "epoch": 0.845667066015421, "grad_norm": 0.3562752902507782, "learning_rate": 1.2428117033524838e-05, "loss": 0.5897, "step": 27529 }, { "epoch": 0.8456977851503702, "grad_norm": 0.34182778000831604, "learning_rate": 1.2427648216181787e-05, "loss": 0.5323, "step": 27530 }, { "epoch": 0.8457285042853193, "grad_norm": 0.35560935735702515, "learning_rate": 1.2427179393168794e-05, "loss": 0.5798, "step": 27531 }, { "epoch": 0.8457592234202684, "grad_norm": 0.3527083694934845, "learning_rate": 1.2426710564486952e-05, "loss": 0.5745, "step": 27532 }, { "epoch": 0.8457899425552177, "grad_norm": 0.38120928406715393, "learning_rate": 1.2426241730137361e-05, "loss": 0.5783, "step": 27533 }, { "epoch": 0.8458206616901668, "grad_norm": 0.4197694957256317, "learning_rate": 1.242577289012111e-05, "loss": 0.5684, "step": 27534 }, { "epoch": 0.845851380825116, "grad_norm": 0.37842699885368347, "learning_rate": 1.24253040444393e-05, "loss": 0.5808, "step": 27535 }, { "epoch": 0.8458820999600651, "grad_norm": 0.36563238501548767, "learning_rate": 1.242483519309302e-05, "loss": 0.5197, "step": 27536 }, { "epoch": 0.8459128190950143, "grad_norm": 0.34175485372543335, "learning_rate": 1.2424366336083369e-05, "loss": 0.468, "step": 27537 }, { "epoch": 0.8459435382299635, "grad_norm": 0.35126546025276184, "learning_rate": 1.2423897473411442e-05, "loss": 0.5393, "step": 27538 }, { "epoch": 0.8459742573649126, "grad_norm": 0.33626681566238403, "learning_rate": 1.2423428605078333e-05, "loss": 0.4872, "step": 27539 }, { "epoch": 0.8460049764998617, "grad_norm": 0.34489545226097107, "learning_rate": 1.2422959731085135e-05, "loss": 0.4927, "step": 27540 }, { "epoch": 0.846035695634811, "grad_norm": 0.32902905344963074, "learning_rate": 1.2422490851432947e-05, "loss": 0.5538, "step": 27541 }, { "epoch": 0.8460664147697601, "grad_norm": 0.39556071162223816, "learning_rate": 1.2422021966122864e-05, "loss": 0.6169, "step": 27542 }, { "epoch": 0.8460971339047092, "grad_norm": 0.36458417773246765, "learning_rate": 1.2421553075155976e-05, "loss": 0.5284, "step": 27543 }, { "epoch": 0.8461278530396584, "grad_norm": 0.3372204005718231, "learning_rate": 1.2421084178533385e-05, "loss": 0.5552, "step": 27544 }, { "epoch": 0.8461585721746075, "grad_norm": 0.4863251745700836, "learning_rate": 1.2420615276256181e-05, "loss": 0.6552, "step": 27545 }, { "epoch": 0.8461892913095567, "grad_norm": 0.35853105783462524, "learning_rate": 1.2420146368325463e-05, "loss": 0.5374, "step": 27546 }, { "epoch": 0.8462200104445059, "grad_norm": 0.3419025242328644, "learning_rate": 1.241967745474232e-05, "loss": 0.5207, "step": 27547 }, { "epoch": 0.846250729579455, "grad_norm": 0.38604798913002014, "learning_rate": 1.2419208535507856e-05, "loss": 0.4841, "step": 27548 }, { "epoch": 0.8462814487144042, "grad_norm": 0.9277123212814331, "learning_rate": 1.241873961062316e-05, "loss": 0.5742, "step": 27549 }, { "epoch": 0.8463121678493534, "grad_norm": 0.4297885298728943, "learning_rate": 1.2418270680089329e-05, "loss": 0.6459, "step": 27550 }, { "epoch": 0.8463428869843025, "grad_norm": 0.3648771345615387, "learning_rate": 1.2417801743907458e-05, "loss": 0.54, "step": 27551 }, { "epoch": 0.8463736061192517, "grad_norm": 0.34363052248954773, "learning_rate": 1.2417332802078642e-05, "loss": 0.5059, "step": 27552 }, { "epoch": 0.8464043252542008, "grad_norm": 0.3713448941707611, "learning_rate": 1.2416863854603975e-05, "loss": 0.5647, "step": 27553 }, { "epoch": 0.84643504438915, "grad_norm": 0.35090014338493347, "learning_rate": 1.2416394901484554e-05, "loss": 0.5637, "step": 27554 }, { "epoch": 0.8464657635240992, "grad_norm": 0.3549133539199829, "learning_rate": 1.2415925942721476e-05, "loss": 0.4734, "step": 27555 }, { "epoch": 0.8464964826590483, "grad_norm": 0.35566067695617676, "learning_rate": 1.2415456978315832e-05, "loss": 0.6177, "step": 27556 }, { "epoch": 0.8465272017939974, "grad_norm": 0.39036643505096436, "learning_rate": 1.241498800826872e-05, "loss": 0.5315, "step": 27557 }, { "epoch": 0.8465579209289467, "grad_norm": 0.3655991554260254, "learning_rate": 1.2414519032581234e-05, "loss": 0.6207, "step": 27558 }, { "epoch": 0.8465886400638958, "grad_norm": 0.3891712725162506, "learning_rate": 1.2414050051254472e-05, "loss": 0.6197, "step": 27559 }, { "epoch": 0.846619359198845, "grad_norm": 0.36192891001701355, "learning_rate": 1.2413581064289527e-05, "loss": 0.5645, "step": 27560 }, { "epoch": 0.8466500783337941, "grad_norm": 0.42353636026382446, "learning_rate": 1.2413112071687493e-05, "loss": 0.6113, "step": 27561 }, { "epoch": 0.8466807974687433, "grad_norm": 0.34134212136268616, "learning_rate": 1.241264307344947e-05, "loss": 0.596, "step": 27562 }, { "epoch": 0.8467115166036925, "grad_norm": 0.35790976881980896, "learning_rate": 1.2412174069576548e-05, "loss": 0.5215, "step": 27563 }, { "epoch": 0.8467422357386416, "grad_norm": 0.4133078455924988, "learning_rate": 1.2411705060069827e-05, "loss": 0.566, "step": 27564 }, { "epoch": 0.8467729548735907, "grad_norm": 0.34150758385658264, "learning_rate": 1.24112360449304e-05, "loss": 0.519, "step": 27565 }, { "epoch": 0.84680367400854, "grad_norm": 0.3692472577095032, "learning_rate": 1.2410767024159363e-05, "loss": 0.5399, "step": 27566 }, { "epoch": 0.8468343931434891, "grad_norm": 0.3617804944515228, "learning_rate": 1.2410297997757812e-05, "loss": 0.5082, "step": 27567 }, { "epoch": 0.8468651122784382, "grad_norm": 0.3893692195415497, "learning_rate": 1.240982896572684e-05, "loss": 0.546, "step": 27568 }, { "epoch": 0.8468958314133874, "grad_norm": 0.4071747958660126, "learning_rate": 1.2409359928067545e-05, "loss": 0.516, "step": 27569 }, { "epoch": 0.8469265505483365, "grad_norm": 0.4233867824077606, "learning_rate": 1.2408890884781021e-05, "loss": 0.6066, "step": 27570 }, { "epoch": 0.8469572696832858, "grad_norm": 0.37697258591651917, "learning_rate": 1.2408421835868366e-05, "loss": 0.479, "step": 27571 }, { "epoch": 0.8469879888182349, "grad_norm": 0.3660478889942169, "learning_rate": 1.2407952781330669e-05, "loss": 0.5091, "step": 27572 }, { "epoch": 0.847018707953184, "grad_norm": 0.4180964529514313, "learning_rate": 1.2407483721169036e-05, "loss": 0.6228, "step": 27573 }, { "epoch": 0.8470494270881332, "grad_norm": 0.36409011483192444, "learning_rate": 1.2407014655384553e-05, "loss": 0.5917, "step": 27574 }, { "epoch": 0.8470801462230824, "grad_norm": 0.3454005718231201, "learning_rate": 1.240654558397832e-05, "loss": 0.6072, "step": 27575 }, { "epoch": 0.8471108653580315, "grad_norm": 0.35828661918640137, "learning_rate": 1.2406076506951432e-05, "loss": 0.5921, "step": 27576 }, { "epoch": 0.8471415844929807, "grad_norm": 0.3394390940666199, "learning_rate": 1.2405607424304981e-05, "loss": 0.5259, "step": 27577 }, { "epoch": 0.8471723036279298, "grad_norm": 0.4342328906059265, "learning_rate": 1.2405138336040072e-05, "loss": 0.5931, "step": 27578 }, { "epoch": 0.847203022762879, "grad_norm": 0.4988875091075897, "learning_rate": 1.240466924215779e-05, "loss": 0.6536, "step": 27579 }, { "epoch": 0.8472337418978282, "grad_norm": 0.45602115988731384, "learning_rate": 1.2404200142659238e-05, "loss": 0.5935, "step": 27580 }, { "epoch": 0.8472644610327773, "grad_norm": 0.3758673071861267, "learning_rate": 1.2403731037545505e-05, "loss": 0.5508, "step": 27581 }, { "epoch": 0.8472951801677264, "grad_norm": 0.341767281293869, "learning_rate": 1.2403261926817693e-05, "loss": 0.504, "step": 27582 }, { "epoch": 0.8473258993026757, "grad_norm": 0.34443292021751404, "learning_rate": 1.2402792810476897e-05, "loss": 0.5203, "step": 27583 }, { "epoch": 0.8473566184376248, "grad_norm": 0.3747936189174652, "learning_rate": 1.2402323688524205e-05, "loss": 0.6031, "step": 27584 }, { "epoch": 0.847387337572574, "grad_norm": 0.36414021253585815, "learning_rate": 1.2401854560960722e-05, "loss": 0.509, "step": 27585 }, { "epoch": 0.8474180567075231, "grad_norm": 0.36967065930366516, "learning_rate": 1.2401385427787538e-05, "loss": 0.5198, "step": 27586 }, { "epoch": 0.8474487758424722, "grad_norm": 0.3400532007217407, "learning_rate": 1.2400916289005753e-05, "loss": 0.495, "step": 27587 }, { "epoch": 0.8474794949774215, "grad_norm": 0.3551408350467682, "learning_rate": 1.2400447144616457e-05, "loss": 0.6143, "step": 27588 }, { "epoch": 0.8475102141123706, "grad_norm": 0.3659326136112213, "learning_rate": 1.2399977994620753e-05, "loss": 0.5256, "step": 27589 }, { "epoch": 0.8475409332473197, "grad_norm": 0.3556739389896393, "learning_rate": 1.239950883901973e-05, "loss": 0.53, "step": 27590 }, { "epoch": 0.847571652382269, "grad_norm": 0.3547216057777405, "learning_rate": 1.2399039677814488e-05, "loss": 0.5626, "step": 27591 }, { "epoch": 0.8476023715172181, "grad_norm": 0.3815220892429352, "learning_rate": 1.239857051100612e-05, "loss": 0.5298, "step": 27592 }, { "epoch": 0.8476330906521672, "grad_norm": 0.33802327513694763, "learning_rate": 1.2398101338595725e-05, "loss": 0.5299, "step": 27593 }, { "epoch": 0.8476638097871164, "grad_norm": 0.3804583251476288, "learning_rate": 1.2397632160584396e-05, "loss": 0.5206, "step": 27594 }, { "epoch": 0.8476945289220655, "grad_norm": 0.3531932830810547, "learning_rate": 1.2397162976973226e-05, "loss": 0.4489, "step": 27595 }, { "epoch": 0.8477252480570148, "grad_norm": 0.3475799262523651, "learning_rate": 1.2396693787763318e-05, "loss": 0.5297, "step": 27596 }, { "epoch": 0.8477559671919639, "grad_norm": 0.434799462556839, "learning_rate": 1.2396224592955763e-05, "loss": 0.6215, "step": 27597 }, { "epoch": 0.847786686326913, "grad_norm": 0.37432989478111267, "learning_rate": 1.239575539255166e-05, "loss": 0.5969, "step": 27598 }, { "epoch": 0.8478174054618622, "grad_norm": 0.3698047995567322, "learning_rate": 1.23952861865521e-05, "loss": 0.6239, "step": 27599 }, { "epoch": 0.8478481245968114, "grad_norm": 0.4090425670146942, "learning_rate": 1.2394816974958186e-05, "loss": 0.6218, "step": 27600 }, { "epoch": 0.8478788437317605, "grad_norm": 0.3428356945514679, "learning_rate": 1.2394347757771009e-05, "loss": 0.6455, "step": 27601 }, { "epoch": 0.8479095628667097, "grad_norm": 0.4947218596935272, "learning_rate": 1.239387853499166e-05, "loss": 0.5012, "step": 27602 }, { "epoch": 0.8479402820016588, "grad_norm": 0.3196181654930115, "learning_rate": 1.2393409306621246e-05, "loss": 0.5038, "step": 27603 }, { "epoch": 0.847971001136608, "grad_norm": 0.37893858551979065, "learning_rate": 1.2392940072660857e-05, "loss": 0.565, "step": 27604 }, { "epoch": 0.8480017202715572, "grad_norm": 0.4031427204608917, "learning_rate": 1.239247083311159e-05, "loss": 0.5866, "step": 27605 }, { "epoch": 0.8480324394065063, "grad_norm": 0.3469350039958954, "learning_rate": 1.2392001587974536e-05, "loss": 0.4709, "step": 27606 }, { "epoch": 0.8480631585414554, "grad_norm": 0.40895912051200867, "learning_rate": 1.23915323372508e-05, "loss": 0.535, "step": 27607 }, { "epoch": 0.8480938776764047, "grad_norm": 0.3670434355735779, "learning_rate": 1.239106308094147e-05, "loss": 0.4464, "step": 27608 }, { "epoch": 0.8481245968113538, "grad_norm": 0.38032007217407227, "learning_rate": 1.2390593819047647e-05, "loss": 0.5194, "step": 27609 }, { "epoch": 0.848155315946303, "grad_norm": 0.3661014139652252, "learning_rate": 1.239012455157042e-05, "loss": 0.4715, "step": 27610 }, { "epoch": 0.8481860350812521, "grad_norm": 0.4126776158809662, "learning_rate": 1.2389655278510899e-05, "loss": 0.5417, "step": 27611 }, { "epoch": 0.8482167542162012, "grad_norm": 0.37472566962242126, "learning_rate": 1.2389185999870168e-05, "loss": 0.5441, "step": 27612 }, { "epoch": 0.8482474733511505, "grad_norm": 0.4004959762096405, "learning_rate": 1.238871671564932e-05, "loss": 0.5539, "step": 27613 }, { "epoch": 0.8482781924860996, "grad_norm": 0.3484770357608795, "learning_rate": 1.2388247425849465e-05, "loss": 0.5705, "step": 27614 }, { "epoch": 0.8483089116210487, "grad_norm": 0.33629119396209717, "learning_rate": 1.2387778130471687e-05, "loss": 0.5067, "step": 27615 }, { "epoch": 0.8483396307559979, "grad_norm": 0.3756142556667328, "learning_rate": 1.2387308829517088e-05, "loss": 0.5346, "step": 27616 }, { "epoch": 0.8483703498909471, "grad_norm": 0.3652289807796478, "learning_rate": 1.2386839522986763e-05, "loss": 0.5617, "step": 27617 }, { "epoch": 0.8484010690258962, "grad_norm": 0.37556180357933044, "learning_rate": 1.2386370210881809e-05, "loss": 0.5169, "step": 27618 }, { "epoch": 0.8484317881608454, "grad_norm": 0.42423388361930847, "learning_rate": 1.2385900893203317e-05, "loss": 0.6148, "step": 27619 }, { "epoch": 0.8484625072957945, "grad_norm": 0.3416650891304016, "learning_rate": 1.2385431569952388e-05, "loss": 0.5309, "step": 27620 }, { "epoch": 0.8484932264307438, "grad_norm": 0.36803749203681946, "learning_rate": 1.2384962241130121e-05, "loss": 0.546, "step": 27621 }, { "epoch": 0.8485239455656929, "grad_norm": 0.39428457617759705, "learning_rate": 1.2384492906737602e-05, "loss": 0.5962, "step": 27622 }, { "epoch": 0.848554664700642, "grad_norm": 0.5533764362335205, "learning_rate": 1.2384023566775939e-05, "loss": 0.5379, "step": 27623 }, { "epoch": 0.8485853838355912, "grad_norm": 0.38032206892967224, "learning_rate": 1.2383554221246217e-05, "loss": 0.5974, "step": 27624 }, { "epoch": 0.8486161029705404, "grad_norm": 0.36803096532821655, "learning_rate": 1.2383084870149542e-05, "loss": 0.618, "step": 27625 }, { "epoch": 0.8486468221054895, "grad_norm": 0.3871247470378876, "learning_rate": 1.2382615513487002e-05, "loss": 0.5456, "step": 27626 }, { "epoch": 0.8486775412404387, "grad_norm": 0.35111314058303833, "learning_rate": 1.23821461512597e-05, "loss": 0.4787, "step": 27627 }, { "epoch": 0.8487082603753878, "grad_norm": 0.32581064105033875, "learning_rate": 1.2381676783468728e-05, "loss": 0.473, "step": 27628 }, { "epoch": 0.8487389795103369, "grad_norm": 0.38832929730415344, "learning_rate": 1.2381207410115184e-05, "loss": 0.599, "step": 27629 }, { "epoch": 0.8487696986452862, "grad_norm": 0.34616926312446594, "learning_rate": 1.2380738031200166e-05, "loss": 0.4384, "step": 27630 }, { "epoch": 0.8488004177802353, "grad_norm": 0.45111897587776184, "learning_rate": 1.2380268646724763e-05, "loss": 0.5393, "step": 27631 }, { "epoch": 0.8488311369151844, "grad_norm": 0.34794455766677856, "learning_rate": 1.2379799256690079e-05, "loss": 0.5699, "step": 27632 }, { "epoch": 0.8488618560501336, "grad_norm": 0.3860713541507721, "learning_rate": 1.2379329861097207e-05, "loss": 0.612, "step": 27633 }, { "epoch": 0.8488925751850828, "grad_norm": 0.41103070974349976, "learning_rate": 1.2378860459947245e-05, "loss": 0.5743, "step": 27634 }, { "epoch": 0.848923294320032, "grad_norm": 0.35143613815307617, "learning_rate": 1.237839105324129e-05, "loss": 0.5397, "step": 27635 }, { "epoch": 0.8489540134549811, "grad_norm": 0.3847537636756897, "learning_rate": 1.2377921640980432e-05, "loss": 0.5129, "step": 27636 }, { "epoch": 0.8489847325899302, "grad_norm": 0.36957573890686035, "learning_rate": 1.2377452223165775e-05, "loss": 0.5632, "step": 27637 }, { "epoch": 0.8490154517248795, "grad_norm": 0.522733211517334, "learning_rate": 1.2376982799798412e-05, "loss": 0.5043, "step": 27638 }, { "epoch": 0.8490461708598286, "grad_norm": 0.3464639186859131, "learning_rate": 1.2376513370879442e-05, "loss": 0.5688, "step": 27639 }, { "epoch": 0.8490768899947777, "grad_norm": 0.38365262746810913, "learning_rate": 1.2376043936409954e-05, "loss": 0.5402, "step": 27640 }, { "epoch": 0.8491076091297269, "grad_norm": 0.35751473903656006, "learning_rate": 1.2375574496391054e-05, "loss": 0.596, "step": 27641 }, { "epoch": 0.8491383282646761, "grad_norm": 0.5369989275932312, "learning_rate": 1.237510505082383e-05, "loss": 0.648, "step": 27642 }, { "epoch": 0.8491690473996252, "grad_norm": 0.38200005888938904, "learning_rate": 1.2374635599709385e-05, "loss": 0.5663, "step": 27643 }, { "epoch": 0.8491997665345744, "grad_norm": 0.36229240894317627, "learning_rate": 1.2374166143048811e-05, "loss": 0.599, "step": 27644 }, { "epoch": 0.8492304856695235, "grad_norm": 0.34747445583343506, "learning_rate": 1.2373696680843208e-05, "loss": 0.5364, "step": 27645 }, { "epoch": 0.8492612048044728, "grad_norm": 0.36600613594055176, "learning_rate": 1.237322721309367e-05, "loss": 0.5333, "step": 27646 }, { "epoch": 0.8492919239394219, "grad_norm": 0.371778666973114, "learning_rate": 1.2372757739801294e-05, "loss": 0.603, "step": 27647 }, { "epoch": 0.849322643074371, "grad_norm": 0.3720551133155823, "learning_rate": 1.2372288260967176e-05, "loss": 0.5352, "step": 27648 }, { "epoch": 0.8493533622093202, "grad_norm": 0.34599435329437256, "learning_rate": 1.2371818776592412e-05, "loss": 0.6118, "step": 27649 }, { "epoch": 0.8493840813442693, "grad_norm": 0.3657319247722626, "learning_rate": 1.23713492866781e-05, "loss": 0.5604, "step": 27650 }, { "epoch": 0.8494148004792185, "grad_norm": 0.3356226682662964, "learning_rate": 1.2370879791225336e-05, "loss": 0.5055, "step": 27651 }, { "epoch": 0.8494455196141677, "grad_norm": 0.4043309688568115, "learning_rate": 1.237041029023522e-05, "loss": 0.5665, "step": 27652 }, { "epoch": 0.8494762387491168, "grad_norm": 0.3583422303199768, "learning_rate": 1.2369940783708843e-05, "loss": 0.5632, "step": 27653 }, { "epoch": 0.8495069578840659, "grad_norm": 0.3308202922344208, "learning_rate": 1.2369471271647301e-05, "loss": 0.5708, "step": 27654 }, { "epoch": 0.8495376770190152, "grad_norm": 0.3720800578594208, "learning_rate": 1.2369001754051699e-05, "loss": 0.5684, "step": 27655 }, { "epoch": 0.8495683961539643, "grad_norm": 0.36900806427001953, "learning_rate": 1.2368532230923124e-05, "loss": 0.5838, "step": 27656 }, { "epoch": 0.8495991152889134, "grad_norm": 0.3496271073818207, "learning_rate": 1.2368062702262679e-05, "loss": 0.4829, "step": 27657 }, { "epoch": 0.8496298344238626, "grad_norm": 0.3807847499847412, "learning_rate": 1.2367593168071456e-05, "loss": 0.5398, "step": 27658 }, { "epoch": 0.8496605535588118, "grad_norm": 0.39862456917762756, "learning_rate": 1.2367123628350555e-05, "loss": 0.6229, "step": 27659 }, { "epoch": 0.849691272693761, "grad_norm": 0.40468892455101013, "learning_rate": 1.2366654083101071e-05, "loss": 0.5431, "step": 27660 }, { "epoch": 0.8497219918287101, "grad_norm": 0.35003674030303955, "learning_rate": 1.2366184532324102e-05, "loss": 0.5963, "step": 27661 }, { "epoch": 0.8497527109636592, "grad_norm": 0.36983147263526917, "learning_rate": 1.2365714976020741e-05, "loss": 0.4921, "step": 27662 }, { "epoch": 0.8497834300986085, "grad_norm": 0.4098007380962372, "learning_rate": 1.236524541419209e-05, "loss": 0.4866, "step": 27663 }, { "epoch": 0.8498141492335576, "grad_norm": 0.42959994077682495, "learning_rate": 1.2364775846839243e-05, "loss": 0.4908, "step": 27664 }, { "epoch": 0.8498448683685067, "grad_norm": 0.3514260947704315, "learning_rate": 1.2364306273963296e-05, "loss": 0.527, "step": 27665 }, { "epoch": 0.8498755875034559, "grad_norm": 0.403155118227005, "learning_rate": 1.2363836695565347e-05, "loss": 0.5841, "step": 27666 }, { "epoch": 0.849906306638405, "grad_norm": 0.3411968946456909, "learning_rate": 1.2363367111646494e-05, "loss": 0.525, "step": 27667 }, { "epoch": 0.8499370257733542, "grad_norm": 0.3585547208786011, "learning_rate": 1.2362897522207828e-05, "loss": 0.4805, "step": 27668 }, { "epoch": 0.8499677449083034, "grad_norm": 0.35696566104888916, "learning_rate": 1.2362427927250451e-05, "loss": 0.5332, "step": 27669 }, { "epoch": 0.8499984640432525, "grad_norm": 0.35202375054359436, "learning_rate": 1.2361958326775461e-05, "loss": 0.536, "step": 27670 }, { "epoch": 0.8500291831782018, "grad_norm": 0.378108412027359, "learning_rate": 1.2361488720783953e-05, "loss": 0.5554, "step": 27671 }, { "epoch": 0.8500599023131509, "grad_norm": 0.3698126971721649, "learning_rate": 1.236101910927702e-05, "loss": 0.5573, "step": 27672 }, { "epoch": 0.8500906214481, "grad_norm": 0.3581342101097107, "learning_rate": 1.2360549492255764e-05, "loss": 0.5007, "step": 27673 }, { "epoch": 0.8501213405830492, "grad_norm": 0.34038016200065613, "learning_rate": 1.2360079869721278e-05, "loss": 0.5157, "step": 27674 }, { "epoch": 0.8501520597179983, "grad_norm": 0.34282708168029785, "learning_rate": 1.2359610241674663e-05, "loss": 0.4759, "step": 27675 }, { "epoch": 0.8501827788529475, "grad_norm": 0.3716139495372772, "learning_rate": 1.235914060811701e-05, "loss": 0.5942, "step": 27676 }, { "epoch": 0.8502134979878967, "grad_norm": 0.40348565578460693, "learning_rate": 1.2358670969049423e-05, "loss": 0.5888, "step": 27677 }, { "epoch": 0.8502442171228458, "grad_norm": 0.3560005724430084, "learning_rate": 1.2358201324472993e-05, "loss": 0.5953, "step": 27678 }, { "epoch": 0.8502749362577949, "grad_norm": 0.37286844849586487, "learning_rate": 1.235773167438882e-05, "loss": 0.4911, "step": 27679 }, { "epoch": 0.8503056553927442, "grad_norm": 0.38045474886894226, "learning_rate": 1.2357262018798e-05, "loss": 0.5369, "step": 27680 }, { "epoch": 0.8503363745276933, "grad_norm": 0.3868919610977173, "learning_rate": 1.235679235770163e-05, "loss": 0.5726, "step": 27681 }, { "epoch": 0.8503670936626425, "grad_norm": 0.3813724219799042, "learning_rate": 1.2356322691100808e-05, "loss": 0.6549, "step": 27682 }, { "epoch": 0.8503978127975916, "grad_norm": 0.40410080552101135, "learning_rate": 1.2355853018996626e-05, "loss": 0.4689, "step": 27683 }, { "epoch": 0.8504285319325408, "grad_norm": 0.3851407468318939, "learning_rate": 1.235538334139019e-05, "loss": 0.5534, "step": 27684 }, { "epoch": 0.85045925106749, "grad_norm": 0.373899906873703, "learning_rate": 1.2354913658282588e-05, "loss": 0.5304, "step": 27685 }, { "epoch": 0.8504899702024391, "grad_norm": 0.4116860032081604, "learning_rate": 1.2354443969674921e-05, "loss": 0.6504, "step": 27686 }, { "epoch": 0.8505206893373882, "grad_norm": 0.3494124114513397, "learning_rate": 1.2353974275568283e-05, "loss": 0.5252, "step": 27687 }, { "epoch": 0.8505514084723375, "grad_norm": 0.3701028823852539, "learning_rate": 1.235350457596378e-05, "loss": 0.607, "step": 27688 }, { "epoch": 0.8505821276072866, "grad_norm": 0.38732269406318665, "learning_rate": 1.23530348708625e-05, "loss": 0.5422, "step": 27689 }, { "epoch": 0.8506128467422357, "grad_norm": 0.3389124870300293, "learning_rate": 1.2352565160265542e-05, "loss": 0.6013, "step": 27690 }, { "epoch": 0.8506435658771849, "grad_norm": 0.34501469135284424, "learning_rate": 1.2352095444174004e-05, "loss": 0.5017, "step": 27691 }, { "epoch": 0.850674285012134, "grad_norm": 0.378718763589859, "learning_rate": 1.2351625722588983e-05, "loss": 0.5618, "step": 27692 }, { "epoch": 0.8507050041470832, "grad_norm": 0.38276028633117676, "learning_rate": 1.2351155995511577e-05, "loss": 0.5108, "step": 27693 }, { "epoch": 0.8507357232820324, "grad_norm": 0.3475668132305145, "learning_rate": 1.235068626294288e-05, "loss": 0.5566, "step": 27694 }, { "epoch": 0.8507664424169815, "grad_norm": 0.314266562461853, "learning_rate": 1.235021652488399e-05, "loss": 0.5815, "step": 27695 }, { "epoch": 0.8507971615519307, "grad_norm": 0.34614232182502747, "learning_rate": 1.2349746781336007e-05, "loss": 0.5314, "step": 27696 }, { "epoch": 0.8508278806868799, "grad_norm": 0.36781755089759827, "learning_rate": 1.2349277032300027e-05, "loss": 0.575, "step": 27697 }, { "epoch": 0.850858599821829, "grad_norm": 0.36114880442619324, "learning_rate": 1.2348807277777145e-05, "loss": 0.5868, "step": 27698 }, { "epoch": 0.8508893189567782, "grad_norm": 0.3418674170970917, "learning_rate": 1.2348337517768463e-05, "loss": 0.5001, "step": 27699 }, { "epoch": 0.8509200380917273, "grad_norm": 0.34694817662239075, "learning_rate": 1.234786775227507e-05, "loss": 0.4948, "step": 27700 }, { "epoch": 0.8509507572266765, "grad_norm": 0.3660411238670349, "learning_rate": 1.234739798129807e-05, "loss": 0.492, "step": 27701 }, { "epoch": 0.8509814763616257, "grad_norm": 0.35959649085998535, "learning_rate": 1.2346928204838559e-05, "loss": 0.6639, "step": 27702 }, { "epoch": 0.8510121954965748, "grad_norm": 0.5124613642692566, "learning_rate": 1.2346458422897631e-05, "loss": 0.5289, "step": 27703 }, { "epoch": 0.8510429146315239, "grad_norm": 0.39366415143013, "learning_rate": 1.2345988635476386e-05, "loss": 0.6025, "step": 27704 }, { "epoch": 0.8510736337664732, "grad_norm": 0.3338741958141327, "learning_rate": 1.2345518842575922e-05, "loss": 0.4333, "step": 27705 }, { "epoch": 0.8511043529014223, "grad_norm": 0.39096274971961975, "learning_rate": 1.2345049044197334e-05, "loss": 0.6094, "step": 27706 }, { "epoch": 0.8511350720363715, "grad_norm": 0.36250248551368713, "learning_rate": 1.2344579240341722e-05, "loss": 0.5184, "step": 27707 }, { "epoch": 0.8511657911713206, "grad_norm": 0.4240153133869171, "learning_rate": 1.2344109431010178e-05, "loss": 0.4464, "step": 27708 }, { "epoch": 0.8511965103062697, "grad_norm": 0.425925612449646, "learning_rate": 1.2343639616203806e-05, "loss": 0.545, "step": 27709 }, { "epoch": 0.851227229441219, "grad_norm": 1.0063949823379517, "learning_rate": 1.2343169795923697e-05, "loss": 0.5552, "step": 27710 }, { "epoch": 0.8512579485761681, "grad_norm": 0.34353914856910706, "learning_rate": 1.2342699970170955e-05, "loss": 0.5417, "step": 27711 }, { "epoch": 0.8512886677111172, "grad_norm": 0.34566131234169006, "learning_rate": 1.2342230138946672e-05, "loss": 0.5062, "step": 27712 }, { "epoch": 0.8513193868460665, "grad_norm": 0.4005297124385834, "learning_rate": 1.2341760302251947e-05, "loss": 0.6388, "step": 27713 }, { "epoch": 0.8513501059810156, "grad_norm": 0.357536643743515, "learning_rate": 1.2341290460087876e-05, "loss": 0.552, "step": 27714 }, { "epoch": 0.8513808251159647, "grad_norm": 0.43801993131637573, "learning_rate": 1.234082061245556e-05, "loss": 0.5338, "step": 27715 }, { "epoch": 0.8514115442509139, "grad_norm": 0.36187806725502014, "learning_rate": 1.2340350759356092e-05, "loss": 0.5242, "step": 27716 }, { "epoch": 0.851442263385863, "grad_norm": 0.3771367371082306, "learning_rate": 1.2339880900790571e-05, "loss": 0.5197, "step": 27717 }, { "epoch": 0.8514729825208122, "grad_norm": 0.3532012403011322, "learning_rate": 1.2339411036760096e-05, "loss": 0.5409, "step": 27718 }, { "epoch": 0.8515037016557614, "grad_norm": 0.34963709115982056, "learning_rate": 1.2338941167265761e-05, "loss": 0.584, "step": 27719 }, { "epoch": 0.8515344207907105, "grad_norm": 0.3729380965232849, "learning_rate": 1.2338471292308669e-05, "loss": 0.4935, "step": 27720 }, { "epoch": 0.8515651399256597, "grad_norm": 0.34471115469932556, "learning_rate": 1.233800141188991e-05, "loss": 0.5074, "step": 27721 }, { "epoch": 0.8515958590606089, "grad_norm": 0.360833078622818, "learning_rate": 1.233753152601059e-05, "loss": 0.5015, "step": 27722 }, { "epoch": 0.851626578195558, "grad_norm": 0.37381839752197266, "learning_rate": 1.2337061634671798e-05, "loss": 0.4978, "step": 27723 }, { "epoch": 0.8516572973305072, "grad_norm": 0.34398362040519714, "learning_rate": 1.2336591737874637e-05, "loss": 0.5329, "step": 27724 }, { "epoch": 0.8516880164654563, "grad_norm": 0.3794763684272766, "learning_rate": 1.2336121835620203e-05, "loss": 0.5148, "step": 27725 }, { "epoch": 0.8517187356004055, "grad_norm": 0.3366122543811798, "learning_rate": 1.2335651927909592e-05, "loss": 0.5405, "step": 27726 }, { "epoch": 0.8517494547353547, "grad_norm": 0.36852559447288513, "learning_rate": 1.2335182014743905e-05, "loss": 0.5925, "step": 27727 }, { "epoch": 0.8517801738703038, "grad_norm": 0.386248379945755, "learning_rate": 1.2334712096124235e-05, "loss": 0.5496, "step": 27728 }, { "epoch": 0.8518108930052529, "grad_norm": 0.373401403427124, "learning_rate": 1.2334242172051683e-05, "loss": 0.5177, "step": 27729 }, { "epoch": 0.8518416121402022, "grad_norm": 0.3655671775341034, "learning_rate": 1.2333772242527344e-05, "loss": 0.5748, "step": 27730 }, { "epoch": 0.8518723312751513, "grad_norm": 0.368206262588501, "learning_rate": 1.233330230755232e-05, "loss": 0.5012, "step": 27731 }, { "epoch": 0.8519030504101005, "grad_norm": 0.31548717617988586, "learning_rate": 1.2332832367127702e-05, "loss": 0.4881, "step": 27732 }, { "epoch": 0.8519337695450496, "grad_norm": 0.3904719948768616, "learning_rate": 1.2332362421254594e-05, "loss": 0.5306, "step": 27733 }, { "epoch": 0.8519644886799987, "grad_norm": 0.3459623157978058, "learning_rate": 1.233189246993409e-05, "loss": 0.5501, "step": 27734 }, { "epoch": 0.851995207814948, "grad_norm": 0.7190314531326294, "learning_rate": 1.2331422513167288e-05, "loss": 0.577, "step": 27735 }, { "epoch": 0.8520259269498971, "grad_norm": 0.3783929944038391, "learning_rate": 1.2330952550955286e-05, "loss": 0.5136, "step": 27736 }, { "epoch": 0.8520566460848462, "grad_norm": 0.3707468509674072, "learning_rate": 1.2330482583299179e-05, "loss": 0.5991, "step": 27737 }, { "epoch": 0.8520873652197954, "grad_norm": 0.42767050862312317, "learning_rate": 1.233001261020007e-05, "loss": 0.55, "step": 27738 }, { "epoch": 0.8521180843547446, "grad_norm": 0.36012694239616394, "learning_rate": 1.2329542631659053e-05, "loss": 0.5503, "step": 27739 }, { "epoch": 0.8521488034896937, "grad_norm": 0.3739129900932312, "learning_rate": 1.232907264767723e-05, "loss": 0.5916, "step": 27740 }, { "epoch": 0.8521795226246429, "grad_norm": 0.32727551460266113, "learning_rate": 1.2328602658255688e-05, "loss": 0.4608, "step": 27741 }, { "epoch": 0.852210241759592, "grad_norm": 0.37308982014656067, "learning_rate": 1.2328132663395536e-05, "loss": 0.6117, "step": 27742 }, { "epoch": 0.8522409608945412, "grad_norm": 0.38677358627319336, "learning_rate": 1.2327662663097867e-05, "loss": 0.4487, "step": 27743 }, { "epoch": 0.8522716800294904, "grad_norm": 0.3356735110282898, "learning_rate": 1.232719265736378e-05, "loss": 0.5458, "step": 27744 }, { "epoch": 0.8523023991644395, "grad_norm": 0.344997763633728, "learning_rate": 1.2326722646194371e-05, "loss": 0.5609, "step": 27745 }, { "epoch": 0.8523331182993887, "grad_norm": 0.40413641929626465, "learning_rate": 1.2326252629590738e-05, "loss": 0.545, "step": 27746 }, { "epoch": 0.8523638374343379, "grad_norm": 0.36955761909484863, "learning_rate": 1.2325782607553982e-05, "loss": 0.5402, "step": 27747 }, { "epoch": 0.852394556569287, "grad_norm": 0.37410488724708557, "learning_rate": 1.2325312580085198e-05, "loss": 0.6137, "step": 27748 }, { "epoch": 0.8524252757042362, "grad_norm": 0.33513057231903076, "learning_rate": 1.2324842547185482e-05, "loss": 0.5609, "step": 27749 }, { "epoch": 0.8524559948391853, "grad_norm": 0.3680737614631653, "learning_rate": 1.2324372508855934e-05, "loss": 0.541, "step": 27750 }, { "epoch": 0.8524867139741344, "grad_norm": 0.34349825978279114, "learning_rate": 1.2323902465097654e-05, "loss": 0.5677, "step": 27751 }, { "epoch": 0.8525174331090837, "grad_norm": 0.3588765859603882, "learning_rate": 1.2323432415911738e-05, "loss": 0.5802, "step": 27752 }, { "epoch": 0.8525481522440328, "grad_norm": 0.3366919457912445, "learning_rate": 1.232296236129928e-05, "loss": 0.5701, "step": 27753 }, { "epoch": 0.8525788713789819, "grad_norm": 0.40314435958862305, "learning_rate": 1.2322492301261383e-05, "loss": 0.6076, "step": 27754 }, { "epoch": 0.8526095905139311, "grad_norm": 0.372598797082901, "learning_rate": 1.2322022235799143e-05, "loss": 0.5555, "step": 27755 }, { "epoch": 0.8526403096488803, "grad_norm": 0.34022241830825806, "learning_rate": 1.232155216491366e-05, "loss": 0.5413, "step": 27756 }, { "epoch": 0.8526710287838295, "grad_norm": 0.32416266202926636, "learning_rate": 1.2321082088606025e-05, "loss": 0.5545, "step": 27757 }, { "epoch": 0.8527017479187786, "grad_norm": 0.4185382127761841, "learning_rate": 1.2320612006877343e-05, "loss": 0.6124, "step": 27758 }, { "epoch": 0.8527324670537277, "grad_norm": 0.36082401871681213, "learning_rate": 1.232014191972871e-05, "loss": 0.5648, "step": 27759 }, { "epoch": 0.852763186188677, "grad_norm": 0.3383876383304596, "learning_rate": 1.2319671827161221e-05, "loss": 0.5654, "step": 27760 }, { "epoch": 0.8527939053236261, "grad_norm": 0.4308532476425171, "learning_rate": 1.231920172917598e-05, "loss": 0.6767, "step": 27761 }, { "epoch": 0.8528246244585752, "grad_norm": 0.3607148230075836, "learning_rate": 1.2318731625774082e-05, "loss": 0.4601, "step": 27762 }, { "epoch": 0.8528553435935244, "grad_norm": 0.36847370862960815, "learning_rate": 1.2318261516956622e-05, "loss": 0.598, "step": 27763 }, { "epoch": 0.8528860627284736, "grad_norm": 0.36047089099884033, "learning_rate": 1.23177914027247e-05, "loss": 0.5176, "step": 27764 }, { "epoch": 0.8529167818634227, "grad_norm": 0.39380884170532227, "learning_rate": 1.2317321283079416e-05, "loss": 0.6192, "step": 27765 }, { "epoch": 0.8529475009983719, "grad_norm": 0.3488715589046478, "learning_rate": 1.2316851158021863e-05, "loss": 0.5345, "step": 27766 }, { "epoch": 0.852978220133321, "grad_norm": 0.3662119209766388, "learning_rate": 1.2316381027553146e-05, "loss": 0.5942, "step": 27767 }, { "epoch": 0.8530089392682703, "grad_norm": 0.38663047552108765, "learning_rate": 1.231591089167436e-05, "loss": 0.5565, "step": 27768 }, { "epoch": 0.8530396584032194, "grad_norm": 0.3316810429096222, "learning_rate": 1.2315440750386598e-05, "loss": 0.4756, "step": 27769 }, { "epoch": 0.8530703775381685, "grad_norm": 0.36014866828918457, "learning_rate": 1.2314970603690965e-05, "loss": 0.5781, "step": 27770 }, { "epoch": 0.8531010966731177, "grad_norm": 0.4250815212726593, "learning_rate": 1.2314500451588555e-05, "loss": 0.5308, "step": 27771 }, { "epoch": 0.8531318158080669, "grad_norm": 0.46146920323371887, "learning_rate": 1.231403029408047e-05, "loss": 0.4949, "step": 27772 }, { "epoch": 0.853162534943016, "grad_norm": 0.4032500386238098, "learning_rate": 1.2313560131167806e-05, "loss": 0.5139, "step": 27773 }, { "epoch": 0.8531932540779652, "grad_norm": 0.4079582989215851, "learning_rate": 1.2313089962851659e-05, "loss": 0.6213, "step": 27774 }, { "epoch": 0.8532239732129143, "grad_norm": 0.5043154954910278, "learning_rate": 1.2312619789133127e-05, "loss": 0.5095, "step": 27775 }, { "epoch": 0.8532546923478634, "grad_norm": 0.4260423481464386, "learning_rate": 1.2312149610013312e-05, "loss": 0.5136, "step": 27776 }, { "epoch": 0.8532854114828127, "grad_norm": 0.3751973807811737, "learning_rate": 1.231167942549331e-05, "loss": 0.5682, "step": 27777 }, { "epoch": 0.8533161306177618, "grad_norm": 0.40562862157821655, "learning_rate": 1.231120923557422e-05, "loss": 0.551, "step": 27778 }, { "epoch": 0.8533468497527109, "grad_norm": 0.39072665572166443, "learning_rate": 1.2310739040257139e-05, "loss": 0.5345, "step": 27779 }, { "epoch": 0.8533775688876601, "grad_norm": 0.3881576657295227, "learning_rate": 1.2310268839543164e-05, "loss": 0.4981, "step": 27780 }, { "epoch": 0.8534082880226093, "grad_norm": 0.3784579634666443, "learning_rate": 1.2309798633433396e-05, "loss": 0.5301, "step": 27781 }, { "epoch": 0.8534390071575585, "grad_norm": 0.6237891316413879, "learning_rate": 1.2309328421928931e-05, "loss": 0.5178, "step": 27782 }, { "epoch": 0.8534697262925076, "grad_norm": 0.4022156894207001, "learning_rate": 1.2308858205030871e-05, "loss": 0.6111, "step": 27783 }, { "epoch": 0.8535004454274567, "grad_norm": 0.39802563190460205, "learning_rate": 1.2308387982740309e-05, "loss": 0.5348, "step": 27784 }, { "epoch": 0.853531164562406, "grad_norm": 0.3966676890850067, "learning_rate": 1.2307917755058347e-05, "loss": 0.5242, "step": 27785 }, { "epoch": 0.8535618836973551, "grad_norm": 0.42816200852394104, "learning_rate": 1.2307447521986082e-05, "loss": 0.5178, "step": 27786 }, { "epoch": 0.8535926028323042, "grad_norm": 0.4654396176338196, "learning_rate": 1.2306977283524612e-05, "loss": 0.5665, "step": 27787 }, { "epoch": 0.8536233219672534, "grad_norm": 0.399015873670578, "learning_rate": 1.2306507039675036e-05, "loss": 0.5136, "step": 27788 }, { "epoch": 0.8536540411022026, "grad_norm": 0.37021368741989136, "learning_rate": 1.230603679043845e-05, "loss": 0.6099, "step": 27789 }, { "epoch": 0.8536847602371517, "grad_norm": 0.4140093922615051, "learning_rate": 1.2305566535815954e-05, "loss": 0.5048, "step": 27790 }, { "epoch": 0.8537154793721009, "grad_norm": 0.36630529165267944, "learning_rate": 1.2305096275808645e-05, "loss": 0.5378, "step": 27791 }, { "epoch": 0.85374619850705, "grad_norm": 0.3642320930957794, "learning_rate": 1.2304626010417626e-05, "loss": 0.5665, "step": 27792 }, { "epoch": 0.8537769176419993, "grad_norm": 0.3684598207473755, "learning_rate": 1.2304155739643991e-05, "loss": 0.485, "step": 27793 }, { "epoch": 0.8538076367769484, "grad_norm": 0.3651782274246216, "learning_rate": 1.2303685463488839e-05, "loss": 0.5298, "step": 27794 }, { "epoch": 0.8538383559118975, "grad_norm": 0.42666348814964294, "learning_rate": 1.2303215181953267e-05, "loss": 0.552, "step": 27795 }, { "epoch": 0.8538690750468467, "grad_norm": 0.39943987131118774, "learning_rate": 1.2302744895038377e-05, "loss": 0.6292, "step": 27796 }, { "epoch": 0.8538997941817958, "grad_norm": 0.442704975605011, "learning_rate": 1.2302274602745265e-05, "loss": 0.5349, "step": 27797 }, { "epoch": 0.853930513316745, "grad_norm": 0.37488821148872375, "learning_rate": 1.230180430507503e-05, "loss": 0.492, "step": 27798 }, { "epoch": 0.8539612324516942, "grad_norm": 0.3903057277202606, "learning_rate": 1.2301334002028771e-05, "loss": 0.5616, "step": 27799 }, { "epoch": 0.8539919515866433, "grad_norm": 0.3553638160228729, "learning_rate": 1.2300863693607586e-05, "loss": 0.5172, "step": 27800 }, { "epoch": 0.8540226707215924, "grad_norm": 0.33283498883247375, "learning_rate": 1.2300393379812571e-05, "loss": 0.5435, "step": 27801 }, { "epoch": 0.8540533898565417, "grad_norm": 0.4019143283367157, "learning_rate": 1.2299923060644827e-05, "loss": 0.6189, "step": 27802 }, { "epoch": 0.8540841089914908, "grad_norm": 0.37968674302101135, "learning_rate": 1.2299452736105455e-05, "loss": 0.5634, "step": 27803 }, { "epoch": 0.8541148281264399, "grad_norm": 0.388873815536499, "learning_rate": 1.229898240619555e-05, "loss": 0.493, "step": 27804 }, { "epoch": 0.8541455472613891, "grad_norm": 0.3610157072544098, "learning_rate": 1.229851207091621e-05, "loss": 0.6035, "step": 27805 }, { "epoch": 0.8541762663963383, "grad_norm": 0.345295786857605, "learning_rate": 1.2298041730268533e-05, "loss": 0.5426, "step": 27806 }, { "epoch": 0.8542069855312875, "grad_norm": 0.3503148555755615, "learning_rate": 1.229757138425362e-05, "loss": 0.5854, "step": 27807 }, { "epoch": 0.8542377046662366, "grad_norm": 0.36464110016822815, "learning_rate": 1.229710103287257e-05, "loss": 0.5208, "step": 27808 }, { "epoch": 0.8542684238011857, "grad_norm": 0.4120032787322998, "learning_rate": 1.2296630676126478e-05, "loss": 0.5718, "step": 27809 }, { "epoch": 0.854299142936135, "grad_norm": 0.3595506250858307, "learning_rate": 1.2296160314016445e-05, "loss": 0.5601, "step": 27810 }, { "epoch": 0.8543298620710841, "grad_norm": 0.36997929215431213, "learning_rate": 1.2295689946543567e-05, "loss": 0.4692, "step": 27811 }, { "epoch": 0.8543605812060332, "grad_norm": 0.39327314496040344, "learning_rate": 1.229521957370895e-05, "loss": 0.5105, "step": 27812 }, { "epoch": 0.8543913003409824, "grad_norm": 0.4274047315120697, "learning_rate": 1.2294749195513685e-05, "loss": 0.5957, "step": 27813 }, { "epoch": 0.8544220194759315, "grad_norm": 0.3482913076877594, "learning_rate": 1.2294278811958871e-05, "loss": 0.5767, "step": 27814 }, { "epoch": 0.8544527386108807, "grad_norm": 0.43638572096824646, "learning_rate": 1.2293808423045612e-05, "loss": 0.5512, "step": 27815 }, { "epoch": 0.8544834577458299, "grad_norm": 0.32190075516700745, "learning_rate": 1.2293338028774999e-05, "loss": 0.4885, "step": 27816 }, { "epoch": 0.854514176880779, "grad_norm": 0.35543328523635864, "learning_rate": 1.229286762914814e-05, "loss": 0.4968, "step": 27817 }, { "epoch": 0.8545448960157283, "grad_norm": 0.3791341185569763, "learning_rate": 1.2292397224166124e-05, "loss": 0.5649, "step": 27818 }, { "epoch": 0.8545756151506774, "grad_norm": 0.3394736349582672, "learning_rate": 1.2291926813830058e-05, "loss": 0.5504, "step": 27819 }, { "epoch": 0.8546063342856265, "grad_norm": 0.3511335849761963, "learning_rate": 1.2291456398141036e-05, "loss": 0.5212, "step": 27820 }, { "epoch": 0.8546370534205757, "grad_norm": 0.372125506401062, "learning_rate": 1.2290985977100156e-05, "loss": 0.579, "step": 27821 }, { "epoch": 0.8546677725555248, "grad_norm": 0.3706806004047394, "learning_rate": 1.229051555070852e-05, "loss": 0.5377, "step": 27822 }, { "epoch": 0.854698491690474, "grad_norm": 0.404598206281662, "learning_rate": 1.2290045118967221e-05, "loss": 0.5706, "step": 27823 }, { "epoch": 0.8547292108254232, "grad_norm": 0.3375270664691925, "learning_rate": 1.2289574681877368e-05, "loss": 0.4948, "step": 27824 }, { "epoch": 0.8547599299603723, "grad_norm": 0.39136001467704773, "learning_rate": 1.2289104239440048e-05, "loss": 0.5936, "step": 27825 }, { "epoch": 0.8547906490953214, "grad_norm": 0.3466721475124359, "learning_rate": 1.2288633791656367e-05, "loss": 0.5055, "step": 27826 }, { "epoch": 0.8548213682302707, "grad_norm": 0.35018718242645264, "learning_rate": 1.2288163338527421e-05, "loss": 0.5322, "step": 27827 }, { "epoch": 0.8548520873652198, "grad_norm": 0.3432736098766327, "learning_rate": 1.2287692880054314e-05, "loss": 0.6055, "step": 27828 }, { "epoch": 0.8548828065001689, "grad_norm": 0.3957948684692383, "learning_rate": 1.2287222416238136e-05, "loss": 0.5149, "step": 27829 }, { "epoch": 0.8549135256351181, "grad_norm": 0.3605062961578369, "learning_rate": 1.2286751947079991e-05, "loss": 0.5113, "step": 27830 }, { "epoch": 0.8549442447700673, "grad_norm": 0.3949517011642456, "learning_rate": 1.2286281472580978e-05, "loss": 0.5691, "step": 27831 }, { "epoch": 0.8549749639050165, "grad_norm": 0.33140885829925537, "learning_rate": 1.2285810992742193e-05, "loss": 0.5169, "step": 27832 }, { "epoch": 0.8550056830399656, "grad_norm": 0.3551516532897949, "learning_rate": 1.2285340507564741e-05, "loss": 0.5147, "step": 27833 }, { "epoch": 0.8550364021749147, "grad_norm": 0.3956296443939209, "learning_rate": 1.2284870017049712e-05, "loss": 0.5483, "step": 27834 }, { "epoch": 0.855067121309864, "grad_norm": 0.3451474606990814, "learning_rate": 1.2284399521198212e-05, "loss": 0.5479, "step": 27835 }, { "epoch": 0.8550978404448131, "grad_norm": 0.346734881401062, "learning_rate": 1.2283929020011337e-05, "loss": 0.578, "step": 27836 }, { "epoch": 0.8551285595797622, "grad_norm": 0.36936208605766296, "learning_rate": 1.2283458513490187e-05, "loss": 0.5796, "step": 27837 }, { "epoch": 0.8551592787147114, "grad_norm": 0.3712770640850067, "learning_rate": 1.228298800163586e-05, "loss": 0.4932, "step": 27838 }, { "epoch": 0.8551899978496605, "grad_norm": 0.36375266313552856, "learning_rate": 1.2282517484449455e-05, "loss": 0.6329, "step": 27839 }, { "epoch": 0.8552207169846097, "grad_norm": 0.34024080634117126, "learning_rate": 1.228204696193207e-05, "loss": 0.4901, "step": 27840 }, { "epoch": 0.8552514361195589, "grad_norm": 0.41130444407463074, "learning_rate": 1.2281576434084806e-05, "loss": 0.5344, "step": 27841 }, { "epoch": 0.855282155254508, "grad_norm": 0.3750464916229248, "learning_rate": 1.2281105900908761e-05, "loss": 0.5607, "step": 27842 }, { "epoch": 0.8553128743894572, "grad_norm": 0.3560344874858856, "learning_rate": 1.2280635362405032e-05, "loss": 0.5335, "step": 27843 }, { "epoch": 0.8553435935244064, "grad_norm": 0.3350215554237366, "learning_rate": 1.2280164818574722e-05, "loss": 0.5569, "step": 27844 }, { "epoch": 0.8553743126593555, "grad_norm": 0.3714674115180969, "learning_rate": 1.2279694269418924e-05, "loss": 0.5271, "step": 27845 }, { "epoch": 0.8554050317943047, "grad_norm": 0.44601598381996155, "learning_rate": 1.2279223714938746e-05, "loss": 0.5395, "step": 27846 }, { "epoch": 0.8554357509292538, "grad_norm": 0.3672606647014618, "learning_rate": 1.2278753155135278e-05, "loss": 0.5501, "step": 27847 }, { "epoch": 0.855466470064203, "grad_norm": 0.341852605342865, "learning_rate": 1.2278282590009627e-05, "loss": 0.6347, "step": 27848 }, { "epoch": 0.8554971891991522, "grad_norm": 0.35295847058296204, "learning_rate": 1.2277812019562884e-05, "loss": 0.5372, "step": 27849 }, { "epoch": 0.8555279083341013, "grad_norm": 0.37700098752975464, "learning_rate": 1.227734144379615e-05, "loss": 0.5241, "step": 27850 }, { "epoch": 0.8555586274690504, "grad_norm": 0.34076255559921265, "learning_rate": 1.2276870862710533e-05, "loss": 0.5105, "step": 27851 }, { "epoch": 0.8555893466039997, "grad_norm": 0.3701516091823578, "learning_rate": 1.227640027630712e-05, "loss": 0.5092, "step": 27852 }, { "epoch": 0.8556200657389488, "grad_norm": 0.3716222643852234, "learning_rate": 1.2275929684587019e-05, "loss": 0.5559, "step": 27853 }, { "epoch": 0.8556507848738979, "grad_norm": 0.32700347900390625, "learning_rate": 1.227545908755132e-05, "loss": 0.5209, "step": 27854 }, { "epoch": 0.8556815040088471, "grad_norm": 0.3878425061702728, "learning_rate": 1.2274988485201132e-05, "loss": 0.5729, "step": 27855 }, { "epoch": 0.8557122231437962, "grad_norm": 0.4009822905063629, "learning_rate": 1.2274517877537549e-05, "loss": 0.5058, "step": 27856 }, { "epoch": 0.8557429422787455, "grad_norm": 0.3958221971988678, "learning_rate": 1.2274047264561666e-05, "loss": 0.5365, "step": 27857 }, { "epoch": 0.8557736614136946, "grad_norm": 0.33384448289871216, "learning_rate": 1.2273576646274592e-05, "loss": 0.507, "step": 27858 }, { "epoch": 0.8558043805486437, "grad_norm": 0.5830596685409546, "learning_rate": 1.2273106022677417e-05, "loss": 0.5061, "step": 27859 }, { "epoch": 0.855835099683593, "grad_norm": 0.36213529109954834, "learning_rate": 1.2272635393771249e-05, "loss": 0.5408, "step": 27860 }, { "epoch": 0.8558658188185421, "grad_norm": 0.37394943833351135, "learning_rate": 1.2272164759557177e-05, "loss": 0.4831, "step": 27861 }, { "epoch": 0.8558965379534912, "grad_norm": 0.3218478262424469, "learning_rate": 1.227169412003631e-05, "loss": 0.4135, "step": 27862 }, { "epoch": 0.8559272570884404, "grad_norm": 0.34499725699424744, "learning_rate": 1.2271223475209739e-05, "loss": 0.6461, "step": 27863 }, { "epoch": 0.8559579762233895, "grad_norm": 0.3890269994735718, "learning_rate": 1.2270752825078571e-05, "loss": 0.5507, "step": 27864 }, { "epoch": 0.8559886953583387, "grad_norm": 0.4235478937625885, "learning_rate": 1.2270282169643897e-05, "loss": 0.5173, "step": 27865 }, { "epoch": 0.8560194144932879, "grad_norm": 0.37710338830947876, "learning_rate": 1.2269811508906823e-05, "loss": 0.5996, "step": 27866 }, { "epoch": 0.856050133628237, "grad_norm": 0.3799552321434021, "learning_rate": 1.2269340842868446e-05, "loss": 0.5656, "step": 27867 }, { "epoch": 0.8560808527631862, "grad_norm": 0.3807094693183899, "learning_rate": 1.2268870171529864e-05, "loss": 0.5656, "step": 27868 }, { "epoch": 0.8561115718981354, "grad_norm": 0.36839520931243896, "learning_rate": 1.2268399494892176e-05, "loss": 0.5281, "step": 27869 }, { "epoch": 0.8561422910330845, "grad_norm": 0.5964272618293762, "learning_rate": 1.2267928812956483e-05, "loss": 0.5519, "step": 27870 }, { "epoch": 0.8561730101680337, "grad_norm": 0.3601151406764984, "learning_rate": 1.2267458125723885e-05, "loss": 0.5134, "step": 27871 }, { "epoch": 0.8562037293029828, "grad_norm": 0.3634551167488098, "learning_rate": 1.2266987433195481e-05, "loss": 0.5755, "step": 27872 }, { "epoch": 0.856234448437932, "grad_norm": 0.3347277343273163, "learning_rate": 1.2266516735372367e-05, "loss": 0.4538, "step": 27873 }, { "epoch": 0.8562651675728812, "grad_norm": 0.348099946975708, "learning_rate": 1.2266046032255648e-05, "loss": 0.5283, "step": 27874 }, { "epoch": 0.8562958867078303, "grad_norm": 0.3729408085346222, "learning_rate": 1.2265575323846417e-05, "loss": 0.5531, "step": 27875 }, { "epoch": 0.8563266058427794, "grad_norm": 0.3797382116317749, "learning_rate": 1.2265104610145779e-05, "loss": 0.601, "step": 27876 }, { "epoch": 0.8563573249777287, "grad_norm": 0.3526412844657898, "learning_rate": 1.2264633891154829e-05, "loss": 0.5156, "step": 27877 }, { "epoch": 0.8563880441126778, "grad_norm": 0.3546144366264343, "learning_rate": 1.2264163166874671e-05, "loss": 0.5451, "step": 27878 }, { "epoch": 0.856418763247627, "grad_norm": 0.5102583765983582, "learning_rate": 1.2263692437306401e-05, "loss": 0.5301, "step": 27879 }, { "epoch": 0.8564494823825761, "grad_norm": 0.3690704107284546, "learning_rate": 1.2263221702451119e-05, "loss": 0.6893, "step": 27880 }, { "epoch": 0.8564802015175252, "grad_norm": 0.403723806142807, "learning_rate": 1.2262750962309924e-05, "loss": 0.5576, "step": 27881 }, { "epoch": 0.8565109206524745, "grad_norm": 0.3293156623840332, "learning_rate": 1.2262280216883916e-05, "loss": 0.5383, "step": 27882 }, { "epoch": 0.8565416397874236, "grad_norm": 0.34353455901145935, "learning_rate": 1.2261809466174194e-05, "loss": 0.6149, "step": 27883 }, { "epoch": 0.8565723589223727, "grad_norm": 0.47761696577072144, "learning_rate": 1.226133871018186e-05, "loss": 0.5723, "step": 27884 }, { "epoch": 0.8566030780573219, "grad_norm": 0.3396124541759491, "learning_rate": 1.226086794890801e-05, "loss": 0.5305, "step": 27885 }, { "epoch": 0.8566337971922711, "grad_norm": 0.3618820011615753, "learning_rate": 1.2260397182353744e-05, "loss": 0.5342, "step": 27886 }, { "epoch": 0.8566645163272202, "grad_norm": 0.38744086027145386, "learning_rate": 1.2259926410520165e-05, "loss": 0.5058, "step": 27887 }, { "epoch": 0.8566952354621694, "grad_norm": 0.39134925603866577, "learning_rate": 1.2259455633408369e-05, "loss": 0.6257, "step": 27888 }, { "epoch": 0.8567259545971185, "grad_norm": 0.32813358306884766, "learning_rate": 1.2258984851019456e-05, "loss": 0.5447, "step": 27889 }, { "epoch": 0.8567566737320677, "grad_norm": 0.3744778037071228, "learning_rate": 1.2258514063354527e-05, "loss": 0.6544, "step": 27890 }, { "epoch": 0.8567873928670169, "grad_norm": 0.35688358545303345, "learning_rate": 1.2258043270414678e-05, "loss": 0.5318, "step": 27891 }, { "epoch": 0.856818112001966, "grad_norm": 0.32990866899490356, "learning_rate": 1.2257572472201016e-05, "loss": 0.498, "step": 27892 }, { "epoch": 0.8568488311369152, "grad_norm": 0.3887975215911865, "learning_rate": 1.2257101668714632e-05, "loss": 0.5348, "step": 27893 }, { "epoch": 0.8568795502718644, "grad_norm": 0.38444772362709045, "learning_rate": 1.2256630859956633e-05, "loss": 0.5978, "step": 27894 }, { "epoch": 0.8569102694068135, "grad_norm": 0.3942812383174896, "learning_rate": 1.2256160045928112e-05, "loss": 0.5806, "step": 27895 }, { "epoch": 0.8569409885417627, "grad_norm": 0.34272146224975586, "learning_rate": 1.2255689226630172e-05, "loss": 0.4926, "step": 27896 }, { "epoch": 0.8569717076767118, "grad_norm": 0.3873318135738373, "learning_rate": 1.2255218402063912e-05, "loss": 0.5142, "step": 27897 }, { "epoch": 0.857002426811661, "grad_norm": 0.40973109006881714, "learning_rate": 1.2254747572230434e-05, "loss": 0.5845, "step": 27898 }, { "epoch": 0.8570331459466102, "grad_norm": 0.40104782581329346, "learning_rate": 1.2254276737130832e-05, "loss": 0.5243, "step": 27899 }, { "epoch": 0.8570638650815593, "grad_norm": 0.35483506321907043, "learning_rate": 1.2253805896766214e-05, "loss": 0.5798, "step": 27900 }, { "epoch": 0.8570945842165084, "grad_norm": 0.4269712567329407, "learning_rate": 1.2253335051137674e-05, "loss": 0.6137, "step": 27901 }, { "epoch": 0.8571253033514576, "grad_norm": 0.3881089687347412, "learning_rate": 1.2252864200246308e-05, "loss": 0.5414, "step": 27902 }, { "epoch": 0.8571560224864068, "grad_norm": 0.393797904253006, "learning_rate": 1.2252393344093225e-05, "loss": 0.5811, "step": 27903 }, { "epoch": 0.857186741621356, "grad_norm": 0.38085031509399414, "learning_rate": 1.225192248267952e-05, "loss": 0.5437, "step": 27904 }, { "epoch": 0.8572174607563051, "grad_norm": 0.4187985360622406, "learning_rate": 1.2251451616006289e-05, "loss": 0.4604, "step": 27905 }, { "epoch": 0.8572481798912542, "grad_norm": 0.412358820438385, "learning_rate": 1.2250980744074636e-05, "loss": 0.4973, "step": 27906 }, { "epoch": 0.8572788990262035, "grad_norm": 0.404795378446579, "learning_rate": 1.2250509866885662e-05, "loss": 0.4827, "step": 27907 }, { "epoch": 0.8573096181611526, "grad_norm": 0.34944605827331543, "learning_rate": 1.2250038984440467e-05, "loss": 0.5181, "step": 27908 }, { "epoch": 0.8573403372961017, "grad_norm": 0.33997491002082825, "learning_rate": 1.2249568096740146e-05, "loss": 0.5385, "step": 27909 }, { "epoch": 0.8573710564310509, "grad_norm": 0.33933472633361816, "learning_rate": 1.2249097203785803e-05, "loss": 0.5962, "step": 27910 }, { "epoch": 0.8574017755660001, "grad_norm": 0.32410404086112976, "learning_rate": 1.2248626305578534e-05, "loss": 0.5404, "step": 27911 }, { "epoch": 0.8574324947009492, "grad_norm": 0.32519298791885376, "learning_rate": 1.2248155402119443e-05, "loss": 0.4691, "step": 27912 }, { "epoch": 0.8574632138358984, "grad_norm": 0.3327036499977112, "learning_rate": 1.2247684493409629e-05, "loss": 0.494, "step": 27913 }, { "epoch": 0.8574939329708475, "grad_norm": 0.37759509682655334, "learning_rate": 1.224721357945019e-05, "loss": 0.5363, "step": 27914 }, { "epoch": 0.8575246521057966, "grad_norm": 0.41273561120033264, "learning_rate": 1.2246742660242225e-05, "loss": 0.4648, "step": 27915 }, { "epoch": 0.8575553712407459, "grad_norm": 0.3620213568210602, "learning_rate": 1.2246271735786839e-05, "loss": 0.5126, "step": 27916 }, { "epoch": 0.857586090375695, "grad_norm": 0.35247454047203064, "learning_rate": 1.2245800806085127e-05, "loss": 0.5585, "step": 27917 }, { "epoch": 0.8576168095106442, "grad_norm": 0.3273746073246002, "learning_rate": 1.2245329871138189e-05, "loss": 0.4977, "step": 27918 }, { "epoch": 0.8576475286455933, "grad_norm": 0.35373029112815857, "learning_rate": 1.224485893094713e-05, "loss": 0.6133, "step": 27919 }, { "epoch": 0.8576782477805425, "grad_norm": 0.41784608364105225, "learning_rate": 1.2244387985513043e-05, "loss": 0.5616, "step": 27920 }, { "epoch": 0.8577089669154917, "grad_norm": 0.3863956034183502, "learning_rate": 1.2243917034837032e-05, "loss": 0.5243, "step": 27921 }, { "epoch": 0.8577396860504408, "grad_norm": 0.34137463569641113, "learning_rate": 1.2243446078920194e-05, "loss": 0.5675, "step": 27922 }, { "epoch": 0.8577704051853899, "grad_norm": 0.3958633542060852, "learning_rate": 1.2242975117763632e-05, "loss": 0.6099, "step": 27923 }, { "epoch": 0.8578011243203392, "grad_norm": 0.3947158753871918, "learning_rate": 1.2242504151368446e-05, "loss": 0.6171, "step": 27924 }, { "epoch": 0.8578318434552883, "grad_norm": 0.39517998695373535, "learning_rate": 1.2242033179735735e-05, "loss": 0.5642, "step": 27925 }, { "epoch": 0.8578625625902374, "grad_norm": 0.3649339973926544, "learning_rate": 1.2241562202866599e-05, "loss": 0.5556, "step": 27926 }, { "epoch": 0.8578932817251866, "grad_norm": 0.451049268245697, "learning_rate": 1.2241091220762139e-05, "loss": 0.6157, "step": 27927 }, { "epoch": 0.8579240008601358, "grad_norm": 0.38279497623443604, "learning_rate": 1.2240620233423453e-05, "loss": 0.4818, "step": 27928 }, { "epoch": 0.857954719995085, "grad_norm": 0.38797488808631897, "learning_rate": 1.224014924085164e-05, "loss": 0.5686, "step": 27929 }, { "epoch": 0.8579854391300341, "grad_norm": 0.33632248640060425, "learning_rate": 1.2239678243047804e-05, "loss": 0.5562, "step": 27930 }, { "epoch": 0.8580161582649832, "grad_norm": 0.34845733642578125, "learning_rate": 1.223920724001304e-05, "loss": 0.6635, "step": 27931 }, { "epoch": 0.8580468773999325, "grad_norm": 0.3959728479385376, "learning_rate": 1.2238736231748455e-05, "loss": 0.5471, "step": 27932 }, { "epoch": 0.8580775965348816, "grad_norm": 0.413150817155838, "learning_rate": 1.2238265218255142e-05, "loss": 0.573, "step": 27933 }, { "epoch": 0.8581083156698307, "grad_norm": 0.35602498054504395, "learning_rate": 1.2237794199534208e-05, "loss": 0.5961, "step": 27934 }, { "epoch": 0.8581390348047799, "grad_norm": 0.39103561639785767, "learning_rate": 1.2237323175586749e-05, "loss": 0.4826, "step": 27935 }, { "epoch": 0.858169753939729, "grad_norm": 0.36219123005867004, "learning_rate": 1.2236852146413862e-05, "loss": 0.5934, "step": 27936 }, { "epoch": 0.8582004730746782, "grad_norm": 0.39069902896881104, "learning_rate": 1.2236381112016653e-05, "loss": 0.4924, "step": 27937 }, { "epoch": 0.8582311922096274, "grad_norm": 0.39814186096191406, "learning_rate": 1.223591007239622e-05, "loss": 0.5667, "step": 27938 }, { "epoch": 0.8582619113445765, "grad_norm": 0.3504239022731781, "learning_rate": 1.2235439027553661e-05, "loss": 0.5937, "step": 27939 }, { "epoch": 0.8582926304795256, "grad_norm": 0.7812365293502808, "learning_rate": 1.223496797749008e-05, "loss": 0.4471, "step": 27940 }, { "epoch": 0.8583233496144749, "grad_norm": 0.36257025599479675, "learning_rate": 1.2234496922206573e-05, "loss": 0.5999, "step": 27941 }, { "epoch": 0.858354068749424, "grad_norm": 0.372210294008255, "learning_rate": 1.2234025861704244e-05, "loss": 0.564, "step": 27942 }, { "epoch": 0.8583847878843732, "grad_norm": 0.442782461643219, "learning_rate": 1.2233554795984191e-05, "loss": 0.5115, "step": 27943 }, { "epoch": 0.8584155070193223, "grad_norm": 0.4189632833003998, "learning_rate": 1.2233083725047514e-05, "loss": 0.5374, "step": 27944 }, { "epoch": 0.8584462261542715, "grad_norm": 0.3877957761287689, "learning_rate": 1.2232612648895315e-05, "loss": 0.5462, "step": 27945 }, { "epoch": 0.8584769452892207, "grad_norm": 0.3999117612838745, "learning_rate": 1.2232141567528695e-05, "loss": 0.5661, "step": 27946 }, { "epoch": 0.8585076644241698, "grad_norm": 0.35847410559654236, "learning_rate": 1.223167048094875e-05, "loss": 0.53, "step": 27947 }, { "epoch": 0.8585383835591189, "grad_norm": 0.3902151584625244, "learning_rate": 1.2231199389156583e-05, "loss": 0.5019, "step": 27948 }, { "epoch": 0.8585691026940682, "grad_norm": 0.3498285710811615, "learning_rate": 1.2230728292153293e-05, "loss": 0.5351, "step": 27949 }, { "epoch": 0.8585998218290173, "grad_norm": 0.3648121654987335, "learning_rate": 1.2230257189939984e-05, "loss": 0.5943, "step": 27950 }, { "epoch": 0.8586305409639664, "grad_norm": 0.4056466519832611, "learning_rate": 1.2229786082517751e-05, "loss": 0.5972, "step": 27951 }, { "epoch": 0.8586612600989156, "grad_norm": 0.41696879267692566, "learning_rate": 1.22293149698877e-05, "loss": 0.5354, "step": 27952 }, { "epoch": 0.8586919792338648, "grad_norm": 0.3761141896247864, "learning_rate": 1.222884385205093e-05, "loss": 0.6291, "step": 27953 }, { "epoch": 0.858722698368814, "grad_norm": 0.3443341553211212, "learning_rate": 1.2228372729008533e-05, "loss": 0.5483, "step": 27954 }, { "epoch": 0.8587534175037631, "grad_norm": 0.4087982475757599, "learning_rate": 1.222790160076162e-05, "loss": 0.7017, "step": 27955 }, { "epoch": 0.8587841366387122, "grad_norm": 0.3447273075580597, "learning_rate": 1.2227430467311285e-05, "loss": 0.5573, "step": 27956 }, { "epoch": 0.8588148557736615, "grad_norm": 0.36068195104599, "learning_rate": 1.2226959328658632e-05, "loss": 0.5267, "step": 27957 }, { "epoch": 0.8588455749086106, "grad_norm": 0.3581807315349579, "learning_rate": 1.2226488184804758e-05, "loss": 0.5032, "step": 27958 }, { "epoch": 0.8588762940435597, "grad_norm": 0.3625902831554413, "learning_rate": 1.2226017035750768e-05, "loss": 0.6677, "step": 27959 }, { "epoch": 0.8589070131785089, "grad_norm": 0.37308594584465027, "learning_rate": 1.2225545881497757e-05, "loss": 0.496, "step": 27960 }, { "epoch": 0.858937732313458, "grad_norm": 0.36009013652801514, "learning_rate": 1.222507472204683e-05, "loss": 0.5815, "step": 27961 }, { "epoch": 0.8589684514484072, "grad_norm": 0.35388270020484924, "learning_rate": 1.2224603557399085e-05, "loss": 0.5845, "step": 27962 }, { "epoch": 0.8589991705833564, "grad_norm": 0.37619394063949585, "learning_rate": 1.2224132387555623e-05, "loss": 0.4899, "step": 27963 }, { "epoch": 0.8590298897183055, "grad_norm": 0.36960557103157043, "learning_rate": 1.2223661212517545e-05, "loss": 0.5469, "step": 27964 }, { "epoch": 0.8590606088532546, "grad_norm": 0.36737051606178284, "learning_rate": 1.2223190032285953e-05, "loss": 0.511, "step": 27965 }, { "epoch": 0.8590913279882039, "grad_norm": 0.3796035945415497, "learning_rate": 1.2222718846861943e-05, "loss": 0.59, "step": 27966 }, { "epoch": 0.859122047123153, "grad_norm": 0.4013253152370453, "learning_rate": 1.2222247656246618e-05, "loss": 0.58, "step": 27967 }, { "epoch": 0.8591527662581022, "grad_norm": 0.41957637667655945, "learning_rate": 1.2221776460441079e-05, "loss": 0.5213, "step": 27968 }, { "epoch": 0.8591834853930513, "grad_norm": 0.42187145352363586, "learning_rate": 1.2221305259446424e-05, "loss": 0.5311, "step": 27969 }, { "epoch": 0.8592142045280005, "grad_norm": 0.34909820556640625, "learning_rate": 1.2220834053263757e-05, "loss": 0.5519, "step": 27970 }, { "epoch": 0.8592449236629497, "grad_norm": 0.37643730640411377, "learning_rate": 1.2220362841894178e-05, "loss": 0.4934, "step": 27971 }, { "epoch": 0.8592756427978988, "grad_norm": 0.36117348074913025, "learning_rate": 1.2219891625338784e-05, "loss": 0.5568, "step": 27972 }, { "epoch": 0.8593063619328479, "grad_norm": 0.3675304651260376, "learning_rate": 1.221942040359868e-05, "loss": 0.5106, "step": 27973 }, { "epoch": 0.8593370810677972, "grad_norm": 0.397242933511734, "learning_rate": 1.2218949176674964e-05, "loss": 0.5616, "step": 27974 }, { "epoch": 0.8593678002027463, "grad_norm": 0.37104880809783936, "learning_rate": 1.2218477944568739e-05, "loss": 0.5527, "step": 27975 }, { "epoch": 0.8593985193376954, "grad_norm": 0.42052608728408813, "learning_rate": 1.22180067072811e-05, "loss": 0.4485, "step": 27976 }, { "epoch": 0.8594292384726446, "grad_norm": 0.31958386301994324, "learning_rate": 1.2217535464813155e-05, "loss": 0.4369, "step": 27977 }, { "epoch": 0.8594599576075937, "grad_norm": 0.34904977679252625, "learning_rate": 1.2217064217165999e-05, "loss": 0.5733, "step": 27978 }, { "epoch": 0.859490676742543, "grad_norm": 0.4194101095199585, "learning_rate": 1.2216592964340735e-05, "loss": 0.5457, "step": 27979 }, { "epoch": 0.8595213958774921, "grad_norm": 0.3757416903972626, "learning_rate": 1.2216121706338465e-05, "loss": 0.5978, "step": 27980 }, { "epoch": 0.8595521150124412, "grad_norm": 0.34763193130493164, "learning_rate": 1.2215650443160285e-05, "loss": 0.5432, "step": 27981 }, { "epoch": 0.8595828341473905, "grad_norm": 0.3776974081993103, "learning_rate": 1.22151791748073e-05, "loss": 0.5494, "step": 27982 }, { "epoch": 0.8596135532823396, "grad_norm": 0.40915268659591675, "learning_rate": 1.221470790128061e-05, "loss": 0.5588, "step": 27983 }, { "epoch": 0.8596442724172887, "grad_norm": 0.35812312364578247, "learning_rate": 1.2214236622581314e-05, "loss": 0.5745, "step": 27984 }, { "epoch": 0.8596749915522379, "grad_norm": 0.3787415027618408, "learning_rate": 1.2213765338710514e-05, "loss": 0.5167, "step": 27985 }, { "epoch": 0.859705710687187, "grad_norm": 0.3241567611694336, "learning_rate": 1.2213294049669311e-05, "loss": 0.4633, "step": 27986 }, { "epoch": 0.8597364298221362, "grad_norm": 0.3747265338897705, "learning_rate": 1.2212822755458804e-05, "loss": 0.5578, "step": 27987 }, { "epoch": 0.8597671489570854, "grad_norm": 0.3536657392978668, "learning_rate": 1.2212351456080097e-05, "loss": 0.5783, "step": 27988 }, { "epoch": 0.8597978680920345, "grad_norm": 0.42396798729896545, "learning_rate": 1.221188015153429e-05, "loss": 0.5142, "step": 27989 }, { "epoch": 0.8598285872269837, "grad_norm": 0.355142742395401, "learning_rate": 1.2211408841822478e-05, "loss": 0.5203, "step": 27990 }, { "epoch": 0.8598593063619329, "grad_norm": 0.3781788647174835, "learning_rate": 1.221093752694577e-05, "loss": 0.6159, "step": 27991 }, { "epoch": 0.859890025496882, "grad_norm": 0.3543121814727783, "learning_rate": 1.2210466206905261e-05, "loss": 0.5724, "step": 27992 }, { "epoch": 0.8599207446318312, "grad_norm": 0.3874433934688568, "learning_rate": 1.2209994881702053e-05, "loss": 0.5358, "step": 27993 }, { "epoch": 0.8599514637667803, "grad_norm": 0.3175719976425171, "learning_rate": 1.2209523551337247e-05, "loss": 0.4835, "step": 27994 }, { "epoch": 0.8599821829017295, "grad_norm": 0.3467021584510803, "learning_rate": 1.220905221581195e-05, "loss": 0.532, "step": 27995 }, { "epoch": 0.8600129020366787, "grad_norm": 0.38211074471473694, "learning_rate": 1.220858087512725e-05, "loss": 0.5779, "step": 27996 }, { "epoch": 0.8600436211716278, "grad_norm": 0.4006567895412445, "learning_rate": 1.2208109529284259e-05, "loss": 0.4905, "step": 27997 }, { "epoch": 0.8600743403065769, "grad_norm": 0.35316476225852966, "learning_rate": 1.2207638178284073e-05, "loss": 0.556, "step": 27998 }, { "epoch": 0.8601050594415262, "grad_norm": 0.3345332145690918, "learning_rate": 1.2207166822127794e-05, "loss": 0.4824, "step": 27999 }, { "epoch": 0.8601357785764753, "grad_norm": 0.34061044454574585, "learning_rate": 1.2206695460816524e-05, "loss": 0.5256, "step": 28000 }, { "epoch": 0.8601664977114244, "grad_norm": 0.3784792423248291, "learning_rate": 1.220622409435136e-05, "loss": 0.498, "step": 28001 }, { "epoch": 0.8601972168463736, "grad_norm": 0.35379546880722046, "learning_rate": 1.2205752722733408e-05, "loss": 0.5888, "step": 28002 }, { "epoch": 0.8602279359813227, "grad_norm": 0.7960163950920105, "learning_rate": 1.2205281345963765e-05, "loss": 0.5664, "step": 28003 }, { "epoch": 0.860258655116272, "grad_norm": 0.34865856170654297, "learning_rate": 1.2204809964043536e-05, "loss": 0.5238, "step": 28004 }, { "epoch": 0.8602893742512211, "grad_norm": 0.4017929136753082, "learning_rate": 1.2204338576973817e-05, "loss": 0.5594, "step": 28005 }, { "epoch": 0.8603200933861702, "grad_norm": 0.3641531467437744, "learning_rate": 1.220386718475571e-05, "loss": 0.5724, "step": 28006 }, { "epoch": 0.8603508125211194, "grad_norm": 0.41688087582588196, "learning_rate": 1.2203395787390322e-05, "loss": 0.5192, "step": 28007 }, { "epoch": 0.8603815316560686, "grad_norm": 0.3567323088645935, "learning_rate": 1.2202924384878744e-05, "loss": 0.4584, "step": 28008 }, { "epoch": 0.8604122507910177, "grad_norm": 0.34079280495643616, "learning_rate": 1.2202452977222085e-05, "loss": 0.493, "step": 28009 }, { "epoch": 0.8604429699259669, "grad_norm": 0.3836486339569092, "learning_rate": 1.2201981564421442e-05, "loss": 0.538, "step": 28010 }, { "epoch": 0.860473689060916, "grad_norm": 0.3707331120967865, "learning_rate": 1.220151014647792e-05, "loss": 0.5459, "step": 28011 }, { "epoch": 0.8605044081958652, "grad_norm": 0.3664844036102295, "learning_rate": 1.2201038723392613e-05, "loss": 0.5235, "step": 28012 }, { "epoch": 0.8605351273308144, "grad_norm": 0.44347500801086426, "learning_rate": 1.220056729516663e-05, "loss": 0.5397, "step": 28013 }, { "epoch": 0.8605658464657635, "grad_norm": 0.36191031336784363, "learning_rate": 1.2200095861801067e-05, "loss": 0.4753, "step": 28014 }, { "epoch": 0.8605965656007127, "grad_norm": 0.3497775197029114, "learning_rate": 1.2199624423297026e-05, "loss": 0.4831, "step": 28015 }, { "epoch": 0.8606272847356619, "grad_norm": 0.36825910210609436, "learning_rate": 1.2199152979655608e-05, "loss": 0.5432, "step": 28016 }, { "epoch": 0.860658003870611, "grad_norm": 0.3586590588092804, "learning_rate": 1.2198681530877915e-05, "loss": 0.5235, "step": 28017 }, { "epoch": 0.8606887230055602, "grad_norm": 0.3927917182445526, "learning_rate": 1.219821007696505e-05, "loss": 0.5544, "step": 28018 }, { "epoch": 0.8607194421405093, "grad_norm": 0.3653677999973297, "learning_rate": 1.2197738617918108e-05, "loss": 0.6235, "step": 28019 }, { "epoch": 0.8607501612754584, "grad_norm": 0.343046098947525, "learning_rate": 1.21972671537382e-05, "loss": 0.5807, "step": 28020 }, { "epoch": 0.8607808804104077, "grad_norm": 0.34243491291999817, "learning_rate": 1.2196795684426414e-05, "loss": 0.5099, "step": 28021 }, { "epoch": 0.8608115995453568, "grad_norm": 0.3594750463962555, "learning_rate": 1.2196324209983863e-05, "loss": 0.5507, "step": 28022 }, { "epoch": 0.8608423186803059, "grad_norm": 0.4017080068588257, "learning_rate": 1.2195852730411643e-05, "loss": 0.5866, "step": 28023 }, { "epoch": 0.8608730378152551, "grad_norm": 0.361447811126709, "learning_rate": 1.2195381245710853e-05, "loss": 0.5489, "step": 28024 }, { "epoch": 0.8609037569502043, "grad_norm": 0.36967912316322327, "learning_rate": 1.21949097558826e-05, "loss": 0.6163, "step": 28025 }, { "epoch": 0.8609344760851534, "grad_norm": 0.3634413480758667, "learning_rate": 1.219443826092798e-05, "loss": 0.5799, "step": 28026 }, { "epoch": 0.8609651952201026, "grad_norm": 0.3552924394607544, "learning_rate": 1.2193966760848098e-05, "loss": 0.5727, "step": 28027 }, { "epoch": 0.8609959143550517, "grad_norm": 0.38254085183143616, "learning_rate": 1.219349525564405e-05, "loss": 0.5925, "step": 28028 }, { "epoch": 0.861026633490001, "grad_norm": 0.4352259933948517, "learning_rate": 1.2193023745316943e-05, "loss": 0.5169, "step": 28029 }, { "epoch": 0.8610573526249501, "grad_norm": 0.3332619369029999, "learning_rate": 1.2192552229867875e-05, "loss": 0.5275, "step": 28030 }, { "epoch": 0.8610880717598992, "grad_norm": 0.36076605319976807, "learning_rate": 1.2192080709297948e-05, "loss": 0.5487, "step": 28031 }, { "epoch": 0.8611187908948484, "grad_norm": 0.3765714466571808, "learning_rate": 1.2191609183608264e-05, "loss": 0.5579, "step": 28032 }, { "epoch": 0.8611495100297976, "grad_norm": 0.3927370607852936, "learning_rate": 1.2191137652799923e-05, "loss": 0.6194, "step": 28033 }, { "epoch": 0.8611802291647467, "grad_norm": 0.3865217864513397, "learning_rate": 1.2190666116874028e-05, "loss": 0.5037, "step": 28034 }, { "epoch": 0.8612109482996959, "grad_norm": 0.3662228286266327, "learning_rate": 1.2190194575831677e-05, "loss": 0.5495, "step": 28035 }, { "epoch": 0.861241667434645, "grad_norm": 0.4278234839439392, "learning_rate": 1.2189723029673979e-05, "loss": 0.5964, "step": 28036 }, { "epoch": 0.8612723865695942, "grad_norm": 0.34770849347114563, "learning_rate": 1.2189251478402025e-05, "loss": 0.542, "step": 28037 }, { "epoch": 0.8613031057045434, "grad_norm": 0.4631699323654175, "learning_rate": 1.2188779922016925e-05, "loss": 0.5582, "step": 28038 }, { "epoch": 0.8613338248394925, "grad_norm": 0.3800567388534546, "learning_rate": 1.2188308360519772e-05, "loss": 0.5246, "step": 28039 }, { "epoch": 0.8613645439744417, "grad_norm": 0.4557029604911804, "learning_rate": 1.2187836793911676e-05, "loss": 0.5291, "step": 28040 }, { "epoch": 0.8613952631093909, "grad_norm": 0.3758394718170166, "learning_rate": 1.2187365222193732e-05, "loss": 0.5675, "step": 28041 }, { "epoch": 0.86142598224434, "grad_norm": 0.41473332047462463, "learning_rate": 1.2186893645367045e-05, "loss": 0.5355, "step": 28042 }, { "epoch": 0.8614567013792892, "grad_norm": 0.43191036581993103, "learning_rate": 1.2186422063432714e-05, "loss": 0.5478, "step": 28043 }, { "epoch": 0.8614874205142383, "grad_norm": 0.4053933620452881, "learning_rate": 1.2185950476391843e-05, "loss": 0.5849, "step": 28044 }, { "epoch": 0.8615181396491874, "grad_norm": 0.4632743000984192, "learning_rate": 1.2185478884245533e-05, "loss": 0.515, "step": 28045 }, { "epoch": 0.8615488587841367, "grad_norm": 0.3841489851474762, "learning_rate": 1.2185007286994881e-05, "loss": 0.5745, "step": 28046 }, { "epoch": 0.8615795779190858, "grad_norm": 0.3628014028072357, "learning_rate": 1.2184535684640994e-05, "loss": 0.5517, "step": 28047 }, { "epoch": 0.8616102970540349, "grad_norm": 0.376005083322525, "learning_rate": 1.218406407718497e-05, "loss": 0.549, "step": 28048 }, { "epoch": 0.8616410161889841, "grad_norm": 0.38568130135536194, "learning_rate": 1.2183592464627913e-05, "loss": 0.5398, "step": 28049 }, { "epoch": 0.8616717353239333, "grad_norm": 0.3827309310436249, "learning_rate": 1.2183120846970922e-05, "loss": 0.6082, "step": 28050 }, { "epoch": 0.8617024544588824, "grad_norm": 0.3787614405155182, "learning_rate": 1.21826492242151e-05, "loss": 0.4791, "step": 28051 }, { "epoch": 0.8617331735938316, "grad_norm": 0.4054242968559265, "learning_rate": 1.2182177596361551e-05, "loss": 0.4208, "step": 28052 }, { "epoch": 0.8617638927287807, "grad_norm": 0.34180521965026855, "learning_rate": 1.218170596341137e-05, "loss": 0.5298, "step": 28053 }, { "epoch": 0.86179461186373, "grad_norm": 0.44166794419288635, "learning_rate": 1.2181234325365664e-05, "loss": 0.546, "step": 28054 }, { "epoch": 0.8618253309986791, "grad_norm": 0.3754299283027649, "learning_rate": 1.2180762682225533e-05, "loss": 0.5318, "step": 28055 }, { "epoch": 0.8618560501336282, "grad_norm": 0.3665773272514343, "learning_rate": 1.218029103399208e-05, "loss": 0.5359, "step": 28056 }, { "epoch": 0.8618867692685774, "grad_norm": 0.3770677447319031, "learning_rate": 1.2179819380666405e-05, "loss": 0.5338, "step": 28057 }, { "epoch": 0.8619174884035266, "grad_norm": 0.3606318533420563, "learning_rate": 1.2179347722249608e-05, "loss": 0.5031, "step": 28058 }, { "epoch": 0.8619482075384757, "grad_norm": 0.46428433060646057, "learning_rate": 1.2178876058742793e-05, "loss": 0.5369, "step": 28059 }, { "epoch": 0.8619789266734249, "grad_norm": 0.3641799986362457, "learning_rate": 1.2178404390147058e-05, "loss": 0.5343, "step": 28060 }, { "epoch": 0.862009645808374, "grad_norm": 0.36584043502807617, "learning_rate": 1.2177932716463511e-05, "loss": 0.6252, "step": 28061 }, { "epoch": 0.8620403649433231, "grad_norm": 0.4056076407432556, "learning_rate": 1.2177461037693247e-05, "loss": 0.5435, "step": 28062 }, { "epoch": 0.8620710840782724, "grad_norm": 0.37391215562820435, "learning_rate": 1.2176989353837377e-05, "loss": 0.5668, "step": 28063 }, { "epoch": 0.8621018032132215, "grad_norm": 0.3537333607673645, "learning_rate": 1.217651766489699e-05, "loss": 0.5535, "step": 28064 }, { "epoch": 0.8621325223481707, "grad_norm": 0.4635092616081238, "learning_rate": 1.2176045970873196e-05, "loss": 0.5638, "step": 28065 }, { "epoch": 0.8621632414831198, "grad_norm": 0.38743308186531067, "learning_rate": 1.2175574271767092e-05, "loss": 0.5738, "step": 28066 }, { "epoch": 0.862193960618069, "grad_norm": 0.36777323484420776, "learning_rate": 1.2175102567579787e-05, "loss": 0.6163, "step": 28067 }, { "epoch": 0.8622246797530182, "grad_norm": 0.3574392795562744, "learning_rate": 1.2174630858312377e-05, "loss": 0.5281, "step": 28068 }, { "epoch": 0.8622553988879673, "grad_norm": 0.39983922243118286, "learning_rate": 1.217415914396596e-05, "loss": 0.5794, "step": 28069 }, { "epoch": 0.8622861180229164, "grad_norm": 0.3251422941684723, "learning_rate": 1.2173687424541647e-05, "loss": 0.521, "step": 28070 }, { "epoch": 0.8623168371578657, "grad_norm": 0.3484809398651123, "learning_rate": 1.2173215700040533e-05, "loss": 0.5765, "step": 28071 }, { "epoch": 0.8623475562928148, "grad_norm": 0.38626793026924133, "learning_rate": 1.2172743970463726e-05, "loss": 0.5176, "step": 28072 }, { "epoch": 0.8623782754277639, "grad_norm": 0.38249725103378296, "learning_rate": 1.2172272235812316e-05, "loss": 0.5444, "step": 28073 }, { "epoch": 0.8624089945627131, "grad_norm": 0.3514723479747772, "learning_rate": 1.217180049608742e-05, "loss": 0.6164, "step": 28074 }, { "epoch": 0.8624397136976623, "grad_norm": 0.4843306243419647, "learning_rate": 1.217132875129013e-05, "loss": 0.6117, "step": 28075 }, { "epoch": 0.8624704328326115, "grad_norm": 2.010173797607422, "learning_rate": 1.217085700142155e-05, "loss": 0.6137, "step": 28076 }, { "epoch": 0.8625011519675606, "grad_norm": 0.3792082965373993, "learning_rate": 1.2170385246482783e-05, "loss": 0.6292, "step": 28077 }, { "epoch": 0.8625318711025097, "grad_norm": 0.4027450382709503, "learning_rate": 1.2169913486474927e-05, "loss": 0.5262, "step": 28078 }, { "epoch": 0.862562590237459, "grad_norm": 0.38133400678634644, "learning_rate": 1.2169441721399089e-05, "loss": 0.5655, "step": 28079 }, { "epoch": 0.8625933093724081, "grad_norm": 0.5428820848464966, "learning_rate": 1.2168969951256364e-05, "loss": 0.5636, "step": 28080 }, { "epoch": 0.8626240285073572, "grad_norm": 0.35512107610702515, "learning_rate": 1.2168498176047863e-05, "loss": 0.5575, "step": 28081 }, { "epoch": 0.8626547476423064, "grad_norm": 0.40253180265426636, "learning_rate": 1.2168026395774681e-05, "loss": 0.5187, "step": 28082 }, { "epoch": 0.8626854667772555, "grad_norm": 0.33607593178749084, "learning_rate": 1.2167554610437923e-05, "loss": 0.5183, "step": 28083 }, { "epoch": 0.8627161859122047, "grad_norm": 0.49827149510383606, "learning_rate": 1.2167082820038686e-05, "loss": 0.6427, "step": 28084 }, { "epoch": 0.8627469050471539, "grad_norm": 0.4200965166091919, "learning_rate": 1.216661102457808e-05, "loss": 0.4714, "step": 28085 }, { "epoch": 0.862777624182103, "grad_norm": 0.37073957920074463, "learning_rate": 1.2166139224057205e-05, "loss": 0.5007, "step": 28086 }, { "epoch": 0.8628083433170521, "grad_norm": 0.41035038232803345, "learning_rate": 1.2165667418477157e-05, "loss": 0.5994, "step": 28087 }, { "epoch": 0.8628390624520014, "grad_norm": 0.3992158770561218, "learning_rate": 1.2165195607839037e-05, "loss": 0.5531, "step": 28088 }, { "epoch": 0.8628697815869505, "grad_norm": 0.4670623242855072, "learning_rate": 1.2164723792143957e-05, "loss": 0.5311, "step": 28089 }, { "epoch": 0.8629005007218997, "grad_norm": 0.35759907960891724, "learning_rate": 1.2164251971393012e-05, "loss": 0.5606, "step": 28090 }, { "epoch": 0.8629312198568488, "grad_norm": 0.38381102681159973, "learning_rate": 1.2163780145587305e-05, "loss": 0.5281, "step": 28091 }, { "epoch": 0.862961938991798, "grad_norm": 0.32145896553993225, "learning_rate": 1.2163308314727942e-05, "loss": 0.4815, "step": 28092 }, { "epoch": 0.8629926581267472, "grad_norm": 0.3913833796977997, "learning_rate": 1.2162836478816017e-05, "loss": 0.5616, "step": 28093 }, { "epoch": 0.8630233772616963, "grad_norm": 0.32410645484924316, "learning_rate": 1.2162364637852637e-05, "loss": 0.5184, "step": 28094 }, { "epoch": 0.8630540963966454, "grad_norm": 0.36208102107048035, "learning_rate": 1.2161892791838908e-05, "loss": 0.6006, "step": 28095 }, { "epoch": 0.8630848155315947, "grad_norm": 0.3400789201259613, "learning_rate": 1.2161420940775921e-05, "loss": 0.5446, "step": 28096 }, { "epoch": 0.8631155346665438, "grad_norm": 0.399844229221344, "learning_rate": 1.2160949084664788e-05, "loss": 0.4971, "step": 28097 }, { "epoch": 0.8631462538014929, "grad_norm": 0.41257476806640625, "learning_rate": 1.2160477223506606e-05, "loss": 0.558, "step": 28098 }, { "epoch": 0.8631769729364421, "grad_norm": 0.3950716257095337, "learning_rate": 1.2160005357302481e-05, "loss": 0.4394, "step": 28099 }, { "epoch": 0.8632076920713913, "grad_norm": 0.35571110248565674, "learning_rate": 1.2159533486053507e-05, "loss": 0.5398, "step": 28100 }, { "epoch": 0.8632384112063405, "grad_norm": 0.35678136348724365, "learning_rate": 1.2159061609760798e-05, "loss": 0.505, "step": 28101 }, { "epoch": 0.8632691303412896, "grad_norm": 0.43138155341148376, "learning_rate": 1.2158589728425445e-05, "loss": 0.5115, "step": 28102 }, { "epoch": 0.8632998494762387, "grad_norm": 0.34540635347366333, "learning_rate": 1.2158117842048558e-05, "loss": 0.4986, "step": 28103 }, { "epoch": 0.863330568611188, "grad_norm": 0.3270249366760254, "learning_rate": 1.2157645950631238e-05, "loss": 0.5263, "step": 28104 }, { "epoch": 0.8633612877461371, "grad_norm": 0.37044599652290344, "learning_rate": 1.215717405417458e-05, "loss": 0.4709, "step": 28105 }, { "epoch": 0.8633920068810862, "grad_norm": 0.4247455298900604, "learning_rate": 1.2156702152679693e-05, "loss": 0.5749, "step": 28106 }, { "epoch": 0.8634227260160354, "grad_norm": 0.39144396781921387, "learning_rate": 1.2156230246147677e-05, "loss": 0.588, "step": 28107 }, { "epoch": 0.8634534451509845, "grad_norm": 0.3646618127822876, "learning_rate": 1.2155758334579637e-05, "loss": 0.5727, "step": 28108 }, { "epoch": 0.8634841642859337, "grad_norm": 0.38119617104530334, "learning_rate": 1.2155286417976674e-05, "loss": 0.5767, "step": 28109 }, { "epoch": 0.8635148834208829, "grad_norm": 0.4091588854789734, "learning_rate": 1.2154814496339885e-05, "loss": 0.563, "step": 28110 }, { "epoch": 0.863545602555832, "grad_norm": 0.4038296639919281, "learning_rate": 1.2154342569670379e-05, "loss": 0.5656, "step": 28111 }, { "epoch": 0.8635763216907811, "grad_norm": 0.4639067053794861, "learning_rate": 1.2153870637969255e-05, "loss": 0.6155, "step": 28112 }, { "epoch": 0.8636070408257304, "grad_norm": 0.38309165835380554, "learning_rate": 1.2153398701237615e-05, "loss": 0.54, "step": 28113 }, { "epoch": 0.8636377599606795, "grad_norm": 0.3727457523345947, "learning_rate": 1.215292675947656e-05, "loss": 0.6255, "step": 28114 }, { "epoch": 0.8636684790956287, "grad_norm": 0.3973926603794098, "learning_rate": 1.2152454812687198e-05, "loss": 0.5877, "step": 28115 }, { "epoch": 0.8636991982305778, "grad_norm": 0.38559359312057495, "learning_rate": 1.2151982860870625e-05, "loss": 0.4995, "step": 28116 }, { "epoch": 0.863729917365527, "grad_norm": 0.34020867943763733, "learning_rate": 1.2151510904027947e-05, "loss": 0.5379, "step": 28117 }, { "epoch": 0.8637606365004762, "grad_norm": 0.34396201372146606, "learning_rate": 1.2151038942160265e-05, "loss": 0.5886, "step": 28118 }, { "epoch": 0.8637913556354253, "grad_norm": 0.39776915311813354, "learning_rate": 1.2150566975268681e-05, "loss": 0.5362, "step": 28119 }, { "epoch": 0.8638220747703744, "grad_norm": 0.3742005527019501, "learning_rate": 1.2150095003354298e-05, "loss": 0.5152, "step": 28120 }, { "epoch": 0.8638527939053237, "grad_norm": 0.3780343234539032, "learning_rate": 1.2149623026418218e-05, "loss": 0.5121, "step": 28121 }, { "epoch": 0.8638835130402728, "grad_norm": 0.3984128534793854, "learning_rate": 1.2149151044461545e-05, "loss": 0.6005, "step": 28122 }, { "epoch": 0.8639142321752219, "grad_norm": 0.36082884669303894, "learning_rate": 1.2148679057485375e-05, "loss": 0.5964, "step": 28123 }, { "epoch": 0.8639449513101711, "grad_norm": 0.3778296709060669, "learning_rate": 1.2148207065490816e-05, "loss": 0.5371, "step": 28124 }, { "epoch": 0.8639756704451202, "grad_norm": 0.4449632167816162, "learning_rate": 1.214773506847897e-05, "loss": 0.5696, "step": 28125 }, { "epoch": 0.8640063895800695, "grad_norm": 0.3624083697795868, "learning_rate": 1.2147263066450941e-05, "loss": 0.5225, "step": 28126 }, { "epoch": 0.8640371087150186, "grad_norm": 0.37673258781433105, "learning_rate": 1.2146791059407827e-05, "loss": 0.5558, "step": 28127 }, { "epoch": 0.8640678278499677, "grad_norm": 0.3459489047527313, "learning_rate": 1.2146319047350734e-05, "loss": 0.5358, "step": 28128 }, { "epoch": 0.864098546984917, "grad_norm": 0.3747577965259552, "learning_rate": 1.2145847030280762e-05, "loss": 0.5974, "step": 28129 }, { "epoch": 0.8641292661198661, "grad_norm": 0.43273988366127014, "learning_rate": 1.2145375008199014e-05, "loss": 0.504, "step": 28130 }, { "epoch": 0.8641599852548152, "grad_norm": 0.35696280002593994, "learning_rate": 1.2144902981106594e-05, "loss": 0.445, "step": 28131 }, { "epoch": 0.8641907043897644, "grad_norm": 0.344871461391449, "learning_rate": 1.2144430949004602e-05, "loss": 0.5989, "step": 28132 }, { "epoch": 0.8642214235247135, "grad_norm": 0.3495834767818451, "learning_rate": 1.2143958911894146e-05, "loss": 0.5119, "step": 28133 }, { "epoch": 0.8642521426596627, "grad_norm": 0.39951929450035095, "learning_rate": 1.2143486869776317e-05, "loss": 0.4998, "step": 28134 }, { "epoch": 0.8642828617946119, "grad_norm": 0.3212926685810089, "learning_rate": 1.214301482265223e-05, "loss": 0.5369, "step": 28135 }, { "epoch": 0.864313580929561, "grad_norm": 0.4089685082435608, "learning_rate": 1.2142542770522981e-05, "loss": 0.6067, "step": 28136 }, { "epoch": 0.8643443000645101, "grad_norm": 0.2873234450817108, "learning_rate": 1.2142070713389673e-05, "loss": 0.4419, "step": 28137 }, { "epoch": 0.8643750191994594, "grad_norm": 0.4441477060317993, "learning_rate": 1.2141598651253413e-05, "loss": 0.5425, "step": 28138 }, { "epoch": 0.8644057383344085, "grad_norm": 0.38240692019462585, "learning_rate": 1.2141126584115295e-05, "loss": 0.4906, "step": 28139 }, { "epoch": 0.8644364574693577, "grad_norm": 0.34498000144958496, "learning_rate": 1.2140654511976427e-05, "loss": 0.526, "step": 28140 }, { "epoch": 0.8644671766043068, "grad_norm": 0.340224951505661, "learning_rate": 1.2140182434837911e-05, "loss": 0.4583, "step": 28141 }, { "epoch": 0.864497895739256, "grad_norm": 0.38648393750190735, "learning_rate": 1.2139710352700849e-05, "loss": 0.5613, "step": 28142 }, { "epoch": 0.8645286148742052, "grad_norm": 0.3300640881061554, "learning_rate": 1.2139238265566345e-05, "loss": 0.5189, "step": 28143 }, { "epoch": 0.8645593340091543, "grad_norm": 0.3620241582393646, "learning_rate": 1.2138766173435502e-05, "loss": 0.5092, "step": 28144 }, { "epoch": 0.8645900531441034, "grad_norm": 0.32719746232032776, "learning_rate": 1.213829407630942e-05, "loss": 0.5714, "step": 28145 }, { "epoch": 0.8646207722790527, "grad_norm": 0.5992541313171387, "learning_rate": 1.2137821974189201e-05, "loss": 0.5263, "step": 28146 }, { "epoch": 0.8646514914140018, "grad_norm": 0.3603494167327881, "learning_rate": 1.2137349867075953e-05, "loss": 0.6178, "step": 28147 }, { "epoch": 0.8646822105489509, "grad_norm": 0.3603261411190033, "learning_rate": 1.2136877754970774e-05, "loss": 0.5451, "step": 28148 }, { "epoch": 0.8647129296839001, "grad_norm": 0.3820466995239258, "learning_rate": 1.2136405637874766e-05, "loss": 0.5131, "step": 28149 }, { "epoch": 0.8647436488188492, "grad_norm": 0.36099669337272644, "learning_rate": 1.2135933515789033e-05, "loss": 0.565, "step": 28150 }, { "epoch": 0.8647743679537985, "grad_norm": 0.38349997997283936, "learning_rate": 1.2135461388714683e-05, "loss": 0.54, "step": 28151 }, { "epoch": 0.8648050870887476, "grad_norm": 0.3675289750099182, "learning_rate": 1.2134989256652809e-05, "loss": 0.542, "step": 28152 }, { "epoch": 0.8648358062236967, "grad_norm": 0.35500192642211914, "learning_rate": 1.213451711960452e-05, "loss": 0.554, "step": 28153 }, { "epoch": 0.8648665253586459, "grad_norm": 0.36297667026519775, "learning_rate": 1.2134044977570917e-05, "loss": 0.6046, "step": 28154 }, { "epoch": 0.8648972444935951, "grad_norm": 0.38055089116096497, "learning_rate": 1.2133572830553105e-05, "loss": 0.5401, "step": 28155 }, { "epoch": 0.8649279636285442, "grad_norm": 0.39451348781585693, "learning_rate": 1.2133100678552183e-05, "loss": 0.546, "step": 28156 }, { "epoch": 0.8649586827634934, "grad_norm": 0.40430977940559387, "learning_rate": 1.2132628521569255e-05, "loss": 0.5747, "step": 28157 }, { "epoch": 0.8649894018984425, "grad_norm": 0.3661837875843048, "learning_rate": 1.2132156359605426e-05, "loss": 0.5562, "step": 28158 }, { "epoch": 0.8650201210333917, "grad_norm": 0.3513790965080261, "learning_rate": 1.2131684192661792e-05, "loss": 0.5735, "step": 28159 }, { "epoch": 0.8650508401683409, "grad_norm": 0.3407869040966034, "learning_rate": 1.2131212020739464e-05, "loss": 0.5741, "step": 28160 }, { "epoch": 0.86508155930329, "grad_norm": 0.3437838554382324, "learning_rate": 1.213073984383954e-05, "loss": 0.5559, "step": 28161 }, { "epoch": 0.8651122784382391, "grad_norm": 0.34281402826309204, "learning_rate": 1.2130267661963127e-05, "loss": 0.4584, "step": 28162 }, { "epoch": 0.8651429975731884, "grad_norm": 0.35963648557662964, "learning_rate": 1.2129795475111324e-05, "loss": 0.5323, "step": 28163 }, { "epoch": 0.8651737167081375, "grad_norm": 0.36112332344055176, "learning_rate": 1.2129323283285235e-05, "loss": 0.4547, "step": 28164 }, { "epoch": 0.8652044358430867, "grad_norm": 0.30556243658065796, "learning_rate": 1.2128851086485961e-05, "loss": 0.5128, "step": 28165 }, { "epoch": 0.8652351549780358, "grad_norm": 0.3551376163959503, "learning_rate": 1.212837888471461e-05, "loss": 0.5367, "step": 28166 }, { "epoch": 0.865265874112985, "grad_norm": 0.39591744542121887, "learning_rate": 1.2127906677972279e-05, "loss": 0.585, "step": 28167 }, { "epoch": 0.8652965932479342, "grad_norm": 0.4505634009838104, "learning_rate": 1.2127434466260073e-05, "loss": 0.6077, "step": 28168 }, { "epoch": 0.8653273123828833, "grad_norm": 0.38131365180015564, "learning_rate": 1.2126962249579097e-05, "loss": 0.6281, "step": 28169 }, { "epoch": 0.8653580315178324, "grad_norm": 0.4329361021518707, "learning_rate": 1.2126490027930451e-05, "loss": 0.5693, "step": 28170 }, { "epoch": 0.8653887506527816, "grad_norm": 0.4369283616542816, "learning_rate": 1.212601780131524e-05, "loss": 0.4657, "step": 28171 }, { "epoch": 0.8654194697877308, "grad_norm": 0.3591020703315735, "learning_rate": 1.2125545569734565e-05, "loss": 0.5578, "step": 28172 }, { "epoch": 0.8654501889226799, "grad_norm": 0.37670448422431946, "learning_rate": 1.212507333318953e-05, "loss": 0.5907, "step": 28173 }, { "epoch": 0.8654809080576291, "grad_norm": 0.351582795381546, "learning_rate": 1.2124601091681239e-05, "loss": 0.5656, "step": 28174 }, { "epoch": 0.8655116271925782, "grad_norm": 0.3309556841850281, "learning_rate": 1.2124128845210793e-05, "loss": 0.5382, "step": 28175 }, { "epoch": 0.8655423463275275, "grad_norm": 0.37537258863449097, "learning_rate": 1.2123656593779293e-05, "loss": 0.556, "step": 28176 }, { "epoch": 0.8655730654624766, "grad_norm": 0.37674087285995483, "learning_rate": 1.2123184337387848e-05, "loss": 0.7234, "step": 28177 }, { "epoch": 0.8656037845974257, "grad_norm": 0.38517293334007263, "learning_rate": 1.2122712076037558e-05, "loss": 0.5206, "step": 28178 }, { "epoch": 0.8656345037323749, "grad_norm": 0.3957965075969696, "learning_rate": 1.2122239809729524e-05, "loss": 0.5508, "step": 28179 }, { "epoch": 0.8656652228673241, "grad_norm": 0.401192843914032, "learning_rate": 1.2121767538464848e-05, "loss": 0.6003, "step": 28180 }, { "epoch": 0.8656959420022732, "grad_norm": 0.3732680082321167, "learning_rate": 1.2121295262244641e-05, "loss": 0.5671, "step": 28181 }, { "epoch": 0.8657266611372224, "grad_norm": 0.3510998785495758, "learning_rate": 1.2120822981069997e-05, "loss": 0.538, "step": 28182 }, { "epoch": 0.8657573802721715, "grad_norm": 0.36242640018463135, "learning_rate": 1.2120350694942027e-05, "loss": 0.5137, "step": 28183 }, { "epoch": 0.8657880994071206, "grad_norm": 0.36082300543785095, "learning_rate": 1.2119878403861827e-05, "loss": 0.4536, "step": 28184 }, { "epoch": 0.8658188185420699, "grad_norm": 0.3587583601474762, "learning_rate": 1.2119406107830502e-05, "loss": 0.5166, "step": 28185 }, { "epoch": 0.865849537677019, "grad_norm": 0.3663063943386078, "learning_rate": 1.2118933806849157e-05, "loss": 0.5451, "step": 28186 }, { "epoch": 0.8658802568119682, "grad_norm": 0.513342559337616, "learning_rate": 1.2118461500918896e-05, "loss": 0.6542, "step": 28187 }, { "epoch": 0.8659109759469173, "grad_norm": 0.40796080231666565, "learning_rate": 1.2117989190040818e-05, "loss": 0.5355, "step": 28188 }, { "epoch": 0.8659416950818665, "grad_norm": 0.46942299604415894, "learning_rate": 1.211751687421603e-05, "loss": 0.4732, "step": 28189 }, { "epoch": 0.8659724142168157, "grad_norm": 0.3810262382030487, "learning_rate": 1.2117044553445634e-05, "loss": 0.6102, "step": 28190 }, { "epoch": 0.8660031333517648, "grad_norm": 0.40272873640060425, "learning_rate": 1.2116572227730729e-05, "loss": 0.4945, "step": 28191 }, { "epoch": 0.8660338524867139, "grad_norm": 0.36627355217933655, "learning_rate": 1.2116099897072425e-05, "loss": 0.6349, "step": 28192 }, { "epoch": 0.8660645716216632, "grad_norm": 0.3637773394584656, "learning_rate": 1.2115627561471819e-05, "loss": 0.5344, "step": 28193 }, { "epoch": 0.8660952907566123, "grad_norm": 0.36690056324005127, "learning_rate": 1.2115155220930022e-05, "loss": 0.5946, "step": 28194 }, { "epoch": 0.8661260098915614, "grad_norm": 0.35724788904190063, "learning_rate": 1.2114682875448128e-05, "loss": 0.5952, "step": 28195 }, { "epoch": 0.8661567290265106, "grad_norm": 0.3874708414077759, "learning_rate": 1.2114210525027246e-05, "loss": 0.5253, "step": 28196 }, { "epoch": 0.8661874481614598, "grad_norm": 0.3616374433040619, "learning_rate": 1.2113738169668475e-05, "loss": 0.5728, "step": 28197 }, { "epoch": 0.8662181672964089, "grad_norm": 0.3944650888442993, "learning_rate": 1.2113265809372923e-05, "loss": 0.5142, "step": 28198 }, { "epoch": 0.8662488864313581, "grad_norm": 0.325636088848114, "learning_rate": 1.211279344414169e-05, "loss": 0.5362, "step": 28199 }, { "epoch": 0.8662796055663072, "grad_norm": 0.3875260353088379, "learning_rate": 1.2112321073975882e-05, "loss": 0.6357, "step": 28200 }, { "epoch": 0.8663103247012565, "grad_norm": 0.3384411931037903, "learning_rate": 1.2111848698876601e-05, "loss": 0.5341, "step": 28201 }, { "epoch": 0.8663410438362056, "grad_norm": 0.362048476934433, "learning_rate": 1.2111376318844948e-05, "loss": 0.4442, "step": 28202 }, { "epoch": 0.8663717629711547, "grad_norm": 0.34969377517700195, "learning_rate": 1.211090393388203e-05, "loss": 0.5058, "step": 28203 }, { "epoch": 0.8664024821061039, "grad_norm": 0.5487659573554993, "learning_rate": 1.2110431543988948e-05, "loss": 0.4626, "step": 28204 }, { "epoch": 0.866433201241053, "grad_norm": 0.4410102963447571, "learning_rate": 1.2109959149166805e-05, "loss": 0.4928, "step": 28205 }, { "epoch": 0.8664639203760022, "grad_norm": 0.37166082859039307, "learning_rate": 1.2109486749416703e-05, "loss": 0.5342, "step": 28206 }, { "epoch": 0.8664946395109514, "grad_norm": 0.3706166744232178, "learning_rate": 1.2109014344739752e-05, "loss": 0.5943, "step": 28207 }, { "epoch": 0.8665253586459005, "grad_norm": 0.3770751953125, "learning_rate": 1.210854193513705e-05, "loss": 0.5153, "step": 28208 }, { "epoch": 0.8665560777808496, "grad_norm": 0.39443373680114746, "learning_rate": 1.2108069520609697e-05, "loss": 0.4624, "step": 28209 }, { "epoch": 0.8665867969157989, "grad_norm": 0.3620499074459076, "learning_rate": 1.2107597101158806e-05, "loss": 0.6424, "step": 28210 }, { "epoch": 0.866617516050748, "grad_norm": 0.37124839425086975, "learning_rate": 1.210712467678547e-05, "loss": 0.5201, "step": 28211 }, { "epoch": 0.8666482351856972, "grad_norm": 0.4100263714790344, "learning_rate": 1.21066522474908e-05, "loss": 0.5309, "step": 28212 }, { "epoch": 0.8666789543206463, "grad_norm": 0.36346039175987244, "learning_rate": 1.2106179813275893e-05, "loss": 0.5231, "step": 28213 }, { "epoch": 0.8667096734555955, "grad_norm": 0.3434258699417114, "learning_rate": 1.210570737414186e-05, "loss": 0.5033, "step": 28214 }, { "epoch": 0.8667403925905447, "grad_norm": 0.3670867085456848, "learning_rate": 1.2105234930089798e-05, "loss": 0.6535, "step": 28215 }, { "epoch": 0.8667711117254938, "grad_norm": 0.3647504150867462, "learning_rate": 1.210476248112081e-05, "loss": 0.5047, "step": 28216 }, { "epoch": 0.8668018308604429, "grad_norm": 0.35337746143341064, "learning_rate": 1.210429002723601e-05, "loss": 0.5336, "step": 28217 }, { "epoch": 0.8668325499953922, "grad_norm": 0.38123470544815063, "learning_rate": 1.2103817568436486e-05, "loss": 0.5719, "step": 28218 }, { "epoch": 0.8668632691303413, "grad_norm": 0.3549462854862213, "learning_rate": 1.2103345104723353e-05, "loss": 0.524, "step": 28219 }, { "epoch": 0.8668939882652904, "grad_norm": 0.43360161781311035, "learning_rate": 1.210287263609771e-05, "loss": 0.5549, "step": 28220 }, { "epoch": 0.8669247074002396, "grad_norm": 0.3634427785873413, "learning_rate": 1.2102400162560664e-05, "loss": 0.4821, "step": 28221 }, { "epoch": 0.8669554265351888, "grad_norm": 0.34963181614875793, "learning_rate": 1.2101927684113311e-05, "loss": 0.5119, "step": 28222 }, { "epoch": 0.8669861456701379, "grad_norm": 0.4105188548564911, "learning_rate": 1.2101455200756762e-05, "loss": 0.5195, "step": 28223 }, { "epoch": 0.8670168648050871, "grad_norm": 0.36009135842323303, "learning_rate": 1.2100982712492115e-05, "loss": 0.5621, "step": 28224 }, { "epoch": 0.8670475839400362, "grad_norm": 0.585655152797699, "learning_rate": 1.2100510219320479e-05, "loss": 0.506, "step": 28225 }, { "epoch": 0.8670783030749855, "grad_norm": 0.40325987339019775, "learning_rate": 1.2100037721242955e-05, "loss": 0.5373, "step": 28226 }, { "epoch": 0.8671090222099346, "grad_norm": 0.41445568203926086, "learning_rate": 1.2099565218260642e-05, "loss": 0.6247, "step": 28227 }, { "epoch": 0.8671397413448837, "grad_norm": 0.3949780762195587, "learning_rate": 1.2099092710374653e-05, "loss": 0.6176, "step": 28228 }, { "epoch": 0.8671704604798329, "grad_norm": 0.3419690430164337, "learning_rate": 1.2098620197586082e-05, "loss": 0.4383, "step": 28229 }, { "epoch": 0.867201179614782, "grad_norm": 0.3402341604232788, "learning_rate": 1.2098147679896039e-05, "loss": 0.5766, "step": 28230 }, { "epoch": 0.8672318987497312, "grad_norm": 0.38239824771881104, "learning_rate": 1.2097675157305625e-05, "loss": 0.5906, "step": 28231 }, { "epoch": 0.8672626178846804, "grad_norm": 0.47236600518226624, "learning_rate": 1.2097202629815944e-05, "loss": 0.5411, "step": 28232 }, { "epoch": 0.8672933370196295, "grad_norm": 0.3779805302619934, "learning_rate": 1.2096730097428102e-05, "loss": 0.5749, "step": 28233 }, { "epoch": 0.8673240561545786, "grad_norm": 0.4310336410999298, "learning_rate": 1.2096257560143197e-05, "loss": 0.5907, "step": 28234 }, { "epoch": 0.8673547752895279, "grad_norm": 0.35946735739707947, "learning_rate": 1.2095785017962337e-05, "loss": 0.5659, "step": 28235 }, { "epoch": 0.867385494424477, "grad_norm": 0.35657989978790283, "learning_rate": 1.2095312470886627e-05, "loss": 0.6309, "step": 28236 }, { "epoch": 0.8674162135594262, "grad_norm": 0.41304346919059753, "learning_rate": 1.2094839918917167e-05, "loss": 0.5346, "step": 28237 }, { "epoch": 0.8674469326943753, "grad_norm": 0.38084983825683594, "learning_rate": 1.2094367362055062e-05, "loss": 0.6162, "step": 28238 }, { "epoch": 0.8674776518293245, "grad_norm": 0.3936210870742798, "learning_rate": 1.2093894800301416e-05, "loss": 0.5426, "step": 28239 }, { "epoch": 0.8675083709642737, "grad_norm": 0.3637482821941376, "learning_rate": 1.209342223365733e-05, "loss": 0.5667, "step": 28240 }, { "epoch": 0.8675390900992228, "grad_norm": 0.3544303774833679, "learning_rate": 1.2092949662123915e-05, "loss": 0.5215, "step": 28241 }, { "epoch": 0.8675698092341719, "grad_norm": 0.3575209379196167, "learning_rate": 1.2092477085702267e-05, "loss": 0.5964, "step": 28242 }, { "epoch": 0.8676005283691212, "grad_norm": 0.3794827461242676, "learning_rate": 1.2092004504393493e-05, "loss": 0.4866, "step": 28243 }, { "epoch": 0.8676312475040703, "grad_norm": 0.3641577661037445, "learning_rate": 1.2091531918198696e-05, "loss": 0.6006, "step": 28244 }, { "epoch": 0.8676619666390194, "grad_norm": 0.35853537917137146, "learning_rate": 1.2091059327118979e-05, "loss": 0.605, "step": 28245 }, { "epoch": 0.8676926857739686, "grad_norm": 0.37069225311279297, "learning_rate": 1.209058673115545e-05, "loss": 0.6016, "step": 28246 }, { "epoch": 0.8677234049089178, "grad_norm": 0.3637998700141907, "learning_rate": 1.2090114130309207e-05, "loss": 0.5399, "step": 28247 }, { "epoch": 0.8677541240438669, "grad_norm": 0.39712899923324585, "learning_rate": 1.2089641524581359e-05, "loss": 0.5731, "step": 28248 }, { "epoch": 0.8677848431788161, "grad_norm": 0.34125983715057373, "learning_rate": 1.2089168913973004e-05, "loss": 0.4865, "step": 28249 }, { "epoch": 0.8678155623137652, "grad_norm": 0.3918803036212921, "learning_rate": 1.2088696298485252e-05, "loss": 0.52, "step": 28250 }, { "epoch": 0.8678462814487145, "grad_norm": 0.3776468336582184, "learning_rate": 1.2088223678119202e-05, "loss": 0.5726, "step": 28251 }, { "epoch": 0.8678770005836636, "grad_norm": 0.42034825682640076, "learning_rate": 1.208775105287596e-05, "loss": 0.5934, "step": 28252 }, { "epoch": 0.8679077197186127, "grad_norm": 0.3591286838054657, "learning_rate": 1.208727842275663e-05, "loss": 0.5678, "step": 28253 }, { "epoch": 0.8679384388535619, "grad_norm": 0.35185706615448, "learning_rate": 1.2086805787762314e-05, "loss": 0.508, "step": 28254 }, { "epoch": 0.867969157988511, "grad_norm": 0.36784687638282776, "learning_rate": 1.2086333147894121e-05, "loss": 0.5318, "step": 28255 }, { "epoch": 0.8679998771234602, "grad_norm": 0.3960963189601898, "learning_rate": 1.2085860503153149e-05, "loss": 0.588, "step": 28256 }, { "epoch": 0.8680305962584094, "grad_norm": 0.3797062933444977, "learning_rate": 1.2085387853540504e-05, "loss": 0.5202, "step": 28257 }, { "epoch": 0.8680613153933585, "grad_norm": 0.3768194615840912, "learning_rate": 1.208491519905729e-05, "loss": 0.5477, "step": 28258 }, { "epoch": 0.8680920345283076, "grad_norm": 0.38469552993774414, "learning_rate": 1.2084442539704612e-05, "loss": 0.5771, "step": 28259 }, { "epoch": 0.8681227536632569, "grad_norm": 0.34327077865600586, "learning_rate": 1.2083969875483573e-05, "loss": 0.5329, "step": 28260 }, { "epoch": 0.868153472798206, "grad_norm": 0.39796653389930725, "learning_rate": 1.2083497206395274e-05, "loss": 0.5203, "step": 28261 }, { "epoch": 0.8681841919331552, "grad_norm": 0.382515549659729, "learning_rate": 1.2083024532440826e-05, "loss": 0.5415, "step": 28262 }, { "epoch": 0.8682149110681043, "grad_norm": 0.3330744802951813, "learning_rate": 1.2082551853621325e-05, "loss": 0.5788, "step": 28263 }, { "epoch": 0.8682456302030535, "grad_norm": 0.35241952538490295, "learning_rate": 1.208207916993788e-05, "loss": 0.5498, "step": 28264 }, { "epoch": 0.8682763493380027, "grad_norm": 0.33570146560668945, "learning_rate": 1.2081606481391595e-05, "loss": 0.5153, "step": 28265 }, { "epoch": 0.8683070684729518, "grad_norm": 0.3952699899673462, "learning_rate": 1.2081133787983572e-05, "loss": 0.5389, "step": 28266 }, { "epoch": 0.8683377876079009, "grad_norm": 0.34955304861068726, "learning_rate": 1.2080661089714914e-05, "loss": 0.543, "step": 28267 }, { "epoch": 0.8683685067428502, "grad_norm": 0.35482311248779297, "learning_rate": 1.2080188386586729e-05, "loss": 0.6308, "step": 28268 }, { "epoch": 0.8683992258777993, "grad_norm": 0.4179548919200897, "learning_rate": 1.2079715678600117e-05, "loss": 0.5908, "step": 28269 }, { "epoch": 0.8684299450127484, "grad_norm": 0.38749513030052185, "learning_rate": 1.2079242965756181e-05, "loss": 0.54, "step": 28270 }, { "epoch": 0.8684606641476976, "grad_norm": 0.3896263539791107, "learning_rate": 1.2078770248056033e-05, "loss": 0.5186, "step": 28271 }, { "epoch": 0.8684913832826467, "grad_norm": 0.46835511922836304, "learning_rate": 1.2078297525500771e-05, "loss": 0.5535, "step": 28272 }, { "epoch": 0.868522102417596, "grad_norm": 0.49034619331359863, "learning_rate": 1.20778247980915e-05, "loss": 0.615, "step": 28273 }, { "epoch": 0.8685528215525451, "grad_norm": 0.3779667615890503, "learning_rate": 1.2077352065829321e-05, "loss": 0.4924, "step": 28274 }, { "epoch": 0.8685835406874942, "grad_norm": 0.40355467796325684, "learning_rate": 1.2076879328715345e-05, "loss": 0.5616, "step": 28275 }, { "epoch": 0.8686142598224434, "grad_norm": 0.43120065331459045, "learning_rate": 1.2076406586750668e-05, "loss": 0.4883, "step": 28276 }, { "epoch": 0.8686449789573926, "grad_norm": 0.37144190073013306, "learning_rate": 1.2075933839936403e-05, "loss": 0.5316, "step": 28277 }, { "epoch": 0.8686756980923417, "grad_norm": 0.4047151207923889, "learning_rate": 1.2075461088273649e-05, "loss": 0.6156, "step": 28278 }, { "epoch": 0.8687064172272909, "grad_norm": 0.41164830327033997, "learning_rate": 1.207498833176351e-05, "loss": 0.5835, "step": 28279 }, { "epoch": 0.86873713636224, "grad_norm": 0.3746277987957001, "learning_rate": 1.207451557040709e-05, "loss": 0.5735, "step": 28280 }, { "epoch": 0.8687678554971892, "grad_norm": 0.3428497910499573, "learning_rate": 1.2074042804205493e-05, "loss": 0.5535, "step": 28281 }, { "epoch": 0.8687985746321384, "grad_norm": 0.38990241289138794, "learning_rate": 1.2073570033159826e-05, "loss": 0.546, "step": 28282 }, { "epoch": 0.8688292937670875, "grad_norm": 0.37238162755966187, "learning_rate": 1.2073097257271189e-05, "loss": 0.5352, "step": 28283 }, { "epoch": 0.8688600129020366, "grad_norm": 1.2515003681182861, "learning_rate": 1.2072624476540691e-05, "loss": 0.5469, "step": 28284 }, { "epoch": 0.8688907320369859, "grad_norm": 0.40762174129486084, "learning_rate": 1.2072151690969434e-05, "loss": 0.5332, "step": 28285 }, { "epoch": 0.868921451171935, "grad_norm": 0.37631866335868835, "learning_rate": 1.2071678900558521e-05, "loss": 0.5537, "step": 28286 }, { "epoch": 0.8689521703068842, "grad_norm": 0.33911219239234924, "learning_rate": 1.2071206105309057e-05, "loss": 0.5451, "step": 28287 }, { "epoch": 0.8689828894418333, "grad_norm": 0.6600348949432373, "learning_rate": 1.2070733305222144e-05, "loss": 0.5846, "step": 28288 }, { "epoch": 0.8690136085767824, "grad_norm": 0.3795805871486664, "learning_rate": 1.2070260500298893e-05, "loss": 0.6143, "step": 28289 }, { "epoch": 0.8690443277117317, "grad_norm": 0.40648725628852844, "learning_rate": 1.2069787690540402e-05, "loss": 0.557, "step": 28290 }, { "epoch": 0.8690750468466808, "grad_norm": 0.4348684847354889, "learning_rate": 1.206931487594778e-05, "loss": 0.505, "step": 28291 }, { "epoch": 0.8691057659816299, "grad_norm": 0.43361029028892517, "learning_rate": 1.2068842056522123e-05, "loss": 0.5501, "step": 28292 }, { "epoch": 0.8691364851165791, "grad_norm": 0.35881322622299194, "learning_rate": 1.2068369232264547e-05, "loss": 0.5264, "step": 28293 }, { "epoch": 0.8691672042515283, "grad_norm": 0.3718612492084503, "learning_rate": 1.2067896403176147e-05, "loss": 0.496, "step": 28294 }, { "epoch": 0.8691979233864774, "grad_norm": 0.3262152075767517, "learning_rate": 1.206742356925803e-05, "loss": 0.5447, "step": 28295 }, { "epoch": 0.8692286425214266, "grad_norm": 0.36109989881515503, "learning_rate": 1.2066950730511303e-05, "loss": 0.5342, "step": 28296 }, { "epoch": 0.8692593616563757, "grad_norm": 0.3652712404727936, "learning_rate": 1.2066477886937067e-05, "loss": 0.5749, "step": 28297 }, { "epoch": 0.869290080791325, "grad_norm": 0.3252395987510681, "learning_rate": 1.2066005038536428e-05, "loss": 0.4636, "step": 28298 }, { "epoch": 0.8693207999262741, "grad_norm": 0.35671788454055786, "learning_rate": 1.206553218531049e-05, "loss": 0.4892, "step": 28299 }, { "epoch": 0.8693515190612232, "grad_norm": 0.3569547235965729, "learning_rate": 1.2065059327260358e-05, "loss": 0.4943, "step": 28300 }, { "epoch": 0.8693822381961724, "grad_norm": 0.5488170981407166, "learning_rate": 1.2064586464387134e-05, "loss": 0.5814, "step": 28301 }, { "epoch": 0.8694129573311216, "grad_norm": 0.3410843312740326, "learning_rate": 1.2064113596691923e-05, "loss": 0.5201, "step": 28302 }, { "epoch": 0.8694436764660707, "grad_norm": 0.39925825595855713, "learning_rate": 1.2063640724175833e-05, "loss": 0.5637, "step": 28303 }, { "epoch": 0.8694743956010199, "grad_norm": 0.3406887650489807, "learning_rate": 1.2063167846839967e-05, "loss": 0.5505, "step": 28304 }, { "epoch": 0.869505114735969, "grad_norm": 0.3735995888710022, "learning_rate": 1.2062694964685426e-05, "loss": 0.5459, "step": 28305 }, { "epoch": 0.8695358338709182, "grad_norm": 0.3855251967906952, "learning_rate": 1.2062222077713318e-05, "loss": 0.6123, "step": 28306 }, { "epoch": 0.8695665530058674, "grad_norm": 0.3665768802165985, "learning_rate": 1.2061749185924744e-05, "loss": 0.4494, "step": 28307 }, { "epoch": 0.8695972721408165, "grad_norm": 0.37892502546310425, "learning_rate": 1.2061276289320812e-05, "loss": 0.5711, "step": 28308 }, { "epoch": 0.8696279912757656, "grad_norm": 0.3508387804031372, "learning_rate": 1.2060803387902626e-05, "loss": 0.5015, "step": 28309 }, { "epoch": 0.8696587104107149, "grad_norm": 0.3673098683357239, "learning_rate": 1.206033048167129e-05, "loss": 0.5804, "step": 28310 }, { "epoch": 0.869689429545664, "grad_norm": 0.3834904730319977, "learning_rate": 1.205985757062791e-05, "loss": 0.5469, "step": 28311 }, { "epoch": 0.8697201486806132, "grad_norm": 0.36852291226387024, "learning_rate": 1.2059384654773586e-05, "loss": 0.5327, "step": 28312 }, { "epoch": 0.8697508678155623, "grad_norm": 0.4282296895980835, "learning_rate": 1.2058911734109426e-05, "loss": 0.5142, "step": 28313 }, { "epoch": 0.8697815869505114, "grad_norm": 0.34131157398223877, "learning_rate": 1.2058438808636534e-05, "loss": 0.5079, "step": 28314 }, { "epoch": 0.8698123060854607, "grad_norm": 0.5953589677810669, "learning_rate": 1.2057965878356015e-05, "loss": 0.5468, "step": 28315 }, { "epoch": 0.8698430252204098, "grad_norm": 0.3637564480304718, "learning_rate": 1.205749294326897e-05, "loss": 0.618, "step": 28316 }, { "epoch": 0.8698737443553589, "grad_norm": 0.34850266575813293, "learning_rate": 1.205702000337651e-05, "loss": 0.5441, "step": 28317 }, { "epoch": 0.8699044634903081, "grad_norm": 0.3577592968940735, "learning_rate": 1.2056547058679735e-05, "loss": 0.6174, "step": 28318 }, { "epoch": 0.8699351826252573, "grad_norm": 0.3557618260383606, "learning_rate": 1.2056074109179752e-05, "loss": 0.6154, "step": 28319 }, { "epoch": 0.8699659017602064, "grad_norm": 0.3887292742729187, "learning_rate": 1.2055601154877663e-05, "loss": 0.6225, "step": 28320 }, { "epoch": 0.8699966208951556, "grad_norm": 0.37766939401626587, "learning_rate": 1.2055128195774572e-05, "loss": 0.5358, "step": 28321 }, { "epoch": 0.8700273400301047, "grad_norm": 0.35930126905441284, "learning_rate": 1.2054655231871588e-05, "loss": 0.4598, "step": 28322 }, { "epoch": 0.870058059165054, "grad_norm": 0.3371865153312683, "learning_rate": 1.2054182263169816e-05, "loss": 0.5605, "step": 28323 }, { "epoch": 0.8700887783000031, "grad_norm": 0.3411305546760559, "learning_rate": 1.2053709289670352e-05, "loss": 0.5706, "step": 28324 }, { "epoch": 0.8701194974349522, "grad_norm": 0.43060052394866943, "learning_rate": 1.2053236311374307e-05, "loss": 0.5278, "step": 28325 }, { "epoch": 0.8701502165699014, "grad_norm": 0.3841065764427185, "learning_rate": 1.2052763328282789e-05, "loss": 0.4938, "step": 28326 }, { "epoch": 0.8701809357048506, "grad_norm": 0.35268688201904297, "learning_rate": 1.2052290340396898e-05, "loss": 0.497, "step": 28327 }, { "epoch": 0.8702116548397997, "grad_norm": 0.3373374044895172, "learning_rate": 1.2051817347717738e-05, "loss": 0.5758, "step": 28328 }, { "epoch": 0.8702423739747489, "grad_norm": 0.32163485884666443, "learning_rate": 1.2051344350246417e-05, "loss": 0.4543, "step": 28329 }, { "epoch": 0.870273093109698, "grad_norm": 0.3770144581794739, "learning_rate": 1.2050871347984036e-05, "loss": 0.5007, "step": 28330 }, { "epoch": 0.8703038122446471, "grad_norm": 0.404241144657135, "learning_rate": 1.2050398340931703e-05, "loss": 0.5732, "step": 28331 }, { "epoch": 0.8703345313795964, "grad_norm": 0.34240633249282837, "learning_rate": 1.2049925329090521e-05, "loss": 0.6023, "step": 28332 }, { "epoch": 0.8703652505145455, "grad_norm": 0.32017794251441956, "learning_rate": 1.2049452312461595e-05, "loss": 0.52, "step": 28333 }, { "epoch": 0.8703959696494946, "grad_norm": 0.34968438744544983, "learning_rate": 1.2048979291046032e-05, "loss": 0.5789, "step": 28334 }, { "epoch": 0.8704266887844438, "grad_norm": 0.5014244914054871, "learning_rate": 1.2048506264844932e-05, "loss": 0.5229, "step": 28335 }, { "epoch": 0.870457407919393, "grad_norm": 0.3599821925163269, "learning_rate": 1.2048033233859404e-05, "loss": 0.5853, "step": 28336 }, { "epoch": 0.8704881270543422, "grad_norm": 0.35950425267219543, "learning_rate": 1.204756019809055e-05, "loss": 0.5337, "step": 28337 }, { "epoch": 0.8705188461892913, "grad_norm": 0.3515561521053314, "learning_rate": 1.2047087157539478e-05, "loss": 0.542, "step": 28338 }, { "epoch": 0.8705495653242404, "grad_norm": 0.3710479140281677, "learning_rate": 1.204661411220729e-05, "loss": 0.541, "step": 28339 }, { "epoch": 0.8705802844591897, "grad_norm": 0.38754239678382874, "learning_rate": 1.2046141062095093e-05, "loss": 0.5415, "step": 28340 }, { "epoch": 0.8706110035941388, "grad_norm": 0.3851338028907776, "learning_rate": 1.2045668007203993e-05, "loss": 0.5068, "step": 28341 }, { "epoch": 0.8706417227290879, "grad_norm": 0.37634167075157166, "learning_rate": 1.2045194947535088e-05, "loss": 0.4997, "step": 28342 }, { "epoch": 0.8706724418640371, "grad_norm": 1.1174964904785156, "learning_rate": 1.2044721883089487e-05, "loss": 0.5526, "step": 28343 }, { "epoch": 0.8707031609989863, "grad_norm": 0.3972259759902954, "learning_rate": 1.2044248813868296e-05, "loss": 0.5519, "step": 28344 }, { "epoch": 0.8707338801339354, "grad_norm": 0.37555593252182007, "learning_rate": 1.204377573987262e-05, "loss": 0.5572, "step": 28345 }, { "epoch": 0.8707645992688846, "grad_norm": 0.34312689304351807, "learning_rate": 1.2043302661103565e-05, "loss": 0.5655, "step": 28346 }, { "epoch": 0.8707953184038337, "grad_norm": 0.3722642660140991, "learning_rate": 1.204282957756223e-05, "loss": 0.4981, "step": 28347 }, { "epoch": 0.870826037538783, "grad_norm": 0.3831174671649933, "learning_rate": 1.2042356489249727e-05, "loss": 0.5383, "step": 28348 }, { "epoch": 0.8708567566737321, "grad_norm": 0.34630683064460754, "learning_rate": 1.2041883396167156e-05, "loss": 0.5848, "step": 28349 }, { "epoch": 0.8708874758086812, "grad_norm": 0.37555786967277527, "learning_rate": 1.2041410298315626e-05, "loss": 0.6067, "step": 28350 }, { "epoch": 0.8709181949436304, "grad_norm": 0.5176382064819336, "learning_rate": 1.2040937195696235e-05, "loss": 0.5641, "step": 28351 }, { "epoch": 0.8709489140785796, "grad_norm": 0.34975409507751465, "learning_rate": 1.2040464088310097e-05, "loss": 0.5102, "step": 28352 }, { "epoch": 0.8709796332135287, "grad_norm": 0.3589489459991455, "learning_rate": 1.2039990976158312e-05, "loss": 0.5214, "step": 28353 }, { "epoch": 0.8710103523484779, "grad_norm": 0.3910689949989319, "learning_rate": 1.2039517859241986e-05, "loss": 0.6606, "step": 28354 }, { "epoch": 0.871041071483427, "grad_norm": 0.3577881157398224, "learning_rate": 1.203904473756222e-05, "loss": 0.4905, "step": 28355 }, { "epoch": 0.8710717906183761, "grad_norm": 0.3914414048194885, "learning_rate": 1.2038571611120127e-05, "loss": 0.5442, "step": 28356 }, { "epoch": 0.8711025097533254, "grad_norm": 0.3504895269870758, "learning_rate": 1.2038098479916807e-05, "loss": 0.6555, "step": 28357 }, { "epoch": 0.8711332288882745, "grad_norm": 0.445613294839859, "learning_rate": 1.2037625343953363e-05, "loss": 0.5092, "step": 28358 }, { "epoch": 0.8711639480232236, "grad_norm": 0.37776419520378113, "learning_rate": 1.2037152203230905e-05, "loss": 0.6011, "step": 28359 }, { "epoch": 0.8711946671581728, "grad_norm": 0.4132393002510071, "learning_rate": 1.2036679057750534e-05, "loss": 0.5574, "step": 28360 }, { "epoch": 0.871225386293122, "grad_norm": 0.38311588764190674, "learning_rate": 1.2036205907513357e-05, "loss": 0.5174, "step": 28361 }, { "epoch": 0.8712561054280712, "grad_norm": 0.3761301040649414, "learning_rate": 1.2035732752520477e-05, "loss": 0.534, "step": 28362 }, { "epoch": 0.8712868245630203, "grad_norm": 0.3402872383594513, "learning_rate": 1.2035259592773005e-05, "loss": 0.5838, "step": 28363 }, { "epoch": 0.8713175436979694, "grad_norm": 0.36089766025543213, "learning_rate": 1.203478642827204e-05, "loss": 0.5951, "step": 28364 }, { "epoch": 0.8713482628329187, "grad_norm": 0.3616520166397095, "learning_rate": 1.203431325901869e-05, "loss": 0.5889, "step": 28365 }, { "epoch": 0.8713789819678678, "grad_norm": 0.40467703342437744, "learning_rate": 1.2033840085014062e-05, "loss": 0.5989, "step": 28366 }, { "epoch": 0.8714097011028169, "grad_norm": 0.37168195843696594, "learning_rate": 1.2033366906259252e-05, "loss": 0.5681, "step": 28367 }, { "epoch": 0.8714404202377661, "grad_norm": 0.3765406012535095, "learning_rate": 1.2032893722755376e-05, "loss": 0.5182, "step": 28368 }, { "epoch": 0.8714711393727153, "grad_norm": 0.4047154188156128, "learning_rate": 1.2032420534503533e-05, "loss": 0.5291, "step": 28369 }, { "epoch": 0.8715018585076644, "grad_norm": 0.3733317255973816, "learning_rate": 1.2031947341504832e-05, "loss": 0.474, "step": 28370 }, { "epoch": 0.8715325776426136, "grad_norm": 0.36944565176963806, "learning_rate": 1.2031474143760374e-05, "loss": 0.5445, "step": 28371 }, { "epoch": 0.8715632967775627, "grad_norm": 0.36369311809539795, "learning_rate": 1.2031000941271269e-05, "loss": 0.5393, "step": 28372 }, { "epoch": 0.871594015912512, "grad_norm": 0.43815261125564575, "learning_rate": 1.2030527734038615e-05, "loss": 0.5848, "step": 28373 }, { "epoch": 0.8716247350474611, "grad_norm": 0.3505791425704956, "learning_rate": 1.2030054522063525e-05, "loss": 0.4138, "step": 28374 }, { "epoch": 0.8716554541824102, "grad_norm": 0.3540589511394501, "learning_rate": 1.2029581305347102e-05, "loss": 0.5124, "step": 28375 }, { "epoch": 0.8716861733173594, "grad_norm": 0.5005012154579163, "learning_rate": 1.2029108083890448e-05, "loss": 0.4989, "step": 28376 }, { "epoch": 0.8717168924523085, "grad_norm": 0.3566738963127136, "learning_rate": 1.202863485769467e-05, "loss": 0.4498, "step": 28377 }, { "epoch": 0.8717476115872577, "grad_norm": 0.3552400767803192, "learning_rate": 1.2028161626760875e-05, "loss": 0.5022, "step": 28378 }, { "epoch": 0.8717783307222069, "grad_norm": 0.38504570722579956, "learning_rate": 1.2027688391090166e-05, "loss": 0.5223, "step": 28379 }, { "epoch": 0.871809049857156, "grad_norm": 0.34923699498176575, "learning_rate": 1.2027215150683646e-05, "loss": 0.5355, "step": 28380 }, { "epoch": 0.8718397689921051, "grad_norm": 0.3514001667499542, "learning_rate": 1.2026741905542427e-05, "loss": 0.5752, "step": 28381 }, { "epoch": 0.8718704881270544, "grad_norm": 0.5052415728569031, "learning_rate": 1.2026268655667613e-05, "loss": 0.5219, "step": 28382 }, { "epoch": 0.8719012072620035, "grad_norm": 0.36378154158592224, "learning_rate": 1.2025795401060303e-05, "loss": 0.5471, "step": 28383 }, { "epoch": 0.8719319263969527, "grad_norm": 0.4040933847427368, "learning_rate": 1.2025322141721607e-05, "loss": 0.5368, "step": 28384 }, { "epoch": 0.8719626455319018, "grad_norm": 0.38887929916381836, "learning_rate": 1.2024848877652631e-05, "loss": 0.5652, "step": 28385 }, { "epoch": 0.871993364666851, "grad_norm": 0.41604888439178467, "learning_rate": 1.202437560885448e-05, "loss": 0.5937, "step": 28386 }, { "epoch": 0.8720240838018002, "grad_norm": 0.38224557042121887, "learning_rate": 1.2023902335328254e-05, "loss": 0.525, "step": 28387 }, { "epoch": 0.8720548029367493, "grad_norm": 0.39212989807128906, "learning_rate": 1.2023429057075067e-05, "loss": 0.5639, "step": 28388 }, { "epoch": 0.8720855220716984, "grad_norm": 0.37059760093688965, "learning_rate": 1.202295577409602e-05, "loss": 0.6008, "step": 28389 }, { "epoch": 0.8721162412066477, "grad_norm": 0.3652953803539276, "learning_rate": 1.2022482486392217e-05, "loss": 0.5646, "step": 28390 }, { "epoch": 0.8721469603415968, "grad_norm": 0.34840190410614014, "learning_rate": 1.2022009193964764e-05, "loss": 0.5352, "step": 28391 }, { "epoch": 0.8721776794765459, "grad_norm": 0.43520575761795044, "learning_rate": 1.2021535896814768e-05, "loss": 0.5448, "step": 28392 }, { "epoch": 0.8722083986114951, "grad_norm": 0.37797415256500244, "learning_rate": 1.2021062594943335e-05, "loss": 0.546, "step": 28393 }, { "epoch": 0.8722391177464442, "grad_norm": 0.3550953269004822, "learning_rate": 1.202058928835157e-05, "loss": 0.5336, "step": 28394 }, { "epoch": 0.8722698368813934, "grad_norm": 0.3618764579296112, "learning_rate": 1.2020115977040576e-05, "loss": 0.5322, "step": 28395 }, { "epoch": 0.8723005560163426, "grad_norm": 0.37103471159935, "learning_rate": 1.2019642661011458e-05, "loss": 0.5581, "step": 28396 }, { "epoch": 0.8723312751512917, "grad_norm": 0.35329002141952515, "learning_rate": 1.2019169340265329e-05, "loss": 0.482, "step": 28397 }, { "epoch": 0.872361994286241, "grad_norm": 0.35215526819229126, "learning_rate": 1.2018696014803284e-05, "loss": 0.5222, "step": 28398 }, { "epoch": 0.8723927134211901, "grad_norm": 0.5158136487007141, "learning_rate": 1.2018222684626433e-05, "loss": 0.5852, "step": 28399 }, { "epoch": 0.8724234325561392, "grad_norm": 0.3559378683567047, "learning_rate": 1.2017749349735885e-05, "loss": 0.4943, "step": 28400 }, { "epoch": 0.8724541516910884, "grad_norm": 0.42552292346954346, "learning_rate": 1.2017276010132739e-05, "loss": 0.5666, "step": 28401 }, { "epoch": 0.8724848708260375, "grad_norm": 0.3351263403892517, "learning_rate": 1.2016802665818107e-05, "loss": 0.514, "step": 28402 }, { "epoch": 0.8725155899609867, "grad_norm": 0.3869855999946594, "learning_rate": 1.2016329316793092e-05, "loss": 0.5801, "step": 28403 }, { "epoch": 0.8725463090959359, "grad_norm": 0.44248974323272705, "learning_rate": 1.2015855963058799e-05, "loss": 0.5102, "step": 28404 }, { "epoch": 0.872577028230885, "grad_norm": 0.36946240067481995, "learning_rate": 1.201538260461633e-05, "loss": 0.5402, "step": 28405 }, { "epoch": 0.8726077473658341, "grad_norm": 0.3372649550437927, "learning_rate": 1.20149092414668e-05, "loss": 0.6167, "step": 28406 }, { "epoch": 0.8726384665007834, "grad_norm": 0.3475021421909332, "learning_rate": 1.2014435873611303e-05, "loss": 0.581, "step": 28407 }, { "epoch": 0.8726691856357325, "grad_norm": 0.3618728220462799, "learning_rate": 1.2013962501050956e-05, "loss": 0.601, "step": 28408 }, { "epoch": 0.8726999047706817, "grad_norm": 0.33615124225616455, "learning_rate": 1.2013489123786855e-05, "loss": 0.5719, "step": 28409 }, { "epoch": 0.8727306239056308, "grad_norm": 0.35050782561302185, "learning_rate": 1.201301574182011e-05, "loss": 0.5212, "step": 28410 }, { "epoch": 0.87276134304058, "grad_norm": 0.5041343569755554, "learning_rate": 1.2012542355151827e-05, "loss": 0.5489, "step": 28411 }, { "epoch": 0.8727920621755292, "grad_norm": 0.39476048946380615, "learning_rate": 1.201206896378311e-05, "loss": 0.5501, "step": 28412 }, { "epoch": 0.8728227813104783, "grad_norm": 0.40119466185569763, "learning_rate": 1.2011595567715066e-05, "loss": 0.588, "step": 28413 }, { "epoch": 0.8728535004454274, "grad_norm": 0.3947545289993286, "learning_rate": 1.2011122166948799e-05, "loss": 0.4643, "step": 28414 }, { "epoch": 0.8728842195803767, "grad_norm": 0.366443395614624, "learning_rate": 1.2010648761485418e-05, "loss": 0.5407, "step": 28415 }, { "epoch": 0.8729149387153258, "grad_norm": 0.35504522919654846, "learning_rate": 1.2010175351326023e-05, "loss": 0.4415, "step": 28416 }, { "epoch": 0.8729456578502749, "grad_norm": 0.3694327771663666, "learning_rate": 1.2009701936471724e-05, "loss": 0.5456, "step": 28417 }, { "epoch": 0.8729763769852241, "grad_norm": 0.3424679636955261, "learning_rate": 1.2009228516923628e-05, "loss": 0.5432, "step": 28418 }, { "epoch": 0.8730070961201732, "grad_norm": 0.362690269947052, "learning_rate": 1.2008755092682836e-05, "loss": 0.5338, "step": 28419 }, { "epoch": 0.8730378152551224, "grad_norm": 0.39937320351600647, "learning_rate": 1.2008281663750458e-05, "loss": 0.6598, "step": 28420 }, { "epoch": 0.8730685343900716, "grad_norm": 0.3452723026275635, "learning_rate": 1.2007808230127595e-05, "loss": 0.5612, "step": 28421 }, { "epoch": 0.8730992535250207, "grad_norm": 0.4428025782108307, "learning_rate": 1.200733479181536e-05, "loss": 0.4835, "step": 28422 }, { "epoch": 0.8731299726599699, "grad_norm": 0.3314797282218933, "learning_rate": 1.2006861348814851e-05, "loss": 0.4244, "step": 28423 }, { "epoch": 0.8731606917949191, "grad_norm": 0.3312529921531677, "learning_rate": 1.200638790112718e-05, "loss": 0.5258, "step": 28424 }, { "epoch": 0.8731914109298682, "grad_norm": 0.3261760473251343, "learning_rate": 1.2005914448753449e-05, "loss": 0.4752, "step": 28425 }, { "epoch": 0.8732221300648174, "grad_norm": 0.38583502173423767, "learning_rate": 1.2005440991694763e-05, "loss": 0.5055, "step": 28426 }, { "epoch": 0.8732528491997665, "grad_norm": 0.3740696907043457, "learning_rate": 1.2004967529952232e-05, "loss": 0.5636, "step": 28427 }, { "epoch": 0.8732835683347157, "grad_norm": 0.422904372215271, "learning_rate": 1.2004494063526956e-05, "loss": 0.6054, "step": 28428 }, { "epoch": 0.8733142874696649, "grad_norm": 0.33249735832214355, "learning_rate": 1.200402059242005e-05, "loss": 0.4509, "step": 28429 }, { "epoch": 0.873345006604614, "grad_norm": 0.41646090149879456, "learning_rate": 1.2003547116632608e-05, "loss": 0.5981, "step": 28430 }, { "epoch": 0.8733757257395631, "grad_norm": 0.4529435336589813, "learning_rate": 1.2003073636165745e-05, "loss": 0.5739, "step": 28431 }, { "epoch": 0.8734064448745124, "grad_norm": 0.3781065344810486, "learning_rate": 1.200260015102056e-05, "loss": 0.5844, "step": 28432 }, { "epoch": 0.8734371640094615, "grad_norm": 0.4346970021724701, "learning_rate": 1.2002126661198168e-05, "loss": 0.6099, "step": 28433 }, { "epoch": 0.8734678831444107, "grad_norm": 0.3558316230773926, "learning_rate": 1.2001653166699666e-05, "loss": 0.5097, "step": 28434 }, { "epoch": 0.8734986022793598, "grad_norm": 0.3581884205341339, "learning_rate": 1.200117966752616e-05, "loss": 0.5732, "step": 28435 }, { "epoch": 0.873529321414309, "grad_norm": 1.1512809991836548, "learning_rate": 1.2000706163678763e-05, "loss": 0.5112, "step": 28436 }, { "epoch": 0.8735600405492582, "grad_norm": 0.34583595395088196, "learning_rate": 1.2000232655158577e-05, "loss": 0.5539, "step": 28437 }, { "epoch": 0.8735907596842073, "grad_norm": 0.37214091420173645, "learning_rate": 1.199975914196671e-05, "loss": 0.4647, "step": 28438 }, { "epoch": 0.8736214788191564, "grad_norm": 0.37176012992858887, "learning_rate": 1.1999285624104263e-05, "loss": 0.5516, "step": 28439 }, { "epoch": 0.8736521979541056, "grad_norm": 0.34277960658073425, "learning_rate": 1.1998812101572344e-05, "loss": 0.5603, "step": 28440 }, { "epoch": 0.8736829170890548, "grad_norm": 0.38454344868659973, "learning_rate": 1.199833857437206e-05, "loss": 0.5059, "step": 28441 }, { "epoch": 0.8737136362240039, "grad_norm": 0.36601483821868896, "learning_rate": 1.199786504250452e-05, "loss": 0.5257, "step": 28442 }, { "epoch": 0.8737443553589531, "grad_norm": 0.3531837463378906, "learning_rate": 1.1997391505970821e-05, "loss": 0.514, "step": 28443 }, { "epoch": 0.8737750744939022, "grad_norm": 0.40768250823020935, "learning_rate": 1.199691796477208e-05, "loss": 0.6043, "step": 28444 }, { "epoch": 0.8738057936288514, "grad_norm": 0.4027094542980194, "learning_rate": 1.1996444418909397e-05, "loss": 0.5302, "step": 28445 }, { "epoch": 0.8738365127638006, "grad_norm": 0.37989288568496704, "learning_rate": 1.1995970868383878e-05, "loss": 0.5779, "step": 28446 }, { "epoch": 0.8738672318987497, "grad_norm": 0.3598441481590271, "learning_rate": 1.1995497313196628e-05, "loss": 0.5262, "step": 28447 }, { "epoch": 0.8738979510336989, "grad_norm": 0.34312736988067627, "learning_rate": 1.1995023753348756e-05, "loss": 0.5935, "step": 28448 }, { "epoch": 0.8739286701686481, "grad_norm": 0.35345765948295593, "learning_rate": 1.1994550188841366e-05, "loss": 0.4661, "step": 28449 }, { "epoch": 0.8739593893035972, "grad_norm": 0.3622262179851532, "learning_rate": 1.1994076619675564e-05, "loss": 0.6362, "step": 28450 }, { "epoch": 0.8739901084385464, "grad_norm": 0.3771741986274719, "learning_rate": 1.199360304585246e-05, "loss": 0.4532, "step": 28451 }, { "epoch": 0.8740208275734955, "grad_norm": 0.3380659222602844, "learning_rate": 1.1993129467373153e-05, "loss": 0.4989, "step": 28452 }, { "epoch": 0.8740515467084446, "grad_norm": 0.36526191234588623, "learning_rate": 1.1992655884238752e-05, "loss": 0.592, "step": 28453 }, { "epoch": 0.8740822658433939, "grad_norm": 0.3690098524093628, "learning_rate": 1.199218229645037e-05, "loss": 0.5066, "step": 28454 }, { "epoch": 0.874112984978343, "grad_norm": 0.3638240396976471, "learning_rate": 1.1991708704009102e-05, "loss": 0.5629, "step": 28455 }, { "epoch": 0.8741437041132921, "grad_norm": 0.35513952374458313, "learning_rate": 1.1991235106916062e-05, "loss": 0.5547, "step": 28456 }, { "epoch": 0.8741744232482414, "grad_norm": 0.4401882588863373, "learning_rate": 1.1990761505172351e-05, "loss": 0.656, "step": 28457 }, { "epoch": 0.8742051423831905, "grad_norm": 0.38038066029548645, "learning_rate": 1.1990287898779078e-05, "loss": 0.5544, "step": 28458 }, { "epoch": 0.8742358615181397, "grad_norm": 0.3848641514778137, "learning_rate": 1.198981428773735e-05, "loss": 0.5209, "step": 28459 }, { "epoch": 0.8742665806530888, "grad_norm": 0.3674757480621338, "learning_rate": 1.1989340672048271e-05, "loss": 0.498, "step": 28460 }, { "epoch": 0.8742972997880379, "grad_norm": 0.37454643845558167, "learning_rate": 1.1988867051712952e-05, "loss": 0.5377, "step": 28461 }, { "epoch": 0.8743280189229872, "grad_norm": 0.4118833839893341, "learning_rate": 1.1988393426732488e-05, "loss": 0.5715, "step": 28462 }, { "epoch": 0.8743587380579363, "grad_norm": 0.31500008702278137, "learning_rate": 1.1987919797107996e-05, "loss": 0.4664, "step": 28463 }, { "epoch": 0.8743894571928854, "grad_norm": 0.3179040253162384, "learning_rate": 1.1987446162840577e-05, "loss": 0.4353, "step": 28464 }, { "epoch": 0.8744201763278346, "grad_norm": 0.34795668721199036, "learning_rate": 1.1986972523931343e-05, "loss": 0.6149, "step": 28465 }, { "epoch": 0.8744508954627838, "grad_norm": 0.42334648966789246, "learning_rate": 1.1986498880381389e-05, "loss": 0.4578, "step": 28466 }, { "epoch": 0.8744816145977329, "grad_norm": 0.4124751389026642, "learning_rate": 1.1986025232191833e-05, "loss": 0.5754, "step": 28467 }, { "epoch": 0.8745123337326821, "grad_norm": 0.3709756135940552, "learning_rate": 1.1985551579363775e-05, "loss": 0.5432, "step": 28468 }, { "epoch": 0.8745430528676312, "grad_norm": 0.35963988304138184, "learning_rate": 1.1985077921898322e-05, "loss": 0.5947, "step": 28469 }, { "epoch": 0.8745737720025804, "grad_norm": 0.32027468085289, "learning_rate": 1.1984604259796582e-05, "loss": 0.4564, "step": 28470 }, { "epoch": 0.8746044911375296, "grad_norm": 0.35123881697654724, "learning_rate": 1.1984130593059657e-05, "loss": 0.4812, "step": 28471 }, { "epoch": 0.8746352102724787, "grad_norm": 0.35165804624557495, "learning_rate": 1.1983656921688662e-05, "loss": 0.5712, "step": 28472 }, { "epoch": 0.8746659294074279, "grad_norm": 0.33970072865486145, "learning_rate": 1.1983183245684692e-05, "loss": 0.5265, "step": 28473 }, { "epoch": 0.874696648542377, "grad_norm": 0.35628536343574524, "learning_rate": 1.1982709565048865e-05, "loss": 0.5585, "step": 28474 }, { "epoch": 0.8747273676773262, "grad_norm": 0.39367762207984924, "learning_rate": 1.1982235879782276e-05, "loss": 0.549, "step": 28475 }, { "epoch": 0.8747580868122754, "grad_norm": 0.3781222701072693, "learning_rate": 1.1981762189886039e-05, "loss": 0.554, "step": 28476 }, { "epoch": 0.8747888059472245, "grad_norm": 0.3650170862674713, "learning_rate": 1.1981288495361256e-05, "loss": 0.6579, "step": 28477 }, { "epoch": 0.8748195250821736, "grad_norm": 0.3629835247993469, "learning_rate": 1.198081479620904e-05, "loss": 0.5871, "step": 28478 }, { "epoch": 0.8748502442171229, "grad_norm": 0.31506964564323425, "learning_rate": 1.198034109243049e-05, "loss": 0.5439, "step": 28479 }, { "epoch": 0.874880963352072, "grad_norm": 0.39303264021873474, "learning_rate": 1.1979867384026713e-05, "loss": 0.4821, "step": 28480 }, { "epoch": 0.8749116824870211, "grad_norm": 0.3680780529975891, "learning_rate": 1.1979393670998823e-05, "loss": 0.5754, "step": 28481 }, { "epoch": 0.8749424016219703, "grad_norm": 0.40052494406700134, "learning_rate": 1.1978919953347913e-05, "loss": 0.4647, "step": 28482 }, { "epoch": 0.8749731207569195, "grad_norm": 0.3474857807159424, "learning_rate": 1.1978446231075106e-05, "loss": 0.5724, "step": 28483 }, { "epoch": 0.8750038398918687, "grad_norm": 0.37930673360824585, "learning_rate": 1.1977972504181493e-05, "loss": 0.6296, "step": 28484 }, { "epoch": 0.8750345590268178, "grad_norm": 0.3816988468170166, "learning_rate": 1.197749877266819e-05, "loss": 0.5506, "step": 28485 }, { "epoch": 0.8750652781617669, "grad_norm": 0.3843477666378021, "learning_rate": 1.1977025036536298e-05, "loss": 0.51, "step": 28486 }, { "epoch": 0.8750959972967162, "grad_norm": 0.3932378590106964, "learning_rate": 1.1976551295786928e-05, "loss": 0.4969, "step": 28487 }, { "epoch": 0.8751267164316653, "grad_norm": 0.6251181364059448, "learning_rate": 1.1976077550421183e-05, "loss": 0.5236, "step": 28488 }, { "epoch": 0.8751574355666144, "grad_norm": 0.39725425839424133, "learning_rate": 1.1975603800440171e-05, "loss": 0.545, "step": 28489 }, { "epoch": 0.8751881547015636, "grad_norm": 0.33884525299072266, "learning_rate": 1.1975130045845003e-05, "loss": 0.5348, "step": 28490 }, { "epoch": 0.8752188738365128, "grad_norm": 0.38952916860580444, "learning_rate": 1.1974656286636775e-05, "loss": 0.581, "step": 28491 }, { "epoch": 0.8752495929714619, "grad_norm": 0.3561926782131195, "learning_rate": 1.1974182522816601e-05, "loss": 0.5248, "step": 28492 }, { "epoch": 0.8752803121064111, "grad_norm": 0.38090750575065613, "learning_rate": 1.1973708754385587e-05, "loss": 0.5169, "step": 28493 }, { "epoch": 0.8753110312413602, "grad_norm": 0.4078444242477417, "learning_rate": 1.197323498134484e-05, "loss": 0.5043, "step": 28494 }, { "epoch": 0.8753417503763095, "grad_norm": 0.3465564250946045, "learning_rate": 1.197276120369546e-05, "loss": 0.5336, "step": 28495 }, { "epoch": 0.8753724695112586, "grad_norm": 0.35461142659187317, "learning_rate": 1.1972287421438563e-05, "loss": 0.5787, "step": 28496 }, { "epoch": 0.8754031886462077, "grad_norm": 0.3657251298427582, "learning_rate": 1.197181363457525e-05, "loss": 0.5132, "step": 28497 }, { "epoch": 0.8754339077811569, "grad_norm": 0.3772505819797516, "learning_rate": 1.1971339843106626e-05, "loss": 0.509, "step": 28498 }, { "epoch": 0.875464626916106, "grad_norm": 0.3799249529838562, "learning_rate": 1.1970866047033803e-05, "loss": 0.6318, "step": 28499 }, { "epoch": 0.8754953460510552, "grad_norm": 0.4004979133605957, "learning_rate": 1.197039224635788e-05, "loss": 0.6919, "step": 28500 }, { "epoch": 0.8755260651860044, "grad_norm": 0.3383437991142273, "learning_rate": 1.1969918441079976e-05, "loss": 0.5144, "step": 28501 }, { "epoch": 0.8755567843209535, "grad_norm": 0.3761340081691742, "learning_rate": 1.1969444631201183e-05, "loss": 0.5923, "step": 28502 }, { "epoch": 0.8755875034559026, "grad_norm": 0.35938945412635803, "learning_rate": 1.1968970816722618e-05, "loss": 0.567, "step": 28503 }, { "epoch": 0.8756182225908519, "grad_norm": 0.3502582907676697, "learning_rate": 1.1968496997645383e-05, "loss": 0.4865, "step": 28504 }, { "epoch": 0.875648941725801, "grad_norm": 0.3545056879520416, "learning_rate": 1.1968023173970587e-05, "loss": 0.5974, "step": 28505 }, { "epoch": 0.8756796608607501, "grad_norm": 0.3534618318080902, "learning_rate": 1.1967549345699332e-05, "loss": 0.5035, "step": 28506 }, { "epoch": 0.8757103799956993, "grad_norm": 0.38224393129348755, "learning_rate": 1.1967075512832731e-05, "loss": 0.5166, "step": 28507 }, { "epoch": 0.8757410991306485, "grad_norm": 0.4031940698623657, "learning_rate": 1.1966601675371887e-05, "loss": 0.5356, "step": 28508 }, { "epoch": 0.8757718182655977, "grad_norm": 0.4032827615737915, "learning_rate": 1.1966127833317906e-05, "loss": 0.6813, "step": 28509 }, { "epoch": 0.8758025374005468, "grad_norm": 0.38926735520362854, "learning_rate": 1.1965653986671901e-05, "loss": 0.5601, "step": 28510 }, { "epoch": 0.8758332565354959, "grad_norm": 0.3711499571800232, "learning_rate": 1.1965180135434968e-05, "loss": 0.5965, "step": 28511 }, { "epoch": 0.8758639756704452, "grad_norm": 0.3383844196796417, "learning_rate": 1.1964706279608224e-05, "loss": 0.494, "step": 28512 }, { "epoch": 0.8758946948053943, "grad_norm": 0.3685321807861328, "learning_rate": 1.1964232419192769e-05, "loss": 0.6024, "step": 28513 }, { "epoch": 0.8759254139403434, "grad_norm": 0.4037533402442932, "learning_rate": 1.1963758554189712e-05, "loss": 0.488, "step": 28514 }, { "epoch": 0.8759561330752926, "grad_norm": 0.4837189316749573, "learning_rate": 1.1963284684600162e-05, "loss": 0.5113, "step": 28515 }, { "epoch": 0.8759868522102418, "grad_norm": 0.36115920543670654, "learning_rate": 1.196281081042522e-05, "loss": 0.5508, "step": 28516 }, { "epoch": 0.8760175713451909, "grad_norm": 0.3209586441516876, "learning_rate": 1.1962336931666002e-05, "loss": 0.5337, "step": 28517 }, { "epoch": 0.8760482904801401, "grad_norm": 0.4126362204551697, "learning_rate": 1.1961863048323603e-05, "loss": 0.5662, "step": 28518 }, { "epoch": 0.8760790096150892, "grad_norm": 0.3545790910720825, "learning_rate": 1.1961389160399138e-05, "loss": 0.5298, "step": 28519 }, { "epoch": 0.8761097287500385, "grad_norm": 0.4286617636680603, "learning_rate": 1.1960915267893711e-05, "loss": 0.575, "step": 28520 }, { "epoch": 0.8761404478849876, "grad_norm": 0.3668558895587921, "learning_rate": 1.1960441370808431e-05, "loss": 0.5611, "step": 28521 }, { "epoch": 0.8761711670199367, "grad_norm": 0.3677925765514374, "learning_rate": 1.1959967469144402e-05, "loss": 0.561, "step": 28522 }, { "epoch": 0.8762018861548859, "grad_norm": 0.34738266468048096, "learning_rate": 1.1959493562902734e-05, "loss": 0.6089, "step": 28523 }, { "epoch": 0.876232605289835, "grad_norm": 0.32403188943862915, "learning_rate": 1.1959019652084527e-05, "loss": 0.4787, "step": 28524 }, { "epoch": 0.8762633244247842, "grad_norm": 0.3634187877178192, "learning_rate": 1.19585457366909e-05, "loss": 0.5615, "step": 28525 }, { "epoch": 0.8762940435597334, "grad_norm": 0.35820427536964417, "learning_rate": 1.1958071816722944e-05, "loss": 0.4748, "step": 28526 }, { "epoch": 0.8763247626946825, "grad_norm": 0.36405035853385925, "learning_rate": 1.1957597892181781e-05, "loss": 0.5684, "step": 28527 }, { "epoch": 0.8763554818296316, "grad_norm": 0.37877750396728516, "learning_rate": 1.1957123963068508e-05, "loss": 0.5658, "step": 28528 }, { "epoch": 0.8763862009645809, "grad_norm": 0.438912570476532, "learning_rate": 1.1956650029384239e-05, "loss": 0.615, "step": 28529 }, { "epoch": 0.87641692009953, "grad_norm": 0.37476322054862976, "learning_rate": 1.1956176091130074e-05, "loss": 0.5545, "step": 28530 }, { "epoch": 0.8764476392344791, "grad_norm": 0.401238352060318, "learning_rate": 1.1955702148307126e-05, "loss": 0.5535, "step": 28531 }, { "epoch": 0.8764783583694283, "grad_norm": 0.3393048644065857, "learning_rate": 1.1955228200916495e-05, "loss": 0.4938, "step": 28532 }, { "epoch": 0.8765090775043775, "grad_norm": 0.37326276302337646, "learning_rate": 1.1954754248959297e-05, "loss": 0.5221, "step": 28533 }, { "epoch": 0.8765397966393267, "grad_norm": 0.35535162687301636, "learning_rate": 1.1954280292436631e-05, "loss": 0.5764, "step": 28534 }, { "epoch": 0.8765705157742758, "grad_norm": 0.4396457076072693, "learning_rate": 1.1953806331349606e-05, "loss": 0.5983, "step": 28535 }, { "epoch": 0.8766012349092249, "grad_norm": 0.3452571630477905, "learning_rate": 1.195333236569933e-05, "loss": 0.511, "step": 28536 }, { "epoch": 0.8766319540441742, "grad_norm": 0.3852604329586029, "learning_rate": 1.1952858395486912e-05, "loss": 0.5789, "step": 28537 }, { "epoch": 0.8766626731791233, "grad_norm": 0.40779659152030945, "learning_rate": 1.1952384420713456e-05, "loss": 0.5492, "step": 28538 }, { "epoch": 0.8766933923140724, "grad_norm": 0.35132572054862976, "learning_rate": 1.195191044138007e-05, "loss": 0.5182, "step": 28539 }, { "epoch": 0.8767241114490216, "grad_norm": 0.4511793851852417, "learning_rate": 1.1951436457487859e-05, "loss": 0.5849, "step": 28540 }, { "epoch": 0.8767548305839707, "grad_norm": 0.36128172278404236, "learning_rate": 1.1950962469037934e-05, "loss": 0.5858, "step": 28541 }, { "epoch": 0.8767855497189199, "grad_norm": 0.3583977222442627, "learning_rate": 1.19504884760314e-05, "loss": 0.5635, "step": 28542 }, { "epoch": 0.8768162688538691, "grad_norm": 0.46043965220451355, "learning_rate": 1.1950014478469363e-05, "loss": 0.6164, "step": 28543 }, { "epoch": 0.8768469879888182, "grad_norm": 0.384075403213501, "learning_rate": 1.194954047635293e-05, "loss": 0.5177, "step": 28544 }, { "epoch": 0.8768777071237674, "grad_norm": 0.4157235622406006, "learning_rate": 1.1949066469683207e-05, "loss": 0.6756, "step": 28545 }, { "epoch": 0.8769084262587166, "grad_norm": 0.3424513339996338, "learning_rate": 1.194859245846131e-05, "loss": 0.5368, "step": 28546 }, { "epoch": 0.8769391453936657, "grad_norm": 0.3807108402252197, "learning_rate": 1.1948118442688335e-05, "loss": 0.5447, "step": 28547 }, { "epoch": 0.8769698645286149, "grad_norm": 0.3656023144721985, "learning_rate": 1.1947644422365394e-05, "loss": 0.5476, "step": 28548 }, { "epoch": 0.877000583663564, "grad_norm": 0.3773270547389984, "learning_rate": 1.1947170397493595e-05, "loss": 0.5879, "step": 28549 }, { "epoch": 0.8770313027985132, "grad_norm": 0.3797477185726166, "learning_rate": 1.194669636807404e-05, "loss": 0.5461, "step": 28550 }, { "epoch": 0.8770620219334624, "grad_norm": 0.4022761881351471, "learning_rate": 1.1946222334107843e-05, "loss": 0.5027, "step": 28551 }, { "epoch": 0.8770927410684115, "grad_norm": 0.38314321637153625, "learning_rate": 1.1945748295596104e-05, "loss": 0.5714, "step": 28552 }, { "epoch": 0.8771234602033606, "grad_norm": 0.3317645788192749, "learning_rate": 1.1945274252539939e-05, "loss": 0.5222, "step": 28553 }, { "epoch": 0.8771541793383099, "grad_norm": 0.37899336218833923, "learning_rate": 1.1944800204940446e-05, "loss": 0.5465, "step": 28554 }, { "epoch": 0.877184898473259, "grad_norm": 0.35870981216430664, "learning_rate": 1.1944326152798738e-05, "loss": 0.5966, "step": 28555 }, { "epoch": 0.8772156176082081, "grad_norm": 0.35689061880111694, "learning_rate": 1.194385209611592e-05, "loss": 0.5643, "step": 28556 }, { "epoch": 0.8772463367431573, "grad_norm": 0.381534218788147, "learning_rate": 1.19433780348931e-05, "loss": 0.5538, "step": 28557 }, { "epoch": 0.8772770558781064, "grad_norm": 0.4329849183559418, "learning_rate": 1.1942903969131383e-05, "loss": 0.5573, "step": 28558 }, { "epoch": 0.8773077750130557, "grad_norm": 0.3629775047302246, "learning_rate": 1.1942429898831882e-05, "loss": 0.5248, "step": 28559 }, { "epoch": 0.8773384941480048, "grad_norm": 0.3795928359031677, "learning_rate": 1.19419558239957e-05, "loss": 0.5388, "step": 28560 }, { "epoch": 0.8773692132829539, "grad_norm": 0.3473522663116455, "learning_rate": 1.194148174462394e-05, "loss": 0.5217, "step": 28561 }, { "epoch": 0.8773999324179032, "grad_norm": 0.3514872193336487, "learning_rate": 1.1941007660717716e-05, "loss": 0.5116, "step": 28562 }, { "epoch": 0.8774306515528523, "grad_norm": 0.37163352966308594, "learning_rate": 1.1940533572278133e-05, "loss": 0.5081, "step": 28563 }, { "epoch": 0.8774613706878014, "grad_norm": 0.3957999050617218, "learning_rate": 1.19400594793063e-05, "loss": 0.5194, "step": 28564 }, { "epoch": 0.8774920898227506, "grad_norm": 0.45667317509651184, "learning_rate": 1.193958538180332e-05, "loss": 0.5765, "step": 28565 }, { "epoch": 0.8775228089576997, "grad_norm": 0.31795796751976013, "learning_rate": 1.1939111279770306e-05, "loss": 0.498, "step": 28566 }, { "epoch": 0.8775535280926489, "grad_norm": 0.3534555733203888, "learning_rate": 1.193863717320836e-05, "loss": 0.5734, "step": 28567 }, { "epoch": 0.8775842472275981, "grad_norm": 0.35697636008262634, "learning_rate": 1.193816306211859e-05, "loss": 0.5093, "step": 28568 }, { "epoch": 0.8776149663625472, "grad_norm": 0.3742774724960327, "learning_rate": 1.1937688946502108e-05, "loss": 0.5307, "step": 28569 }, { "epoch": 0.8776456854974964, "grad_norm": 0.37554365396499634, "learning_rate": 1.1937214826360017e-05, "loss": 0.5607, "step": 28570 }, { "epoch": 0.8776764046324456, "grad_norm": 0.3330083191394806, "learning_rate": 1.1936740701693425e-05, "loss": 0.534, "step": 28571 }, { "epoch": 0.8777071237673947, "grad_norm": 0.4284761846065521, "learning_rate": 1.1936266572503441e-05, "loss": 0.4816, "step": 28572 }, { "epoch": 0.8777378429023439, "grad_norm": 0.3812798857688904, "learning_rate": 1.1935792438791171e-05, "loss": 0.599, "step": 28573 }, { "epoch": 0.877768562037293, "grad_norm": 0.3909289836883545, "learning_rate": 1.1935318300557722e-05, "loss": 0.5578, "step": 28574 }, { "epoch": 0.8777992811722422, "grad_norm": 0.38519397377967834, "learning_rate": 1.1934844157804202e-05, "loss": 0.608, "step": 28575 }, { "epoch": 0.8778300003071914, "grad_norm": 0.3625744879245758, "learning_rate": 1.1934370010531718e-05, "loss": 0.5256, "step": 28576 }, { "epoch": 0.8778607194421405, "grad_norm": 0.3755088448524475, "learning_rate": 1.1933895858741378e-05, "loss": 0.5265, "step": 28577 }, { "epoch": 0.8778914385770896, "grad_norm": 0.3729977011680603, "learning_rate": 1.1933421702434292e-05, "loss": 0.4795, "step": 28578 }, { "epoch": 0.8779221577120389, "grad_norm": 0.3635970652103424, "learning_rate": 1.193294754161156e-05, "loss": 0.522, "step": 28579 }, { "epoch": 0.877952876846988, "grad_norm": 0.44820940494537354, "learning_rate": 1.1932473376274294e-05, "loss": 0.5378, "step": 28580 }, { "epoch": 0.8779835959819372, "grad_norm": 0.36792418360710144, "learning_rate": 1.19319992064236e-05, "loss": 0.5052, "step": 28581 }, { "epoch": 0.8780143151168863, "grad_norm": 0.3944654166698456, "learning_rate": 1.1931525032060593e-05, "loss": 0.5779, "step": 28582 }, { "epoch": 0.8780450342518354, "grad_norm": 0.3996855616569519, "learning_rate": 1.1931050853186372e-05, "loss": 0.5606, "step": 28583 }, { "epoch": 0.8780757533867847, "grad_norm": 0.34706252813339233, "learning_rate": 1.1930576669802045e-05, "loss": 0.5052, "step": 28584 }, { "epoch": 0.8781064725217338, "grad_norm": 0.3386700749397278, "learning_rate": 1.1930102481908724e-05, "loss": 0.5663, "step": 28585 }, { "epoch": 0.8781371916566829, "grad_norm": 0.3904951214790344, "learning_rate": 1.1929628289507513e-05, "loss": 0.6165, "step": 28586 }, { "epoch": 0.8781679107916321, "grad_norm": 0.33483460545539856, "learning_rate": 1.1929154092599519e-05, "loss": 0.5028, "step": 28587 }, { "epoch": 0.8781986299265813, "grad_norm": 0.3866231441497803, "learning_rate": 1.1928679891185853e-05, "loss": 0.5031, "step": 28588 }, { "epoch": 0.8782293490615304, "grad_norm": 0.3592565059661865, "learning_rate": 1.1928205685267619e-05, "loss": 0.5163, "step": 28589 }, { "epoch": 0.8782600681964796, "grad_norm": 0.3535676896572113, "learning_rate": 1.1927731474845927e-05, "loss": 0.5618, "step": 28590 }, { "epoch": 0.8782907873314287, "grad_norm": 0.3651736378669739, "learning_rate": 1.1927257259921883e-05, "loss": 0.4933, "step": 28591 }, { "epoch": 0.8783215064663779, "grad_norm": 0.3568408489227295, "learning_rate": 1.1926783040496595e-05, "loss": 0.5976, "step": 28592 }, { "epoch": 0.8783522256013271, "grad_norm": 0.35250362753868103, "learning_rate": 1.192630881657117e-05, "loss": 0.526, "step": 28593 }, { "epoch": 0.8783829447362762, "grad_norm": 0.32746878266334534, "learning_rate": 1.1925834588146719e-05, "loss": 0.506, "step": 28594 }, { "epoch": 0.8784136638712254, "grad_norm": 0.37152794003486633, "learning_rate": 1.1925360355224342e-05, "loss": 0.5515, "step": 28595 }, { "epoch": 0.8784443830061746, "grad_norm": 0.370459645986557, "learning_rate": 1.1924886117805156e-05, "loss": 0.6041, "step": 28596 }, { "epoch": 0.8784751021411237, "grad_norm": 0.4606618285179138, "learning_rate": 1.1924411875890261e-05, "loss": 0.521, "step": 28597 }, { "epoch": 0.8785058212760729, "grad_norm": 0.37495335936546326, "learning_rate": 1.1923937629480769e-05, "loss": 0.5727, "step": 28598 }, { "epoch": 0.878536540411022, "grad_norm": 0.38151052594184875, "learning_rate": 1.1923463378577783e-05, "loss": 0.539, "step": 28599 }, { "epoch": 0.8785672595459711, "grad_norm": 0.35542571544647217, "learning_rate": 1.1922989123182418e-05, "loss": 0.5684, "step": 28600 }, { "epoch": 0.8785979786809204, "grad_norm": 0.42323094606399536, "learning_rate": 1.1922514863295778e-05, "loss": 0.5562, "step": 28601 }, { "epoch": 0.8786286978158695, "grad_norm": 0.32956644892692566, "learning_rate": 1.1922040598918967e-05, "loss": 0.4754, "step": 28602 }, { "epoch": 0.8786594169508186, "grad_norm": 0.38022640347480774, "learning_rate": 1.19215663300531e-05, "loss": 0.4774, "step": 28603 }, { "epoch": 0.8786901360857678, "grad_norm": 0.3761645555496216, "learning_rate": 1.1921092056699277e-05, "loss": 0.5376, "step": 28604 }, { "epoch": 0.878720855220717, "grad_norm": 0.34249207377433777, "learning_rate": 1.1920617778858612e-05, "loss": 0.47, "step": 28605 }, { "epoch": 0.8787515743556662, "grad_norm": 0.4622829854488373, "learning_rate": 1.1920143496532207e-05, "loss": 0.4745, "step": 28606 }, { "epoch": 0.8787822934906153, "grad_norm": 0.3649110496044159, "learning_rate": 1.1919669209721176e-05, "loss": 0.5858, "step": 28607 }, { "epoch": 0.8788130126255644, "grad_norm": 0.3742687404155731, "learning_rate": 1.191919491842662e-05, "loss": 0.5207, "step": 28608 }, { "epoch": 0.8788437317605137, "grad_norm": 0.3561270833015442, "learning_rate": 1.1918720622649653e-05, "loss": 0.6356, "step": 28609 }, { "epoch": 0.8788744508954628, "grad_norm": 0.3974371552467346, "learning_rate": 1.191824632239138e-05, "loss": 0.4933, "step": 28610 }, { "epoch": 0.8789051700304119, "grad_norm": 0.37987181544303894, "learning_rate": 1.1917772017652908e-05, "loss": 0.5556, "step": 28611 }, { "epoch": 0.8789358891653611, "grad_norm": 0.32581794261932373, "learning_rate": 1.1917297708435347e-05, "loss": 0.5206, "step": 28612 }, { "epoch": 0.8789666083003103, "grad_norm": 0.3868493437767029, "learning_rate": 1.19168233947398e-05, "loss": 0.5035, "step": 28613 }, { "epoch": 0.8789973274352594, "grad_norm": 0.4203509986400604, "learning_rate": 1.1916349076567381e-05, "loss": 0.4833, "step": 28614 }, { "epoch": 0.8790280465702086, "grad_norm": 0.5030935406684875, "learning_rate": 1.1915874753919194e-05, "loss": 0.5325, "step": 28615 }, { "epoch": 0.8790587657051577, "grad_norm": 0.36044424772262573, "learning_rate": 1.1915400426796349e-05, "loss": 0.5424, "step": 28616 }, { "epoch": 0.8790894848401068, "grad_norm": 0.3541775345802307, "learning_rate": 1.1914926095199945e-05, "loss": 0.5101, "step": 28617 }, { "epoch": 0.8791202039750561, "grad_norm": 0.34893181920051575, "learning_rate": 1.1914451759131107e-05, "loss": 0.5187, "step": 28618 }, { "epoch": 0.8791509231100052, "grad_norm": 0.3500135838985443, "learning_rate": 1.191397741859093e-05, "loss": 0.5539, "step": 28619 }, { "epoch": 0.8791816422449544, "grad_norm": 0.34665754437446594, "learning_rate": 1.1913503073580524e-05, "loss": 0.5584, "step": 28620 }, { "epoch": 0.8792123613799036, "grad_norm": 0.39776986837387085, "learning_rate": 1.1913028724100998e-05, "loss": 0.5757, "step": 28621 }, { "epoch": 0.8792430805148527, "grad_norm": 0.3833310008049011, "learning_rate": 1.191255437015346e-05, "loss": 0.5239, "step": 28622 }, { "epoch": 0.8792737996498019, "grad_norm": 0.3801657259464264, "learning_rate": 1.191208001173902e-05, "loss": 0.5395, "step": 28623 }, { "epoch": 0.879304518784751, "grad_norm": 0.4133453667163849, "learning_rate": 1.191160564885878e-05, "loss": 0.6014, "step": 28624 }, { "epoch": 0.8793352379197001, "grad_norm": 0.34215885400772095, "learning_rate": 1.1911131281513855e-05, "loss": 0.4963, "step": 28625 }, { "epoch": 0.8793659570546494, "grad_norm": 0.3751656115055084, "learning_rate": 1.1910656909705348e-05, "loss": 0.574, "step": 28626 }, { "epoch": 0.8793966761895985, "grad_norm": 0.4515533745288849, "learning_rate": 1.1910182533434366e-05, "loss": 0.5333, "step": 28627 }, { "epoch": 0.8794273953245476, "grad_norm": 0.3326597511768341, "learning_rate": 1.1909708152702021e-05, "loss": 0.4882, "step": 28628 }, { "epoch": 0.8794581144594968, "grad_norm": 0.38516512513160706, "learning_rate": 1.1909233767509421e-05, "loss": 0.5114, "step": 28629 }, { "epoch": 0.879488833594446, "grad_norm": 0.3187645673751831, "learning_rate": 1.190875937785767e-05, "loss": 0.4843, "step": 28630 }, { "epoch": 0.8795195527293952, "grad_norm": 0.4774008095264435, "learning_rate": 1.190828498374788e-05, "loss": 0.5731, "step": 28631 }, { "epoch": 0.8795502718643443, "grad_norm": 0.35603654384613037, "learning_rate": 1.1907810585181157e-05, "loss": 0.5551, "step": 28632 }, { "epoch": 0.8795809909992934, "grad_norm": 0.3325956165790558, "learning_rate": 1.1907336182158607e-05, "loss": 0.5545, "step": 28633 }, { "epoch": 0.8796117101342427, "grad_norm": 0.41112515330314636, "learning_rate": 1.1906861774681342e-05, "loss": 0.5704, "step": 28634 }, { "epoch": 0.8796424292691918, "grad_norm": 0.33738914132118225, "learning_rate": 1.1906387362750467e-05, "loss": 0.434, "step": 28635 }, { "epoch": 0.8796731484041409, "grad_norm": 0.37980222702026367, "learning_rate": 1.1905912946367088e-05, "loss": 0.5184, "step": 28636 }, { "epoch": 0.8797038675390901, "grad_norm": 0.36841580271720886, "learning_rate": 1.1905438525532322e-05, "loss": 0.5335, "step": 28637 }, { "epoch": 0.8797345866740393, "grad_norm": 0.34749457240104675, "learning_rate": 1.190496410024727e-05, "loss": 0.4204, "step": 28638 }, { "epoch": 0.8797653058089884, "grad_norm": 0.367552250623703, "learning_rate": 1.190448967051304e-05, "loss": 0.5153, "step": 28639 }, { "epoch": 0.8797960249439376, "grad_norm": 0.4074769616127014, "learning_rate": 1.1904015236330741e-05, "loss": 0.5809, "step": 28640 }, { "epoch": 0.8798267440788867, "grad_norm": 0.35253727436065674, "learning_rate": 1.1903540797701482e-05, "loss": 0.5552, "step": 28641 }, { "epoch": 0.8798574632138358, "grad_norm": 0.39211541414260864, "learning_rate": 1.190306635462637e-05, "loss": 0.5388, "step": 28642 }, { "epoch": 0.8798881823487851, "grad_norm": 0.34538722038269043, "learning_rate": 1.1902591907106515e-05, "loss": 0.5566, "step": 28643 }, { "epoch": 0.8799189014837342, "grad_norm": 0.39662429690361023, "learning_rate": 1.1902117455143023e-05, "loss": 0.5029, "step": 28644 }, { "epoch": 0.8799496206186834, "grad_norm": 0.3844943940639496, "learning_rate": 1.1901642998737003e-05, "loss": 0.5498, "step": 28645 }, { "epoch": 0.8799803397536325, "grad_norm": 0.3470093309879303, "learning_rate": 1.1901168537889565e-05, "loss": 0.5363, "step": 28646 }, { "epoch": 0.8800110588885817, "grad_norm": 0.5061672925949097, "learning_rate": 1.1900694072601813e-05, "loss": 0.6027, "step": 28647 }, { "epoch": 0.8800417780235309, "grad_norm": 0.39393261075019836, "learning_rate": 1.1900219602874859e-05, "loss": 0.5247, "step": 28648 }, { "epoch": 0.88007249715848, "grad_norm": 0.3467022180557251, "learning_rate": 1.1899745128709806e-05, "loss": 0.4802, "step": 28649 }, { "epoch": 0.8801032162934291, "grad_norm": 0.32742562890052795, "learning_rate": 1.1899270650107768e-05, "loss": 0.4966, "step": 28650 }, { "epoch": 0.8801339354283784, "grad_norm": 0.39101552963256836, "learning_rate": 1.1898796167069849e-05, "loss": 0.5488, "step": 28651 }, { "epoch": 0.8801646545633275, "grad_norm": 0.3973555266857147, "learning_rate": 1.189832167959716e-05, "loss": 0.6389, "step": 28652 }, { "epoch": 0.8801953736982766, "grad_norm": 0.36989277601242065, "learning_rate": 1.1897847187690807e-05, "loss": 0.5872, "step": 28653 }, { "epoch": 0.8802260928332258, "grad_norm": 0.3676644265651703, "learning_rate": 1.1897372691351899e-05, "loss": 0.5593, "step": 28654 }, { "epoch": 0.880256811968175, "grad_norm": 0.3642939329147339, "learning_rate": 1.1896898190581547e-05, "loss": 0.5475, "step": 28655 }, { "epoch": 0.8802875311031242, "grad_norm": 0.4619405269622803, "learning_rate": 1.1896423685380856e-05, "loss": 0.5512, "step": 28656 }, { "epoch": 0.8803182502380733, "grad_norm": 0.3612806499004364, "learning_rate": 1.1895949175750934e-05, "loss": 0.5785, "step": 28657 }, { "epoch": 0.8803489693730224, "grad_norm": 0.3421967625617981, "learning_rate": 1.1895474661692893e-05, "loss": 0.5073, "step": 28658 }, { "epoch": 0.8803796885079717, "grad_norm": 0.3827309310436249, "learning_rate": 1.1895000143207835e-05, "loss": 0.5513, "step": 28659 }, { "epoch": 0.8804104076429208, "grad_norm": 0.43373745679855347, "learning_rate": 1.1894525620296874e-05, "loss": 0.5464, "step": 28660 }, { "epoch": 0.8804411267778699, "grad_norm": 0.4101219177246094, "learning_rate": 1.1894051092961117e-05, "loss": 0.5772, "step": 28661 }, { "epoch": 0.8804718459128191, "grad_norm": 0.36512136459350586, "learning_rate": 1.1893576561201667e-05, "loss": 0.5333, "step": 28662 }, { "epoch": 0.8805025650477682, "grad_norm": 0.4511406719684601, "learning_rate": 1.189310202501964e-05, "loss": 0.5761, "step": 28663 }, { "epoch": 0.8805332841827174, "grad_norm": 0.36611056327819824, "learning_rate": 1.1892627484416144e-05, "loss": 0.5972, "step": 28664 }, { "epoch": 0.8805640033176666, "grad_norm": 0.36712414026260376, "learning_rate": 1.1892152939392278e-05, "loss": 0.488, "step": 28665 }, { "epoch": 0.8805947224526157, "grad_norm": 0.3500593602657318, "learning_rate": 1.1891678389949163e-05, "loss": 0.5469, "step": 28666 }, { "epoch": 0.8806254415875648, "grad_norm": 0.4563599228858948, "learning_rate": 1.1891203836087895e-05, "loss": 0.6111, "step": 28667 }, { "epoch": 0.8806561607225141, "grad_norm": 0.3901461958885193, "learning_rate": 1.1890729277809592e-05, "loss": 0.554, "step": 28668 }, { "epoch": 0.8806868798574632, "grad_norm": 0.3650174140930176, "learning_rate": 1.1890254715115356e-05, "loss": 0.4978, "step": 28669 }, { "epoch": 0.8807175989924124, "grad_norm": 0.464628666639328, "learning_rate": 1.1889780148006303e-05, "loss": 0.5626, "step": 28670 }, { "epoch": 0.8807483181273615, "grad_norm": 0.3557138442993164, "learning_rate": 1.188930557648353e-05, "loss": 0.5775, "step": 28671 }, { "epoch": 0.8807790372623107, "grad_norm": 0.38642969727516174, "learning_rate": 1.1888831000548156e-05, "loss": 0.4713, "step": 28672 }, { "epoch": 0.8808097563972599, "grad_norm": 0.3766513168811798, "learning_rate": 1.1888356420201284e-05, "loss": 0.48, "step": 28673 }, { "epoch": 0.880840475532209, "grad_norm": 0.3663230240345001, "learning_rate": 1.1887881835444024e-05, "loss": 0.4769, "step": 28674 }, { "epoch": 0.8808711946671581, "grad_norm": 0.35603582859039307, "learning_rate": 1.1887407246277486e-05, "loss": 0.5024, "step": 28675 }, { "epoch": 0.8809019138021074, "grad_norm": 0.34346920251846313, "learning_rate": 1.1886932652702771e-05, "loss": 0.579, "step": 28676 }, { "epoch": 0.8809326329370565, "grad_norm": 0.47976091504096985, "learning_rate": 1.1886458054720999e-05, "loss": 0.5248, "step": 28677 }, { "epoch": 0.8809633520720056, "grad_norm": 0.34490567445755005, "learning_rate": 1.188598345233327e-05, "loss": 0.4772, "step": 28678 }, { "epoch": 0.8809940712069548, "grad_norm": 0.3299933075904846, "learning_rate": 1.1885508845540696e-05, "loss": 0.5792, "step": 28679 }, { "epoch": 0.881024790341904, "grad_norm": 0.40825533866882324, "learning_rate": 1.1885034234344384e-05, "loss": 0.5357, "step": 28680 }, { "epoch": 0.8810555094768532, "grad_norm": 0.3503306210041046, "learning_rate": 1.1884559618745441e-05, "loss": 0.5623, "step": 28681 }, { "epoch": 0.8810862286118023, "grad_norm": 0.38100486993789673, "learning_rate": 1.1884084998744983e-05, "loss": 0.6028, "step": 28682 }, { "epoch": 0.8811169477467514, "grad_norm": 0.3583396077156067, "learning_rate": 1.1883610374344105e-05, "loss": 0.5284, "step": 28683 }, { "epoch": 0.8811476668817007, "grad_norm": 0.3678629994392395, "learning_rate": 1.188313574554393e-05, "loss": 0.541, "step": 28684 }, { "epoch": 0.8811783860166498, "grad_norm": 0.3996458351612091, "learning_rate": 1.1882661112345555e-05, "loss": 0.4984, "step": 28685 }, { "epoch": 0.8812091051515989, "grad_norm": 0.32259243726730347, "learning_rate": 1.1882186474750098e-05, "loss": 0.5577, "step": 28686 }, { "epoch": 0.8812398242865481, "grad_norm": 0.43160003423690796, "learning_rate": 1.188171183275866e-05, "loss": 0.5339, "step": 28687 }, { "epoch": 0.8812705434214972, "grad_norm": 0.37618017196655273, "learning_rate": 1.1881237186372355e-05, "loss": 0.5912, "step": 28688 }, { "epoch": 0.8813012625564464, "grad_norm": 0.3858717679977417, "learning_rate": 1.1880762535592284e-05, "loss": 0.527, "step": 28689 }, { "epoch": 0.8813319816913956, "grad_norm": 0.3916114866733551, "learning_rate": 1.1880287880419564e-05, "loss": 0.5318, "step": 28690 }, { "epoch": 0.8813627008263447, "grad_norm": 0.3604762554168701, "learning_rate": 1.1879813220855303e-05, "loss": 0.5694, "step": 28691 }, { "epoch": 0.881393419961294, "grad_norm": 0.3321112394332886, "learning_rate": 1.1879338556900604e-05, "loss": 0.5324, "step": 28692 }, { "epoch": 0.8814241390962431, "grad_norm": 0.36953675746917725, "learning_rate": 1.1878863888556577e-05, "loss": 0.5384, "step": 28693 }, { "epoch": 0.8814548582311922, "grad_norm": 0.367255836725235, "learning_rate": 1.1878389215824335e-05, "loss": 0.5613, "step": 28694 }, { "epoch": 0.8814855773661414, "grad_norm": 0.39371371269226074, "learning_rate": 1.1877914538704984e-05, "loss": 0.5013, "step": 28695 }, { "epoch": 0.8815162965010905, "grad_norm": 0.3365134596824646, "learning_rate": 1.187743985719963e-05, "loss": 0.5526, "step": 28696 }, { "epoch": 0.8815470156360397, "grad_norm": 0.3779672682285309, "learning_rate": 1.1876965171309387e-05, "loss": 0.4987, "step": 28697 }, { "epoch": 0.8815777347709889, "grad_norm": 0.3641018867492676, "learning_rate": 1.187649048103536e-05, "loss": 0.5794, "step": 28698 }, { "epoch": 0.881608453905938, "grad_norm": 0.3445289134979248, "learning_rate": 1.1876015786378657e-05, "loss": 0.5815, "step": 28699 }, { "epoch": 0.8816391730408871, "grad_norm": 0.3262920677661896, "learning_rate": 1.187554108734039e-05, "loss": 0.4479, "step": 28700 }, { "epoch": 0.8816698921758364, "grad_norm": 0.33516809344291687, "learning_rate": 1.1875066383921662e-05, "loss": 0.519, "step": 28701 }, { "epoch": 0.8817006113107855, "grad_norm": 0.38956326246261597, "learning_rate": 1.1874591676123593e-05, "loss": 0.5486, "step": 28702 }, { "epoch": 0.8817313304457346, "grad_norm": 0.3897840678691864, "learning_rate": 1.1874116963947278e-05, "loss": 0.5348, "step": 28703 }, { "epoch": 0.8817620495806838, "grad_norm": 0.401876837015152, "learning_rate": 1.1873642247393834e-05, "loss": 0.5682, "step": 28704 }, { "epoch": 0.881792768715633, "grad_norm": 0.33791154623031616, "learning_rate": 1.1873167526464365e-05, "loss": 0.5273, "step": 28705 }, { "epoch": 0.8818234878505822, "grad_norm": 0.3597770035266876, "learning_rate": 1.1872692801159986e-05, "loss": 0.5851, "step": 28706 }, { "epoch": 0.8818542069855313, "grad_norm": 0.3895593583583832, "learning_rate": 1.18722180714818e-05, "loss": 0.5382, "step": 28707 }, { "epoch": 0.8818849261204804, "grad_norm": 0.3710252344608307, "learning_rate": 1.1871743337430917e-05, "loss": 0.4693, "step": 28708 }, { "epoch": 0.8819156452554296, "grad_norm": 0.3702550530433655, "learning_rate": 1.187126859900845e-05, "loss": 0.5254, "step": 28709 }, { "epoch": 0.8819463643903788, "grad_norm": 0.38882213830947876, "learning_rate": 1.1870793856215502e-05, "loss": 0.6074, "step": 28710 }, { "epoch": 0.8819770835253279, "grad_norm": 0.3779081702232361, "learning_rate": 1.1870319109053185e-05, "loss": 0.5812, "step": 28711 }, { "epoch": 0.8820078026602771, "grad_norm": 0.3579406440258026, "learning_rate": 1.1869844357522607e-05, "loss": 0.5453, "step": 28712 }, { "epoch": 0.8820385217952262, "grad_norm": 0.36059561371803284, "learning_rate": 1.1869369601624875e-05, "loss": 0.5311, "step": 28713 }, { "epoch": 0.8820692409301754, "grad_norm": 0.3890632390975952, "learning_rate": 1.1868894841361102e-05, "loss": 0.6484, "step": 28714 }, { "epoch": 0.8820999600651246, "grad_norm": 0.39460521936416626, "learning_rate": 1.1868420076732396e-05, "loss": 0.5161, "step": 28715 }, { "epoch": 0.8821306792000737, "grad_norm": 0.8755515813827515, "learning_rate": 1.1867945307739865e-05, "loss": 0.5274, "step": 28716 }, { "epoch": 0.8821613983350229, "grad_norm": 0.4122254550457001, "learning_rate": 1.1867470534384613e-05, "loss": 0.5536, "step": 28717 }, { "epoch": 0.8821921174699721, "grad_norm": 0.4169957935810089, "learning_rate": 1.1866995756667757e-05, "loss": 0.5137, "step": 28718 }, { "epoch": 0.8822228366049212, "grad_norm": 0.3940141499042511, "learning_rate": 1.1866520974590399e-05, "loss": 0.5857, "step": 28719 }, { "epoch": 0.8822535557398704, "grad_norm": 0.3529159724712372, "learning_rate": 1.1866046188153653e-05, "loss": 0.6296, "step": 28720 }, { "epoch": 0.8822842748748195, "grad_norm": 0.3536147177219391, "learning_rate": 1.1865571397358623e-05, "loss": 0.5842, "step": 28721 }, { "epoch": 0.8823149940097686, "grad_norm": 0.3866824805736542, "learning_rate": 1.1865096602206426e-05, "loss": 0.5433, "step": 28722 }, { "epoch": 0.8823457131447179, "grad_norm": 0.40439629554748535, "learning_rate": 1.186462180269816e-05, "loss": 0.5538, "step": 28723 }, { "epoch": 0.882376432279667, "grad_norm": 0.33019500970840454, "learning_rate": 1.1864146998834944e-05, "loss": 0.5544, "step": 28724 }, { "epoch": 0.8824071514146161, "grad_norm": 0.4458649754524231, "learning_rate": 1.1863672190617878e-05, "loss": 0.5669, "step": 28725 }, { "epoch": 0.8824378705495654, "grad_norm": 0.3443959653377533, "learning_rate": 1.1863197378048077e-05, "loss": 0.5731, "step": 28726 }, { "epoch": 0.8824685896845145, "grad_norm": 0.36838939785957336, "learning_rate": 1.1862722561126652e-05, "loss": 0.504, "step": 28727 }, { "epoch": 0.8824993088194636, "grad_norm": 0.3484560549259186, "learning_rate": 1.1862247739854704e-05, "loss": 0.5707, "step": 28728 }, { "epoch": 0.8825300279544128, "grad_norm": 0.36207062005996704, "learning_rate": 1.1861772914233349e-05, "loss": 0.4897, "step": 28729 }, { "epoch": 0.8825607470893619, "grad_norm": 0.362156480550766, "learning_rate": 1.186129808426369e-05, "loss": 0.5217, "step": 28730 }, { "epoch": 0.8825914662243112, "grad_norm": 0.323417991399765, "learning_rate": 1.1860823249946842e-05, "loss": 0.5372, "step": 28731 }, { "epoch": 0.8826221853592603, "grad_norm": 0.36482182145118713, "learning_rate": 1.186034841128391e-05, "loss": 0.5452, "step": 28732 }, { "epoch": 0.8826529044942094, "grad_norm": 0.3593558371067047, "learning_rate": 1.1859873568276008e-05, "loss": 0.4984, "step": 28733 }, { "epoch": 0.8826836236291586, "grad_norm": 0.34719082713127136, "learning_rate": 1.185939872092424e-05, "loss": 0.5455, "step": 28734 }, { "epoch": 0.8827143427641078, "grad_norm": 0.39127615094184875, "learning_rate": 1.1858923869229712e-05, "loss": 0.61, "step": 28735 }, { "epoch": 0.8827450618990569, "grad_norm": 0.35191357135772705, "learning_rate": 1.1858449013193543e-05, "loss": 0.5888, "step": 28736 }, { "epoch": 0.8827757810340061, "grad_norm": 0.33052492141723633, "learning_rate": 1.1857974152816834e-05, "loss": 0.511, "step": 28737 }, { "epoch": 0.8828065001689552, "grad_norm": 0.40897446870803833, "learning_rate": 1.1857499288100698e-05, "loss": 0.5908, "step": 28738 }, { "epoch": 0.8828372193039044, "grad_norm": 0.32562682032585144, "learning_rate": 1.185702441904624e-05, "loss": 0.4617, "step": 28739 }, { "epoch": 0.8828679384388536, "grad_norm": 0.4204212725162506, "learning_rate": 1.1856549545654574e-05, "loss": 0.5623, "step": 28740 }, { "epoch": 0.8828986575738027, "grad_norm": 0.3367350101470947, "learning_rate": 1.1856074667926804e-05, "loss": 0.5117, "step": 28741 }, { "epoch": 0.8829293767087519, "grad_norm": 0.3556893765926361, "learning_rate": 1.1855599785864046e-05, "loss": 0.5141, "step": 28742 }, { "epoch": 0.882960095843701, "grad_norm": 0.33450549840927124, "learning_rate": 1.1855124899467402e-05, "loss": 0.5119, "step": 28743 }, { "epoch": 0.8829908149786502, "grad_norm": 0.373088002204895, "learning_rate": 1.1854650008737987e-05, "loss": 0.5444, "step": 28744 }, { "epoch": 0.8830215341135994, "grad_norm": 0.3712841868400574, "learning_rate": 1.1854175113676904e-05, "loss": 0.4562, "step": 28745 }, { "epoch": 0.8830522532485485, "grad_norm": 0.46100085973739624, "learning_rate": 1.1853700214285266e-05, "loss": 0.5369, "step": 28746 }, { "epoch": 0.8830829723834976, "grad_norm": 0.3749023973941803, "learning_rate": 1.1853225310564185e-05, "loss": 0.5591, "step": 28747 }, { "epoch": 0.8831136915184469, "grad_norm": 0.3326268196105957, "learning_rate": 1.1852750402514764e-05, "loss": 0.4532, "step": 28748 }, { "epoch": 0.883144410653396, "grad_norm": 0.45694220066070557, "learning_rate": 1.1852275490138117e-05, "loss": 0.4776, "step": 28749 }, { "epoch": 0.8831751297883451, "grad_norm": 0.7791348099708557, "learning_rate": 1.1851800573435348e-05, "loss": 0.6184, "step": 28750 }, { "epoch": 0.8832058489232943, "grad_norm": 0.38931572437286377, "learning_rate": 1.1851325652407573e-05, "loss": 0.6274, "step": 28751 }, { "epoch": 0.8832365680582435, "grad_norm": 0.3477408289909363, "learning_rate": 1.1850850727055897e-05, "loss": 0.5642, "step": 28752 }, { "epoch": 0.8832672871931926, "grad_norm": 0.40969350934028625, "learning_rate": 1.1850375797381429e-05, "loss": 0.5655, "step": 28753 }, { "epoch": 0.8832980063281418, "grad_norm": 0.4214032292366028, "learning_rate": 1.1849900863385279e-05, "loss": 0.5081, "step": 28754 }, { "epoch": 0.8833287254630909, "grad_norm": 0.3468281924724579, "learning_rate": 1.1849425925068557e-05, "loss": 0.6204, "step": 28755 }, { "epoch": 0.8833594445980402, "grad_norm": 0.38827022910118103, "learning_rate": 1.1848950982432373e-05, "loss": 0.5467, "step": 28756 }, { "epoch": 0.8833901637329893, "grad_norm": 0.36806589365005493, "learning_rate": 1.1848476035477829e-05, "loss": 0.5659, "step": 28757 }, { "epoch": 0.8834208828679384, "grad_norm": 0.39305374026298523, "learning_rate": 1.1848001084206046e-05, "loss": 0.5919, "step": 28758 }, { "epoch": 0.8834516020028876, "grad_norm": 0.3684200942516327, "learning_rate": 1.1847526128618125e-05, "loss": 0.4748, "step": 28759 }, { "epoch": 0.8834823211378368, "grad_norm": 0.3696395754814148, "learning_rate": 1.184705116871518e-05, "loss": 0.5955, "step": 28760 }, { "epoch": 0.8835130402727859, "grad_norm": 0.413555771112442, "learning_rate": 1.1846576204498314e-05, "loss": 0.471, "step": 28761 }, { "epoch": 0.8835437594077351, "grad_norm": 0.37124308943748474, "learning_rate": 1.1846101235968643e-05, "loss": 0.5942, "step": 28762 }, { "epoch": 0.8835744785426842, "grad_norm": 0.36381593346595764, "learning_rate": 1.1845626263127272e-05, "loss": 0.5295, "step": 28763 }, { "epoch": 0.8836051976776333, "grad_norm": 0.3212294578552246, "learning_rate": 1.1845151285975313e-05, "loss": 0.55, "step": 28764 }, { "epoch": 0.8836359168125826, "grad_norm": 0.35492080450057983, "learning_rate": 1.1844676304513875e-05, "loss": 0.544, "step": 28765 }, { "epoch": 0.8836666359475317, "grad_norm": 0.39948591589927673, "learning_rate": 1.1844201318744067e-05, "loss": 0.6257, "step": 28766 }, { "epoch": 0.8836973550824809, "grad_norm": 0.36700770258903503, "learning_rate": 1.1843726328666996e-05, "loss": 0.5929, "step": 28767 }, { "epoch": 0.88372807421743, "grad_norm": 0.3481409251689911, "learning_rate": 1.1843251334283777e-05, "loss": 0.5464, "step": 28768 }, { "epoch": 0.8837587933523792, "grad_norm": 0.39146941900253296, "learning_rate": 1.1842776335595511e-05, "loss": 0.5628, "step": 28769 }, { "epoch": 0.8837895124873284, "grad_norm": 0.3741774559020996, "learning_rate": 1.1842301332603317e-05, "loss": 0.5903, "step": 28770 }, { "epoch": 0.8838202316222775, "grad_norm": 0.3912888467311859, "learning_rate": 1.1841826325308294e-05, "loss": 0.5943, "step": 28771 }, { "epoch": 0.8838509507572266, "grad_norm": 0.3673592507839203, "learning_rate": 1.1841351313711562e-05, "loss": 0.551, "step": 28772 }, { "epoch": 0.8838816698921759, "grad_norm": 0.39287981390953064, "learning_rate": 1.1840876297814221e-05, "loss": 0.4959, "step": 28773 }, { "epoch": 0.883912389027125, "grad_norm": 0.401985228061676, "learning_rate": 1.184040127761739e-05, "loss": 0.529, "step": 28774 }, { "epoch": 0.8839431081620741, "grad_norm": 0.39204224944114685, "learning_rate": 1.1839926253122169e-05, "loss": 0.5354, "step": 28775 }, { "epoch": 0.8839738272970233, "grad_norm": 0.5625383257865906, "learning_rate": 1.1839451224329675e-05, "loss": 0.673, "step": 28776 }, { "epoch": 0.8840045464319725, "grad_norm": 0.4214898347854614, "learning_rate": 1.1838976191241009e-05, "loss": 0.5701, "step": 28777 }, { "epoch": 0.8840352655669217, "grad_norm": 0.3759685456752777, "learning_rate": 1.1838501153857292e-05, "loss": 0.5224, "step": 28778 }, { "epoch": 0.8840659847018708, "grad_norm": 0.3629912734031677, "learning_rate": 1.1838026112179626e-05, "loss": 0.5447, "step": 28779 }, { "epoch": 0.8840967038368199, "grad_norm": 0.38633570075035095, "learning_rate": 1.1837551066209119e-05, "loss": 0.5548, "step": 28780 }, { "epoch": 0.8841274229717692, "grad_norm": 0.36346718668937683, "learning_rate": 1.1837076015946883e-05, "loss": 0.596, "step": 28781 }, { "epoch": 0.8841581421067183, "grad_norm": 0.35284578800201416, "learning_rate": 1.1836600961394028e-05, "loss": 0.5783, "step": 28782 }, { "epoch": 0.8841888612416674, "grad_norm": 0.38617607951164246, "learning_rate": 1.1836125902551663e-05, "loss": 0.4886, "step": 28783 }, { "epoch": 0.8842195803766166, "grad_norm": 0.38138607144355774, "learning_rate": 1.1835650839420898e-05, "loss": 0.5182, "step": 28784 }, { "epoch": 0.8842502995115658, "grad_norm": 0.3860319256782532, "learning_rate": 1.1835175772002845e-05, "loss": 0.5026, "step": 28785 }, { "epoch": 0.8842810186465149, "grad_norm": 0.391011118888855, "learning_rate": 1.183470070029861e-05, "loss": 0.6034, "step": 28786 }, { "epoch": 0.8843117377814641, "grad_norm": 0.33110031485557556, "learning_rate": 1.18342256243093e-05, "loss": 0.5492, "step": 28787 }, { "epoch": 0.8843424569164132, "grad_norm": 0.3818282186985016, "learning_rate": 1.1833750544036031e-05, "loss": 0.6004, "step": 28788 }, { "epoch": 0.8843731760513623, "grad_norm": 0.3694525957107544, "learning_rate": 1.1833275459479908e-05, "loss": 0.5803, "step": 28789 }, { "epoch": 0.8844038951863116, "grad_norm": 0.37546443939208984, "learning_rate": 1.1832800370642044e-05, "loss": 0.6078, "step": 28790 }, { "epoch": 0.8844346143212607, "grad_norm": 0.3564591407775879, "learning_rate": 1.1832325277523544e-05, "loss": 0.5487, "step": 28791 }, { "epoch": 0.8844653334562099, "grad_norm": 0.3522203266620636, "learning_rate": 1.1831850180125523e-05, "loss": 0.5543, "step": 28792 }, { "epoch": 0.884496052591159, "grad_norm": 0.36349377036094666, "learning_rate": 1.1831375078449086e-05, "loss": 0.5465, "step": 28793 }, { "epoch": 0.8845267717261082, "grad_norm": 0.3836571276187897, "learning_rate": 1.1830899972495347e-05, "loss": 0.4777, "step": 28794 }, { "epoch": 0.8845574908610574, "grad_norm": 0.38532698154449463, "learning_rate": 1.183042486226541e-05, "loss": 0.5432, "step": 28795 }, { "epoch": 0.8845882099960065, "grad_norm": 0.3695721924304962, "learning_rate": 1.182994974776039e-05, "loss": 0.5144, "step": 28796 }, { "epoch": 0.8846189291309556, "grad_norm": 0.41050317883491516, "learning_rate": 1.1829474628981396e-05, "loss": 0.573, "step": 28797 }, { "epoch": 0.8846496482659049, "grad_norm": 0.3702645003795624, "learning_rate": 1.1828999505929532e-05, "loss": 0.5807, "step": 28798 }, { "epoch": 0.884680367400854, "grad_norm": 0.35607168078422546, "learning_rate": 1.1828524378605915e-05, "loss": 0.5477, "step": 28799 }, { "epoch": 0.8847110865358031, "grad_norm": 0.41972827911376953, "learning_rate": 1.1828049247011648e-05, "loss": 0.5455, "step": 28800 }, { "epoch": 0.8847418056707523, "grad_norm": 0.42780107259750366, "learning_rate": 1.182757411114785e-05, "loss": 0.5099, "step": 28801 }, { "epoch": 0.8847725248057015, "grad_norm": 0.3645953834056854, "learning_rate": 1.182709897101562e-05, "loss": 0.5381, "step": 28802 }, { "epoch": 0.8848032439406507, "grad_norm": 0.44355931878089905, "learning_rate": 1.1826623826616078e-05, "loss": 0.516, "step": 28803 }, { "epoch": 0.8848339630755998, "grad_norm": 0.33103883266448975, "learning_rate": 1.1826148677950324e-05, "loss": 0.5217, "step": 28804 }, { "epoch": 0.8848646822105489, "grad_norm": 0.3815517723560333, "learning_rate": 1.1825673525019475e-05, "loss": 0.6246, "step": 28805 }, { "epoch": 0.8848954013454982, "grad_norm": 0.34079280495643616, "learning_rate": 1.1825198367824634e-05, "loss": 0.4922, "step": 28806 }, { "epoch": 0.8849261204804473, "grad_norm": 0.436113178730011, "learning_rate": 1.182472320636692e-05, "loss": 0.5269, "step": 28807 }, { "epoch": 0.8849568396153964, "grad_norm": 0.42847123742103577, "learning_rate": 1.1824248040647435e-05, "loss": 0.526, "step": 28808 }, { "epoch": 0.8849875587503456, "grad_norm": 0.3636466860771179, "learning_rate": 1.182377287066729e-05, "loss": 0.6308, "step": 28809 }, { "epoch": 0.8850182778852947, "grad_norm": 0.7543382048606873, "learning_rate": 1.1823297696427598e-05, "loss": 0.5615, "step": 28810 }, { "epoch": 0.8850489970202439, "grad_norm": 0.4106084108352661, "learning_rate": 1.1822822517929464e-05, "loss": 0.6541, "step": 28811 }, { "epoch": 0.8850797161551931, "grad_norm": 0.38661712408065796, "learning_rate": 1.1822347335174004e-05, "loss": 0.5119, "step": 28812 }, { "epoch": 0.8851104352901422, "grad_norm": 0.35872945189476013, "learning_rate": 1.1821872148162323e-05, "loss": 0.5277, "step": 28813 }, { "epoch": 0.8851411544250913, "grad_norm": 0.4016355276107788, "learning_rate": 1.1821396956895534e-05, "loss": 0.596, "step": 28814 }, { "epoch": 0.8851718735600406, "grad_norm": 0.37546518445014954, "learning_rate": 1.1820921761374746e-05, "loss": 0.4886, "step": 28815 }, { "epoch": 0.8852025926949897, "grad_norm": 0.3714987635612488, "learning_rate": 1.1820446561601064e-05, "loss": 0.5466, "step": 28816 }, { "epoch": 0.8852333118299389, "grad_norm": 0.3532666265964508, "learning_rate": 1.1819971357575605e-05, "loss": 0.5692, "step": 28817 }, { "epoch": 0.885264030964888, "grad_norm": 0.3860808312892914, "learning_rate": 1.1819496149299472e-05, "loss": 0.5457, "step": 28818 }, { "epoch": 0.8852947500998372, "grad_norm": 0.750840961933136, "learning_rate": 1.1819020936773782e-05, "loss": 0.5153, "step": 28819 }, { "epoch": 0.8853254692347864, "grad_norm": 0.35707929730415344, "learning_rate": 1.1818545719999644e-05, "loss": 0.6053, "step": 28820 }, { "epoch": 0.8853561883697355, "grad_norm": 0.33397117257118225, "learning_rate": 1.181807049897816e-05, "loss": 0.5514, "step": 28821 }, { "epoch": 0.8853869075046846, "grad_norm": 0.3286038339138031, "learning_rate": 1.181759527371045e-05, "loss": 0.4947, "step": 28822 }, { "epoch": 0.8854176266396339, "grad_norm": 0.34672755002975464, "learning_rate": 1.1817120044197614e-05, "loss": 0.5543, "step": 28823 }, { "epoch": 0.885448345774583, "grad_norm": 0.40215668082237244, "learning_rate": 1.1816644810440772e-05, "loss": 0.5958, "step": 28824 }, { "epoch": 0.8854790649095321, "grad_norm": 0.37266805768013, "learning_rate": 1.1816169572441028e-05, "loss": 0.6229, "step": 28825 }, { "epoch": 0.8855097840444813, "grad_norm": 0.3461994528770447, "learning_rate": 1.1815694330199492e-05, "loss": 0.5167, "step": 28826 }, { "epoch": 0.8855405031794304, "grad_norm": 0.41044309735298157, "learning_rate": 1.1815219083717276e-05, "loss": 0.5545, "step": 28827 }, { "epoch": 0.8855712223143797, "grad_norm": 0.4606837332248688, "learning_rate": 1.1814743832995489e-05, "loss": 0.5116, "step": 28828 }, { "epoch": 0.8856019414493288, "grad_norm": 0.3953881859779358, "learning_rate": 1.181426857803524e-05, "loss": 0.5922, "step": 28829 }, { "epoch": 0.8856326605842779, "grad_norm": 0.37313157320022583, "learning_rate": 1.1813793318837641e-05, "loss": 0.5682, "step": 28830 }, { "epoch": 0.8856633797192272, "grad_norm": 0.4100241959095001, "learning_rate": 1.1813318055403802e-05, "loss": 0.5701, "step": 28831 }, { "epoch": 0.8856940988541763, "grad_norm": 0.3765318691730499, "learning_rate": 1.181284278773483e-05, "loss": 0.5837, "step": 28832 }, { "epoch": 0.8857248179891254, "grad_norm": 0.34791240096092224, "learning_rate": 1.181236751583184e-05, "loss": 0.5616, "step": 28833 }, { "epoch": 0.8857555371240746, "grad_norm": 0.3547991216182709, "learning_rate": 1.1811892239695934e-05, "loss": 0.5498, "step": 28834 }, { "epoch": 0.8857862562590237, "grad_norm": 0.3860110938549042, "learning_rate": 1.1811416959328232e-05, "loss": 0.5278, "step": 28835 }, { "epoch": 0.8858169753939729, "grad_norm": 0.3757220208644867, "learning_rate": 1.1810941674729835e-05, "loss": 0.5593, "step": 28836 }, { "epoch": 0.8858476945289221, "grad_norm": 0.3734588921070099, "learning_rate": 1.181046638590186e-05, "loss": 0.5274, "step": 28837 }, { "epoch": 0.8858784136638712, "grad_norm": 0.36524927616119385, "learning_rate": 1.1809991092845417e-05, "loss": 0.6405, "step": 28838 }, { "epoch": 0.8859091327988203, "grad_norm": 0.3494725227355957, "learning_rate": 1.1809515795561608e-05, "loss": 0.5825, "step": 28839 }, { "epoch": 0.8859398519337696, "grad_norm": 0.3887060880661011, "learning_rate": 1.1809040494051551e-05, "loss": 0.5155, "step": 28840 }, { "epoch": 0.8859705710687187, "grad_norm": 0.41668763756752014, "learning_rate": 1.1808565188316353e-05, "loss": 0.6821, "step": 28841 }, { "epoch": 0.8860012902036679, "grad_norm": 0.36746782064437866, "learning_rate": 1.1808089878357128e-05, "loss": 0.4987, "step": 28842 }, { "epoch": 0.886032009338617, "grad_norm": 0.35515421628952026, "learning_rate": 1.1807614564174977e-05, "loss": 0.5907, "step": 28843 }, { "epoch": 0.8860627284735662, "grad_norm": 0.45503389835357666, "learning_rate": 1.180713924577102e-05, "loss": 0.493, "step": 28844 }, { "epoch": 0.8860934476085154, "grad_norm": 0.43905001878738403, "learning_rate": 1.180666392314636e-05, "loss": 0.538, "step": 28845 }, { "epoch": 0.8861241667434645, "grad_norm": 0.36361634731292725, "learning_rate": 1.1806188596302116e-05, "loss": 0.5835, "step": 28846 }, { "epoch": 0.8861548858784136, "grad_norm": 0.39485684037208557, "learning_rate": 1.1805713265239384e-05, "loss": 0.6087, "step": 28847 }, { "epoch": 0.8861856050133629, "grad_norm": 0.3865734934806824, "learning_rate": 1.180523792995929e-05, "loss": 0.6201, "step": 28848 }, { "epoch": 0.886216324148312, "grad_norm": 0.3799424469470978, "learning_rate": 1.1804762590462934e-05, "loss": 0.5061, "step": 28849 }, { "epoch": 0.8862470432832611, "grad_norm": 0.34630098938941956, "learning_rate": 1.1804287246751429e-05, "loss": 0.5378, "step": 28850 }, { "epoch": 0.8862777624182103, "grad_norm": 0.38220685720443726, "learning_rate": 1.1803811898825885e-05, "loss": 0.5267, "step": 28851 }, { "epoch": 0.8863084815531594, "grad_norm": 0.38606616854667664, "learning_rate": 1.1803336546687412e-05, "loss": 0.5237, "step": 28852 }, { "epoch": 0.8863392006881087, "grad_norm": 0.3509537875652313, "learning_rate": 1.1802861190337121e-05, "loss": 0.5436, "step": 28853 }, { "epoch": 0.8863699198230578, "grad_norm": 0.3782220482826233, "learning_rate": 1.180238582977612e-05, "loss": 0.6328, "step": 28854 }, { "epoch": 0.8864006389580069, "grad_norm": 0.3503873646259308, "learning_rate": 1.1801910465005522e-05, "loss": 0.5109, "step": 28855 }, { "epoch": 0.8864313580929561, "grad_norm": 0.37204936146736145, "learning_rate": 1.1801435096026439e-05, "loss": 0.5252, "step": 28856 }, { "epoch": 0.8864620772279053, "grad_norm": 0.4351598024368286, "learning_rate": 1.1800959722839976e-05, "loss": 0.649, "step": 28857 }, { "epoch": 0.8864927963628544, "grad_norm": 0.346413254737854, "learning_rate": 1.1800484345447245e-05, "loss": 0.5295, "step": 28858 }, { "epoch": 0.8865235154978036, "grad_norm": 0.37163859605789185, "learning_rate": 1.1800008963849359e-05, "loss": 0.5704, "step": 28859 }, { "epoch": 0.8865542346327527, "grad_norm": 0.38946276903152466, "learning_rate": 1.1799533578047426e-05, "loss": 0.581, "step": 28860 }, { "epoch": 0.8865849537677019, "grad_norm": 0.38705602288246155, "learning_rate": 1.1799058188042556e-05, "loss": 0.6221, "step": 28861 }, { "epoch": 0.8866156729026511, "grad_norm": 0.35668689012527466, "learning_rate": 1.179858279383586e-05, "loss": 0.5479, "step": 28862 }, { "epoch": 0.8866463920376002, "grad_norm": 0.38796642422676086, "learning_rate": 1.1798107395428448e-05, "loss": 0.5316, "step": 28863 }, { "epoch": 0.8866771111725493, "grad_norm": 0.3858502507209778, "learning_rate": 1.1797631992821433e-05, "loss": 0.5441, "step": 28864 }, { "epoch": 0.8867078303074986, "grad_norm": 0.32650160789489746, "learning_rate": 1.1797156586015918e-05, "loss": 0.5813, "step": 28865 }, { "epoch": 0.8867385494424477, "grad_norm": 0.39082103967666626, "learning_rate": 1.1796681175013022e-05, "loss": 0.4115, "step": 28866 }, { "epoch": 0.8867692685773969, "grad_norm": 0.4091886579990387, "learning_rate": 1.1796205759813852e-05, "loss": 0.5397, "step": 28867 }, { "epoch": 0.886799987712346, "grad_norm": 0.35487100481987, "learning_rate": 1.1795730340419518e-05, "loss": 0.4984, "step": 28868 }, { "epoch": 0.8868307068472951, "grad_norm": 0.37216314673423767, "learning_rate": 1.179525491683113e-05, "loss": 0.5791, "step": 28869 }, { "epoch": 0.8868614259822444, "grad_norm": 0.37191838026046753, "learning_rate": 1.1794779489049797e-05, "loss": 0.5489, "step": 28870 }, { "epoch": 0.8868921451171935, "grad_norm": 0.4429958462715149, "learning_rate": 1.1794304057076633e-05, "loss": 0.6287, "step": 28871 }, { "epoch": 0.8869228642521426, "grad_norm": 0.40547290444374084, "learning_rate": 1.1793828620912744e-05, "loss": 0.5487, "step": 28872 }, { "epoch": 0.8869535833870918, "grad_norm": 0.35442492365837097, "learning_rate": 1.1793353180559245e-05, "loss": 0.5822, "step": 28873 }, { "epoch": 0.886984302522041, "grad_norm": 0.33529505133628845, "learning_rate": 1.1792877736017245e-05, "loss": 0.5254, "step": 28874 }, { "epoch": 0.8870150216569901, "grad_norm": 0.36264997720718384, "learning_rate": 1.1792402287287853e-05, "loss": 0.6282, "step": 28875 }, { "epoch": 0.8870457407919393, "grad_norm": 0.39809656143188477, "learning_rate": 1.179192683437218e-05, "loss": 0.5299, "step": 28876 }, { "epoch": 0.8870764599268884, "grad_norm": 0.4114306569099426, "learning_rate": 1.1791451377271334e-05, "loss": 0.607, "step": 28877 }, { "epoch": 0.8871071790618377, "grad_norm": 0.36931073665618896, "learning_rate": 1.1790975915986433e-05, "loss": 0.5135, "step": 28878 }, { "epoch": 0.8871378981967868, "grad_norm": 0.33933210372924805, "learning_rate": 1.1790500450518581e-05, "loss": 0.5566, "step": 28879 }, { "epoch": 0.8871686173317359, "grad_norm": 0.3626555800437927, "learning_rate": 1.179002498086889e-05, "loss": 0.5141, "step": 28880 }, { "epoch": 0.8871993364666851, "grad_norm": 0.3304084241390228, "learning_rate": 1.1789549507038471e-05, "loss": 0.5892, "step": 28881 }, { "epoch": 0.8872300556016343, "grad_norm": 0.38880956172943115, "learning_rate": 1.1789074029028433e-05, "loss": 0.5906, "step": 28882 }, { "epoch": 0.8872607747365834, "grad_norm": 0.3740380108356476, "learning_rate": 1.178859854683989e-05, "loss": 0.5353, "step": 28883 }, { "epoch": 0.8872914938715326, "grad_norm": 0.4566633105278015, "learning_rate": 1.1788123060473949e-05, "loss": 0.572, "step": 28884 }, { "epoch": 0.8873222130064817, "grad_norm": 0.7962703108787537, "learning_rate": 1.1787647569931722e-05, "loss": 0.5455, "step": 28885 }, { "epoch": 0.8873529321414309, "grad_norm": 0.3480527400970459, "learning_rate": 1.1787172075214316e-05, "loss": 0.5507, "step": 28886 }, { "epoch": 0.8873836512763801, "grad_norm": 0.35639697313308716, "learning_rate": 1.178669657632285e-05, "loss": 0.6104, "step": 28887 }, { "epoch": 0.8874143704113292, "grad_norm": 0.34075039625167847, "learning_rate": 1.1786221073258426e-05, "loss": 0.549, "step": 28888 }, { "epoch": 0.8874450895462784, "grad_norm": 0.3772374093532562, "learning_rate": 1.178574556602216e-05, "loss": 0.5467, "step": 28889 }, { "epoch": 0.8874758086812276, "grad_norm": 0.3319837749004364, "learning_rate": 1.1785270054615156e-05, "loss": 0.5323, "step": 28890 }, { "epoch": 0.8875065278161767, "grad_norm": 0.38996919989585876, "learning_rate": 1.1784794539038533e-05, "loss": 0.4993, "step": 28891 }, { "epoch": 0.8875372469511259, "grad_norm": 0.35976722836494446, "learning_rate": 1.1784319019293398e-05, "loss": 0.5901, "step": 28892 }, { "epoch": 0.887567966086075, "grad_norm": 0.32823413610458374, "learning_rate": 1.178384349538086e-05, "loss": 0.5242, "step": 28893 }, { "epoch": 0.8875986852210241, "grad_norm": 0.3826940953731537, "learning_rate": 1.178336796730203e-05, "loss": 0.5721, "step": 28894 }, { "epoch": 0.8876294043559734, "grad_norm": 0.3532222807407379, "learning_rate": 1.1782892435058023e-05, "loss": 0.4878, "step": 28895 }, { "epoch": 0.8876601234909225, "grad_norm": 0.3963314890861511, "learning_rate": 1.1782416898649944e-05, "loss": 0.5617, "step": 28896 }, { "epoch": 0.8876908426258716, "grad_norm": 0.4314756393432617, "learning_rate": 1.1781941358078903e-05, "loss": 0.4903, "step": 28897 }, { "epoch": 0.8877215617608208, "grad_norm": 0.3538123667240143, "learning_rate": 1.1781465813346018e-05, "loss": 0.541, "step": 28898 }, { "epoch": 0.88775228089577, "grad_norm": 0.3498910367488861, "learning_rate": 1.1780990264452394e-05, "loss": 0.5076, "step": 28899 }, { "epoch": 0.8877830000307191, "grad_norm": 0.4682481288909912, "learning_rate": 1.1780514711399142e-05, "loss": 0.5328, "step": 28900 }, { "epoch": 0.8878137191656683, "grad_norm": 0.36090508103370667, "learning_rate": 1.1780039154187377e-05, "loss": 0.4817, "step": 28901 }, { "epoch": 0.8878444383006174, "grad_norm": 0.371182918548584, "learning_rate": 1.1779563592818202e-05, "loss": 0.5826, "step": 28902 }, { "epoch": 0.8878751574355667, "grad_norm": 0.41682666540145874, "learning_rate": 1.1779088027292734e-05, "loss": 0.608, "step": 28903 }, { "epoch": 0.8879058765705158, "grad_norm": 0.34811192750930786, "learning_rate": 1.177861245761208e-05, "loss": 0.5869, "step": 28904 }, { "epoch": 0.8879365957054649, "grad_norm": 0.38269034028053284, "learning_rate": 1.1778136883777353e-05, "loss": 0.5098, "step": 28905 }, { "epoch": 0.8879673148404141, "grad_norm": 0.3593958914279938, "learning_rate": 1.1777661305789662e-05, "loss": 0.505, "step": 28906 }, { "epoch": 0.8879980339753633, "grad_norm": 0.3190249502658844, "learning_rate": 1.1777185723650122e-05, "loss": 0.4731, "step": 28907 }, { "epoch": 0.8880287531103124, "grad_norm": 0.36842408776283264, "learning_rate": 1.1776710137359837e-05, "loss": 0.579, "step": 28908 }, { "epoch": 0.8880594722452616, "grad_norm": 0.3684832751750946, "learning_rate": 1.1776234546919922e-05, "loss": 0.5314, "step": 28909 }, { "epoch": 0.8880901913802107, "grad_norm": 0.3728339374065399, "learning_rate": 1.177575895233149e-05, "loss": 0.5372, "step": 28910 }, { "epoch": 0.8881209105151598, "grad_norm": 0.34268802404403687, "learning_rate": 1.1775283353595648e-05, "loss": 0.5984, "step": 28911 }, { "epoch": 0.8881516296501091, "grad_norm": 0.3476943373680115, "learning_rate": 1.1774807750713506e-05, "loss": 0.5611, "step": 28912 }, { "epoch": 0.8881823487850582, "grad_norm": 0.3426256775856018, "learning_rate": 1.1774332143686177e-05, "loss": 0.5335, "step": 28913 }, { "epoch": 0.8882130679200074, "grad_norm": 0.3668546676635742, "learning_rate": 1.1773856532514773e-05, "loss": 0.5317, "step": 28914 }, { "epoch": 0.8882437870549565, "grad_norm": 0.4626966416835785, "learning_rate": 1.1773380917200401e-05, "loss": 0.4778, "step": 28915 }, { "epoch": 0.8882745061899057, "grad_norm": 0.34655100107192993, "learning_rate": 1.1772905297744177e-05, "loss": 0.5949, "step": 28916 }, { "epoch": 0.8883052253248549, "grad_norm": 0.3679962754249573, "learning_rate": 1.1772429674147206e-05, "loss": 0.5685, "step": 28917 }, { "epoch": 0.888335944459804, "grad_norm": 0.42858412861824036, "learning_rate": 1.1771954046410603e-05, "loss": 0.5292, "step": 28918 }, { "epoch": 0.8883666635947531, "grad_norm": 0.331386536359787, "learning_rate": 1.177147841453548e-05, "loss": 0.6001, "step": 28919 }, { "epoch": 0.8883973827297024, "grad_norm": 0.35045382380485535, "learning_rate": 1.1771002778522942e-05, "loss": 0.5904, "step": 28920 }, { "epoch": 0.8884281018646515, "grad_norm": 0.34320002794265747, "learning_rate": 1.1770527138374103e-05, "loss": 0.5411, "step": 28921 }, { "epoch": 0.8884588209996006, "grad_norm": 0.3626762926578522, "learning_rate": 1.1770051494090075e-05, "loss": 0.5777, "step": 28922 }, { "epoch": 0.8884895401345498, "grad_norm": 0.34664082527160645, "learning_rate": 1.176957584567197e-05, "loss": 0.4991, "step": 28923 }, { "epoch": 0.888520259269499, "grad_norm": 0.37304380536079407, "learning_rate": 1.1769100193120895e-05, "loss": 0.5894, "step": 28924 }, { "epoch": 0.8885509784044481, "grad_norm": 0.3290463387966156, "learning_rate": 1.1768624536437965e-05, "loss": 0.4283, "step": 28925 }, { "epoch": 0.8885816975393973, "grad_norm": 0.3669934570789337, "learning_rate": 1.1768148875624285e-05, "loss": 0.5605, "step": 28926 }, { "epoch": 0.8886124166743464, "grad_norm": 0.3832922875881195, "learning_rate": 1.176767321068097e-05, "loss": 0.5173, "step": 28927 }, { "epoch": 0.8886431358092957, "grad_norm": 0.3878687620162964, "learning_rate": 1.1767197541609134e-05, "loss": 0.5191, "step": 28928 }, { "epoch": 0.8886738549442448, "grad_norm": 0.36315709352493286, "learning_rate": 1.1766721868409883e-05, "loss": 0.5611, "step": 28929 }, { "epoch": 0.8887045740791939, "grad_norm": 0.78389573097229, "learning_rate": 1.1766246191084331e-05, "loss": 0.6934, "step": 28930 }, { "epoch": 0.8887352932141431, "grad_norm": 0.4277898073196411, "learning_rate": 1.1765770509633588e-05, "loss": 0.4793, "step": 28931 }, { "epoch": 0.8887660123490922, "grad_norm": 0.40674006938934326, "learning_rate": 1.1765294824058763e-05, "loss": 0.5501, "step": 28932 }, { "epoch": 0.8887967314840414, "grad_norm": 0.4271043539047241, "learning_rate": 1.176481913436097e-05, "loss": 0.5196, "step": 28933 }, { "epoch": 0.8888274506189906, "grad_norm": 0.41685980558395386, "learning_rate": 1.1764343440541317e-05, "loss": 0.4828, "step": 28934 }, { "epoch": 0.8888581697539397, "grad_norm": 0.3416048288345337, "learning_rate": 1.176386774260092e-05, "loss": 0.5675, "step": 28935 }, { "epoch": 0.8888888888888888, "grad_norm": 0.3934910297393799, "learning_rate": 1.1763392040540882e-05, "loss": 0.5556, "step": 28936 }, { "epoch": 0.8889196080238381, "grad_norm": 0.36152660846710205, "learning_rate": 1.1762916334362322e-05, "loss": 0.5978, "step": 28937 }, { "epoch": 0.8889503271587872, "grad_norm": 0.37392738461494446, "learning_rate": 1.1762440624066347e-05, "loss": 0.6397, "step": 28938 }, { "epoch": 0.8889810462937364, "grad_norm": 0.3691830039024353, "learning_rate": 1.176196490965407e-05, "loss": 0.5085, "step": 28939 }, { "epoch": 0.8890117654286855, "grad_norm": 0.3801521062850952, "learning_rate": 1.1761489191126599e-05, "loss": 0.6063, "step": 28940 }, { "epoch": 0.8890424845636347, "grad_norm": 0.3843648433685303, "learning_rate": 1.1761013468485048e-05, "loss": 0.4818, "step": 28941 }, { "epoch": 0.8890732036985839, "grad_norm": 0.36389580368995667, "learning_rate": 1.1760537741730524e-05, "loss": 0.5738, "step": 28942 }, { "epoch": 0.889103922833533, "grad_norm": 0.3392965495586395, "learning_rate": 1.1760062010864147e-05, "loss": 0.435, "step": 28943 }, { "epoch": 0.8891346419684821, "grad_norm": 0.38295307755470276, "learning_rate": 1.1759586275887017e-05, "loss": 0.5153, "step": 28944 }, { "epoch": 0.8891653611034314, "grad_norm": 0.4161711633205414, "learning_rate": 1.175911053680025e-05, "loss": 0.5691, "step": 28945 }, { "epoch": 0.8891960802383805, "grad_norm": 0.3456330895423889, "learning_rate": 1.1758634793604961e-05, "loss": 0.5458, "step": 28946 }, { "epoch": 0.8892267993733296, "grad_norm": 0.5023092031478882, "learning_rate": 1.1758159046302255e-05, "loss": 0.556, "step": 28947 }, { "epoch": 0.8892575185082788, "grad_norm": 0.35854870080947876, "learning_rate": 1.175768329489325e-05, "loss": 0.472, "step": 28948 }, { "epoch": 0.889288237643228, "grad_norm": 0.42526817321777344, "learning_rate": 1.1757207539379048e-05, "loss": 0.5014, "step": 28949 }, { "epoch": 0.8893189567781771, "grad_norm": 0.3714195489883423, "learning_rate": 1.1756731779760768e-05, "loss": 0.4846, "step": 28950 }, { "epoch": 0.8893496759131263, "grad_norm": 0.37277156114578247, "learning_rate": 1.1756256016039518e-05, "loss": 0.596, "step": 28951 }, { "epoch": 0.8893803950480754, "grad_norm": 0.39023277163505554, "learning_rate": 1.1755780248216409e-05, "loss": 0.5248, "step": 28952 }, { "epoch": 0.8894111141830247, "grad_norm": 0.39622950553894043, "learning_rate": 1.1755304476292552e-05, "loss": 0.5645, "step": 28953 }, { "epoch": 0.8894418333179738, "grad_norm": 0.31875208020210266, "learning_rate": 1.175482870026906e-05, "loss": 0.4709, "step": 28954 }, { "epoch": 0.8894725524529229, "grad_norm": 0.37708935141563416, "learning_rate": 1.1754352920147043e-05, "loss": 0.5408, "step": 28955 }, { "epoch": 0.8895032715878721, "grad_norm": 0.3484902083873749, "learning_rate": 1.1753877135927609e-05, "loss": 0.518, "step": 28956 }, { "epoch": 0.8895339907228212, "grad_norm": 0.4592888355255127, "learning_rate": 1.1753401347611877e-05, "loss": 0.6268, "step": 28957 }, { "epoch": 0.8895647098577704, "grad_norm": 0.4016129672527313, "learning_rate": 1.1752925555200951e-05, "loss": 0.6008, "step": 28958 }, { "epoch": 0.8895954289927196, "grad_norm": 0.3927650451660156, "learning_rate": 1.1752449758695947e-05, "loss": 0.5746, "step": 28959 }, { "epoch": 0.8896261481276687, "grad_norm": 0.405592143535614, "learning_rate": 1.175197395809797e-05, "loss": 0.5565, "step": 28960 }, { "epoch": 0.8896568672626178, "grad_norm": 0.37797752022743225, "learning_rate": 1.175149815340814e-05, "loss": 0.5304, "step": 28961 }, { "epoch": 0.8896875863975671, "grad_norm": 0.38570138812065125, "learning_rate": 1.1751022344627562e-05, "loss": 0.5224, "step": 28962 }, { "epoch": 0.8897183055325162, "grad_norm": 0.3665459156036377, "learning_rate": 1.175054653175735e-05, "loss": 0.4758, "step": 28963 }, { "epoch": 0.8897490246674654, "grad_norm": 0.46825799345970154, "learning_rate": 1.1750070714798611e-05, "loss": 0.5531, "step": 28964 }, { "epoch": 0.8897797438024145, "grad_norm": 0.37258201837539673, "learning_rate": 1.1749594893752461e-05, "loss": 0.5288, "step": 28965 }, { "epoch": 0.8898104629373637, "grad_norm": 0.37475040555000305, "learning_rate": 1.1749119068620014e-05, "loss": 0.5926, "step": 28966 }, { "epoch": 0.8898411820723129, "grad_norm": 0.35719162225723267, "learning_rate": 1.1748643239402372e-05, "loss": 0.584, "step": 28967 }, { "epoch": 0.889871901207262, "grad_norm": 0.3528898358345032, "learning_rate": 1.1748167406100655e-05, "loss": 0.4964, "step": 28968 }, { "epoch": 0.8899026203422111, "grad_norm": 0.37365543842315674, "learning_rate": 1.1747691568715969e-05, "loss": 0.4995, "step": 28969 }, { "epoch": 0.8899333394771604, "grad_norm": 0.3509801924228668, "learning_rate": 1.1747215727249428e-05, "loss": 0.5912, "step": 28970 }, { "epoch": 0.8899640586121095, "grad_norm": 0.3592296242713928, "learning_rate": 1.1746739881702145e-05, "loss": 0.5199, "step": 28971 }, { "epoch": 0.8899947777470586, "grad_norm": 0.35401684045791626, "learning_rate": 1.1746264032075225e-05, "loss": 0.5039, "step": 28972 }, { "epoch": 0.8900254968820078, "grad_norm": 0.35983994603157043, "learning_rate": 1.1745788178369786e-05, "loss": 0.5271, "step": 28973 }, { "epoch": 0.890056216016957, "grad_norm": 0.39763957262039185, "learning_rate": 1.1745312320586934e-05, "loss": 0.4457, "step": 28974 }, { "epoch": 0.8900869351519061, "grad_norm": 0.36594071984291077, "learning_rate": 1.1744836458727786e-05, "loss": 0.5612, "step": 28975 }, { "epoch": 0.8901176542868553, "grad_norm": 0.3287925124168396, "learning_rate": 1.174436059279345e-05, "loss": 0.5391, "step": 28976 }, { "epoch": 0.8901483734218044, "grad_norm": 0.36755433678627014, "learning_rate": 1.1743884722785038e-05, "loss": 0.5778, "step": 28977 }, { "epoch": 0.8901790925567536, "grad_norm": 0.32556578516960144, "learning_rate": 1.1743408848703659e-05, "loss": 0.501, "step": 28978 }, { "epoch": 0.8902098116917028, "grad_norm": 0.34264522790908813, "learning_rate": 1.174293297055043e-05, "loss": 0.5752, "step": 28979 }, { "epoch": 0.8902405308266519, "grad_norm": 0.3422245979309082, "learning_rate": 1.1742457088326458e-05, "loss": 0.5296, "step": 28980 }, { "epoch": 0.8902712499616011, "grad_norm": 0.6367719769477844, "learning_rate": 1.1741981202032857e-05, "loss": 0.5368, "step": 28981 }, { "epoch": 0.8903019690965502, "grad_norm": 0.3849518299102783, "learning_rate": 1.1741505311670736e-05, "loss": 0.5386, "step": 28982 }, { "epoch": 0.8903326882314994, "grad_norm": 0.3385475277900696, "learning_rate": 1.1741029417241206e-05, "loss": 0.5451, "step": 28983 }, { "epoch": 0.8903634073664486, "grad_norm": 0.34317517280578613, "learning_rate": 1.1740553518745383e-05, "loss": 0.587, "step": 28984 }, { "epoch": 0.8903941265013977, "grad_norm": 0.3511945903301239, "learning_rate": 1.1740077616184373e-05, "loss": 0.5703, "step": 28985 }, { "epoch": 0.8904248456363468, "grad_norm": 0.3732929825782776, "learning_rate": 1.1739601709559295e-05, "loss": 0.5365, "step": 28986 }, { "epoch": 0.8904555647712961, "grad_norm": 0.34636974334716797, "learning_rate": 1.1739125798871254e-05, "loss": 0.5672, "step": 28987 }, { "epoch": 0.8904862839062452, "grad_norm": 0.39064469933509827, "learning_rate": 1.1738649884121361e-05, "loss": 0.4997, "step": 28988 }, { "epoch": 0.8905170030411944, "grad_norm": 0.383428692817688, "learning_rate": 1.173817396531073e-05, "loss": 0.5706, "step": 28989 }, { "epoch": 0.8905477221761435, "grad_norm": 0.4068516194820404, "learning_rate": 1.1737698042440473e-05, "loss": 0.5424, "step": 28990 }, { "epoch": 0.8905784413110927, "grad_norm": 0.36948490142822266, "learning_rate": 1.1737222115511701e-05, "loss": 0.5142, "step": 28991 }, { "epoch": 0.8906091604460419, "grad_norm": 0.3474804759025574, "learning_rate": 1.1736746184525524e-05, "loss": 0.553, "step": 28992 }, { "epoch": 0.890639879580991, "grad_norm": 0.35619673132896423, "learning_rate": 1.173627024948306e-05, "loss": 0.5035, "step": 28993 }, { "epoch": 0.8906705987159401, "grad_norm": 0.3490934371948242, "learning_rate": 1.1735794310385409e-05, "loss": 0.5549, "step": 28994 }, { "epoch": 0.8907013178508894, "grad_norm": 0.3745153248310089, "learning_rate": 1.1735318367233694e-05, "loss": 0.5934, "step": 28995 }, { "epoch": 0.8907320369858385, "grad_norm": 0.39144766330718994, "learning_rate": 1.1734842420029019e-05, "loss": 0.6145, "step": 28996 }, { "epoch": 0.8907627561207876, "grad_norm": 0.39825326204299927, "learning_rate": 1.17343664687725e-05, "loss": 0.7566, "step": 28997 }, { "epoch": 0.8907934752557368, "grad_norm": 0.436125785112381, "learning_rate": 1.1733890513465247e-05, "loss": 0.4768, "step": 28998 }, { "epoch": 0.8908241943906859, "grad_norm": 0.40765801072120667, "learning_rate": 1.1733414554108371e-05, "loss": 0.5374, "step": 28999 }, { "epoch": 0.8908549135256352, "grad_norm": 0.35587120056152344, "learning_rate": 1.1732938590702983e-05, "loss": 0.5906, "step": 29000 }, { "epoch": 0.8908856326605843, "grad_norm": 0.33609437942504883, "learning_rate": 1.1732462623250196e-05, "loss": 0.5657, "step": 29001 }, { "epoch": 0.8909163517955334, "grad_norm": 0.41958412528038025, "learning_rate": 1.1731986651751121e-05, "loss": 0.5594, "step": 29002 }, { "epoch": 0.8909470709304826, "grad_norm": 0.35127824544906616, "learning_rate": 1.1731510676206871e-05, "loss": 0.4953, "step": 29003 }, { "epoch": 0.8909777900654318, "grad_norm": 0.4268958270549774, "learning_rate": 1.1731034696618559e-05, "loss": 0.5935, "step": 29004 }, { "epoch": 0.8910085092003809, "grad_norm": 0.34146225452423096, "learning_rate": 1.1730558712987294e-05, "loss": 0.5874, "step": 29005 }, { "epoch": 0.8910392283353301, "grad_norm": 0.36192816495895386, "learning_rate": 1.1730082725314186e-05, "loss": 0.5498, "step": 29006 }, { "epoch": 0.8910699474702792, "grad_norm": 0.4366661608219147, "learning_rate": 1.1729606733600353e-05, "loss": 0.5563, "step": 29007 }, { "epoch": 0.8911006666052284, "grad_norm": 0.3393646776676178, "learning_rate": 1.1729130737846898e-05, "loss": 0.5949, "step": 29008 }, { "epoch": 0.8911313857401776, "grad_norm": 0.3784931004047394, "learning_rate": 1.1728654738054943e-05, "loss": 0.5919, "step": 29009 }, { "epoch": 0.8911621048751267, "grad_norm": 0.367180734872818, "learning_rate": 1.1728178734225588e-05, "loss": 0.5371, "step": 29010 }, { "epoch": 0.8911928240100758, "grad_norm": 0.4239346981048584, "learning_rate": 1.1727702726359955e-05, "loss": 0.5277, "step": 29011 }, { "epoch": 0.891223543145025, "grad_norm": 0.3788200616836548, "learning_rate": 1.172722671445915e-05, "loss": 0.566, "step": 29012 }, { "epoch": 0.8912542622799742, "grad_norm": 0.3487885296344757, "learning_rate": 1.1726750698524288e-05, "loss": 0.552, "step": 29013 }, { "epoch": 0.8912849814149234, "grad_norm": 0.37022462487220764, "learning_rate": 1.1726274678556475e-05, "loss": 0.5497, "step": 29014 }, { "epoch": 0.8913157005498725, "grad_norm": 0.3761101961135864, "learning_rate": 1.1725798654556831e-05, "loss": 0.5352, "step": 29015 }, { "epoch": 0.8913464196848216, "grad_norm": 0.3869961202144623, "learning_rate": 1.1725322626526462e-05, "loss": 0.5061, "step": 29016 }, { "epoch": 0.8913771388197709, "grad_norm": 0.335576593875885, "learning_rate": 1.1724846594466483e-05, "loss": 0.5282, "step": 29017 }, { "epoch": 0.89140785795472, "grad_norm": 0.40271615982055664, "learning_rate": 1.1724370558378002e-05, "loss": 0.4413, "step": 29018 }, { "epoch": 0.8914385770896691, "grad_norm": 0.3632548749446869, "learning_rate": 1.1723894518262133e-05, "loss": 0.5595, "step": 29019 }, { "epoch": 0.8914692962246183, "grad_norm": 0.37168291211128235, "learning_rate": 1.172341847411999e-05, "loss": 0.5607, "step": 29020 }, { "epoch": 0.8915000153595675, "grad_norm": 0.3443134129047394, "learning_rate": 1.1722942425952682e-05, "loss": 0.5228, "step": 29021 }, { "epoch": 0.8915307344945166, "grad_norm": 0.36192044615745544, "learning_rate": 1.1722466373761322e-05, "loss": 0.5612, "step": 29022 }, { "epoch": 0.8915614536294658, "grad_norm": 0.36753925681114197, "learning_rate": 1.1721990317547022e-05, "loss": 0.5561, "step": 29023 }, { "epoch": 0.8915921727644149, "grad_norm": 0.3704235553741455, "learning_rate": 1.172151425731089e-05, "loss": 0.5307, "step": 29024 }, { "epoch": 0.8916228918993642, "grad_norm": 0.39878177642822266, "learning_rate": 1.1721038193054046e-05, "loss": 0.5939, "step": 29025 }, { "epoch": 0.8916536110343133, "grad_norm": 0.4418213367462158, "learning_rate": 1.1720562124777594e-05, "loss": 0.5112, "step": 29026 }, { "epoch": 0.8916843301692624, "grad_norm": 0.4099626839160919, "learning_rate": 1.1720086052482651e-05, "loss": 0.5875, "step": 29027 }, { "epoch": 0.8917150493042116, "grad_norm": 0.3883257806301117, "learning_rate": 1.1719609976170325e-05, "loss": 0.511, "step": 29028 }, { "epoch": 0.8917457684391608, "grad_norm": 0.31214624643325806, "learning_rate": 1.171913389584173e-05, "loss": 0.5281, "step": 29029 }, { "epoch": 0.8917764875741099, "grad_norm": 0.35188019275665283, "learning_rate": 1.1718657811497978e-05, "loss": 0.5175, "step": 29030 }, { "epoch": 0.8918072067090591, "grad_norm": 0.3678131103515625, "learning_rate": 1.1718181723140183e-05, "loss": 0.4925, "step": 29031 }, { "epoch": 0.8918379258440082, "grad_norm": 0.3403291404247284, "learning_rate": 1.171770563076945e-05, "loss": 0.5744, "step": 29032 }, { "epoch": 0.8918686449789573, "grad_norm": 0.37363961338996887, "learning_rate": 1.1717229534386901e-05, "loss": 0.5406, "step": 29033 }, { "epoch": 0.8918993641139066, "grad_norm": 0.34401291608810425, "learning_rate": 1.171675343399364e-05, "loss": 0.5234, "step": 29034 }, { "epoch": 0.8919300832488557, "grad_norm": 0.34269994497299194, "learning_rate": 1.171627732959078e-05, "loss": 0.5094, "step": 29035 }, { "epoch": 0.8919608023838048, "grad_norm": 0.35016605257987976, "learning_rate": 1.1715801221179435e-05, "loss": 0.5363, "step": 29036 }, { "epoch": 0.891991521518754, "grad_norm": 0.38452988862991333, "learning_rate": 1.1715325108760714e-05, "loss": 0.5577, "step": 29037 }, { "epoch": 0.8920222406537032, "grad_norm": 0.3608287572860718, "learning_rate": 1.1714848992335737e-05, "loss": 0.5521, "step": 29038 }, { "epoch": 0.8920529597886524, "grad_norm": 0.39149388670921326, "learning_rate": 1.1714372871905607e-05, "loss": 0.4811, "step": 29039 }, { "epoch": 0.8920836789236015, "grad_norm": 0.3653331995010376, "learning_rate": 1.171389674747144e-05, "loss": 0.627, "step": 29040 }, { "epoch": 0.8921143980585506, "grad_norm": 0.3614308536052704, "learning_rate": 1.1713420619034348e-05, "loss": 0.5783, "step": 29041 }, { "epoch": 0.8921451171934999, "grad_norm": 0.40790632367134094, "learning_rate": 1.171294448659544e-05, "loss": 0.5905, "step": 29042 }, { "epoch": 0.892175836328449, "grad_norm": 0.506161093711853, "learning_rate": 1.1712468350155835e-05, "loss": 0.5411, "step": 29043 }, { "epoch": 0.8922065554633981, "grad_norm": 0.3868597745895386, "learning_rate": 1.1711992209716635e-05, "loss": 0.5956, "step": 29044 }, { "epoch": 0.8922372745983473, "grad_norm": 0.3605025112628937, "learning_rate": 1.1711516065278962e-05, "loss": 0.4919, "step": 29045 }, { "epoch": 0.8922679937332965, "grad_norm": 0.4490320682525635, "learning_rate": 1.1711039916843922e-05, "loss": 0.5362, "step": 29046 }, { "epoch": 0.8922987128682456, "grad_norm": 0.34128111600875854, "learning_rate": 1.1710563764412632e-05, "loss": 0.4967, "step": 29047 }, { "epoch": 0.8923294320031948, "grad_norm": 0.37127599120140076, "learning_rate": 1.1710087607986197e-05, "loss": 0.4854, "step": 29048 }, { "epoch": 0.8923601511381439, "grad_norm": 0.35695257782936096, "learning_rate": 1.1709611447565734e-05, "loss": 0.5915, "step": 29049 }, { "epoch": 0.8923908702730932, "grad_norm": 0.4584716856479645, "learning_rate": 1.1709135283152354e-05, "loss": 0.5645, "step": 29050 }, { "epoch": 0.8924215894080423, "grad_norm": 0.36193153262138367, "learning_rate": 1.170865911474717e-05, "loss": 0.5766, "step": 29051 }, { "epoch": 0.8924523085429914, "grad_norm": 0.3900162875652313, "learning_rate": 1.1708182942351294e-05, "loss": 0.4706, "step": 29052 }, { "epoch": 0.8924830276779406, "grad_norm": 0.3898542523384094, "learning_rate": 1.1707706765965834e-05, "loss": 0.6129, "step": 29053 }, { "epoch": 0.8925137468128898, "grad_norm": 0.36070582270622253, "learning_rate": 1.1707230585591909e-05, "loss": 0.5348, "step": 29054 }, { "epoch": 0.8925444659478389, "grad_norm": 0.3606311082839966, "learning_rate": 1.1706754401230622e-05, "loss": 0.6273, "step": 29055 }, { "epoch": 0.8925751850827881, "grad_norm": 0.33707061409950256, "learning_rate": 1.1706278212883098e-05, "loss": 0.5133, "step": 29056 }, { "epoch": 0.8926059042177372, "grad_norm": 0.3593640625476837, "learning_rate": 1.170580202055044e-05, "loss": 0.6042, "step": 29057 }, { "epoch": 0.8926366233526863, "grad_norm": 0.3751041293144226, "learning_rate": 1.1705325824233759e-05, "loss": 0.6293, "step": 29058 }, { "epoch": 0.8926673424876356, "grad_norm": 0.5558409094810486, "learning_rate": 1.1704849623934175e-05, "loss": 0.5485, "step": 29059 }, { "epoch": 0.8926980616225847, "grad_norm": 0.39583370089530945, "learning_rate": 1.1704373419652793e-05, "loss": 0.4343, "step": 29060 }, { "epoch": 0.8927287807575338, "grad_norm": 0.3611915409564972, "learning_rate": 1.170389721139073e-05, "loss": 0.5909, "step": 29061 }, { "epoch": 0.892759499892483, "grad_norm": 0.35294345021247864, "learning_rate": 1.1703420999149094e-05, "loss": 0.4747, "step": 29062 }, { "epoch": 0.8927902190274322, "grad_norm": 0.35425040125846863, "learning_rate": 1.1702944782929e-05, "loss": 0.5524, "step": 29063 }, { "epoch": 0.8928209381623814, "grad_norm": 0.36997100710868835, "learning_rate": 1.1702468562731557e-05, "loss": 0.5261, "step": 29064 }, { "epoch": 0.8928516572973305, "grad_norm": 0.38002336025238037, "learning_rate": 1.1701992338557884e-05, "loss": 0.5489, "step": 29065 }, { "epoch": 0.8928823764322796, "grad_norm": 0.40502017736434937, "learning_rate": 1.1701516110409086e-05, "loss": 0.5052, "step": 29066 }, { "epoch": 0.8929130955672289, "grad_norm": 0.37337663769721985, "learning_rate": 1.170103987828628e-05, "loss": 0.4856, "step": 29067 }, { "epoch": 0.892943814702178, "grad_norm": 0.33717480301856995, "learning_rate": 1.1700563642190578e-05, "loss": 0.5621, "step": 29068 }, { "epoch": 0.8929745338371271, "grad_norm": 0.38163459300994873, "learning_rate": 1.1700087402123089e-05, "loss": 0.5114, "step": 29069 }, { "epoch": 0.8930052529720763, "grad_norm": 0.3622649013996124, "learning_rate": 1.1699611158084928e-05, "loss": 0.5252, "step": 29070 }, { "epoch": 0.8930359721070255, "grad_norm": 0.3614450693130493, "learning_rate": 1.1699134910077204e-05, "loss": 0.5281, "step": 29071 }, { "epoch": 0.8930666912419746, "grad_norm": 0.4057738184928894, "learning_rate": 1.1698658658101034e-05, "loss": 0.544, "step": 29072 }, { "epoch": 0.8930974103769238, "grad_norm": 0.7699423432350159, "learning_rate": 1.1698182402157523e-05, "loss": 0.5872, "step": 29073 }, { "epoch": 0.8931281295118729, "grad_norm": 0.3690846860408783, "learning_rate": 1.1697706142247794e-05, "loss": 0.5888, "step": 29074 }, { "epoch": 0.8931588486468222, "grad_norm": 0.389926016330719, "learning_rate": 1.1697229878372953e-05, "loss": 0.6304, "step": 29075 }, { "epoch": 0.8931895677817713, "grad_norm": 0.44032153487205505, "learning_rate": 1.1696753610534112e-05, "loss": 0.5862, "step": 29076 }, { "epoch": 0.8932202869167204, "grad_norm": 0.39951300621032715, "learning_rate": 1.1696277338732386e-05, "loss": 0.532, "step": 29077 }, { "epoch": 0.8932510060516696, "grad_norm": 0.4318150281906128, "learning_rate": 1.1695801062968883e-05, "loss": 0.6641, "step": 29078 }, { "epoch": 0.8932817251866187, "grad_norm": 0.5193226933479309, "learning_rate": 1.169532478324472e-05, "loss": 0.6762, "step": 29079 }, { "epoch": 0.8933124443215679, "grad_norm": 0.3964790105819702, "learning_rate": 1.169484849956101e-05, "loss": 0.5693, "step": 29080 }, { "epoch": 0.8933431634565171, "grad_norm": 0.33887723088264465, "learning_rate": 1.169437221191886e-05, "loss": 0.5912, "step": 29081 }, { "epoch": 0.8933738825914662, "grad_norm": 0.3475557863712311, "learning_rate": 1.1693895920319384e-05, "loss": 0.5369, "step": 29082 }, { "epoch": 0.8934046017264153, "grad_norm": 0.35879871249198914, "learning_rate": 1.16934196247637e-05, "loss": 0.5343, "step": 29083 }, { "epoch": 0.8934353208613646, "grad_norm": 0.3953433930873871, "learning_rate": 1.1692943325252912e-05, "loss": 0.593, "step": 29084 }, { "epoch": 0.8934660399963137, "grad_norm": 0.38500505685806274, "learning_rate": 1.169246702178814e-05, "loss": 0.6101, "step": 29085 }, { "epoch": 0.8934967591312629, "grad_norm": 0.3428557217121124, "learning_rate": 1.1691990714370491e-05, "loss": 0.5203, "step": 29086 }, { "epoch": 0.893527478266212, "grad_norm": 0.3394281268119812, "learning_rate": 1.1691514403001081e-05, "loss": 0.5172, "step": 29087 }, { "epoch": 0.8935581974011612, "grad_norm": 0.3379194438457489, "learning_rate": 1.1691038087681021e-05, "loss": 0.5315, "step": 29088 }, { "epoch": 0.8935889165361104, "grad_norm": 0.36904579401016235, "learning_rate": 1.1690561768411421e-05, "loss": 0.4481, "step": 29089 }, { "epoch": 0.8936196356710595, "grad_norm": 0.3740984797477722, "learning_rate": 1.1690085445193398e-05, "loss": 0.4829, "step": 29090 }, { "epoch": 0.8936503548060086, "grad_norm": 0.4366152584552765, "learning_rate": 1.1689609118028058e-05, "loss": 0.5773, "step": 29091 }, { "epoch": 0.8936810739409579, "grad_norm": 0.38222813606262207, "learning_rate": 1.1689132786916524e-05, "loss": 0.6166, "step": 29092 }, { "epoch": 0.893711793075907, "grad_norm": 0.3493430018424988, "learning_rate": 1.16886564518599e-05, "loss": 0.4781, "step": 29093 }, { "epoch": 0.8937425122108561, "grad_norm": 0.35154253244400024, "learning_rate": 1.16881801128593e-05, "loss": 0.5466, "step": 29094 }, { "epoch": 0.8937732313458053, "grad_norm": 0.3608851730823517, "learning_rate": 1.1687703769915842e-05, "loss": 0.6008, "step": 29095 }, { "epoch": 0.8938039504807544, "grad_norm": 0.3733634352684021, "learning_rate": 1.1687227423030627e-05, "loss": 0.6094, "step": 29096 }, { "epoch": 0.8938346696157036, "grad_norm": 0.3546454906463623, "learning_rate": 1.168675107220478e-05, "loss": 0.4632, "step": 29097 }, { "epoch": 0.8938653887506528, "grad_norm": 0.35510751605033875, "learning_rate": 1.1686274717439406e-05, "loss": 0.5968, "step": 29098 }, { "epoch": 0.8938961078856019, "grad_norm": 0.37248674035072327, "learning_rate": 1.1685798358735618e-05, "loss": 0.6275, "step": 29099 }, { "epoch": 0.8939268270205512, "grad_norm": 0.41977736353874207, "learning_rate": 1.1685321996094533e-05, "loss": 0.6133, "step": 29100 }, { "epoch": 0.8939575461555003, "grad_norm": 0.3456113636493683, "learning_rate": 1.1684845629517262e-05, "loss": 0.5838, "step": 29101 }, { "epoch": 0.8939882652904494, "grad_norm": 0.37516549229621887, "learning_rate": 1.1684369259004911e-05, "loss": 0.5559, "step": 29102 }, { "epoch": 0.8940189844253986, "grad_norm": 0.3984665274620056, "learning_rate": 1.16838928845586e-05, "loss": 0.6283, "step": 29103 }, { "epoch": 0.8940497035603477, "grad_norm": 0.39230746030807495, "learning_rate": 1.1683416506179443e-05, "loss": 0.5239, "step": 29104 }, { "epoch": 0.8940804226952969, "grad_norm": 0.4490218758583069, "learning_rate": 1.1682940123868547e-05, "loss": 0.6255, "step": 29105 }, { "epoch": 0.8941111418302461, "grad_norm": 0.3584361970424652, "learning_rate": 1.1682463737627025e-05, "loss": 0.5016, "step": 29106 }, { "epoch": 0.8941418609651952, "grad_norm": 0.3265388011932373, "learning_rate": 1.1681987347455993e-05, "loss": 0.4841, "step": 29107 }, { "epoch": 0.8941725801001443, "grad_norm": 0.32914188504219055, "learning_rate": 1.1681510953356564e-05, "loss": 0.4848, "step": 29108 }, { "epoch": 0.8942032992350936, "grad_norm": 0.3997056484222412, "learning_rate": 1.1681034555329847e-05, "loss": 0.5563, "step": 29109 }, { "epoch": 0.8942340183700427, "grad_norm": 0.39220741391181946, "learning_rate": 1.1680558153376953e-05, "loss": 0.5184, "step": 29110 }, { "epoch": 0.8942647375049919, "grad_norm": 0.3491116166114807, "learning_rate": 1.1680081747499003e-05, "loss": 0.4583, "step": 29111 }, { "epoch": 0.894295456639941, "grad_norm": 0.39146098494529724, "learning_rate": 1.1679605337697102e-05, "loss": 0.5364, "step": 29112 }, { "epoch": 0.8943261757748902, "grad_norm": 0.3897015452384949, "learning_rate": 1.1679128923972366e-05, "loss": 0.5002, "step": 29113 }, { "epoch": 0.8943568949098394, "grad_norm": 0.38518622517585754, "learning_rate": 1.1678652506325907e-05, "loss": 0.5192, "step": 29114 }, { "epoch": 0.8943876140447885, "grad_norm": 0.344930499792099, "learning_rate": 1.1678176084758839e-05, "loss": 0.5515, "step": 29115 }, { "epoch": 0.8944183331797376, "grad_norm": 0.3362020254135132, "learning_rate": 1.1677699659272273e-05, "loss": 0.5512, "step": 29116 }, { "epoch": 0.8944490523146869, "grad_norm": 0.3783765435218811, "learning_rate": 1.1677223229867322e-05, "loss": 0.5902, "step": 29117 }, { "epoch": 0.894479771449636, "grad_norm": 0.3536039888858795, "learning_rate": 1.16767467965451e-05, "loss": 0.523, "step": 29118 }, { "epoch": 0.8945104905845851, "grad_norm": 0.3842537999153137, "learning_rate": 1.167627035930672e-05, "loss": 0.6587, "step": 29119 }, { "epoch": 0.8945412097195343, "grad_norm": 0.40035152435302734, "learning_rate": 1.1675793918153291e-05, "loss": 0.5252, "step": 29120 }, { "epoch": 0.8945719288544834, "grad_norm": 0.3601178824901581, "learning_rate": 1.1675317473085927e-05, "loss": 0.4637, "step": 29121 }, { "epoch": 0.8946026479894326, "grad_norm": 0.35206374526023865, "learning_rate": 1.1674841024105745e-05, "loss": 0.448, "step": 29122 }, { "epoch": 0.8946333671243818, "grad_norm": 0.3530576229095459, "learning_rate": 1.1674364571213852e-05, "loss": 0.5268, "step": 29123 }, { "epoch": 0.8946640862593309, "grad_norm": 0.35276296734809875, "learning_rate": 1.1673888114411367e-05, "loss": 0.49, "step": 29124 }, { "epoch": 0.8946948053942801, "grad_norm": 0.3794379234313965, "learning_rate": 1.1673411653699396e-05, "loss": 0.5235, "step": 29125 }, { "epoch": 0.8947255245292293, "grad_norm": 0.35325440764427185, "learning_rate": 1.1672935189079057e-05, "loss": 0.4334, "step": 29126 }, { "epoch": 0.8947562436641784, "grad_norm": 0.3410915732383728, "learning_rate": 1.167245872055146e-05, "loss": 0.5185, "step": 29127 }, { "epoch": 0.8947869627991276, "grad_norm": 0.36122846603393555, "learning_rate": 1.1671982248117719e-05, "loss": 0.5838, "step": 29128 }, { "epoch": 0.8948176819340767, "grad_norm": 0.3639472424983978, "learning_rate": 1.1671505771778947e-05, "loss": 0.5035, "step": 29129 }, { "epoch": 0.8948484010690259, "grad_norm": 0.3806101381778717, "learning_rate": 1.1671029291536258e-05, "loss": 0.563, "step": 29130 }, { "epoch": 0.8948791202039751, "grad_norm": 0.40977659821510315, "learning_rate": 1.1670552807390765e-05, "loss": 0.5157, "step": 29131 }, { "epoch": 0.8949098393389242, "grad_norm": 0.43634337186813354, "learning_rate": 1.1670076319343572e-05, "loss": 0.5601, "step": 29132 }, { "epoch": 0.8949405584738733, "grad_norm": 0.37837350368499756, "learning_rate": 1.1669599827395808e-05, "loss": 0.5589, "step": 29133 }, { "epoch": 0.8949712776088226, "grad_norm": 0.3434171974658966, "learning_rate": 1.1669123331548571e-05, "loss": 0.5143, "step": 29134 }, { "epoch": 0.8950019967437717, "grad_norm": 0.39081692695617676, "learning_rate": 1.1668646831802981e-05, "loss": 0.5637, "step": 29135 }, { "epoch": 0.8950327158787209, "grad_norm": 0.3740633428096771, "learning_rate": 1.1668170328160151e-05, "loss": 0.5858, "step": 29136 }, { "epoch": 0.89506343501367, "grad_norm": 0.3874022960662842, "learning_rate": 1.1667693820621194e-05, "loss": 0.4729, "step": 29137 }, { "epoch": 0.8950941541486191, "grad_norm": 0.3864888846874237, "learning_rate": 1.166721730918722e-05, "loss": 0.6178, "step": 29138 }, { "epoch": 0.8951248732835684, "grad_norm": 0.36377087235450745, "learning_rate": 1.1666740793859342e-05, "loss": 0.5159, "step": 29139 }, { "epoch": 0.8951555924185175, "grad_norm": 0.35021501779556274, "learning_rate": 1.1666264274638677e-05, "loss": 0.4614, "step": 29140 }, { "epoch": 0.8951863115534666, "grad_norm": 0.35061314702033997, "learning_rate": 1.1665787751526332e-05, "loss": 0.4863, "step": 29141 }, { "epoch": 0.8952170306884158, "grad_norm": 0.389512300491333, "learning_rate": 1.1665311224523425e-05, "loss": 0.4712, "step": 29142 }, { "epoch": 0.895247749823365, "grad_norm": 0.4310374855995178, "learning_rate": 1.166483469363107e-05, "loss": 0.5789, "step": 29143 }, { "epoch": 0.8952784689583141, "grad_norm": 0.3397652804851532, "learning_rate": 1.1664358158850375e-05, "loss": 0.5734, "step": 29144 }, { "epoch": 0.8953091880932633, "grad_norm": 0.39559900760650635, "learning_rate": 1.1663881620182454e-05, "loss": 0.5718, "step": 29145 }, { "epoch": 0.8953399072282124, "grad_norm": 0.3593844473361969, "learning_rate": 1.1663405077628421e-05, "loss": 0.5425, "step": 29146 }, { "epoch": 0.8953706263631616, "grad_norm": 0.3859720528125763, "learning_rate": 1.1662928531189392e-05, "loss": 0.6054, "step": 29147 }, { "epoch": 0.8954013454981108, "grad_norm": 0.3481912910938263, "learning_rate": 1.1662451980866475e-05, "loss": 0.5554, "step": 29148 }, { "epoch": 0.8954320646330599, "grad_norm": 0.4468294680118561, "learning_rate": 1.1661975426660788e-05, "loss": 0.5764, "step": 29149 }, { "epoch": 0.8954627837680091, "grad_norm": 0.38392722606658936, "learning_rate": 1.166149886857344e-05, "loss": 0.5776, "step": 29150 }, { "epoch": 0.8954935029029583, "grad_norm": 0.3704873323440552, "learning_rate": 1.1661022306605545e-05, "loss": 0.5148, "step": 29151 }, { "epoch": 0.8955242220379074, "grad_norm": 0.3437010943889618, "learning_rate": 1.1660545740758216e-05, "loss": 0.5317, "step": 29152 }, { "epoch": 0.8955549411728566, "grad_norm": 0.39671340584754944, "learning_rate": 1.1660069171032568e-05, "loss": 0.5024, "step": 29153 }, { "epoch": 0.8955856603078057, "grad_norm": 0.37411025166511536, "learning_rate": 1.165959259742971e-05, "loss": 0.5652, "step": 29154 }, { "epoch": 0.8956163794427549, "grad_norm": 0.37485265731811523, "learning_rate": 1.165911601995076e-05, "loss": 0.4663, "step": 29155 }, { "epoch": 0.8956470985777041, "grad_norm": 0.3611644208431244, "learning_rate": 1.1658639438596828e-05, "loss": 0.5177, "step": 29156 }, { "epoch": 0.8956778177126532, "grad_norm": 0.37300965189933777, "learning_rate": 1.1658162853369029e-05, "loss": 0.475, "step": 29157 }, { "epoch": 0.8957085368476023, "grad_norm": 0.35765308141708374, "learning_rate": 1.1657686264268473e-05, "loss": 0.4703, "step": 29158 }, { "epoch": 0.8957392559825516, "grad_norm": 0.40342289209365845, "learning_rate": 1.1657209671296274e-05, "loss": 0.511, "step": 29159 }, { "epoch": 0.8957699751175007, "grad_norm": 0.34205538034439087, "learning_rate": 1.1656733074453547e-05, "loss": 0.5676, "step": 29160 }, { "epoch": 0.8958006942524499, "grad_norm": 0.3680044412612915, "learning_rate": 1.1656256473741406e-05, "loss": 0.5527, "step": 29161 }, { "epoch": 0.895831413387399, "grad_norm": 0.4114828109741211, "learning_rate": 1.1655779869160962e-05, "loss": 0.5434, "step": 29162 }, { "epoch": 0.8958621325223481, "grad_norm": 0.3417610228061676, "learning_rate": 1.1655303260713326e-05, "loss": 0.5337, "step": 29163 }, { "epoch": 0.8958928516572974, "grad_norm": 0.3668915331363678, "learning_rate": 1.1654826648399613e-05, "loss": 0.4398, "step": 29164 }, { "epoch": 0.8959235707922465, "grad_norm": 0.3965575695037842, "learning_rate": 1.165435003222094e-05, "loss": 0.4681, "step": 29165 }, { "epoch": 0.8959542899271956, "grad_norm": 0.351157546043396, "learning_rate": 1.1653873412178416e-05, "loss": 0.6024, "step": 29166 }, { "epoch": 0.8959850090621448, "grad_norm": 0.44689276814460754, "learning_rate": 1.1653396788273155e-05, "loss": 0.6095, "step": 29167 }, { "epoch": 0.896015728197094, "grad_norm": 0.34697291254997253, "learning_rate": 1.1652920160506268e-05, "loss": 0.5594, "step": 29168 }, { "epoch": 0.8960464473320431, "grad_norm": 0.38427796959877014, "learning_rate": 1.1652443528878876e-05, "loss": 0.5079, "step": 29169 }, { "epoch": 0.8960771664669923, "grad_norm": 0.4112304747104645, "learning_rate": 1.1651966893392082e-05, "loss": 0.6218, "step": 29170 }, { "epoch": 0.8961078856019414, "grad_norm": 0.47631606459617615, "learning_rate": 1.1651490254047007e-05, "loss": 0.6294, "step": 29171 }, { "epoch": 0.8961386047368906, "grad_norm": 0.3414067327976227, "learning_rate": 1.1651013610844759e-05, "loss": 0.5528, "step": 29172 }, { "epoch": 0.8961693238718398, "grad_norm": 0.35288143157958984, "learning_rate": 1.1650536963786454e-05, "loss": 0.4993, "step": 29173 }, { "epoch": 0.8962000430067889, "grad_norm": 0.3751794397830963, "learning_rate": 1.1650060312873206e-05, "loss": 0.54, "step": 29174 }, { "epoch": 0.8962307621417381, "grad_norm": 0.3695565462112427, "learning_rate": 1.1649583658106126e-05, "loss": 0.4806, "step": 29175 }, { "epoch": 0.8962614812766873, "grad_norm": 0.346417635679245, "learning_rate": 1.1649106999486328e-05, "loss": 0.5572, "step": 29176 }, { "epoch": 0.8962922004116364, "grad_norm": 0.3941897451877594, "learning_rate": 1.1648630337014924e-05, "loss": 0.6011, "step": 29177 }, { "epoch": 0.8963229195465856, "grad_norm": 0.3722604811191559, "learning_rate": 1.164815367069303e-05, "loss": 0.5037, "step": 29178 }, { "epoch": 0.8963536386815347, "grad_norm": 0.331224650144577, "learning_rate": 1.1647677000521758e-05, "loss": 0.5603, "step": 29179 }, { "epoch": 0.8963843578164838, "grad_norm": 0.42777693271636963, "learning_rate": 1.1647200326502222e-05, "loss": 0.5699, "step": 29180 }, { "epoch": 0.8964150769514331, "grad_norm": 0.3496192395687103, "learning_rate": 1.1646723648635531e-05, "loss": 0.5088, "step": 29181 }, { "epoch": 0.8964457960863822, "grad_norm": 0.3685421347618103, "learning_rate": 1.1646246966922807e-05, "loss": 0.5148, "step": 29182 }, { "epoch": 0.8964765152213313, "grad_norm": 0.39778122305870056, "learning_rate": 1.1645770281365154e-05, "loss": 0.5638, "step": 29183 }, { "epoch": 0.8965072343562805, "grad_norm": 0.3787819445133209, "learning_rate": 1.164529359196369e-05, "loss": 0.5687, "step": 29184 }, { "epoch": 0.8965379534912297, "grad_norm": 0.4546995759010315, "learning_rate": 1.164481689871953e-05, "loss": 0.5118, "step": 29185 }, { "epoch": 0.8965686726261789, "grad_norm": 0.3548251688480377, "learning_rate": 1.1644340201633787e-05, "loss": 0.4901, "step": 29186 }, { "epoch": 0.896599391761128, "grad_norm": 0.41278010606765747, "learning_rate": 1.1643863500707568e-05, "loss": 0.54, "step": 29187 }, { "epoch": 0.8966301108960771, "grad_norm": 0.40834540128707886, "learning_rate": 1.1643386795941993e-05, "loss": 0.5447, "step": 29188 }, { "epoch": 0.8966608300310264, "grad_norm": 0.4543491303920746, "learning_rate": 1.1642910087338174e-05, "loss": 0.5362, "step": 29189 }, { "epoch": 0.8966915491659755, "grad_norm": 0.39631810784339905, "learning_rate": 1.1642433374897225e-05, "loss": 0.581, "step": 29190 }, { "epoch": 0.8967222683009246, "grad_norm": 0.4739347994327545, "learning_rate": 1.1641956658620257e-05, "loss": 0.5503, "step": 29191 }, { "epoch": 0.8967529874358738, "grad_norm": 0.3676416873931885, "learning_rate": 1.1641479938508385e-05, "loss": 0.5456, "step": 29192 }, { "epoch": 0.896783706570823, "grad_norm": 0.3718222677707672, "learning_rate": 1.1641003214562721e-05, "loss": 0.5033, "step": 29193 }, { "epoch": 0.8968144257057721, "grad_norm": 0.34875938296318054, "learning_rate": 1.164052648678438e-05, "loss": 0.5052, "step": 29194 }, { "epoch": 0.8968451448407213, "grad_norm": 0.3475865423679352, "learning_rate": 1.1640049755174472e-05, "loss": 0.5638, "step": 29195 }, { "epoch": 0.8968758639756704, "grad_norm": 0.42486128211021423, "learning_rate": 1.1639573019734117e-05, "loss": 0.5926, "step": 29196 }, { "epoch": 0.8969065831106197, "grad_norm": 0.3557370901107788, "learning_rate": 1.163909628046442e-05, "loss": 0.5908, "step": 29197 }, { "epoch": 0.8969373022455688, "grad_norm": 0.36703574657440186, "learning_rate": 1.1638619537366505e-05, "loss": 0.5586, "step": 29198 }, { "epoch": 0.8969680213805179, "grad_norm": 0.3919374644756317, "learning_rate": 1.1638142790441476e-05, "loss": 0.5825, "step": 29199 }, { "epoch": 0.8969987405154671, "grad_norm": 0.5415679216384888, "learning_rate": 1.1637666039690454e-05, "loss": 0.5821, "step": 29200 }, { "epoch": 0.8970294596504162, "grad_norm": 0.376060426235199, "learning_rate": 1.1637189285114544e-05, "loss": 0.5131, "step": 29201 }, { "epoch": 0.8970601787853654, "grad_norm": 0.3900511860847473, "learning_rate": 1.1636712526714864e-05, "loss": 0.5172, "step": 29202 }, { "epoch": 0.8970908979203146, "grad_norm": 0.4064315855503082, "learning_rate": 1.1636235764492532e-05, "loss": 0.5744, "step": 29203 }, { "epoch": 0.8971216170552637, "grad_norm": 0.38979625701904297, "learning_rate": 1.1635758998448652e-05, "loss": 0.5445, "step": 29204 }, { "epoch": 0.8971523361902128, "grad_norm": 0.3462862968444824, "learning_rate": 1.1635282228584346e-05, "loss": 0.5465, "step": 29205 }, { "epoch": 0.8971830553251621, "grad_norm": 0.42674165964126587, "learning_rate": 1.1634805454900721e-05, "loss": 0.5973, "step": 29206 }, { "epoch": 0.8972137744601112, "grad_norm": 0.3664769232273102, "learning_rate": 1.1634328677398899e-05, "loss": 0.4992, "step": 29207 }, { "epoch": 0.8972444935950603, "grad_norm": 0.3814771771430969, "learning_rate": 1.1633851896079987e-05, "loss": 0.5611, "step": 29208 }, { "epoch": 0.8972752127300095, "grad_norm": 0.33953872323036194, "learning_rate": 1.1633375110945095e-05, "loss": 0.5436, "step": 29209 }, { "epoch": 0.8973059318649587, "grad_norm": 0.38440465927124023, "learning_rate": 1.1632898321995344e-05, "loss": 0.5403, "step": 29210 }, { "epoch": 0.8973366509999079, "grad_norm": 0.4070439040660858, "learning_rate": 1.1632421529231845e-05, "loss": 0.5739, "step": 29211 }, { "epoch": 0.897367370134857, "grad_norm": 0.36272865533828735, "learning_rate": 1.1631944732655713e-05, "loss": 0.6034, "step": 29212 }, { "epoch": 0.8973980892698061, "grad_norm": 0.32232633233070374, "learning_rate": 1.1631467932268058e-05, "loss": 0.53, "step": 29213 }, { "epoch": 0.8974288084047554, "grad_norm": 0.3979099690914154, "learning_rate": 1.1630991128069999e-05, "loss": 0.5706, "step": 29214 }, { "epoch": 0.8974595275397045, "grad_norm": 0.3474920094013214, "learning_rate": 1.1630514320062642e-05, "loss": 0.4987, "step": 29215 }, { "epoch": 0.8974902466746536, "grad_norm": 0.4283681809902191, "learning_rate": 1.1630037508247107e-05, "loss": 0.5495, "step": 29216 }, { "epoch": 0.8975209658096028, "grad_norm": 0.372677743434906, "learning_rate": 1.1629560692624506e-05, "loss": 0.6037, "step": 29217 }, { "epoch": 0.897551684944552, "grad_norm": 0.3212263286113739, "learning_rate": 1.1629083873195952e-05, "loss": 0.5029, "step": 29218 }, { "epoch": 0.8975824040795011, "grad_norm": 0.37483924627304077, "learning_rate": 1.1628607049962557e-05, "loss": 0.5964, "step": 29219 }, { "epoch": 0.8976131232144503, "grad_norm": 0.37280532717704773, "learning_rate": 1.1628130222925438e-05, "loss": 0.6279, "step": 29220 }, { "epoch": 0.8976438423493994, "grad_norm": 0.3316991925239563, "learning_rate": 1.1627653392085707e-05, "loss": 0.5171, "step": 29221 }, { "epoch": 0.8976745614843487, "grad_norm": 0.36691421270370483, "learning_rate": 1.1627176557444477e-05, "loss": 0.5732, "step": 29222 }, { "epoch": 0.8977052806192978, "grad_norm": 0.39681878685951233, "learning_rate": 1.1626699719002866e-05, "loss": 0.599, "step": 29223 }, { "epoch": 0.8977359997542469, "grad_norm": 0.347149521112442, "learning_rate": 1.1626222876761982e-05, "loss": 0.5123, "step": 29224 }, { "epoch": 0.8977667188891961, "grad_norm": 0.375559538602829, "learning_rate": 1.1625746030722939e-05, "loss": 0.5742, "step": 29225 }, { "epoch": 0.8977974380241452, "grad_norm": 0.3767823278903961, "learning_rate": 1.1625269180886856e-05, "loss": 0.5279, "step": 29226 }, { "epoch": 0.8978281571590944, "grad_norm": 0.32713228464126587, "learning_rate": 1.1624792327254839e-05, "loss": 0.5294, "step": 29227 }, { "epoch": 0.8978588762940436, "grad_norm": 0.4051297903060913, "learning_rate": 1.162431546982801e-05, "loss": 0.5243, "step": 29228 }, { "epoch": 0.8978895954289927, "grad_norm": 0.39647752046585083, "learning_rate": 1.1623838608607477e-05, "loss": 0.573, "step": 29229 }, { "epoch": 0.8979203145639418, "grad_norm": 0.3469703197479248, "learning_rate": 1.1623361743594357e-05, "loss": 0.5694, "step": 29230 }, { "epoch": 0.8979510336988911, "grad_norm": 0.3807819187641144, "learning_rate": 1.162288487478976e-05, "loss": 0.52, "step": 29231 }, { "epoch": 0.8979817528338402, "grad_norm": 0.3375532925128937, "learning_rate": 1.1622408002194802e-05, "loss": 0.586, "step": 29232 }, { "epoch": 0.8980124719687893, "grad_norm": 0.3919714391231537, "learning_rate": 1.1621931125810598e-05, "loss": 0.5737, "step": 29233 }, { "epoch": 0.8980431911037385, "grad_norm": 0.35372433066368103, "learning_rate": 1.1621454245638262e-05, "loss": 0.6138, "step": 29234 }, { "epoch": 0.8980739102386877, "grad_norm": 0.3728470504283905, "learning_rate": 1.1620977361678904e-05, "loss": 0.5192, "step": 29235 }, { "epoch": 0.8981046293736369, "grad_norm": 0.33900967240333557, "learning_rate": 1.1620500473933643e-05, "loss": 0.5407, "step": 29236 }, { "epoch": 0.898135348508586, "grad_norm": 0.4055013358592987, "learning_rate": 1.1620023582403585e-05, "loss": 0.5673, "step": 29237 }, { "epoch": 0.8981660676435351, "grad_norm": 0.3752363920211792, "learning_rate": 1.161954668708985e-05, "loss": 0.6075, "step": 29238 }, { "epoch": 0.8981967867784844, "grad_norm": 0.3804185390472412, "learning_rate": 1.1619069787993555e-05, "loss": 0.6395, "step": 29239 }, { "epoch": 0.8982275059134335, "grad_norm": 0.3796903192996979, "learning_rate": 1.1618592885115805e-05, "loss": 0.5482, "step": 29240 }, { "epoch": 0.8982582250483826, "grad_norm": 0.4798710346221924, "learning_rate": 1.161811597845772e-05, "loss": 0.6141, "step": 29241 }, { "epoch": 0.8982889441833318, "grad_norm": 0.3611539900302887, "learning_rate": 1.1617639068020414e-05, "loss": 0.556, "step": 29242 }, { "epoch": 0.898319663318281, "grad_norm": 0.448697030544281, "learning_rate": 1.1617162153804997e-05, "loss": 0.6148, "step": 29243 }, { "epoch": 0.8983503824532301, "grad_norm": 0.4395407438278198, "learning_rate": 1.1616685235812586e-05, "loss": 0.5314, "step": 29244 }, { "epoch": 0.8983811015881793, "grad_norm": 0.31136104464530945, "learning_rate": 1.161620831404429e-05, "loss": 0.4122, "step": 29245 }, { "epoch": 0.8984118207231284, "grad_norm": 0.3255364000797272, "learning_rate": 1.1615731388501232e-05, "loss": 0.5423, "step": 29246 }, { "epoch": 0.8984425398580776, "grad_norm": 0.380889356136322, "learning_rate": 1.1615254459184517e-05, "loss": 0.5368, "step": 29247 }, { "epoch": 0.8984732589930268, "grad_norm": 0.38023850321769714, "learning_rate": 1.1614777526095265e-05, "loss": 0.595, "step": 29248 }, { "epoch": 0.8985039781279759, "grad_norm": 0.33138254284858704, "learning_rate": 1.1614300589234583e-05, "loss": 0.4988, "step": 29249 }, { "epoch": 0.8985346972629251, "grad_norm": 0.39108824729919434, "learning_rate": 1.1613823648603592e-05, "loss": 0.6049, "step": 29250 }, { "epoch": 0.8985654163978742, "grad_norm": 0.38546040654182434, "learning_rate": 1.1613346704203403e-05, "loss": 0.6082, "step": 29251 }, { "epoch": 0.8985961355328234, "grad_norm": 0.35764428973197937, "learning_rate": 1.1612869756035133e-05, "loss": 0.5169, "step": 29252 }, { "epoch": 0.8986268546677726, "grad_norm": 0.3549938499927521, "learning_rate": 1.161239280409989e-05, "loss": 0.5468, "step": 29253 }, { "epoch": 0.8986575738027217, "grad_norm": 0.3706185221672058, "learning_rate": 1.1611915848398789e-05, "loss": 0.4605, "step": 29254 }, { "epoch": 0.8986882929376708, "grad_norm": 0.35539355874061584, "learning_rate": 1.1611438888932949e-05, "loss": 0.5229, "step": 29255 }, { "epoch": 0.8987190120726201, "grad_norm": 0.3874891698360443, "learning_rate": 1.1610961925703476e-05, "loss": 0.6223, "step": 29256 }, { "epoch": 0.8987497312075692, "grad_norm": 0.34200021624565125, "learning_rate": 1.1610484958711495e-05, "loss": 0.5392, "step": 29257 }, { "epoch": 0.8987804503425183, "grad_norm": 0.381277471780777, "learning_rate": 1.1610007987958111e-05, "loss": 0.6429, "step": 29258 }, { "epoch": 0.8988111694774675, "grad_norm": 0.33084505796432495, "learning_rate": 1.1609531013444442e-05, "loss": 0.5137, "step": 29259 }, { "epoch": 0.8988418886124167, "grad_norm": 0.41172894835472107, "learning_rate": 1.1609054035171603e-05, "loss": 0.5603, "step": 29260 }, { "epoch": 0.8988726077473659, "grad_norm": 0.48996976017951965, "learning_rate": 1.1608577053140702e-05, "loss": 0.5469, "step": 29261 }, { "epoch": 0.898903326882315, "grad_norm": 0.35952329635620117, "learning_rate": 1.1608100067352858e-05, "loss": 0.5383, "step": 29262 }, { "epoch": 0.8989340460172641, "grad_norm": 0.3835344910621643, "learning_rate": 1.1607623077809185e-05, "loss": 0.5636, "step": 29263 }, { "epoch": 0.8989647651522134, "grad_norm": 0.37628307938575745, "learning_rate": 1.1607146084510797e-05, "loss": 0.5445, "step": 29264 }, { "epoch": 0.8989954842871625, "grad_norm": 0.34644749760627747, "learning_rate": 1.1606669087458803e-05, "loss": 0.5067, "step": 29265 }, { "epoch": 0.8990262034221116, "grad_norm": 0.45269137620925903, "learning_rate": 1.1606192086654327e-05, "loss": 0.5908, "step": 29266 }, { "epoch": 0.8990569225570608, "grad_norm": 0.37236082553863525, "learning_rate": 1.1605715082098473e-05, "loss": 0.5889, "step": 29267 }, { "epoch": 0.8990876416920099, "grad_norm": 0.3351103365421295, "learning_rate": 1.160523807379236e-05, "loss": 0.5541, "step": 29268 }, { "epoch": 0.8991183608269591, "grad_norm": 0.37064898014068604, "learning_rate": 1.16047610617371e-05, "loss": 0.5345, "step": 29269 }, { "epoch": 0.8991490799619083, "grad_norm": 0.40902402997016907, "learning_rate": 1.1604284045933812e-05, "loss": 0.5672, "step": 29270 }, { "epoch": 0.8991797990968574, "grad_norm": 0.3466871976852417, "learning_rate": 1.1603807026383604e-05, "loss": 0.4799, "step": 29271 }, { "epoch": 0.8992105182318066, "grad_norm": 0.3667127788066864, "learning_rate": 1.1603330003087594e-05, "loss": 0.5662, "step": 29272 }, { "epoch": 0.8992412373667558, "grad_norm": 0.4108659327030182, "learning_rate": 1.1602852976046894e-05, "loss": 0.6086, "step": 29273 }, { "epoch": 0.8992719565017049, "grad_norm": 0.36967742443084717, "learning_rate": 1.1602375945262615e-05, "loss": 0.5463, "step": 29274 }, { "epoch": 0.8993026756366541, "grad_norm": 0.3385744094848633, "learning_rate": 1.1601898910735882e-05, "loss": 0.4863, "step": 29275 }, { "epoch": 0.8993333947716032, "grad_norm": 0.4643601179122925, "learning_rate": 1.16014218724678e-05, "loss": 0.5176, "step": 29276 }, { "epoch": 0.8993641139065524, "grad_norm": 0.3586946427822113, "learning_rate": 1.1600944830459484e-05, "loss": 0.5527, "step": 29277 }, { "epoch": 0.8993948330415016, "grad_norm": 0.35994213819503784, "learning_rate": 1.160046778471205e-05, "loss": 0.5379, "step": 29278 }, { "epoch": 0.8994255521764507, "grad_norm": 0.38903114199638367, "learning_rate": 1.1599990735226613e-05, "loss": 0.5453, "step": 29279 }, { "epoch": 0.8994562713113998, "grad_norm": 0.38624700903892517, "learning_rate": 1.1599513682004283e-05, "loss": 0.5819, "step": 29280 }, { "epoch": 0.899486990446349, "grad_norm": 0.37496599555015564, "learning_rate": 1.1599036625046178e-05, "loss": 0.51, "step": 29281 }, { "epoch": 0.8995177095812982, "grad_norm": 0.4372881054878235, "learning_rate": 1.1598559564353415e-05, "loss": 0.525, "step": 29282 }, { "epoch": 0.8995484287162474, "grad_norm": 0.35726574063301086, "learning_rate": 1.1598082499927101e-05, "loss": 0.513, "step": 29283 }, { "epoch": 0.8995791478511965, "grad_norm": 0.43936729431152344, "learning_rate": 1.1597605431768356e-05, "loss": 0.5349, "step": 29284 }, { "epoch": 0.8996098669861456, "grad_norm": 0.3508240282535553, "learning_rate": 1.159712835987829e-05, "loss": 0.5661, "step": 29285 }, { "epoch": 0.8996405861210949, "grad_norm": 0.3656429052352905, "learning_rate": 1.1596651284258017e-05, "loss": 0.5056, "step": 29286 }, { "epoch": 0.899671305256044, "grad_norm": 0.382525771856308, "learning_rate": 1.1596174204908657e-05, "loss": 0.5746, "step": 29287 }, { "epoch": 0.8997020243909931, "grad_norm": 0.40686172246932983, "learning_rate": 1.159569712183132e-05, "loss": 0.5428, "step": 29288 }, { "epoch": 0.8997327435259423, "grad_norm": 0.36497044563293457, "learning_rate": 1.1595220035027121e-05, "loss": 0.57, "step": 29289 }, { "epoch": 0.8997634626608915, "grad_norm": 0.40388062596321106, "learning_rate": 1.1594742944497175e-05, "loss": 0.5697, "step": 29290 }, { "epoch": 0.8997941817958406, "grad_norm": 0.4128093421459198, "learning_rate": 1.1594265850242595e-05, "loss": 0.5618, "step": 29291 }, { "epoch": 0.8998249009307898, "grad_norm": 0.4392506778240204, "learning_rate": 1.159378875226449e-05, "loss": 0.5107, "step": 29292 }, { "epoch": 0.8998556200657389, "grad_norm": 0.32969772815704346, "learning_rate": 1.1593311650563987e-05, "loss": 0.5358, "step": 29293 }, { "epoch": 0.8998863392006881, "grad_norm": 0.38095366954803467, "learning_rate": 1.1592834545142194e-05, "loss": 0.5399, "step": 29294 }, { "epoch": 0.8999170583356373, "grad_norm": 0.3998109996318817, "learning_rate": 1.159235743600022e-05, "loss": 0.5273, "step": 29295 }, { "epoch": 0.8999477774705864, "grad_norm": 0.39055055379867554, "learning_rate": 1.1591880323139187e-05, "loss": 0.6052, "step": 29296 }, { "epoch": 0.8999784966055356, "grad_norm": 0.3822246491909027, "learning_rate": 1.1591403206560203e-05, "loss": 0.5294, "step": 29297 }, { "epoch": 0.9000092157404848, "grad_norm": 0.3779137432575226, "learning_rate": 1.1590926086264389e-05, "loss": 0.5471, "step": 29298 }, { "epoch": 0.9000399348754339, "grad_norm": 0.3434487581253052, "learning_rate": 1.1590448962252857e-05, "loss": 0.5955, "step": 29299 }, { "epoch": 0.9000706540103831, "grad_norm": 0.4829327166080475, "learning_rate": 1.1589971834526719e-05, "loss": 0.484, "step": 29300 }, { "epoch": 0.9001013731453322, "grad_norm": 0.36475369334220886, "learning_rate": 1.1589494703087087e-05, "loss": 0.531, "step": 29301 }, { "epoch": 0.9001320922802813, "grad_norm": 0.3607562184333801, "learning_rate": 1.1589017567935083e-05, "loss": 0.5158, "step": 29302 }, { "epoch": 0.9001628114152306, "grad_norm": 0.38439294695854187, "learning_rate": 1.1588540429071817e-05, "loss": 0.5305, "step": 29303 }, { "epoch": 0.9001935305501797, "grad_norm": 0.3898003399372101, "learning_rate": 1.1588063286498405e-05, "loss": 0.4943, "step": 29304 }, { "epoch": 0.9002242496851288, "grad_norm": 0.33420833945274353, "learning_rate": 1.1587586140215961e-05, "loss": 0.5181, "step": 29305 }, { "epoch": 0.900254968820078, "grad_norm": 0.3654800355434418, "learning_rate": 1.1587108990225595e-05, "loss": 0.5045, "step": 29306 }, { "epoch": 0.9002856879550272, "grad_norm": 0.3554096221923828, "learning_rate": 1.1586631836528427e-05, "loss": 0.5289, "step": 29307 }, { "epoch": 0.9003164070899764, "grad_norm": 0.350625604391098, "learning_rate": 1.1586154679125569e-05, "loss": 0.567, "step": 29308 }, { "epoch": 0.9003471262249255, "grad_norm": 0.3476911783218384, "learning_rate": 1.1585677518018137e-05, "loss": 0.5907, "step": 29309 }, { "epoch": 0.9003778453598746, "grad_norm": 0.3718331754207611, "learning_rate": 1.158520035320724e-05, "loss": 0.5615, "step": 29310 }, { "epoch": 0.9004085644948239, "grad_norm": 0.3689592182636261, "learning_rate": 1.1584723184694002e-05, "loss": 0.5505, "step": 29311 }, { "epoch": 0.900439283629773, "grad_norm": 0.352777361869812, "learning_rate": 1.1584246012479532e-05, "loss": 0.5831, "step": 29312 }, { "epoch": 0.9004700027647221, "grad_norm": 0.40394285321235657, "learning_rate": 1.1583768836564943e-05, "loss": 0.5759, "step": 29313 }, { "epoch": 0.9005007218996713, "grad_norm": 0.3612789511680603, "learning_rate": 1.1583291656951353e-05, "loss": 0.5473, "step": 29314 }, { "epoch": 0.9005314410346205, "grad_norm": 0.400181382894516, "learning_rate": 1.1582814473639874e-05, "loss": 0.5217, "step": 29315 }, { "epoch": 0.9005621601695696, "grad_norm": 0.4082617461681366, "learning_rate": 1.158233728663162e-05, "loss": 0.5818, "step": 29316 }, { "epoch": 0.9005928793045188, "grad_norm": 0.39978116750717163, "learning_rate": 1.1581860095927708e-05, "loss": 0.4939, "step": 29317 }, { "epoch": 0.9006235984394679, "grad_norm": 0.41021767258644104, "learning_rate": 1.1581382901529253e-05, "loss": 0.5646, "step": 29318 }, { "epoch": 0.900654317574417, "grad_norm": 0.35087695717811584, "learning_rate": 1.1580905703437364e-05, "loss": 0.5645, "step": 29319 }, { "epoch": 0.9006850367093663, "grad_norm": 0.3354513347148895, "learning_rate": 1.1580428501653162e-05, "loss": 0.6336, "step": 29320 }, { "epoch": 0.9007157558443154, "grad_norm": 0.37321850657463074, "learning_rate": 1.1579951296177755e-05, "loss": 0.4816, "step": 29321 }, { "epoch": 0.9007464749792646, "grad_norm": 0.4024014174938202, "learning_rate": 1.1579474087012268e-05, "loss": 0.5548, "step": 29322 }, { "epoch": 0.9007771941142138, "grad_norm": 0.3545786440372467, "learning_rate": 1.1578996874157806e-05, "loss": 0.4985, "step": 29323 }, { "epoch": 0.9008079132491629, "grad_norm": 0.37873250246047974, "learning_rate": 1.1578519657615484e-05, "loss": 0.5625, "step": 29324 }, { "epoch": 0.9008386323841121, "grad_norm": 0.38367074728012085, "learning_rate": 1.1578042437386423e-05, "loss": 0.6365, "step": 29325 }, { "epoch": 0.9008693515190612, "grad_norm": 0.4516834020614624, "learning_rate": 1.157756521347173e-05, "loss": 0.5662, "step": 29326 }, { "epoch": 0.9009000706540103, "grad_norm": 0.3553648293018341, "learning_rate": 1.1577087985872526e-05, "loss": 0.5973, "step": 29327 }, { "epoch": 0.9009307897889596, "grad_norm": 0.39309701323509216, "learning_rate": 1.157661075458992e-05, "loss": 0.5436, "step": 29328 }, { "epoch": 0.9009615089239087, "grad_norm": 0.3667060434818268, "learning_rate": 1.1576133519625031e-05, "loss": 0.5153, "step": 29329 }, { "epoch": 0.9009922280588578, "grad_norm": 0.39454346895217896, "learning_rate": 1.1575656280978973e-05, "loss": 0.5816, "step": 29330 }, { "epoch": 0.901022947193807, "grad_norm": 0.3639608323574066, "learning_rate": 1.1575179038652858e-05, "loss": 0.565, "step": 29331 }, { "epoch": 0.9010536663287562, "grad_norm": 0.3885846734046936, "learning_rate": 1.1574701792647805e-05, "loss": 0.5394, "step": 29332 }, { "epoch": 0.9010843854637054, "grad_norm": 0.35227170586586, "learning_rate": 1.1574224542964922e-05, "loss": 0.5845, "step": 29333 }, { "epoch": 0.9011151045986545, "grad_norm": 0.3654109537601471, "learning_rate": 1.1573747289605333e-05, "loss": 0.4724, "step": 29334 }, { "epoch": 0.9011458237336036, "grad_norm": 0.33935102820396423, "learning_rate": 1.1573270032570142e-05, "loss": 0.5255, "step": 29335 }, { "epoch": 0.9011765428685529, "grad_norm": 0.3269764482975006, "learning_rate": 1.1572792771860473e-05, "loss": 0.5017, "step": 29336 }, { "epoch": 0.901207262003502, "grad_norm": 0.39226067066192627, "learning_rate": 1.1572315507477434e-05, "loss": 0.619, "step": 29337 }, { "epoch": 0.9012379811384511, "grad_norm": 0.4187617301940918, "learning_rate": 1.1571838239422144e-05, "loss": 0.493, "step": 29338 }, { "epoch": 0.9012687002734003, "grad_norm": 0.3932759761810303, "learning_rate": 1.1571360967695716e-05, "loss": 0.5405, "step": 29339 }, { "epoch": 0.9012994194083495, "grad_norm": 0.3902454972267151, "learning_rate": 1.1570883692299264e-05, "loss": 0.5132, "step": 29340 }, { "epoch": 0.9013301385432986, "grad_norm": 0.4655286371707916, "learning_rate": 1.1570406413233904e-05, "loss": 0.5428, "step": 29341 }, { "epoch": 0.9013608576782478, "grad_norm": 0.3448968827724457, "learning_rate": 1.1569929130500748e-05, "loss": 0.5232, "step": 29342 }, { "epoch": 0.9013915768131969, "grad_norm": 0.41710254549980164, "learning_rate": 1.1569451844100917e-05, "loss": 0.6466, "step": 29343 }, { "epoch": 0.901422295948146, "grad_norm": 0.38822928071022034, "learning_rate": 1.1568974554035517e-05, "loss": 0.5213, "step": 29344 }, { "epoch": 0.9014530150830953, "grad_norm": 0.438874751329422, "learning_rate": 1.156849726030567e-05, "loss": 0.6552, "step": 29345 }, { "epoch": 0.9014837342180444, "grad_norm": 0.3584957420825958, "learning_rate": 1.1568019962912486e-05, "loss": 0.5814, "step": 29346 }, { "epoch": 0.9015144533529936, "grad_norm": 0.3470343351364136, "learning_rate": 1.1567542661857083e-05, "loss": 0.5395, "step": 29347 }, { "epoch": 0.9015451724879427, "grad_norm": 0.3551245331764221, "learning_rate": 1.1567065357140577e-05, "loss": 0.4768, "step": 29348 }, { "epoch": 0.9015758916228919, "grad_norm": 0.38639217615127563, "learning_rate": 1.1566588048764076e-05, "loss": 0.5956, "step": 29349 }, { "epoch": 0.9016066107578411, "grad_norm": 0.3693387806415558, "learning_rate": 1.1566110736728703e-05, "loss": 0.5252, "step": 29350 }, { "epoch": 0.9016373298927902, "grad_norm": 0.4033450782299042, "learning_rate": 1.1565633421035567e-05, "loss": 0.4851, "step": 29351 }, { "epoch": 0.9016680490277393, "grad_norm": 0.38036200404167175, "learning_rate": 1.1565156101685786e-05, "loss": 0.5792, "step": 29352 }, { "epoch": 0.9016987681626886, "grad_norm": 0.39378657937049866, "learning_rate": 1.1564678778680471e-05, "loss": 0.4818, "step": 29353 }, { "epoch": 0.9017294872976377, "grad_norm": 0.3506257236003876, "learning_rate": 1.1564201452020745e-05, "loss": 0.5307, "step": 29354 }, { "epoch": 0.9017602064325868, "grad_norm": 0.40408027172088623, "learning_rate": 1.1563724121707711e-05, "loss": 0.5547, "step": 29355 }, { "epoch": 0.901790925567536, "grad_norm": 0.394906222820282, "learning_rate": 1.1563246787742495e-05, "loss": 0.6589, "step": 29356 }, { "epoch": 0.9018216447024852, "grad_norm": 0.3898763060569763, "learning_rate": 1.1562769450126206e-05, "loss": 0.6148, "step": 29357 }, { "epoch": 0.9018523638374344, "grad_norm": 0.4264306128025055, "learning_rate": 1.1562292108859955e-05, "loss": 0.5496, "step": 29358 }, { "epoch": 0.9018830829723835, "grad_norm": 0.3806595206260681, "learning_rate": 1.1561814763944868e-05, "loss": 0.5679, "step": 29359 }, { "epoch": 0.9019138021073326, "grad_norm": 0.3467036783695221, "learning_rate": 1.156133741538205e-05, "loss": 0.5562, "step": 29360 }, { "epoch": 0.9019445212422819, "grad_norm": 0.3892757296562195, "learning_rate": 1.156086006317262e-05, "loss": 0.4643, "step": 29361 }, { "epoch": 0.901975240377231, "grad_norm": 0.3952309787273407, "learning_rate": 1.1560382707317693e-05, "loss": 0.5722, "step": 29362 }, { "epoch": 0.9020059595121801, "grad_norm": 0.37006300687789917, "learning_rate": 1.1559905347818384e-05, "loss": 0.4967, "step": 29363 }, { "epoch": 0.9020366786471293, "grad_norm": 0.3575572371482849, "learning_rate": 1.1559427984675803e-05, "loss": 0.5759, "step": 29364 }, { "epoch": 0.9020673977820785, "grad_norm": 0.41613826155662537, "learning_rate": 1.1558950617891074e-05, "loss": 0.5125, "step": 29365 }, { "epoch": 0.9020981169170276, "grad_norm": 0.3676168620586395, "learning_rate": 1.1558473247465304e-05, "loss": 0.5168, "step": 29366 }, { "epoch": 0.9021288360519768, "grad_norm": 0.3656062185764313, "learning_rate": 1.1557995873399612e-05, "loss": 0.5165, "step": 29367 }, { "epoch": 0.9021595551869259, "grad_norm": 0.3746381402015686, "learning_rate": 1.1557518495695112e-05, "loss": 0.5872, "step": 29368 }, { "epoch": 0.902190274321875, "grad_norm": 0.39362943172454834, "learning_rate": 1.1557041114352917e-05, "loss": 0.5509, "step": 29369 }, { "epoch": 0.9022209934568243, "grad_norm": 0.38215115666389465, "learning_rate": 1.1556563729374146e-05, "loss": 0.565, "step": 29370 }, { "epoch": 0.9022517125917734, "grad_norm": 0.379550963640213, "learning_rate": 1.155608634075991e-05, "loss": 0.5627, "step": 29371 }, { "epoch": 0.9022824317267226, "grad_norm": 0.358399897813797, "learning_rate": 1.1555608948511328e-05, "loss": 0.6198, "step": 29372 }, { "epoch": 0.9023131508616717, "grad_norm": 0.3549087941646576, "learning_rate": 1.1555131552629511e-05, "loss": 0.4856, "step": 29373 }, { "epoch": 0.9023438699966209, "grad_norm": 0.3918706178665161, "learning_rate": 1.1554654153115576e-05, "loss": 0.4817, "step": 29374 }, { "epoch": 0.9023745891315701, "grad_norm": 0.33043569326400757, "learning_rate": 1.1554176749970638e-05, "loss": 0.5108, "step": 29375 }, { "epoch": 0.9024053082665192, "grad_norm": 0.3278942108154297, "learning_rate": 1.155369934319581e-05, "loss": 0.4933, "step": 29376 }, { "epoch": 0.9024360274014683, "grad_norm": 0.3822532296180725, "learning_rate": 1.1553221932792211e-05, "loss": 0.6297, "step": 29377 }, { "epoch": 0.9024667465364176, "grad_norm": 0.37743616104125977, "learning_rate": 1.1552744518760952e-05, "loss": 0.5602, "step": 29378 }, { "epoch": 0.9024974656713667, "grad_norm": 0.3373430073261261, "learning_rate": 1.1552267101103151e-05, "loss": 0.5859, "step": 29379 }, { "epoch": 0.9025281848063158, "grad_norm": 0.37383541464805603, "learning_rate": 1.155178967981992e-05, "loss": 0.5499, "step": 29380 }, { "epoch": 0.902558903941265, "grad_norm": 0.37578848004341125, "learning_rate": 1.1551312254912377e-05, "loss": 0.6285, "step": 29381 }, { "epoch": 0.9025896230762142, "grad_norm": 0.4155479073524475, "learning_rate": 1.1550834826381636e-05, "loss": 0.5131, "step": 29382 }, { "epoch": 0.9026203422111634, "grad_norm": 0.35096386075019836, "learning_rate": 1.1550357394228811e-05, "loss": 0.5567, "step": 29383 }, { "epoch": 0.9026510613461125, "grad_norm": 0.39132460951805115, "learning_rate": 1.1549879958455019e-05, "loss": 0.4434, "step": 29384 }, { "epoch": 0.9026817804810616, "grad_norm": 0.3823644518852234, "learning_rate": 1.1549402519061372e-05, "loss": 0.4919, "step": 29385 }, { "epoch": 0.9027124996160109, "grad_norm": 0.3310815095901489, "learning_rate": 1.1548925076048992e-05, "loss": 0.5008, "step": 29386 }, { "epoch": 0.90274321875096, "grad_norm": 0.4212404787540436, "learning_rate": 1.1548447629418983e-05, "loss": 0.614, "step": 29387 }, { "epoch": 0.9027739378859091, "grad_norm": 0.42461031675338745, "learning_rate": 1.1547970179172473e-05, "loss": 0.541, "step": 29388 }, { "epoch": 0.9028046570208583, "grad_norm": 0.36618563532829285, "learning_rate": 1.1547492725310564e-05, "loss": 0.5486, "step": 29389 }, { "epoch": 0.9028353761558074, "grad_norm": 0.40258151292800903, "learning_rate": 1.1547015267834384e-05, "loss": 0.5282, "step": 29390 }, { "epoch": 0.9028660952907566, "grad_norm": 0.3550490140914917, "learning_rate": 1.1546537806745038e-05, "loss": 0.5591, "step": 29391 }, { "epoch": 0.9028968144257058, "grad_norm": 0.3382057249546051, "learning_rate": 1.1546060342043646e-05, "loss": 0.496, "step": 29392 }, { "epoch": 0.9029275335606549, "grad_norm": 0.3465280830860138, "learning_rate": 1.1545582873731323e-05, "loss": 0.5184, "step": 29393 }, { "epoch": 0.9029582526956041, "grad_norm": 0.4036102294921875, "learning_rate": 1.154510540180918e-05, "loss": 0.5114, "step": 29394 }, { "epoch": 0.9029889718305533, "grad_norm": 0.37061965465545654, "learning_rate": 1.1544627926278338e-05, "loss": 0.5574, "step": 29395 }, { "epoch": 0.9030196909655024, "grad_norm": 0.3728731870651245, "learning_rate": 1.1544150447139908e-05, "loss": 0.5979, "step": 29396 }, { "epoch": 0.9030504101004516, "grad_norm": 0.39472001791000366, "learning_rate": 1.154367296439501e-05, "loss": 0.5334, "step": 29397 }, { "epoch": 0.9030811292354007, "grad_norm": 0.39978158473968506, "learning_rate": 1.1543195478044753e-05, "loss": 0.493, "step": 29398 }, { "epoch": 0.9031118483703499, "grad_norm": 0.3879898488521576, "learning_rate": 1.1542717988090257e-05, "loss": 0.5546, "step": 29399 }, { "epoch": 0.9031425675052991, "grad_norm": 0.37038037180900574, "learning_rate": 1.1542240494532633e-05, "loss": 0.5781, "step": 29400 }, { "epoch": 0.9031732866402482, "grad_norm": 0.41499605774879456, "learning_rate": 1.1541762997373e-05, "loss": 0.4702, "step": 29401 }, { "epoch": 0.9032040057751973, "grad_norm": 0.3580656349658966, "learning_rate": 1.154128549661247e-05, "loss": 0.5115, "step": 29402 }, { "epoch": 0.9032347249101466, "grad_norm": 0.37814682722091675, "learning_rate": 1.1540807992252162e-05, "loss": 0.5288, "step": 29403 }, { "epoch": 0.9032654440450957, "grad_norm": 0.4046918749809265, "learning_rate": 1.1540330484293192e-05, "loss": 0.5384, "step": 29404 }, { "epoch": 0.9032961631800448, "grad_norm": 0.34232351183891296, "learning_rate": 1.1539852972736667e-05, "loss": 0.4688, "step": 29405 }, { "epoch": 0.903326882314994, "grad_norm": 0.36112868785858154, "learning_rate": 1.1539375457583712e-05, "loss": 0.5582, "step": 29406 }, { "epoch": 0.9033576014499431, "grad_norm": 0.3345273733139038, "learning_rate": 1.1538897938835436e-05, "loss": 0.5472, "step": 29407 }, { "epoch": 0.9033883205848924, "grad_norm": 0.3998262882232666, "learning_rate": 1.1538420416492959e-05, "loss": 0.6308, "step": 29408 }, { "epoch": 0.9034190397198415, "grad_norm": 0.4476866126060486, "learning_rate": 1.1537942890557391e-05, "loss": 0.5137, "step": 29409 }, { "epoch": 0.9034497588547906, "grad_norm": 0.40178605914115906, "learning_rate": 1.1537465361029852e-05, "loss": 0.648, "step": 29410 }, { "epoch": 0.9034804779897398, "grad_norm": 0.43135130405426025, "learning_rate": 1.1536987827911454e-05, "loss": 0.4734, "step": 29411 }, { "epoch": 0.903511197124689, "grad_norm": 0.3532111346721649, "learning_rate": 1.1536510291203313e-05, "loss": 0.5873, "step": 29412 }, { "epoch": 0.9035419162596381, "grad_norm": 0.3831571042537689, "learning_rate": 1.1536032750906548e-05, "loss": 0.6216, "step": 29413 }, { "epoch": 0.9035726353945873, "grad_norm": 0.3493700623512268, "learning_rate": 1.1535555207022266e-05, "loss": 0.5015, "step": 29414 }, { "epoch": 0.9036033545295364, "grad_norm": 0.3913020193576813, "learning_rate": 1.1535077659551592e-05, "loss": 0.5461, "step": 29415 }, { "epoch": 0.9036340736644856, "grad_norm": 0.3531235456466675, "learning_rate": 1.1534600108495635e-05, "loss": 0.5063, "step": 29416 }, { "epoch": 0.9036647927994348, "grad_norm": 0.3613587021827698, "learning_rate": 1.1534122553855514e-05, "loss": 0.5578, "step": 29417 }, { "epoch": 0.9036955119343839, "grad_norm": 0.4505279064178467, "learning_rate": 1.1533644995632338e-05, "loss": 0.5048, "step": 29418 }, { "epoch": 0.9037262310693331, "grad_norm": 0.3304678797721863, "learning_rate": 1.1533167433827233e-05, "loss": 0.5756, "step": 29419 }, { "epoch": 0.9037569502042823, "grad_norm": 0.37424954771995544, "learning_rate": 1.1532689868441304e-05, "loss": 0.5363, "step": 29420 }, { "epoch": 0.9037876693392314, "grad_norm": 0.3605819344520569, "learning_rate": 1.1532212299475672e-05, "loss": 0.546, "step": 29421 }, { "epoch": 0.9038183884741806, "grad_norm": 0.6737012267112732, "learning_rate": 1.1531734726931453e-05, "loss": 0.5458, "step": 29422 }, { "epoch": 0.9038491076091297, "grad_norm": 0.3869679272174835, "learning_rate": 1.1531257150809756e-05, "loss": 0.5859, "step": 29423 }, { "epoch": 0.9038798267440789, "grad_norm": 0.3604215085506439, "learning_rate": 1.1530779571111706e-05, "loss": 0.545, "step": 29424 }, { "epoch": 0.9039105458790281, "grad_norm": 0.363965779542923, "learning_rate": 1.153030198783841e-05, "loss": 0.6041, "step": 29425 }, { "epoch": 0.9039412650139772, "grad_norm": 0.38997790217399597, "learning_rate": 1.152982440099099e-05, "loss": 0.5582, "step": 29426 }, { "epoch": 0.9039719841489263, "grad_norm": 0.5612696409225464, "learning_rate": 1.1529346810570558e-05, "loss": 0.4717, "step": 29427 }, { "epoch": 0.9040027032838756, "grad_norm": 0.3752344250679016, "learning_rate": 1.1528869216578226e-05, "loss": 0.5106, "step": 29428 }, { "epoch": 0.9040334224188247, "grad_norm": 0.9133099317550659, "learning_rate": 1.1528391619015118e-05, "loss": 0.4717, "step": 29429 }, { "epoch": 0.9040641415537738, "grad_norm": 0.39173755049705505, "learning_rate": 1.152791401788234e-05, "loss": 0.5681, "step": 29430 }, { "epoch": 0.904094860688723, "grad_norm": 0.44839930534362793, "learning_rate": 1.1527436413181018e-05, "loss": 0.5804, "step": 29431 }, { "epoch": 0.9041255798236721, "grad_norm": 0.3425450026988983, "learning_rate": 1.1526958804912254e-05, "loss": 0.4021, "step": 29432 }, { "epoch": 0.9041562989586214, "grad_norm": 0.4432523250579834, "learning_rate": 1.1526481193077178e-05, "loss": 0.5542, "step": 29433 }, { "epoch": 0.9041870180935705, "grad_norm": 0.3860453963279724, "learning_rate": 1.1526003577676895e-05, "loss": 0.5358, "step": 29434 }, { "epoch": 0.9042177372285196, "grad_norm": 0.3541328012943268, "learning_rate": 1.1525525958712525e-05, "loss": 0.5711, "step": 29435 }, { "epoch": 0.9042484563634688, "grad_norm": 0.4939759373664856, "learning_rate": 1.152504833618518e-05, "loss": 0.5032, "step": 29436 }, { "epoch": 0.904279175498418, "grad_norm": 0.37140196561813354, "learning_rate": 1.1524570710095981e-05, "loss": 0.5434, "step": 29437 }, { "epoch": 0.9043098946333671, "grad_norm": 0.39834144711494446, "learning_rate": 1.1524093080446039e-05, "loss": 0.5636, "step": 29438 }, { "epoch": 0.9043406137683163, "grad_norm": 0.38704395294189453, "learning_rate": 1.1523615447236471e-05, "loss": 0.5612, "step": 29439 }, { "epoch": 0.9043713329032654, "grad_norm": 0.44223952293395996, "learning_rate": 1.1523137810468394e-05, "loss": 0.5445, "step": 29440 }, { "epoch": 0.9044020520382146, "grad_norm": 0.3757840394973755, "learning_rate": 1.1522660170142922e-05, "loss": 0.5421, "step": 29441 }, { "epoch": 0.9044327711731638, "grad_norm": 0.36519864201545715, "learning_rate": 1.1522182526261172e-05, "loss": 0.5757, "step": 29442 }, { "epoch": 0.9044634903081129, "grad_norm": 0.4136393070220947, "learning_rate": 1.1521704878824257e-05, "loss": 0.5631, "step": 29443 }, { "epoch": 0.9044942094430621, "grad_norm": 0.3771471679210663, "learning_rate": 1.1521227227833294e-05, "loss": 0.4907, "step": 29444 }, { "epoch": 0.9045249285780113, "grad_norm": 0.3471463620662689, "learning_rate": 1.15207495732894e-05, "loss": 0.5038, "step": 29445 }, { "epoch": 0.9045556477129604, "grad_norm": 0.34786853194236755, "learning_rate": 1.1520271915193686e-05, "loss": 0.5156, "step": 29446 }, { "epoch": 0.9045863668479096, "grad_norm": 0.3503272831439972, "learning_rate": 1.1519794253547274e-05, "loss": 0.562, "step": 29447 }, { "epoch": 0.9046170859828587, "grad_norm": 0.41893458366394043, "learning_rate": 1.1519316588351274e-05, "loss": 0.575, "step": 29448 }, { "epoch": 0.9046478051178078, "grad_norm": 0.3731043040752411, "learning_rate": 1.1518838919606808e-05, "loss": 0.5737, "step": 29449 }, { "epoch": 0.9046785242527571, "grad_norm": 0.37803924083709717, "learning_rate": 1.1518361247314982e-05, "loss": 0.6137, "step": 29450 }, { "epoch": 0.9047092433877062, "grad_norm": 0.3577786087989807, "learning_rate": 1.1517883571476923e-05, "loss": 0.5069, "step": 29451 }, { "epoch": 0.9047399625226553, "grad_norm": 0.3638598620891571, "learning_rate": 1.1517405892093736e-05, "loss": 0.5991, "step": 29452 }, { "epoch": 0.9047706816576045, "grad_norm": 0.3807497024536133, "learning_rate": 1.1516928209166544e-05, "loss": 0.544, "step": 29453 }, { "epoch": 0.9048014007925537, "grad_norm": 0.37895667552948, "learning_rate": 1.1516450522696458e-05, "loss": 0.4971, "step": 29454 }, { "epoch": 0.9048321199275028, "grad_norm": 0.3638170659542084, "learning_rate": 1.1515972832684598e-05, "loss": 0.5596, "step": 29455 }, { "epoch": 0.904862839062452, "grad_norm": 0.42452988028526306, "learning_rate": 1.1515495139132078e-05, "loss": 0.5141, "step": 29456 }, { "epoch": 0.9048935581974011, "grad_norm": 0.3569198250770569, "learning_rate": 1.151501744204001e-05, "loss": 0.4981, "step": 29457 }, { "epoch": 0.9049242773323504, "grad_norm": 0.3679662048816681, "learning_rate": 1.1514539741409516e-05, "loss": 0.4425, "step": 29458 }, { "epoch": 0.9049549964672995, "grad_norm": 0.41320639848709106, "learning_rate": 1.151406203724171e-05, "loss": 0.6383, "step": 29459 }, { "epoch": 0.9049857156022486, "grad_norm": 0.5240267515182495, "learning_rate": 1.1513584329537703e-05, "loss": 0.5207, "step": 29460 }, { "epoch": 0.9050164347371978, "grad_norm": 0.3486120402812958, "learning_rate": 1.1513106618298618e-05, "loss": 0.5762, "step": 29461 }, { "epoch": 0.905047153872147, "grad_norm": 0.41291341185569763, "learning_rate": 1.1512628903525563e-05, "loss": 0.5896, "step": 29462 }, { "epoch": 0.9050778730070961, "grad_norm": 0.35106563568115234, "learning_rate": 1.151215118521966e-05, "loss": 0.5985, "step": 29463 }, { "epoch": 0.9051085921420453, "grad_norm": 0.3914472162723541, "learning_rate": 1.1511673463382018e-05, "loss": 0.5216, "step": 29464 }, { "epoch": 0.9051393112769944, "grad_norm": 0.4021047353744507, "learning_rate": 1.1511195738013763e-05, "loss": 0.5353, "step": 29465 }, { "epoch": 0.9051700304119435, "grad_norm": 0.5029441714286804, "learning_rate": 1.1510718009116e-05, "loss": 0.4772, "step": 29466 }, { "epoch": 0.9052007495468928, "grad_norm": 0.35925212502479553, "learning_rate": 1.1510240276689853e-05, "loss": 0.5767, "step": 29467 }, { "epoch": 0.9052314686818419, "grad_norm": 0.41069847345352173, "learning_rate": 1.150976254073643e-05, "loss": 0.54, "step": 29468 }, { "epoch": 0.9052621878167911, "grad_norm": 0.33626899123191833, "learning_rate": 1.1509284801256857e-05, "loss": 0.6292, "step": 29469 }, { "epoch": 0.9052929069517403, "grad_norm": 0.3639281690120697, "learning_rate": 1.1508807058252241e-05, "loss": 0.5584, "step": 29470 }, { "epoch": 0.9053236260866894, "grad_norm": 0.3964698016643524, "learning_rate": 1.15083293117237e-05, "loss": 0.6177, "step": 29471 }, { "epoch": 0.9053543452216386, "grad_norm": 0.45813262462615967, "learning_rate": 1.1507851561672355e-05, "loss": 0.5604, "step": 29472 }, { "epoch": 0.9053850643565877, "grad_norm": 0.38048255443573, "learning_rate": 1.1507373808099312e-05, "loss": 0.5549, "step": 29473 }, { "epoch": 0.9054157834915368, "grad_norm": 0.5198050141334534, "learning_rate": 1.1506896051005694e-05, "loss": 0.5112, "step": 29474 }, { "epoch": 0.9054465026264861, "grad_norm": 0.3665630519390106, "learning_rate": 1.1506418290392611e-05, "loss": 0.5147, "step": 29475 }, { "epoch": 0.9054772217614352, "grad_norm": 0.3384034335613251, "learning_rate": 1.150594052626119e-05, "loss": 0.5248, "step": 29476 }, { "epoch": 0.9055079408963843, "grad_norm": 0.36701327562332153, "learning_rate": 1.1505462758612533e-05, "loss": 0.5285, "step": 29477 }, { "epoch": 0.9055386600313335, "grad_norm": 0.44184914231300354, "learning_rate": 1.150498498744777e-05, "loss": 0.5466, "step": 29478 }, { "epoch": 0.9055693791662827, "grad_norm": 0.3685563802719116, "learning_rate": 1.1504507212768006e-05, "loss": 0.5824, "step": 29479 }, { "epoch": 0.9056000983012319, "grad_norm": 0.37756574153900146, "learning_rate": 1.1504029434574358e-05, "loss": 0.6342, "step": 29480 }, { "epoch": 0.905630817436181, "grad_norm": 0.3540154695510864, "learning_rate": 1.1503551652867948e-05, "loss": 0.6215, "step": 29481 }, { "epoch": 0.9056615365711301, "grad_norm": 0.3383459746837616, "learning_rate": 1.1503073867649886e-05, "loss": 0.5218, "step": 29482 }, { "epoch": 0.9056922557060794, "grad_norm": 0.38150137662887573, "learning_rate": 1.1502596078921292e-05, "loss": 0.4969, "step": 29483 }, { "epoch": 0.9057229748410285, "grad_norm": 0.34056535363197327, "learning_rate": 1.1502118286683275e-05, "loss": 0.5385, "step": 29484 }, { "epoch": 0.9057536939759776, "grad_norm": 0.36817821860313416, "learning_rate": 1.150164049093696e-05, "loss": 0.5344, "step": 29485 }, { "epoch": 0.9057844131109268, "grad_norm": 0.36714646220207214, "learning_rate": 1.1501162691683456e-05, "loss": 0.5563, "step": 29486 }, { "epoch": 0.905815132245876, "grad_norm": 0.34227830171585083, "learning_rate": 1.1500684888923886e-05, "loss": 0.536, "step": 29487 }, { "epoch": 0.9058458513808251, "grad_norm": 0.383563756942749, "learning_rate": 1.1500207082659358e-05, "loss": 0.5802, "step": 29488 }, { "epoch": 0.9058765705157743, "grad_norm": 0.3417356312274933, "learning_rate": 1.1499729272890994e-05, "loss": 0.4278, "step": 29489 }, { "epoch": 0.9059072896507234, "grad_norm": 0.3877358138561249, "learning_rate": 1.1499251459619907e-05, "loss": 0.5389, "step": 29490 }, { "epoch": 0.9059380087856725, "grad_norm": 0.39652976393699646, "learning_rate": 1.1498773642847212e-05, "loss": 0.5731, "step": 29491 }, { "epoch": 0.9059687279206218, "grad_norm": 0.6378154754638672, "learning_rate": 1.1498295822574028e-05, "loss": 0.5213, "step": 29492 }, { "epoch": 0.9059994470555709, "grad_norm": 0.3806939721107483, "learning_rate": 1.1497817998801467e-05, "loss": 0.5884, "step": 29493 }, { "epoch": 0.9060301661905201, "grad_norm": 0.4073050320148468, "learning_rate": 1.1497340171530649e-05, "loss": 0.5494, "step": 29494 }, { "epoch": 0.9060608853254692, "grad_norm": 0.45103198289871216, "learning_rate": 1.1496862340762687e-05, "loss": 0.5744, "step": 29495 }, { "epoch": 0.9060916044604184, "grad_norm": 0.37117940187454224, "learning_rate": 1.1496384506498701e-05, "loss": 0.5546, "step": 29496 }, { "epoch": 0.9061223235953676, "grad_norm": 0.37641575932502747, "learning_rate": 1.1495906668739804e-05, "loss": 0.5062, "step": 29497 }, { "epoch": 0.9061530427303167, "grad_norm": 0.37278982996940613, "learning_rate": 1.1495428827487111e-05, "loss": 0.515, "step": 29498 }, { "epoch": 0.9061837618652658, "grad_norm": 0.8541435599327087, "learning_rate": 1.1494950982741741e-05, "loss": 0.5457, "step": 29499 }, { "epoch": 0.9062144810002151, "grad_norm": 0.39259347319602966, "learning_rate": 1.149447313450481e-05, "loss": 0.5847, "step": 29500 }, { "epoch": 0.9062452001351642, "grad_norm": 0.3490196764469147, "learning_rate": 1.149399528277743e-05, "loss": 0.5265, "step": 29501 }, { "epoch": 0.9062759192701133, "grad_norm": 0.3564862906932831, "learning_rate": 1.1493517427560721e-05, "loss": 0.5047, "step": 29502 }, { "epoch": 0.9063066384050625, "grad_norm": 0.3381771147251129, "learning_rate": 1.1493039568855797e-05, "loss": 0.5618, "step": 29503 }, { "epoch": 0.9063373575400117, "grad_norm": 0.4198782742023468, "learning_rate": 1.1492561706663774e-05, "loss": 0.5167, "step": 29504 }, { "epoch": 0.9063680766749609, "grad_norm": 0.37168067693710327, "learning_rate": 1.149208384098577e-05, "loss": 0.5311, "step": 29505 }, { "epoch": 0.90639879580991, "grad_norm": 0.381930410861969, "learning_rate": 1.1491605971822898e-05, "loss": 0.4738, "step": 29506 }, { "epoch": 0.9064295149448591, "grad_norm": 0.41776034235954285, "learning_rate": 1.1491128099176281e-05, "loss": 0.6012, "step": 29507 }, { "epoch": 0.9064602340798084, "grad_norm": 0.39596453309059143, "learning_rate": 1.149065022304703e-05, "loss": 0.5461, "step": 29508 }, { "epoch": 0.9064909532147575, "grad_norm": 0.3479979932308197, "learning_rate": 1.1490172343436257e-05, "loss": 0.3906, "step": 29509 }, { "epoch": 0.9065216723497066, "grad_norm": 0.3631843030452728, "learning_rate": 1.1489694460345085e-05, "loss": 0.6432, "step": 29510 }, { "epoch": 0.9065523914846558, "grad_norm": 0.3631741404533386, "learning_rate": 1.1489216573774622e-05, "loss": 0.5822, "step": 29511 }, { "epoch": 0.906583110619605, "grad_norm": 0.40315213799476624, "learning_rate": 1.1488738683725996e-05, "loss": 0.5535, "step": 29512 }, { "epoch": 0.9066138297545541, "grad_norm": 0.48762133717536926, "learning_rate": 1.1488260790200315e-05, "loss": 0.6171, "step": 29513 }, { "epoch": 0.9066445488895033, "grad_norm": 0.42865586280822754, "learning_rate": 1.1487782893198695e-05, "loss": 0.507, "step": 29514 }, { "epoch": 0.9066752680244524, "grad_norm": 0.35933801531791687, "learning_rate": 1.1487304992722259e-05, "loss": 0.5183, "step": 29515 }, { "epoch": 0.9067059871594015, "grad_norm": 0.443646639585495, "learning_rate": 1.1486827088772113e-05, "loss": 0.5339, "step": 29516 }, { "epoch": 0.9067367062943508, "grad_norm": 0.39196091890335083, "learning_rate": 1.1486349181349382e-05, "loss": 0.483, "step": 29517 }, { "epoch": 0.9067674254292999, "grad_norm": 1.86117422580719, "learning_rate": 1.1485871270455177e-05, "loss": 0.5631, "step": 29518 }, { "epoch": 0.9067981445642491, "grad_norm": 0.5529735088348389, "learning_rate": 1.1485393356090618e-05, "loss": 0.6066, "step": 29519 }, { "epoch": 0.9068288636991982, "grad_norm": 0.3888844847679138, "learning_rate": 1.1484915438256816e-05, "loss": 0.5468, "step": 29520 }, { "epoch": 0.9068595828341474, "grad_norm": 0.4145415127277374, "learning_rate": 1.1484437516954893e-05, "loss": 0.5517, "step": 29521 }, { "epoch": 0.9068903019690966, "grad_norm": 0.3572744131088257, "learning_rate": 1.148395959218596e-05, "loss": 0.521, "step": 29522 }, { "epoch": 0.9069210211040457, "grad_norm": 0.4214564859867096, "learning_rate": 1.1483481663951137e-05, "loss": 0.5702, "step": 29523 }, { "epoch": 0.9069517402389948, "grad_norm": 0.41523849964141846, "learning_rate": 1.1483003732251541e-05, "loss": 0.5074, "step": 29524 }, { "epoch": 0.9069824593739441, "grad_norm": 0.4014154374599457, "learning_rate": 1.1482525797088284e-05, "loss": 0.5913, "step": 29525 }, { "epoch": 0.9070131785088932, "grad_norm": 0.3834599256515503, "learning_rate": 1.1482047858462484e-05, "loss": 0.5363, "step": 29526 }, { "epoch": 0.9070438976438423, "grad_norm": 0.37780606746673584, "learning_rate": 1.148156991637526e-05, "loss": 0.5049, "step": 29527 }, { "epoch": 0.9070746167787915, "grad_norm": 0.4346534013748169, "learning_rate": 1.1481091970827725e-05, "loss": 0.5823, "step": 29528 }, { "epoch": 0.9071053359137407, "grad_norm": 0.36096107959747314, "learning_rate": 1.148061402182099e-05, "loss": 0.5793, "step": 29529 }, { "epoch": 0.9071360550486899, "grad_norm": 0.36910533905029297, "learning_rate": 1.1480136069356185e-05, "loss": 0.5961, "step": 29530 }, { "epoch": 0.907166774183639, "grad_norm": 0.3781280517578125, "learning_rate": 1.147965811343442e-05, "loss": 0.5514, "step": 29531 }, { "epoch": 0.9071974933185881, "grad_norm": 0.4799307882785797, "learning_rate": 1.1479180154056806e-05, "loss": 0.5142, "step": 29532 }, { "epoch": 0.9072282124535374, "grad_norm": 0.37554270029067993, "learning_rate": 1.1478702191224465e-05, "loss": 0.4771, "step": 29533 }, { "epoch": 0.9072589315884865, "grad_norm": 0.36259493231773376, "learning_rate": 1.147822422493851e-05, "loss": 0.5648, "step": 29534 }, { "epoch": 0.9072896507234356, "grad_norm": 0.3428749144077301, "learning_rate": 1.1477746255200062e-05, "loss": 0.4366, "step": 29535 }, { "epoch": 0.9073203698583848, "grad_norm": 0.36835846304893494, "learning_rate": 1.1477268282010231e-05, "loss": 0.569, "step": 29536 }, { "epoch": 0.9073510889933339, "grad_norm": 0.3937818109989166, "learning_rate": 1.147679030537014e-05, "loss": 0.4383, "step": 29537 }, { "epoch": 0.9073818081282831, "grad_norm": 0.37227651476860046, "learning_rate": 1.1476312325280901e-05, "loss": 0.6195, "step": 29538 }, { "epoch": 0.9074125272632323, "grad_norm": 0.3720909059047699, "learning_rate": 1.147583434174363e-05, "loss": 0.5492, "step": 29539 }, { "epoch": 0.9074432463981814, "grad_norm": 0.3508339822292328, "learning_rate": 1.1475356354759446e-05, "loss": 0.5459, "step": 29540 }, { "epoch": 0.9074739655331305, "grad_norm": 0.33086341619491577, "learning_rate": 1.1474878364329464e-05, "loss": 0.5566, "step": 29541 }, { "epoch": 0.9075046846680798, "grad_norm": 0.329486221075058, "learning_rate": 1.1474400370454804e-05, "loss": 0.5409, "step": 29542 }, { "epoch": 0.9075354038030289, "grad_norm": 0.36579620838165283, "learning_rate": 1.1473922373136574e-05, "loss": 0.561, "step": 29543 }, { "epoch": 0.9075661229379781, "grad_norm": 0.3904598355293274, "learning_rate": 1.1473444372375899e-05, "loss": 0.5873, "step": 29544 }, { "epoch": 0.9075968420729272, "grad_norm": 0.38605740666389465, "learning_rate": 1.1472966368173887e-05, "loss": 0.5831, "step": 29545 }, { "epoch": 0.9076275612078764, "grad_norm": 0.39480075240135193, "learning_rate": 1.1472488360531664e-05, "loss": 0.5488, "step": 29546 }, { "epoch": 0.9076582803428256, "grad_norm": 0.3931356370449066, "learning_rate": 1.147201034945034e-05, "loss": 0.4858, "step": 29547 }, { "epoch": 0.9076889994777747, "grad_norm": 0.3832562565803528, "learning_rate": 1.147153233493103e-05, "loss": 0.5233, "step": 29548 }, { "epoch": 0.9077197186127238, "grad_norm": 0.3783801794052124, "learning_rate": 1.1471054316974857e-05, "loss": 0.6053, "step": 29549 }, { "epoch": 0.9077504377476731, "grad_norm": 0.41295722126960754, "learning_rate": 1.1470576295582934e-05, "loss": 0.4785, "step": 29550 }, { "epoch": 0.9077811568826222, "grad_norm": 0.3531751036643982, "learning_rate": 1.1470098270756375e-05, "loss": 0.5391, "step": 29551 }, { "epoch": 0.9078118760175713, "grad_norm": 0.3744320273399353, "learning_rate": 1.14696202424963e-05, "loss": 0.6503, "step": 29552 }, { "epoch": 0.9078425951525205, "grad_norm": 0.35439440608024597, "learning_rate": 1.1469142210803825e-05, "loss": 0.5061, "step": 29553 }, { "epoch": 0.9078733142874696, "grad_norm": 0.47770047187805176, "learning_rate": 1.1468664175680067e-05, "loss": 0.5493, "step": 29554 }, { "epoch": 0.9079040334224189, "grad_norm": 0.38669562339782715, "learning_rate": 1.1468186137126138e-05, "loss": 0.5443, "step": 29555 }, { "epoch": 0.907934752557368, "grad_norm": 0.36570292711257935, "learning_rate": 1.146770809514316e-05, "loss": 0.5182, "step": 29556 }, { "epoch": 0.9079654716923171, "grad_norm": 0.34998103976249695, "learning_rate": 1.1467230049732248e-05, "loss": 0.5052, "step": 29557 }, { "epoch": 0.9079961908272663, "grad_norm": 0.3780595660209656, "learning_rate": 1.1466752000894515e-05, "loss": 0.5483, "step": 29558 }, { "epoch": 0.9080269099622155, "grad_norm": 0.3355556130409241, "learning_rate": 1.1466273948631082e-05, "loss": 0.489, "step": 29559 }, { "epoch": 0.9080576290971646, "grad_norm": 0.35726282000541687, "learning_rate": 1.1465795892943064e-05, "loss": 0.5221, "step": 29560 }, { "epoch": 0.9080883482321138, "grad_norm": 0.36109283566474915, "learning_rate": 1.1465317833831575e-05, "loss": 0.5124, "step": 29561 }, { "epoch": 0.9081190673670629, "grad_norm": 0.3594791293144226, "learning_rate": 1.1464839771297738e-05, "loss": 0.5449, "step": 29562 }, { "epoch": 0.9081497865020121, "grad_norm": 0.3460688591003418, "learning_rate": 1.1464361705342661e-05, "loss": 0.5779, "step": 29563 }, { "epoch": 0.9081805056369613, "grad_norm": 0.9237785339355469, "learning_rate": 1.1463883635967467e-05, "loss": 0.574, "step": 29564 }, { "epoch": 0.9082112247719104, "grad_norm": 0.47423872351646423, "learning_rate": 1.1463405563173272e-05, "loss": 0.4919, "step": 29565 }, { "epoch": 0.9082419439068595, "grad_norm": 0.38879773020744324, "learning_rate": 1.1462927486961187e-05, "loss": 0.5354, "step": 29566 }, { "epoch": 0.9082726630418088, "grad_norm": 0.38508936762809753, "learning_rate": 1.1462449407332337e-05, "loss": 0.6059, "step": 29567 }, { "epoch": 0.9083033821767579, "grad_norm": 0.3460991084575653, "learning_rate": 1.1461971324287831e-05, "loss": 0.4906, "step": 29568 }, { "epoch": 0.9083341013117071, "grad_norm": 0.42589500546455383, "learning_rate": 1.146149323782879e-05, "loss": 0.5616, "step": 29569 }, { "epoch": 0.9083648204466562, "grad_norm": 0.3404439091682434, "learning_rate": 1.1461015147956329e-05, "loss": 0.6024, "step": 29570 }, { "epoch": 0.9083955395816053, "grad_norm": 0.37429478764533997, "learning_rate": 1.1460537054671568e-05, "loss": 0.5659, "step": 29571 }, { "epoch": 0.9084262587165546, "grad_norm": 0.3752211928367615, "learning_rate": 1.1460058957975617e-05, "loss": 0.551, "step": 29572 }, { "epoch": 0.9084569778515037, "grad_norm": 0.3669992983341217, "learning_rate": 1.1459580857869599e-05, "loss": 0.4868, "step": 29573 }, { "epoch": 0.9084876969864528, "grad_norm": 0.3541378974914551, "learning_rate": 1.1459102754354624e-05, "loss": 0.542, "step": 29574 }, { "epoch": 0.908518416121402, "grad_norm": 0.3529665470123291, "learning_rate": 1.145862464743182e-05, "loss": 0.5313, "step": 29575 }, { "epoch": 0.9085491352563512, "grad_norm": 0.3300984501838684, "learning_rate": 1.145814653710229e-05, "loss": 0.4686, "step": 29576 }, { "epoch": 0.9085798543913003, "grad_norm": 0.35162582993507385, "learning_rate": 1.1457668423367158e-05, "loss": 0.533, "step": 29577 }, { "epoch": 0.9086105735262495, "grad_norm": 0.36670151352882385, "learning_rate": 1.1457190306227544e-05, "loss": 0.5328, "step": 29578 }, { "epoch": 0.9086412926611986, "grad_norm": 0.3923032581806183, "learning_rate": 1.1456712185684555e-05, "loss": 0.5744, "step": 29579 }, { "epoch": 0.9086720117961479, "grad_norm": 0.36146894097328186, "learning_rate": 1.1456234061739315e-05, "loss": 0.559, "step": 29580 }, { "epoch": 0.908702730931097, "grad_norm": 0.3578030467033386, "learning_rate": 1.1455755934392939e-05, "loss": 0.5277, "step": 29581 }, { "epoch": 0.9087334500660461, "grad_norm": 0.4160575270652771, "learning_rate": 1.1455277803646543e-05, "loss": 0.5024, "step": 29582 }, { "epoch": 0.9087641692009953, "grad_norm": 0.34450289607048035, "learning_rate": 1.1454799669501241e-05, "loss": 0.5811, "step": 29583 }, { "epoch": 0.9087948883359445, "grad_norm": 0.40066733956336975, "learning_rate": 1.1454321531958155e-05, "loss": 0.5567, "step": 29584 }, { "epoch": 0.9088256074708936, "grad_norm": 0.3794383704662323, "learning_rate": 1.1453843391018399e-05, "loss": 0.5647, "step": 29585 }, { "epoch": 0.9088563266058428, "grad_norm": 0.3834681510925293, "learning_rate": 1.1453365246683093e-05, "loss": 0.5756, "step": 29586 }, { "epoch": 0.9088870457407919, "grad_norm": 0.3843802809715271, "learning_rate": 1.145288709895335e-05, "loss": 0.4667, "step": 29587 }, { "epoch": 0.908917764875741, "grad_norm": 0.3940294086933136, "learning_rate": 1.1452408947830284e-05, "loss": 0.5043, "step": 29588 }, { "epoch": 0.9089484840106903, "grad_norm": 0.362973153591156, "learning_rate": 1.145193079331502e-05, "loss": 0.6528, "step": 29589 }, { "epoch": 0.9089792031456394, "grad_norm": 0.37068894505500793, "learning_rate": 1.1451452635408666e-05, "loss": 0.5864, "step": 29590 }, { "epoch": 0.9090099222805886, "grad_norm": 0.3772093653678894, "learning_rate": 1.1450974474112347e-05, "loss": 0.566, "step": 29591 }, { "epoch": 0.9090406414155378, "grad_norm": 0.3309723436832428, "learning_rate": 1.1450496309427176e-05, "loss": 0.4668, "step": 29592 }, { "epoch": 0.9090713605504869, "grad_norm": 0.34717467427253723, "learning_rate": 1.1450018141354267e-05, "loss": 0.5135, "step": 29593 }, { "epoch": 0.9091020796854361, "grad_norm": 0.3379620909690857, "learning_rate": 1.144953996989474e-05, "loss": 0.5358, "step": 29594 }, { "epoch": 0.9091327988203852, "grad_norm": 0.3599550426006317, "learning_rate": 1.1449061795049712e-05, "loss": 0.5284, "step": 29595 }, { "epoch": 0.9091635179553343, "grad_norm": 0.3576062321662903, "learning_rate": 1.14485836168203e-05, "loss": 0.5115, "step": 29596 }, { "epoch": 0.9091942370902836, "grad_norm": 0.38906949758529663, "learning_rate": 1.1448105435207615e-05, "loss": 0.5387, "step": 29597 }, { "epoch": 0.9092249562252327, "grad_norm": 0.3647676110267639, "learning_rate": 1.1447627250212783e-05, "loss": 0.5511, "step": 29598 }, { "epoch": 0.9092556753601818, "grad_norm": 0.4268209636211395, "learning_rate": 1.1447149061836915e-05, "loss": 0.5922, "step": 29599 }, { "epoch": 0.909286394495131, "grad_norm": 0.34292328357696533, "learning_rate": 1.1446670870081128e-05, "loss": 0.5923, "step": 29600 }, { "epoch": 0.9093171136300802, "grad_norm": 0.34966444969177246, "learning_rate": 1.1446192674946544e-05, "loss": 0.5001, "step": 29601 }, { "epoch": 0.9093478327650293, "grad_norm": 0.3914419114589691, "learning_rate": 1.144571447643427e-05, "loss": 0.5117, "step": 29602 }, { "epoch": 0.9093785518999785, "grad_norm": 0.3711423873901367, "learning_rate": 1.1445236274545435e-05, "loss": 0.4769, "step": 29603 }, { "epoch": 0.9094092710349276, "grad_norm": 0.37245336174964905, "learning_rate": 1.1444758069281145e-05, "loss": 0.5018, "step": 29604 }, { "epoch": 0.9094399901698769, "grad_norm": 0.3533836603164673, "learning_rate": 1.1444279860642526e-05, "loss": 0.5077, "step": 29605 }, { "epoch": 0.909470709304826, "grad_norm": 0.3817453384399414, "learning_rate": 1.1443801648630689e-05, "loss": 0.5909, "step": 29606 }, { "epoch": 0.9095014284397751, "grad_norm": 0.3963542580604553, "learning_rate": 1.1443323433246752e-05, "loss": 0.5982, "step": 29607 }, { "epoch": 0.9095321475747243, "grad_norm": 0.36137089133262634, "learning_rate": 1.144284521449183e-05, "loss": 0.5205, "step": 29608 }, { "epoch": 0.9095628667096735, "grad_norm": 0.34880515933036804, "learning_rate": 1.1442366992367046e-05, "loss": 0.5206, "step": 29609 }, { "epoch": 0.9095935858446226, "grad_norm": 0.39471152424812317, "learning_rate": 1.144188876687351e-05, "loss": 0.5713, "step": 29610 }, { "epoch": 0.9096243049795718, "grad_norm": 0.37558987736701965, "learning_rate": 1.1441410538012346e-05, "loss": 0.6332, "step": 29611 }, { "epoch": 0.9096550241145209, "grad_norm": 0.3571426570415497, "learning_rate": 1.1440932305784664e-05, "loss": 0.5003, "step": 29612 }, { "epoch": 0.90968574324947, "grad_norm": 0.3900526165962219, "learning_rate": 1.1440454070191585e-05, "loss": 0.5028, "step": 29613 }, { "epoch": 0.9097164623844193, "grad_norm": 0.4014030992984772, "learning_rate": 1.1439975831234225e-05, "loss": 0.5537, "step": 29614 }, { "epoch": 0.9097471815193684, "grad_norm": 0.37573039531707764, "learning_rate": 1.14394975889137e-05, "loss": 0.5554, "step": 29615 }, { "epoch": 0.9097779006543176, "grad_norm": 0.4053131639957428, "learning_rate": 1.1439019343231129e-05, "loss": 0.5562, "step": 29616 }, { "epoch": 0.9098086197892667, "grad_norm": 0.3360171318054199, "learning_rate": 1.1438541094187625e-05, "loss": 0.5374, "step": 29617 }, { "epoch": 0.9098393389242159, "grad_norm": 0.3634485602378845, "learning_rate": 1.1438062841784312e-05, "loss": 0.5466, "step": 29618 }, { "epoch": 0.9098700580591651, "grad_norm": 0.4060783088207245, "learning_rate": 1.14375845860223e-05, "loss": 0.546, "step": 29619 }, { "epoch": 0.9099007771941142, "grad_norm": 0.39347100257873535, "learning_rate": 1.1437106326902709e-05, "loss": 0.5946, "step": 29620 }, { "epoch": 0.9099314963290633, "grad_norm": 0.34627243876457214, "learning_rate": 1.1436628064426656e-05, "loss": 0.5123, "step": 29621 }, { "epoch": 0.9099622154640126, "grad_norm": 0.3908993601799011, "learning_rate": 1.1436149798595258e-05, "loss": 0.5737, "step": 29622 }, { "epoch": 0.9099929345989617, "grad_norm": 0.34931737184524536, "learning_rate": 1.1435671529409631e-05, "loss": 0.5821, "step": 29623 }, { "epoch": 0.9100236537339108, "grad_norm": 0.8863815665245056, "learning_rate": 1.1435193256870894e-05, "loss": 0.6715, "step": 29624 }, { "epoch": 0.91005437286886, "grad_norm": 0.34750527143478394, "learning_rate": 1.1434714980980163e-05, "loss": 0.52, "step": 29625 }, { "epoch": 0.9100850920038092, "grad_norm": 0.46106791496276855, "learning_rate": 1.1434236701738555e-05, "loss": 0.5807, "step": 29626 }, { "epoch": 0.9101158111387583, "grad_norm": 0.393120676279068, "learning_rate": 1.1433758419147187e-05, "loss": 0.5217, "step": 29627 }, { "epoch": 0.9101465302737075, "grad_norm": 0.3595203757286072, "learning_rate": 1.1433280133207174e-05, "loss": 0.5267, "step": 29628 }, { "epoch": 0.9101772494086566, "grad_norm": 0.36414211988449097, "learning_rate": 1.1432801843919638e-05, "loss": 0.5443, "step": 29629 }, { "epoch": 0.9102079685436059, "grad_norm": 0.3592149019241333, "learning_rate": 1.1432323551285693e-05, "loss": 0.5416, "step": 29630 }, { "epoch": 0.910238687678555, "grad_norm": 0.37364768981933594, "learning_rate": 1.1431845255306455e-05, "loss": 0.5917, "step": 29631 }, { "epoch": 0.9102694068135041, "grad_norm": 0.3437446355819702, "learning_rate": 1.1431366955983045e-05, "loss": 0.5953, "step": 29632 }, { "epoch": 0.9103001259484533, "grad_norm": 0.36530250310897827, "learning_rate": 1.1430888653316575e-05, "loss": 0.5479, "step": 29633 }, { "epoch": 0.9103308450834025, "grad_norm": 0.3453957140445709, "learning_rate": 1.1430410347308164e-05, "loss": 0.5271, "step": 29634 }, { "epoch": 0.9103615642183516, "grad_norm": 0.3812560439109802, "learning_rate": 1.142993203795893e-05, "loss": 0.5854, "step": 29635 }, { "epoch": 0.9103922833533008, "grad_norm": 0.42268821597099304, "learning_rate": 1.1429453725269992e-05, "loss": 0.4342, "step": 29636 }, { "epoch": 0.9104230024882499, "grad_norm": 0.3529781699180603, "learning_rate": 1.1428975409242463e-05, "loss": 0.5574, "step": 29637 }, { "epoch": 0.910453721623199, "grad_norm": 0.34806567430496216, "learning_rate": 1.1428497089877464e-05, "loss": 0.4603, "step": 29638 }, { "epoch": 0.9104844407581483, "grad_norm": 0.36020249128341675, "learning_rate": 1.1428018767176108e-05, "loss": 0.5276, "step": 29639 }, { "epoch": 0.9105151598930974, "grad_norm": 0.3896033763885498, "learning_rate": 1.1427540441139512e-05, "loss": 0.5345, "step": 29640 }, { "epoch": 0.9105458790280466, "grad_norm": 0.33764228224754333, "learning_rate": 1.1427062111768803e-05, "loss": 0.614, "step": 29641 }, { "epoch": 0.9105765981629957, "grad_norm": 0.35604336857795715, "learning_rate": 1.1426583779065086e-05, "loss": 0.5331, "step": 29642 }, { "epoch": 0.9106073172979449, "grad_norm": 0.327065646648407, "learning_rate": 1.1426105443029484e-05, "loss": 0.5168, "step": 29643 }, { "epoch": 0.9106380364328941, "grad_norm": 0.40635308623313904, "learning_rate": 1.142562710366311e-05, "loss": 0.5388, "step": 29644 }, { "epoch": 0.9106687555678432, "grad_norm": 0.38242319226264954, "learning_rate": 1.1425148760967091e-05, "loss": 0.4963, "step": 29645 }, { "epoch": 0.9106994747027923, "grad_norm": 0.3868727385997772, "learning_rate": 1.1424670414942534e-05, "loss": 0.6257, "step": 29646 }, { "epoch": 0.9107301938377416, "grad_norm": 0.34705081582069397, "learning_rate": 1.1424192065590558e-05, "loss": 0.5812, "step": 29647 }, { "epoch": 0.9107609129726907, "grad_norm": 0.3463880717754364, "learning_rate": 1.1423713712912286e-05, "loss": 0.5713, "step": 29648 }, { "epoch": 0.9107916321076398, "grad_norm": 0.3461008369922638, "learning_rate": 1.1423235356908827e-05, "loss": 0.5083, "step": 29649 }, { "epoch": 0.910822351242589, "grad_norm": 0.36267778277397156, "learning_rate": 1.1422756997581305e-05, "loss": 0.6224, "step": 29650 }, { "epoch": 0.9108530703775382, "grad_norm": 0.37913763523101807, "learning_rate": 1.1422278634930833e-05, "loss": 0.5013, "step": 29651 }, { "epoch": 0.9108837895124873, "grad_norm": 0.39018410444259644, "learning_rate": 1.1421800268958532e-05, "loss": 0.5807, "step": 29652 }, { "epoch": 0.9109145086474365, "grad_norm": 0.34863823652267456, "learning_rate": 1.1421321899665514e-05, "loss": 0.5863, "step": 29653 }, { "epoch": 0.9109452277823856, "grad_norm": 0.4285810887813568, "learning_rate": 1.1420843527052902e-05, "loss": 0.5614, "step": 29654 }, { "epoch": 0.9109759469173349, "grad_norm": 0.39849987626075745, "learning_rate": 1.142036515112181e-05, "loss": 0.6271, "step": 29655 }, { "epoch": 0.911006666052284, "grad_norm": 0.3470666706562042, "learning_rate": 1.1419886771873356e-05, "loss": 0.603, "step": 29656 }, { "epoch": 0.9110373851872331, "grad_norm": 0.34127500653266907, "learning_rate": 1.1419408389308656e-05, "loss": 0.5517, "step": 29657 }, { "epoch": 0.9110681043221823, "grad_norm": 0.5493097305297852, "learning_rate": 1.141893000342883e-05, "loss": 0.5826, "step": 29658 }, { "epoch": 0.9110988234571314, "grad_norm": 0.3744521141052246, "learning_rate": 1.1418451614234995e-05, "loss": 0.6696, "step": 29659 }, { "epoch": 0.9111295425920806, "grad_norm": 0.3674258291721344, "learning_rate": 1.1417973221728264e-05, "loss": 0.5317, "step": 29660 }, { "epoch": 0.9111602617270298, "grad_norm": 0.4055071771144867, "learning_rate": 1.141749482590976e-05, "loss": 0.5812, "step": 29661 }, { "epoch": 0.9111909808619789, "grad_norm": 0.37680482864379883, "learning_rate": 1.1417016426780598e-05, "loss": 0.5809, "step": 29662 }, { "epoch": 0.911221699996928, "grad_norm": 0.4018721580505371, "learning_rate": 1.1416538024341894e-05, "loss": 0.5437, "step": 29663 }, { "epoch": 0.9112524191318773, "grad_norm": 0.40797388553619385, "learning_rate": 1.141605961859477e-05, "loss": 0.6179, "step": 29664 }, { "epoch": 0.9112831382668264, "grad_norm": 0.349023699760437, "learning_rate": 1.1415581209540336e-05, "loss": 0.4502, "step": 29665 }, { "epoch": 0.9113138574017756, "grad_norm": 0.3904239535331726, "learning_rate": 1.1415102797179714e-05, "loss": 0.5599, "step": 29666 }, { "epoch": 0.9113445765367247, "grad_norm": 0.3267405033111572, "learning_rate": 1.141462438151402e-05, "loss": 0.4825, "step": 29667 }, { "epoch": 0.9113752956716739, "grad_norm": 0.38066205382347107, "learning_rate": 1.1414145962544375e-05, "loss": 0.5394, "step": 29668 }, { "epoch": 0.9114060148066231, "grad_norm": 0.3368546962738037, "learning_rate": 1.1413667540271888e-05, "loss": 0.5344, "step": 29669 }, { "epoch": 0.9114367339415722, "grad_norm": 0.43169236183166504, "learning_rate": 1.1413189114697686e-05, "loss": 0.5625, "step": 29670 }, { "epoch": 0.9114674530765213, "grad_norm": 0.3608761429786682, "learning_rate": 1.1412710685822881e-05, "loss": 0.5547, "step": 29671 }, { "epoch": 0.9114981722114706, "grad_norm": 0.38702887296676636, "learning_rate": 1.1412232253648592e-05, "loss": 0.5302, "step": 29672 }, { "epoch": 0.9115288913464197, "grad_norm": 0.3371926546096802, "learning_rate": 1.1411753818175935e-05, "loss": 0.5068, "step": 29673 }, { "epoch": 0.9115596104813688, "grad_norm": 0.3894369304180145, "learning_rate": 1.1411275379406029e-05, "loss": 0.5619, "step": 29674 }, { "epoch": 0.911590329616318, "grad_norm": 0.38616418838500977, "learning_rate": 1.141079693733999e-05, "loss": 0.5891, "step": 29675 }, { "epoch": 0.9116210487512671, "grad_norm": 0.363431841135025, "learning_rate": 1.1410318491978936e-05, "loss": 0.5973, "step": 29676 }, { "epoch": 0.9116517678862163, "grad_norm": 0.37978070974349976, "learning_rate": 1.1409840043323985e-05, "loss": 0.5617, "step": 29677 }, { "epoch": 0.9116824870211655, "grad_norm": 0.361229807138443, "learning_rate": 1.1409361591376253e-05, "loss": 0.6387, "step": 29678 }, { "epoch": 0.9117132061561146, "grad_norm": 0.3865732252597809, "learning_rate": 1.1408883136136862e-05, "loss": 0.5824, "step": 29679 }, { "epoch": 0.9117439252910639, "grad_norm": 0.34537753462791443, "learning_rate": 1.1408404677606923e-05, "loss": 0.4473, "step": 29680 }, { "epoch": 0.911774644426013, "grad_norm": 0.40693479776382446, "learning_rate": 1.140792621578756e-05, "loss": 0.554, "step": 29681 }, { "epoch": 0.9118053635609621, "grad_norm": 0.41441506147384644, "learning_rate": 1.1407447750679884e-05, "loss": 0.609, "step": 29682 }, { "epoch": 0.9118360826959113, "grad_norm": 0.7827738523483276, "learning_rate": 1.1406969282285013e-05, "loss": 0.5083, "step": 29683 }, { "epoch": 0.9118668018308604, "grad_norm": 0.40561243891716003, "learning_rate": 1.1406490810604074e-05, "loss": 0.546, "step": 29684 }, { "epoch": 0.9118975209658096, "grad_norm": 0.40101373195648193, "learning_rate": 1.1406012335638173e-05, "loss": 0.6023, "step": 29685 }, { "epoch": 0.9119282401007588, "grad_norm": 0.34067508578300476, "learning_rate": 1.1405533857388433e-05, "loss": 0.5198, "step": 29686 }, { "epoch": 0.9119589592357079, "grad_norm": 0.3718006908893585, "learning_rate": 1.1405055375855968e-05, "loss": 0.5048, "step": 29687 }, { "epoch": 0.911989678370657, "grad_norm": 0.3842949867248535, "learning_rate": 1.1404576891041901e-05, "loss": 0.5079, "step": 29688 }, { "epoch": 0.9120203975056063, "grad_norm": 0.3396540880203247, "learning_rate": 1.1404098402947345e-05, "loss": 0.5521, "step": 29689 }, { "epoch": 0.9120511166405554, "grad_norm": 0.34843090176582336, "learning_rate": 1.140361991157342e-05, "loss": 0.5489, "step": 29690 }, { "epoch": 0.9120818357755046, "grad_norm": 0.43462929129600525, "learning_rate": 1.1403141416921241e-05, "loss": 0.5438, "step": 29691 }, { "epoch": 0.9121125549104537, "grad_norm": 0.4300488233566284, "learning_rate": 1.1402662918991931e-05, "loss": 0.5462, "step": 29692 }, { "epoch": 0.9121432740454029, "grad_norm": 0.36503028869628906, "learning_rate": 1.1402184417786604e-05, "loss": 0.5223, "step": 29693 }, { "epoch": 0.9121739931803521, "grad_norm": 0.3724687397480011, "learning_rate": 1.1401705913306372e-05, "loss": 0.5063, "step": 29694 }, { "epoch": 0.9122047123153012, "grad_norm": 0.38562798500061035, "learning_rate": 1.1401227405552359e-05, "loss": 0.529, "step": 29695 }, { "epoch": 0.9122354314502503, "grad_norm": 0.3860780596733093, "learning_rate": 1.1400748894525684e-05, "loss": 0.5383, "step": 29696 }, { "epoch": 0.9122661505851996, "grad_norm": 0.41153228282928467, "learning_rate": 1.1400270380227464e-05, "loss": 0.5681, "step": 29697 }, { "epoch": 0.9122968697201487, "grad_norm": 0.40747809410095215, "learning_rate": 1.139979186265881e-05, "loss": 0.6081, "step": 29698 }, { "epoch": 0.9123275888550978, "grad_norm": 0.440523236989975, "learning_rate": 1.1399313341820847e-05, "loss": 0.5141, "step": 29699 }, { "epoch": 0.912358307990047, "grad_norm": 0.3612153232097626, "learning_rate": 1.139883481771469e-05, "loss": 0.469, "step": 29700 }, { "epoch": 0.9123890271249961, "grad_norm": 0.4362585246562958, "learning_rate": 1.1398356290341455e-05, "loss": 0.5677, "step": 29701 }, { "epoch": 0.9124197462599454, "grad_norm": 0.3617365062236786, "learning_rate": 1.1397877759702265e-05, "loss": 0.6003, "step": 29702 }, { "epoch": 0.9124504653948945, "grad_norm": 0.35112297534942627, "learning_rate": 1.1397399225798229e-05, "loss": 0.5518, "step": 29703 }, { "epoch": 0.9124811845298436, "grad_norm": 0.38153699040412903, "learning_rate": 1.1396920688630473e-05, "loss": 0.5563, "step": 29704 }, { "epoch": 0.9125119036647928, "grad_norm": 0.3876824676990509, "learning_rate": 1.1396442148200108e-05, "loss": 0.5673, "step": 29705 }, { "epoch": 0.912542622799742, "grad_norm": 0.44587504863739014, "learning_rate": 1.1395963604508257e-05, "loss": 0.5245, "step": 29706 }, { "epoch": 0.9125733419346911, "grad_norm": 0.363484650850296, "learning_rate": 1.1395485057556035e-05, "loss": 0.6121, "step": 29707 }, { "epoch": 0.9126040610696403, "grad_norm": 0.3916821777820587, "learning_rate": 1.1395006507344561e-05, "loss": 0.6365, "step": 29708 }, { "epoch": 0.9126347802045894, "grad_norm": 0.38654905557632446, "learning_rate": 1.1394527953874954e-05, "loss": 0.5975, "step": 29709 }, { "epoch": 0.9126654993395386, "grad_norm": 0.3578326404094696, "learning_rate": 1.1394049397148327e-05, "loss": 0.4958, "step": 29710 }, { "epoch": 0.9126962184744878, "grad_norm": 0.3601352572441101, "learning_rate": 1.13935708371658e-05, "loss": 0.5044, "step": 29711 }, { "epoch": 0.9127269376094369, "grad_norm": 0.41032400727272034, "learning_rate": 1.1393092273928487e-05, "loss": 0.5841, "step": 29712 }, { "epoch": 0.912757656744386, "grad_norm": 0.36684730648994446, "learning_rate": 1.1392613707437514e-05, "loss": 0.5487, "step": 29713 }, { "epoch": 0.9127883758793353, "grad_norm": 0.35705116391181946, "learning_rate": 1.1392135137693994e-05, "loss": 0.4695, "step": 29714 }, { "epoch": 0.9128190950142844, "grad_norm": 0.3365003168582916, "learning_rate": 1.1391656564699047e-05, "loss": 0.4691, "step": 29715 }, { "epoch": 0.9128498141492336, "grad_norm": 0.34280186891555786, "learning_rate": 1.1391177988453787e-05, "loss": 0.5212, "step": 29716 }, { "epoch": 0.9128805332841827, "grad_norm": 0.40295541286468506, "learning_rate": 1.1390699408959333e-05, "loss": 0.5189, "step": 29717 }, { "epoch": 0.9129112524191318, "grad_norm": 0.4262138903141022, "learning_rate": 1.1390220826216807e-05, "loss": 0.563, "step": 29718 }, { "epoch": 0.9129419715540811, "grad_norm": 0.3461266756057739, "learning_rate": 1.138974224022732e-05, "loss": 0.542, "step": 29719 }, { "epoch": 0.9129726906890302, "grad_norm": 0.3847388029098511, "learning_rate": 1.1389263650991993e-05, "loss": 0.4753, "step": 29720 }, { "epoch": 0.9130034098239793, "grad_norm": 0.3396283984184265, "learning_rate": 1.1388785058511944e-05, "loss": 0.5177, "step": 29721 }, { "epoch": 0.9130341289589285, "grad_norm": 0.35261014103889465, "learning_rate": 1.1388306462788293e-05, "loss": 0.5139, "step": 29722 }, { "epoch": 0.9130648480938777, "grad_norm": 0.41299960017204285, "learning_rate": 1.138782786382215e-05, "loss": 0.5614, "step": 29723 }, { "epoch": 0.9130955672288268, "grad_norm": 0.3549261689186096, "learning_rate": 1.1387349261614643e-05, "loss": 0.4937, "step": 29724 }, { "epoch": 0.913126286363776, "grad_norm": 0.3898283541202545, "learning_rate": 1.1386870656166882e-05, "loss": 0.5569, "step": 29725 }, { "epoch": 0.9131570054987251, "grad_norm": 0.35447824001312256, "learning_rate": 1.1386392047479988e-05, "loss": 0.6329, "step": 29726 }, { "epoch": 0.9131877246336744, "grad_norm": 0.3435254693031311, "learning_rate": 1.1385913435555081e-05, "loss": 0.479, "step": 29727 }, { "epoch": 0.9132184437686235, "grad_norm": 0.3747435212135315, "learning_rate": 1.1385434820393276e-05, "loss": 0.5465, "step": 29728 }, { "epoch": 0.9132491629035726, "grad_norm": 0.6024119257926941, "learning_rate": 1.1384956201995689e-05, "loss": 0.4773, "step": 29729 }, { "epoch": 0.9132798820385218, "grad_norm": 0.35046371817588806, "learning_rate": 1.1384477580363438e-05, "loss": 0.5297, "step": 29730 }, { "epoch": 0.913310601173471, "grad_norm": 0.41459399461746216, "learning_rate": 1.1383998955497647e-05, "loss": 0.6471, "step": 29731 }, { "epoch": 0.9133413203084201, "grad_norm": 0.40949124097824097, "learning_rate": 1.1383520327399427e-05, "loss": 0.5412, "step": 29732 }, { "epoch": 0.9133720394433693, "grad_norm": 0.39807960391044617, "learning_rate": 1.13830416960699e-05, "loss": 0.5364, "step": 29733 }, { "epoch": 0.9134027585783184, "grad_norm": 0.3354315757751465, "learning_rate": 1.1382563061510186e-05, "loss": 0.4828, "step": 29734 }, { "epoch": 0.9134334777132675, "grad_norm": 0.3752550780773163, "learning_rate": 1.1382084423721395e-05, "loss": 0.6056, "step": 29735 }, { "epoch": 0.9134641968482168, "grad_norm": 0.3611757457256317, "learning_rate": 1.138160578270465e-05, "loss": 0.5578, "step": 29736 }, { "epoch": 0.9134949159831659, "grad_norm": 0.4289241135120392, "learning_rate": 1.1381127138461068e-05, "loss": 0.5827, "step": 29737 }, { "epoch": 0.913525635118115, "grad_norm": 0.5071880221366882, "learning_rate": 1.138064849099177e-05, "loss": 0.662, "step": 29738 }, { "epoch": 0.9135563542530643, "grad_norm": 0.4324406683444977, "learning_rate": 1.1380169840297868e-05, "loss": 0.4758, "step": 29739 }, { "epoch": 0.9135870733880134, "grad_norm": 0.3971017301082611, "learning_rate": 1.1379691186380483e-05, "loss": 0.4509, "step": 29740 }, { "epoch": 0.9136177925229626, "grad_norm": 0.3495413362979889, "learning_rate": 1.1379212529240733e-05, "loss": 0.6046, "step": 29741 }, { "epoch": 0.9136485116579117, "grad_norm": 0.37461552023887634, "learning_rate": 1.1378733868879738e-05, "loss": 0.5626, "step": 29742 }, { "epoch": 0.9136792307928608, "grad_norm": 0.347076416015625, "learning_rate": 1.1378255205298612e-05, "loss": 0.5285, "step": 29743 }, { "epoch": 0.9137099499278101, "grad_norm": 0.3817490041255951, "learning_rate": 1.1377776538498475e-05, "loss": 0.5355, "step": 29744 }, { "epoch": 0.9137406690627592, "grad_norm": 0.33235490322113037, "learning_rate": 1.1377297868480445e-05, "loss": 0.5448, "step": 29745 }, { "epoch": 0.9137713881977083, "grad_norm": 0.3625320494174957, "learning_rate": 1.1376819195245639e-05, "loss": 0.5328, "step": 29746 }, { "epoch": 0.9138021073326575, "grad_norm": 0.33627426624298096, "learning_rate": 1.1376340518795177e-05, "loss": 0.5368, "step": 29747 }, { "epoch": 0.9138328264676067, "grad_norm": 0.38602083921432495, "learning_rate": 1.1375861839130171e-05, "loss": 0.5452, "step": 29748 }, { "epoch": 0.9138635456025558, "grad_norm": 0.3814568817615509, "learning_rate": 1.1375383156251749e-05, "loss": 0.4651, "step": 29749 }, { "epoch": 0.913894264737505, "grad_norm": 0.3519912362098694, "learning_rate": 1.1374904470161024e-05, "loss": 0.5694, "step": 29750 }, { "epoch": 0.9139249838724541, "grad_norm": 0.3724469840526581, "learning_rate": 1.1374425780859109e-05, "loss": 0.5166, "step": 29751 }, { "epoch": 0.9139557030074034, "grad_norm": 0.3797607123851776, "learning_rate": 1.137394708834713e-05, "loss": 0.5594, "step": 29752 }, { "epoch": 0.9139864221423525, "grad_norm": 0.4920189082622528, "learning_rate": 1.13734683926262e-05, "loss": 0.5357, "step": 29753 }, { "epoch": 0.9140171412773016, "grad_norm": 0.36709848046302795, "learning_rate": 1.137298969369744e-05, "loss": 0.5793, "step": 29754 }, { "epoch": 0.9140478604122508, "grad_norm": 0.4013237953186035, "learning_rate": 1.1372510991561965e-05, "loss": 0.5132, "step": 29755 }, { "epoch": 0.9140785795472, "grad_norm": 0.3612293303012848, "learning_rate": 1.1372032286220896e-05, "loss": 0.4913, "step": 29756 }, { "epoch": 0.9141092986821491, "grad_norm": 0.41800886392593384, "learning_rate": 1.1371553577675348e-05, "loss": 0.5497, "step": 29757 }, { "epoch": 0.9141400178170983, "grad_norm": 0.3713527023792267, "learning_rate": 1.1371074865926443e-05, "loss": 0.5045, "step": 29758 }, { "epoch": 0.9141707369520474, "grad_norm": 0.3809446096420288, "learning_rate": 1.1370596150975295e-05, "loss": 0.5291, "step": 29759 }, { "epoch": 0.9142014560869965, "grad_norm": 0.4202650487422943, "learning_rate": 1.1370117432823028e-05, "loss": 0.5153, "step": 29760 }, { "epoch": 0.9142321752219458, "grad_norm": 0.397167831659317, "learning_rate": 1.1369638711470751e-05, "loss": 0.5738, "step": 29761 }, { "epoch": 0.9142628943568949, "grad_norm": 0.41068416833877563, "learning_rate": 1.1369159986919589e-05, "loss": 0.4806, "step": 29762 }, { "epoch": 0.914293613491844, "grad_norm": 0.37226855754852295, "learning_rate": 1.136868125917066e-05, "loss": 0.5745, "step": 29763 }, { "epoch": 0.9143243326267932, "grad_norm": 0.36325976252555847, "learning_rate": 1.1368202528225078e-05, "loss": 0.5571, "step": 29764 }, { "epoch": 0.9143550517617424, "grad_norm": 0.3801557123661041, "learning_rate": 1.1367723794083965e-05, "loss": 0.567, "step": 29765 }, { "epoch": 0.9143857708966916, "grad_norm": 0.34358882904052734, "learning_rate": 1.1367245056748434e-05, "loss": 0.6037, "step": 29766 }, { "epoch": 0.9144164900316407, "grad_norm": 0.35549283027648926, "learning_rate": 1.1366766316219611e-05, "loss": 0.5617, "step": 29767 }, { "epoch": 0.9144472091665898, "grad_norm": 0.359526664018631, "learning_rate": 1.136628757249861e-05, "loss": 0.5918, "step": 29768 }, { "epoch": 0.9144779283015391, "grad_norm": 0.39478540420532227, "learning_rate": 1.1365808825586545e-05, "loss": 0.5748, "step": 29769 }, { "epoch": 0.9145086474364882, "grad_norm": 0.4241587221622467, "learning_rate": 1.1365330075484542e-05, "loss": 0.5573, "step": 29770 }, { "epoch": 0.9145393665714373, "grad_norm": 0.3986815810203552, "learning_rate": 1.1364851322193711e-05, "loss": 0.5961, "step": 29771 }, { "epoch": 0.9145700857063865, "grad_norm": 0.31327977776527405, "learning_rate": 1.1364372565715178e-05, "loss": 0.4852, "step": 29772 }, { "epoch": 0.9146008048413357, "grad_norm": 0.3652287721633911, "learning_rate": 1.1363893806050057e-05, "loss": 0.5274, "step": 29773 }, { "epoch": 0.9146315239762848, "grad_norm": 0.3577510118484497, "learning_rate": 1.1363415043199466e-05, "loss": 0.5278, "step": 29774 }, { "epoch": 0.914662243111234, "grad_norm": 0.3431572914123535, "learning_rate": 1.1362936277164525e-05, "loss": 0.5086, "step": 29775 }, { "epoch": 0.9146929622461831, "grad_norm": 0.34095799922943115, "learning_rate": 1.136245750794635e-05, "loss": 0.5629, "step": 29776 }, { "epoch": 0.9147236813811324, "grad_norm": 0.3747989535331726, "learning_rate": 1.136197873554606e-05, "loss": 0.609, "step": 29777 }, { "epoch": 0.9147544005160815, "grad_norm": 0.37615111470222473, "learning_rate": 1.1361499959964774e-05, "loss": 0.5991, "step": 29778 }, { "epoch": 0.9147851196510306, "grad_norm": 0.3722657859325409, "learning_rate": 1.1361021181203613e-05, "loss": 0.5718, "step": 29779 }, { "epoch": 0.9148158387859798, "grad_norm": 0.3626370429992676, "learning_rate": 1.1360542399263688e-05, "loss": 0.469, "step": 29780 }, { "epoch": 0.914846557920929, "grad_norm": 0.381083607673645, "learning_rate": 1.1360063614146121e-05, "loss": 0.5655, "step": 29781 }, { "epoch": 0.9148772770558781, "grad_norm": 0.4104706346988678, "learning_rate": 1.1359584825852033e-05, "loss": 0.6219, "step": 29782 }, { "epoch": 0.9149079961908273, "grad_norm": 0.3680785596370697, "learning_rate": 1.1359106034382539e-05, "loss": 0.5782, "step": 29783 }, { "epoch": 0.9149387153257764, "grad_norm": 0.3318933844566345, "learning_rate": 1.1358627239738756e-05, "loss": 0.4448, "step": 29784 }, { "epoch": 0.9149694344607255, "grad_norm": 0.39394259452819824, "learning_rate": 1.1358148441921804e-05, "loss": 0.529, "step": 29785 }, { "epoch": 0.9150001535956748, "grad_norm": 0.39842161536216736, "learning_rate": 1.1357669640932804e-05, "loss": 0.5421, "step": 29786 }, { "epoch": 0.9150308727306239, "grad_norm": 0.3512493073940277, "learning_rate": 1.1357190836772869e-05, "loss": 0.4209, "step": 29787 }, { "epoch": 0.9150615918655731, "grad_norm": 0.450266033411026, "learning_rate": 1.1356712029443125e-05, "loss": 0.5681, "step": 29788 }, { "epoch": 0.9150923110005222, "grad_norm": 0.36422500014305115, "learning_rate": 1.135623321894468e-05, "loss": 0.598, "step": 29789 }, { "epoch": 0.9151230301354714, "grad_norm": 0.3391086161136627, "learning_rate": 1.1355754405278661e-05, "loss": 0.558, "step": 29790 }, { "epoch": 0.9151537492704206, "grad_norm": 0.37135568261146545, "learning_rate": 1.135527558844618e-05, "loss": 0.6172, "step": 29791 }, { "epoch": 0.9151844684053697, "grad_norm": 0.38462555408477783, "learning_rate": 1.135479676844836e-05, "loss": 0.558, "step": 29792 }, { "epoch": 0.9152151875403188, "grad_norm": 0.3729138672351837, "learning_rate": 1.1354317945286317e-05, "loss": 0.5665, "step": 29793 }, { "epoch": 0.9152459066752681, "grad_norm": 0.3788667619228363, "learning_rate": 1.135383911896117e-05, "loss": 0.5742, "step": 29794 }, { "epoch": 0.9152766258102172, "grad_norm": 0.4169248640537262, "learning_rate": 1.1353360289474038e-05, "loss": 0.569, "step": 29795 }, { "epoch": 0.9153073449451663, "grad_norm": 0.3482382893562317, "learning_rate": 1.1352881456826037e-05, "loss": 0.5066, "step": 29796 }, { "epoch": 0.9153380640801155, "grad_norm": 0.35676130652427673, "learning_rate": 1.135240262101829e-05, "loss": 0.3806, "step": 29797 }, { "epoch": 0.9153687832150647, "grad_norm": 0.36297234892845154, "learning_rate": 1.1351923782051908e-05, "loss": 0.5234, "step": 29798 }, { "epoch": 0.9153995023500138, "grad_norm": 0.5203209519386292, "learning_rate": 1.1351444939928018e-05, "loss": 0.6043, "step": 29799 }, { "epoch": 0.915430221484963, "grad_norm": 0.37229931354522705, "learning_rate": 1.1350966094647728e-05, "loss": 0.5163, "step": 29800 }, { "epoch": 0.9154609406199121, "grad_norm": 0.3829618990421295, "learning_rate": 1.1350487246212168e-05, "loss": 0.5604, "step": 29801 }, { "epoch": 0.9154916597548614, "grad_norm": 0.37218207120895386, "learning_rate": 1.1350008394622447e-05, "loss": 0.5361, "step": 29802 }, { "epoch": 0.9155223788898105, "grad_norm": 0.39492088556289673, "learning_rate": 1.134952953987969e-05, "loss": 0.5269, "step": 29803 }, { "epoch": 0.9155530980247596, "grad_norm": 0.47764095664024353, "learning_rate": 1.1349050681985009e-05, "loss": 0.5962, "step": 29804 }, { "epoch": 0.9155838171597088, "grad_norm": 0.3557504117488861, "learning_rate": 1.1348571820939529e-05, "loss": 0.5295, "step": 29805 }, { "epoch": 0.9156145362946579, "grad_norm": 0.42461609840393066, "learning_rate": 1.1348092956744365e-05, "loss": 0.5234, "step": 29806 }, { "epoch": 0.9156452554296071, "grad_norm": 0.34580719470977783, "learning_rate": 1.1347614089400634e-05, "loss": 0.5256, "step": 29807 }, { "epoch": 0.9156759745645563, "grad_norm": 0.36660414934158325, "learning_rate": 1.1347135218909457e-05, "loss": 0.6005, "step": 29808 }, { "epoch": 0.9157066936995054, "grad_norm": 0.38876649737358093, "learning_rate": 1.1346656345271951e-05, "loss": 0.4847, "step": 29809 }, { "epoch": 0.9157374128344545, "grad_norm": 0.35271182656288147, "learning_rate": 1.134617746848924e-05, "loss": 0.6116, "step": 29810 }, { "epoch": 0.9157681319694038, "grad_norm": 0.5267618298530579, "learning_rate": 1.134569858856243e-05, "loss": 0.5432, "step": 29811 }, { "epoch": 0.9157988511043529, "grad_norm": 0.34377336502075195, "learning_rate": 1.1345219705492652e-05, "loss": 0.5142, "step": 29812 }, { "epoch": 0.9158295702393021, "grad_norm": 0.36077767610549927, "learning_rate": 1.1344740819281017e-05, "loss": 0.5498, "step": 29813 }, { "epoch": 0.9158602893742512, "grad_norm": 0.3289026916027069, "learning_rate": 1.1344261929928649e-05, "loss": 0.5473, "step": 29814 }, { "epoch": 0.9158910085092004, "grad_norm": 0.3697982728481293, "learning_rate": 1.1343783037436662e-05, "loss": 0.4948, "step": 29815 }, { "epoch": 0.9159217276441496, "grad_norm": 0.36773011088371277, "learning_rate": 1.1343304141806173e-05, "loss": 0.5251, "step": 29816 }, { "epoch": 0.9159524467790987, "grad_norm": 0.34727299213409424, "learning_rate": 1.1342825243038307e-05, "loss": 0.6086, "step": 29817 }, { "epoch": 0.9159831659140478, "grad_norm": 0.342718243598938, "learning_rate": 1.1342346341134177e-05, "loss": 0.4655, "step": 29818 }, { "epoch": 0.9160138850489971, "grad_norm": 0.35590770840644836, "learning_rate": 1.1341867436094901e-05, "loss": 0.5439, "step": 29819 }, { "epoch": 0.9160446041839462, "grad_norm": 0.35993117094039917, "learning_rate": 1.1341388527921604e-05, "loss": 0.5403, "step": 29820 }, { "epoch": 0.9160753233188953, "grad_norm": 0.35229575634002686, "learning_rate": 1.13409096166154e-05, "loss": 0.5057, "step": 29821 }, { "epoch": 0.9161060424538445, "grad_norm": 0.3748055696487427, "learning_rate": 1.1340430702177406e-05, "loss": 0.5405, "step": 29822 }, { "epoch": 0.9161367615887936, "grad_norm": 0.3849409222602844, "learning_rate": 1.1339951784608743e-05, "loss": 0.6079, "step": 29823 }, { "epoch": 0.9161674807237428, "grad_norm": 0.37229734659194946, "learning_rate": 1.133947286391053e-05, "loss": 0.5004, "step": 29824 }, { "epoch": 0.916198199858692, "grad_norm": 0.43534713983535767, "learning_rate": 1.1338993940083882e-05, "loss": 0.4981, "step": 29825 }, { "epoch": 0.9162289189936411, "grad_norm": 0.36371979117393494, "learning_rate": 1.1338515013129923e-05, "loss": 0.5062, "step": 29826 }, { "epoch": 0.9162596381285903, "grad_norm": 0.3950299918651581, "learning_rate": 1.1338036083049767e-05, "loss": 0.6111, "step": 29827 }, { "epoch": 0.9162903572635395, "grad_norm": 0.3730788230895996, "learning_rate": 1.1337557149844535e-05, "loss": 0.5467, "step": 29828 }, { "epoch": 0.9163210763984886, "grad_norm": 0.37635210156440735, "learning_rate": 1.1337078213515344e-05, "loss": 0.5029, "step": 29829 }, { "epoch": 0.9163517955334378, "grad_norm": 0.3747934401035309, "learning_rate": 1.1336599274063315e-05, "loss": 0.5481, "step": 29830 }, { "epoch": 0.9163825146683869, "grad_norm": 0.38963639736175537, "learning_rate": 1.1336120331489565e-05, "loss": 0.5203, "step": 29831 }, { "epoch": 0.9164132338033361, "grad_norm": 0.40385931730270386, "learning_rate": 1.1335641385795212e-05, "loss": 0.5778, "step": 29832 }, { "epoch": 0.9164439529382853, "grad_norm": 0.43252235651016235, "learning_rate": 1.1335162436981377e-05, "loss": 0.5736, "step": 29833 }, { "epoch": 0.9164746720732344, "grad_norm": 0.3492489159107208, "learning_rate": 1.1334683485049174e-05, "loss": 0.5581, "step": 29834 }, { "epoch": 0.9165053912081835, "grad_norm": 0.3785008490085602, "learning_rate": 1.1334204529999728e-05, "loss": 0.5517, "step": 29835 }, { "epoch": 0.9165361103431328, "grad_norm": 0.4204126298427582, "learning_rate": 1.1333725571834151e-05, "loss": 0.6252, "step": 29836 }, { "epoch": 0.9165668294780819, "grad_norm": 0.3687248229980469, "learning_rate": 1.1333246610553565e-05, "loss": 0.5265, "step": 29837 }, { "epoch": 0.9165975486130311, "grad_norm": 0.4019128382205963, "learning_rate": 1.1332767646159088e-05, "loss": 0.569, "step": 29838 }, { "epoch": 0.9166282677479802, "grad_norm": 0.40042823553085327, "learning_rate": 1.1332288678651842e-05, "loss": 0.561, "step": 29839 }, { "epoch": 0.9166589868829293, "grad_norm": 0.40015658736228943, "learning_rate": 1.133180970803294e-05, "loss": 0.5858, "step": 29840 }, { "epoch": 0.9166897060178786, "grad_norm": 0.3942359685897827, "learning_rate": 1.1331330734303502e-05, "loss": 0.5163, "step": 29841 }, { "epoch": 0.9167204251528277, "grad_norm": 0.38134104013442993, "learning_rate": 1.1330851757464654e-05, "loss": 0.6081, "step": 29842 }, { "epoch": 0.9167511442877768, "grad_norm": 0.5786691308021545, "learning_rate": 1.1330372777517501e-05, "loss": 0.5335, "step": 29843 }, { "epoch": 0.916781863422726, "grad_norm": 0.5383130311965942, "learning_rate": 1.1329893794463176e-05, "loss": 0.5219, "step": 29844 }, { "epoch": 0.9168125825576752, "grad_norm": 0.377191960811615, "learning_rate": 1.1329414808302788e-05, "loss": 0.5723, "step": 29845 }, { "epoch": 0.9168433016926243, "grad_norm": 0.3670126795768738, "learning_rate": 1.1328935819037461e-05, "loss": 0.5311, "step": 29846 }, { "epoch": 0.9168740208275735, "grad_norm": 0.361858606338501, "learning_rate": 1.132845682666831e-05, "loss": 0.631, "step": 29847 }, { "epoch": 0.9169047399625226, "grad_norm": 0.3342001736164093, "learning_rate": 1.1327977831196458e-05, "loss": 0.5699, "step": 29848 }, { "epoch": 0.9169354590974718, "grad_norm": 0.33372342586517334, "learning_rate": 1.1327498832623019e-05, "loss": 0.5333, "step": 29849 }, { "epoch": 0.916966178232421, "grad_norm": 0.36867091059684753, "learning_rate": 1.132701983094911e-05, "loss": 0.599, "step": 29850 }, { "epoch": 0.9169968973673701, "grad_norm": 0.39576247334480286, "learning_rate": 1.1326540826175859e-05, "loss": 0.5481, "step": 29851 }, { "epoch": 0.9170276165023193, "grad_norm": 0.4900851845741272, "learning_rate": 1.1326061818304376e-05, "loss": 0.5836, "step": 29852 }, { "epoch": 0.9170583356372685, "grad_norm": 0.37317702174186707, "learning_rate": 1.1325582807335787e-05, "loss": 0.551, "step": 29853 }, { "epoch": 0.9170890547722176, "grad_norm": 0.419871062040329, "learning_rate": 1.1325103793271201e-05, "loss": 0.5297, "step": 29854 }, { "epoch": 0.9171197739071668, "grad_norm": 0.3580839931964874, "learning_rate": 1.1324624776111748e-05, "loss": 0.6231, "step": 29855 }, { "epoch": 0.9171504930421159, "grad_norm": 0.3872574269771576, "learning_rate": 1.1324145755858536e-05, "loss": 0.5349, "step": 29856 }, { "epoch": 0.917181212177065, "grad_norm": 0.3449633717536926, "learning_rate": 1.1323666732512692e-05, "loss": 0.5028, "step": 29857 }, { "epoch": 0.9172119313120143, "grad_norm": 0.6797711849212646, "learning_rate": 1.1323187706075331e-05, "loss": 0.5172, "step": 29858 }, { "epoch": 0.9172426504469634, "grad_norm": 0.3633214831352234, "learning_rate": 1.1322708676547574e-05, "loss": 0.6291, "step": 29859 }, { "epoch": 0.9172733695819125, "grad_norm": 0.34720665216445923, "learning_rate": 1.1322229643930541e-05, "loss": 0.5191, "step": 29860 }, { "epoch": 0.9173040887168618, "grad_norm": 0.334983766078949, "learning_rate": 1.1321750608225342e-05, "loss": 0.5712, "step": 29861 }, { "epoch": 0.9173348078518109, "grad_norm": 0.3538673222064972, "learning_rate": 1.1321271569433108e-05, "loss": 0.5556, "step": 29862 }, { "epoch": 0.9173655269867601, "grad_norm": 0.39380282163619995, "learning_rate": 1.1320792527554947e-05, "loss": 0.6448, "step": 29863 }, { "epoch": 0.9173962461217092, "grad_norm": 0.357928991317749, "learning_rate": 1.1320313482591987e-05, "loss": 0.5159, "step": 29864 }, { "epoch": 0.9174269652566583, "grad_norm": 0.35758405923843384, "learning_rate": 1.131983443454534e-05, "loss": 0.4601, "step": 29865 }, { "epoch": 0.9174576843916076, "grad_norm": 0.3901301622390747, "learning_rate": 1.1319355383416129e-05, "loss": 0.5113, "step": 29866 }, { "epoch": 0.9174884035265567, "grad_norm": 0.3837173879146576, "learning_rate": 1.1318876329205473e-05, "loss": 0.4472, "step": 29867 }, { "epoch": 0.9175191226615058, "grad_norm": 0.3511482775211334, "learning_rate": 1.1318397271914485e-05, "loss": 0.5755, "step": 29868 }, { "epoch": 0.917549841796455, "grad_norm": 0.41961973905563354, "learning_rate": 1.1317918211544291e-05, "loss": 0.569, "step": 29869 }, { "epoch": 0.9175805609314042, "grad_norm": 0.4299531877040863, "learning_rate": 1.1317439148096006e-05, "loss": 0.4646, "step": 29870 }, { "epoch": 0.9176112800663533, "grad_norm": 0.3936574459075928, "learning_rate": 1.131696008157075e-05, "loss": 0.4695, "step": 29871 }, { "epoch": 0.9176419992013025, "grad_norm": 0.35572701692581177, "learning_rate": 1.131648101196964e-05, "loss": 0.5362, "step": 29872 }, { "epoch": 0.9176727183362516, "grad_norm": 0.366411954164505, "learning_rate": 1.13160019392938e-05, "loss": 0.5112, "step": 29873 }, { "epoch": 0.9177034374712008, "grad_norm": 0.36636224389076233, "learning_rate": 1.1315522863544344e-05, "loss": 0.5118, "step": 29874 }, { "epoch": 0.91773415660615, "grad_norm": 0.4019298851490021, "learning_rate": 1.1315043784722393e-05, "loss": 0.514, "step": 29875 }, { "epoch": 0.9177648757410991, "grad_norm": 0.4121556580066681, "learning_rate": 1.1314564702829062e-05, "loss": 0.5583, "step": 29876 }, { "epoch": 0.9177955948760483, "grad_norm": 0.44091570377349854, "learning_rate": 1.1314085617865476e-05, "loss": 0.5215, "step": 29877 }, { "epoch": 0.9178263140109975, "grad_norm": 0.34990063309669495, "learning_rate": 1.1313606529832752e-05, "loss": 0.5717, "step": 29878 }, { "epoch": 0.9178570331459466, "grad_norm": 0.41568228602409363, "learning_rate": 1.1313127438732008e-05, "loss": 0.5301, "step": 29879 }, { "epoch": 0.9178877522808958, "grad_norm": 0.38292476534843445, "learning_rate": 1.1312648344564364e-05, "loss": 0.5581, "step": 29880 }, { "epoch": 0.9179184714158449, "grad_norm": 0.3670329749584198, "learning_rate": 1.1312169247330936e-05, "loss": 0.5054, "step": 29881 }, { "epoch": 0.917949190550794, "grad_norm": 0.4003210663795471, "learning_rate": 1.1311690147032846e-05, "loss": 0.6062, "step": 29882 }, { "epoch": 0.9179799096857433, "grad_norm": 0.41060930490493774, "learning_rate": 1.1311211043671212e-05, "loss": 0.5489, "step": 29883 }, { "epoch": 0.9180106288206924, "grad_norm": 0.3740139305591583, "learning_rate": 1.1310731937247153e-05, "loss": 0.4545, "step": 29884 }, { "epoch": 0.9180413479556415, "grad_norm": 0.38347122073173523, "learning_rate": 1.131025282776179e-05, "loss": 0.5978, "step": 29885 }, { "epoch": 0.9180720670905907, "grad_norm": 0.37524494528770447, "learning_rate": 1.1309773715216238e-05, "loss": 0.4938, "step": 29886 }, { "epoch": 0.9181027862255399, "grad_norm": 0.38675257563591003, "learning_rate": 1.1309294599611617e-05, "loss": 0.5167, "step": 29887 }, { "epoch": 0.9181335053604891, "grad_norm": 0.38717299699783325, "learning_rate": 1.1308815480949047e-05, "loss": 0.4992, "step": 29888 }, { "epoch": 0.9181642244954382, "grad_norm": 0.36530157923698425, "learning_rate": 1.1308336359229651e-05, "loss": 0.539, "step": 29889 }, { "epoch": 0.9181949436303873, "grad_norm": 0.3768036365509033, "learning_rate": 1.1307857234454538e-05, "loss": 0.6253, "step": 29890 }, { "epoch": 0.9182256627653366, "grad_norm": 0.3879396319389343, "learning_rate": 1.1307378106624837e-05, "loss": 0.5262, "step": 29891 }, { "epoch": 0.9182563819002857, "grad_norm": 0.8011634945869446, "learning_rate": 1.1306898975741663e-05, "loss": 0.5728, "step": 29892 }, { "epoch": 0.9182871010352348, "grad_norm": 0.35654035210609436, "learning_rate": 1.1306419841806135e-05, "loss": 0.5434, "step": 29893 }, { "epoch": 0.918317820170184, "grad_norm": 0.3509308695793152, "learning_rate": 1.1305940704819372e-05, "loss": 0.5713, "step": 29894 }, { "epoch": 0.9183485393051332, "grad_norm": 0.3579682409763336, "learning_rate": 1.130546156478249e-05, "loss": 0.5348, "step": 29895 }, { "epoch": 0.9183792584400823, "grad_norm": 0.38309985399246216, "learning_rate": 1.1304982421696615e-05, "loss": 0.5246, "step": 29896 }, { "epoch": 0.9184099775750315, "grad_norm": 0.4499751627445221, "learning_rate": 1.1304503275562862e-05, "loss": 0.5918, "step": 29897 }, { "epoch": 0.9184406967099806, "grad_norm": 0.3576095402240753, "learning_rate": 1.1304024126382352e-05, "loss": 0.5175, "step": 29898 }, { "epoch": 0.9184714158449299, "grad_norm": 0.3618386387825012, "learning_rate": 1.1303544974156199e-05, "loss": 0.5625, "step": 29899 }, { "epoch": 0.918502134979879, "grad_norm": 0.3974545896053314, "learning_rate": 1.1303065818885529e-05, "loss": 0.5345, "step": 29900 }, { "epoch": 0.9185328541148281, "grad_norm": 0.36657220125198364, "learning_rate": 1.1302586660571457e-05, "loss": 0.5202, "step": 29901 }, { "epoch": 0.9185635732497773, "grad_norm": 0.3758341670036316, "learning_rate": 1.1302107499215102e-05, "loss": 0.6294, "step": 29902 }, { "epoch": 0.9185942923847265, "grad_norm": 0.36284491419792175, "learning_rate": 1.1301628334817584e-05, "loss": 0.53, "step": 29903 }, { "epoch": 0.9186250115196756, "grad_norm": 0.3757610023021698, "learning_rate": 1.1301149167380024e-05, "loss": 0.5434, "step": 29904 }, { "epoch": 0.9186557306546248, "grad_norm": 0.3532731831073761, "learning_rate": 1.1300669996903537e-05, "loss": 0.5733, "step": 29905 }, { "epoch": 0.9186864497895739, "grad_norm": 0.3599115312099457, "learning_rate": 1.1300190823389244e-05, "loss": 0.4619, "step": 29906 }, { "epoch": 0.918717168924523, "grad_norm": 0.3805372714996338, "learning_rate": 1.1299711646838267e-05, "loss": 0.542, "step": 29907 }, { "epoch": 0.9187478880594723, "grad_norm": 0.39002281427383423, "learning_rate": 1.129923246725172e-05, "loss": 0.6274, "step": 29908 }, { "epoch": 0.9187786071944214, "grad_norm": 0.4161083698272705, "learning_rate": 1.1298753284630725e-05, "loss": 0.5509, "step": 29909 }, { "epoch": 0.9188093263293705, "grad_norm": 0.3908417224884033, "learning_rate": 1.1298274098976403e-05, "loss": 0.5634, "step": 29910 }, { "epoch": 0.9188400454643197, "grad_norm": 0.4437156915664673, "learning_rate": 1.129779491028987e-05, "loss": 0.5964, "step": 29911 }, { "epoch": 0.9188707645992689, "grad_norm": 0.36368775367736816, "learning_rate": 1.1297315718572248e-05, "loss": 0.5146, "step": 29912 }, { "epoch": 0.9189014837342181, "grad_norm": 0.39373475313186646, "learning_rate": 1.129683652382465e-05, "loss": 0.6011, "step": 29913 }, { "epoch": 0.9189322028691672, "grad_norm": 0.3667777478694916, "learning_rate": 1.1296357326048204e-05, "loss": 0.5231, "step": 29914 }, { "epoch": 0.9189629220041163, "grad_norm": 0.35077378153800964, "learning_rate": 1.1295878125244024e-05, "loss": 0.5705, "step": 29915 }, { "epoch": 0.9189936411390656, "grad_norm": 2.929400682449341, "learning_rate": 1.129539892141323e-05, "loss": 0.5626, "step": 29916 }, { "epoch": 0.9190243602740147, "grad_norm": 0.34029829502105713, "learning_rate": 1.129491971455694e-05, "loss": 0.4481, "step": 29917 }, { "epoch": 0.9190550794089638, "grad_norm": 0.40010595321655273, "learning_rate": 1.1294440504676276e-05, "loss": 0.4876, "step": 29918 }, { "epoch": 0.919085798543913, "grad_norm": 0.4035710394382477, "learning_rate": 1.1293961291772358e-05, "loss": 0.581, "step": 29919 }, { "epoch": 0.9191165176788622, "grad_norm": 0.33969858288764954, "learning_rate": 1.1293482075846298e-05, "loss": 0.5623, "step": 29920 }, { "epoch": 0.9191472368138113, "grad_norm": 0.37759196758270264, "learning_rate": 1.1293002856899225e-05, "loss": 0.5499, "step": 29921 }, { "epoch": 0.9191779559487605, "grad_norm": 0.3605083227157593, "learning_rate": 1.129252363493225e-05, "loss": 0.5192, "step": 29922 }, { "epoch": 0.9192086750837096, "grad_norm": 0.36669406294822693, "learning_rate": 1.12920444099465e-05, "loss": 0.5409, "step": 29923 }, { "epoch": 0.9192393942186589, "grad_norm": 0.34949761629104614, "learning_rate": 1.1291565181943085e-05, "loss": 0.5268, "step": 29924 }, { "epoch": 0.919270113353608, "grad_norm": 0.562767744064331, "learning_rate": 1.1291085950923132e-05, "loss": 0.4612, "step": 29925 }, { "epoch": 0.9193008324885571, "grad_norm": 0.5294228196144104, "learning_rate": 1.1290606716887758e-05, "loss": 0.6393, "step": 29926 }, { "epoch": 0.9193315516235063, "grad_norm": 0.358151912689209, "learning_rate": 1.1290127479838082e-05, "loss": 0.4837, "step": 29927 }, { "epoch": 0.9193622707584554, "grad_norm": 0.3482421338558197, "learning_rate": 1.128964823977522e-05, "loss": 0.4862, "step": 29928 }, { "epoch": 0.9193929898934046, "grad_norm": 0.4176829159259796, "learning_rate": 1.1289168996700299e-05, "loss": 0.5038, "step": 29929 }, { "epoch": 0.9194237090283538, "grad_norm": 0.3559646010398865, "learning_rate": 1.1288689750614432e-05, "loss": 0.5705, "step": 29930 }, { "epoch": 0.9194544281633029, "grad_norm": 0.3752310872077942, "learning_rate": 1.1288210501518737e-05, "loss": 0.52, "step": 29931 }, { "epoch": 0.919485147298252, "grad_norm": 0.36884787678718567, "learning_rate": 1.128773124941434e-05, "loss": 0.5568, "step": 29932 }, { "epoch": 0.9195158664332013, "grad_norm": 0.39419183135032654, "learning_rate": 1.1287251994302353e-05, "loss": 0.6478, "step": 29933 }, { "epoch": 0.9195465855681504, "grad_norm": 0.36908623576164246, "learning_rate": 1.1286772736183904e-05, "loss": 0.522, "step": 29934 }, { "epoch": 0.9195773047030995, "grad_norm": 0.33384254574775696, "learning_rate": 1.1286293475060105e-05, "loss": 0.5165, "step": 29935 }, { "epoch": 0.9196080238380487, "grad_norm": 0.3534734845161438, "learning_rate": 1.1285814210932078e-05, "loss": 0.5061, "step": 29936 }, { "epoch": 0.9196387429729979, "grad_norm": 0.35465025901794434, "learning_rate": 1.1285334943800944e-05, "loss": 0.5404, "step": 29937 }, { "epoch": 0.9196694621079471, "grad_norm": 0.3753235936164856, "learning_rate": 1.1284855673667817e-05, "loss": 0.4548, "step": 29938 }, { "epoch": 0.9197001812428962, "grad_norm": 0.3529225289821625, "learning_rate": 1.1284376400533821e-05, "loss": 0.4775, "step": 29939 }, { "epoch": 0.9197309003778453, "grad_norm": 0.4067458212375641, "learning_rate": 1.1283897124400074e-05, "loss": 0.4759, "step": 29940 }, { "epoch": 0.9197616195127946, "grad_norm": 0.41151976585388184, "learning_rate": 1.1283417845267696e-05, "loss": 0.4888, "step": 29941 }, { "epoch": 0.9197923386477437, "grad_norm": 0.4397384822368622, "learning_rate": 1.1282938563137806e-05, "loss": 0.5253, "step": 29942 }, { "epoch": 0.9198230577826928, "grad_norm": 0.4017295837402344, "learning_rate": 1.1282459278011526e-05, "loss": 0.4328, "step": 29943 }, { "epoch": 0.919853776917642, "grad_norm": 0.392113596200943, "learning_rate": 1.1281979989889969e-05, "loss": 0.5004, "step": 29944 }, { "epoch": 0.9198844960525911, "grad_norm": 0.3677724003791809, "learning_rate": 1.1281500698774259e-05, "loss": 0.5933, "step": 29945 }, { "epoch": 0.9199152151875403, "grad_norm": 0.4128761887550354, "learning_rate": 1.1281021404665518e-05, "loss": 0.4541, "step": 29946 }, { "epoch": 0.9199459343224895, "grad_norm": 0.5061865448951721, "learning_rate": 1.1280542107564858e-05, "loss": 0.5368, "step": 29947 }, { "epoch": 0.9199766534574386, "grad_norm": 0.34909504652023315, "learning_rate": 1.1280062807473403e-05, "loss": 0.5537, "step": 29948 }, { "epoch": 0.9200073725923879, "grad_norm": 0.3381185233592987, "learning_rate": 1.127958350439227e-05, "loss": 0.5221, "step": 29949 }, { "epoch": 0.920038091727337, "grad_norm": 0.6326496601104736, "learning_rate": 1.1279104198322584e-05, "loss": 0.5419, "step": 29950 }, { "epoch": 0.9200688108622861, "grad_norm": 0.3726564347743988, "learning_rate": 1.1278624889265459e-05, "loss": 0.4852, "step": 29951 }, { "epoch": 0.9200995299972353, "grad_norm": 0.35474318265914917, "learning_rate": 1.1278145577222018e-05, "loss": 0.5644, "step": 29952 }, { "epoch": 0.9201302491321844, "grad_norm": 0.3307761549949646, "learning_rate": 1.1277666262193378e-05, "loss": 0.531, "step": 29953 }, { "epoch": 0.9201609682671336, "grad_norm": 0.41762402653694153, "learning_rate": 1.1277186944180658e-05, "loss": 0.5039, "step": 29954 }, { "epoch": 0.9201916874020828, "grad_norm": 0.46772515773773193, "learning_rate": 1.1276707623184981e-05, "loss": 0.5384, "step": 29955 }, { "epoch": 0.9202224065370319, "grad_norm": 0.3464648723602295, "learning_rate": 1.1276228299207464e-05, "loss": 0.5312, "step": 29956 }, { "epoch": 0.920253125671981, "grad_norm": 0.5225709676742554, "learning_rate": 1.1275748972249226e-05, "loss": 0.5766, "step": 29957 }, { "epoch": 0.9202838448069303, "grad_norm": 0.44617152214050293, "learning_rate": 1.1275269642311385e-05, "loss": 0.531, "step": 29958 }, { "epoch": 0.9203145639418794, "grad_norm": 0.4651534855365753, "learning_rate": 1.1274790309395066e-05, "loss": 0.491, "step": 29959 }, { "epoch": 0.9203452830768285, "grad_norm": 0.37205153703689575, "learning_rate": 1.127431097350138e-05, "loss": 0.4938, "step": 29960 }, { "epoch": 0.9203760022117777, "grad_norm": 0.4140256643295288, "learning_rate": 1.1273831634631459e-05, "loss": 0.4834, "step": 29961 }, { "epoch": 0.9204067213467269, "grad_norm": 0.3615560531616211, "learning_rate": 1.127335229278641e-05, "loss": 0.646, "step": 29962 }, { "epoch": 0.9204374404816761, "grad_norm": 0.4336291551589966, "learning_rate": 1.1272872947967359e-05, "loss": 0.5488, "step": 29963 }, { "epoch": 0.9204681596166252, "grad_norm": 0.4756731390953064, "learning_rate": 1.1272393600175427e-05, "loss": 0.4505, "step": 29964 }, { "epoch": 0.9204988787515743, "grad_norm": 0.4346102178096771, "learning_rate": 1.1271914249411729e-05, "loss": 0.5285, "step": 29965 }, { "epoch": 0.9205295978865236, "grad_norm": 0.38906222581863403, "learning_rate": 1.1271434895677387e-05, "loss": 0.5962, "step": 29966 }, { "epoch": 0.9205603170214727, "grad_norm": 0.3644165098667145, "learning_rate": 1.1270955538973513e-05, "loss": 0.4655, "step": 29967 }, { "epoch": 0.9205910361564218, "grad_norm": 0.36042895913124084, "learning_rate": 1.1270476179301241e-05, "loss": 0.4778, "step": 29968 }, { "epoch": 0.920621755291371, "grad_norm": 0.3721064627170563, "learning_rate": 1.1269996816661681e-05, "loss": 0.561, "step": 29969 }, { "epoch": 0.9206524744263201, "grad_norm": 0.34181132912635803, "learning_rate": 1.1269517451055959e-05, "loss": 0.5515, "step": 29970 }, { "epoch": 0.9206831935612693, "grad_norm": 0.3369958996772766, "learning_rate": 1.1269038082485187e-05, "loss": 0.5687, "step": 29971 }, { "epoch": 0.9207139126962185, "grad_norm": 0.3779838979244232, "learning_rate": 1.1268558710950487e-05, "loss": 0.4935, "step": 29972 }, { "epoch": 0.9207446318311676, "grad_norm": 0.3547717034816742, "learning_rate": 1.126807933645298e-05, "loss": 0.5707, "step": 29973 }, { "epoch": 0.9207753509661168, "grad_norm": 0.35227230191230774, "learning_rate": 1.1267599958993785e-05, "loss": 0.5369, "step": 29974 }, { "epoch": 0.920806070101066, "grad_norm": 0.4852278232574463, "learning_rate": 1.1267120578574022e-05, "loss": 0.4593, "step": 29975 }, { "epoch": 0.9208367892360151, "grad_norm": 0.3880731165409088, "learning_rate": 1.126664119519481e-05, "loss": 0.5216, "step": 29976 }, { "epoch": 0.9208675083709643, "grad_norm": 0.4194117486476898, "learning_rate": 1.1266161808857273e-05, "loss": 0.5379, "step": 29977 }, { "epoch": 0.9208982275059134, "grad_norm": 0.5134453177452087, "learning_rate": 1.126568241956252e-05, "loss": 0.5289, "step": 29978 }, { "epoch": 0.9209289466408626, "grad_norm": 0.36548614501953125, "learning_rate": 1.1265203027311681e-05, "loss": 0.497, "step": 29979 }, { "epoch": 0.9209596657758118, "grad_norm": 0.4193050265312195, "learning_rate": 1.1264723632105872e-05, "loss": 0.6173, "step": 29980 }, { "epoch": 0.9209903849107609, "grad_norm": 0.4160073399543762, "learning_rate": 1.1264244233946214e-05, "loss": 0.5221, "step": 29981 }, { "epoch": 0.92102110404571, "grad_norm": 0.3888459801673889, "learning_rate": 1.1263764832833823e-05, "loss": 0.6169, "step": 29982 }, { "epoch": 0.9210518231806593, "grad_norm": 0.3380714952945709, "learning_rate": 1.1263285428769821e-05, "loss": 0.4709, "step": 29983 }, { "epoch": 0.9210825423156084, "grad_norm": 0.39911699295043945, "learning_rate": 1.1262806021755331e-05, "loss": 0.522, "step": 29984 }, { "epoch": 0.9211132614505576, "grad_norm": 0.37550267577171326, "learning_rate": 1.1262326611791462e-05, "loss": 0.5146, "step": 29985 }, { "epoch": 0.9211439805855067, "grad_norm": 0.3666677176952362, "learning_rate": 1.1261847198879346e-05, "loss": 0.5873, "step": 29986 }, { "epoch": 0.9211746997204558, "grad_norm": 0.3925999104976654, "learning_rate": 1.12613677830201e-05, "loss": 0.547, "step": 29987 }, { "epoch": 0.9212054188554051, "grad_norm": 0.37403222918510437, "learning_rate": 1.1260888364214838e-05, "loss": 0.5192, "step": 29988 }, { "epoch": 0.9212361379903542, "grad_norm": 0.34289970993995667, "learning_rate": 1.1260408942464682e-05, "loss": 0.5011, "step": 29989 }, { "epoch": 0.9212668571253033, "grad_norm": 0.4002101719379425, "learning_rate": 1.1259929517770756e-05, "loss": 0.5646, "step": 29990 }, { "epoch": 0.9212975762602525, "grad_norm": 0.3749113380908966, "learning_rate": 1.1259450090134176e-05, "loss": 0.5658, "step": 29991 }, { "epoch": 0.9213282953952017, "grad_norm": 0.3723420202732086, "learning_rate": 1.1258970659556062e-05, "loss": 0.6112, "step": 29992 }, { "epoch": 0.9213590145301508, "grad_norm": 0.33891499042510986, "learning_rate": 1.1258491226037533e-05, "loss": 0.4365, "step": 29993 }, { "epoch": 0.9213897336651, "grad_norm": 0.39398443698883057, "learning_rate": 1.125801178957971e-05, "loss": 0.5243, "step": 29994 }, { "epoch": 0.9214204528000491, "grad_norm": 0.3891143500804901, "learning_rate": 1.1257532350183715e-05, "loss": 0.5473, "step": 29995 }, { "epoch": 0.9214511719349983, "grad_norm": 0.34050509333610535, "learning_rate": 1.1257052907850661e-05, "loss": 0.5313, "step": 29996 }, { "epoch": 0.9214818910699475, "grad_norm": 0.40583929419517517, "learning_rate": 1.1256573462581677e-05, "loss": 0.5461, "step": 29997 }, { "epoch": 0.9215126102048966, "grad_norm": 0.40817487239837646, "learning_rate": 1.1256094014377877e-05, "loss": 0.568, "step": 29998 }, { "epoch": 0.9215433293398458, "grad_norm": 0.3612138628959656, "learning_rate": 1.1255614563240378e-05, "loss": 0.5039, "step": 29999 }, { "epoch": 0.921574048474795, "grad_norm": 0.3561590313911438, "learning_rate": 1.1255135109170308e-05, "loss": 0.5691, "step": 30000 }, { "epoch": 0.9216047676097441, "grad_norm": 0.3488360345363617, "learning_rate": 1.125465565216878e-05, "loss": 0.4712, "step": 30001 }, { "epoch": 0.9216354867446933, "grad_norm": 0.4168758690357208, "learning_rate": 1.1254176192236917e-05, "loss": 0.551, "step": 30002 }, { "epoch": 0.9216662058796424, "grad_norm": 0.3940725326538086, "learning_rate": 1.1253696729375835e-05, "loss": 0.5932, "step": 30003 }, { "epoch": 0.9216969250145916, "grad_norm": 0.34979456663131714, "learning_rate": 1.1253217263586657e-05, "loss": 0.4473, "step": 30004 }, { "epoch": 0.9217276441495408, "grad_norm": 0.35536709427833557, "learning_rate": 1.1252737794870508e-05, "loss": 0.5934, "step": 30005 }, { "epoch": 0.9217583632844899, "grad_norm": 0.37758785486221313, "learning_rate": 1.1252258323228497e-05, "loss": 0.5712, "step": 30006 }, { "epoch": 0.921789082419439, "grad_norm": 0.4608696401119232, "learning_rate": 1.1251778848661752e-05, "loss": 0.5004, "step": 30007 }, { "epoch": 0.9218198015543883, "grad_norm": 0.3799079358577728, "learning_rate": 1.1251299371171388e-05, "loss": 0.558, "step": 30008 }, { "epoch": 0.9218505206893374, "grad_norm": 0.36049219965934753, "learning_rate": 1.1250819890758529e-05, "loss": 0.6065, "step": 30009 }, { "epoch": 0.9218812398242866, "grad_norm": 0.3438192307949066, "learning_rate": 1.1250340407424292e-05, "loss": 0.5415, "step": 30010 }, { "epoch": 0.9219119589592357, "grad_norm": 0.3826729655265808, "learning_rate": 1.1249860921169797e-05, "loss": 0.5006, "step": 30011 }, { "epoch": 0.9219426780941848, "grad_norm": 0.39334627985954285, "learning_rate": 1.1249381431996165e-05, "loss": 0.572, "step": 30012 }, { "epoch": 0.9219733972291341, "grad_norm": 0.4380052983760834, "learning_rate": 1.1248901939904517e-05, "loss": 0.5409, "step": 30013 }, { "epoch": 0.9220041163640832, "grad_norm": 0.3461383879184723, "learning_rate": 1.1248422444895967e-05, "loss": 0.5715, "step": 30014 }, { "epoch": 0.9220348354990323, "grad_norm": 0.37462317943573, "learning_rate": 1.1247942946971643e-05, "loss": 0.6101, "step": 30015 }, { "epoch": 0.9220655546339815, "grad_norm": 0.33808737993240356, "learning_rate": 1.1247463446132662e-05, "loss": 0.52, "step": 30016 }, { "epoch": 0.9220962737689307, "grad_norm": 0.3869457542896271, "learning_rate": 1.124698394238014e-05, "loss": 0.567, "step": 30017 }, { "epoch": 0.9221269929038798, "grad_norm": 0.3535647392272949, "learning_rate": 1.1246504435715204e-05, "loss": 0.6166, "step": 30018 }, { "epoch": 0.922157712038829, "grad_norm": 0.3918992578983307, "learning_rate": 1.1246024926138965e-05, "loss": 0.5636, "step": 30019 }, { "epoch": 0.9221884311737781, "grad_norm": 0.3863928020000458, "learning_rate": 1.124554541365255e-05, "loss": 0.503, "step": 30020 }, { "epoch": 0.9222191503087273, "grad_norm": 0.3549692928791046, "learning_rate": 1.1245065898257074e-05, "loss": 0.5501, "step": 30021 }, { "epoch": 0.9222498694436765, "grad_norm": 0.3330966830253601, "learning_rate": 1.1244586379953663e-05, "loss": 0.5186, "step": 30022 }, { "epoch": 0.9222805885786256, "grad_norm": 0.35222336649894714, "learning_rate": 1.1244106858743433e-05, "loss": 0.5897, "step": 30023 }, { "epoch": 0.9223113077135748, "grad_norm": 0.3579084575176239, "learning_rate": 1.1243627334627502e-05, "loss": 0.6038, "step": 30024 }, { "epoch": 0.922342026848524, "grad_norm": 0.3734002709388733, "learning_rate": 1.1243147807606998e-05, "loss": 0.5801, "step": 30025 }, { "epoch": 0.9223727459834731, "grad_norm": 0.3645961284637451, "learning_rate": 1.124266827768303e-05, "loss": 0.509, "step": 30026 }, { "epoch": 0.9224034651184223, "grad_norm": 0.4008324444293976, "learning_rate": 1.1242188744856726e-05, "loss": 0.5059, "step": 30027 }, { "epoch": 0.9224341842533714, "grad_norm": 0.3719411790370941, "learning_rate": 1.1241709209129203e-05, "loss": 0.5436, "step": 30028 }, { "epoch": 0.9224649033883205, "grad_norm": 0.44545838236808777, "learning_rate": 1.1241229670501582e-05, "loss": 0.6186, "step": 30029 }, { "epoch": 0.9224956225232698, "grad_norm": 0.381887823343277, "learning_rate": 1.124075012897498e-05, "loss": 0.569, "step": 30030 }, { "epoch": 0.9225263416582189, "grad_norm": 0.3798633813858032, "learning_rate": 1.1240270584550523e-05, "loss": 0.5563, "step": 30031 }, { "epoch": 0.922557060793168, "grad_norm": 0.3891291618347168, "learning_rate": 1.1239791037229325e-05, "loss": 0.5374, "step": 30032 }, { "epoch": 0.9225877799281172, "grad_norm": 0.34691083431243896, "learning_rate": 1.1239311487012509e-05, "loss": 0.547, "step": 30033 }, { "epoch": 0.9226184990630664, "grad_norm": 0.4190441370010376, "learning_rate": 1.1238831933901197e-05, "loss": 0.4987, "step": 30034 }, { "epoch": 0.9226492181980156, "grad_norm": 0.4357995092868805, "learning_rate": 1.1238352377896502e-05, "loss": 0.5934, "step": 30035 }, { "epoch": 0.9226799373329647, "grad_norm": 0.5160508155822754, "learning_rate": 1.1237872818999551e-05, "loss": 0.5693, "step": 30036 }, { "epoch": 0.9227106564679138, "grad_norm": 0.33477482199668884, "learning_rate": 1.1237393257211462e-05, "loss": 0.5258, "step": 30037 }, { "epoch": 0.9227413756028631, "grad_norm": 0.34983745217323303, "learning_rate": 1.1236913692533355e-05, "loss": 0.5481, "step": 30038 }, { "epoch": 0.9227720947378122, "grad_norm": 0.3413762152194977, "learning_rate": 1.1236434124966346e-05, "loss": 0.5023, "step": 30039 }, { "epoch": 0.9228028138727613, "grad_norm": 0.37891146540641785, "learning_rate": 1.1235954554511562e-05, "loss": 0.561, "step": 30040 }, { "epoch": 0.9228335330077105, "grad_norm": 0.47235676646232605, "learning_rate": 1.123547498117012e-05, "loss": 0.6007, "step": 30041 }, { "epoch": 0.9228642521426597, "grad_norm": 0.39065632224082947, "learning_rate": 1.1234995404943136e-05, "loss": 0.4931, "step": 30042 }, { "epoch": 0.9228949712776088, "grad_norm": 0.34963127970695496, "learning_rate": 1.123451582583174e-05, "loss": 0.5039, "step": 30043 }, { "epoch": 0.922925690412558, "grad_norm": 0.36501845717430115, "learning_rate": 1.1234036243837041e-05, "loss": 0.5658, "step": 30044 }, { "epoch": 0.9229564095475071, "grad_norm": 0.43439361453056335, "learning_rate": 1.1233556658960167e-05, "loss": 0.569, "step": 30045 }, { "epoch": 0.9229871286824562, "grad_norm": 0.36586782336235046, "learning_rate": 1.1233077071202235e-05, "loss": 0.5718, "step": 30046 }, { "epoch": 0.9230178478174055, "grad_norm": 0.47001656889915466, "learning_rate": 1.1232597480564365e-05, "loss": 0.5847, "step": 30047 }, { "epoch": 0.9230485669523546, "grad_norm": 0.347569078207016, "learning_rate": 1.1232117887047675e-05, "loss": 0.4886, "step": 30048 }, { "epoch": 0.9230792860873038, "grad_norm": 0.3632048964500427, "learning_rate": 1.1231638290653293e-05, "loss": 0.5485, "step": 30049 }, { "epoch": 0.923110005222253, "grad_norm": 0.3924248218536377, "learning_rate": 1.123115869138233e-05, "loss": 0.491, "step": 30050 }, { "epoch": 0.9231407243572021, "grad_norm": 0.3840333819389343, "learning_rate": 1.123067908923591e-05, "loss": 0.448, "step": 30051 }, { "epoch": 0.9231714434921513, "grad_norm": 0.3638237714767456, "learning_rate": 1.1230199484215155e-05, "loss": 0.5413, "step": 30052 }, { "epoch": 0.9232021626271004, "grad_norm": 0.3240947425365448, "learning_rate": 1.122971987632118e-05, "loss": 0.5109, "step": 30053 }, { "epoch": 0.9232328817620495, "grad_norm": 0.39878329634666443, "learning_rate": 1.1229240265555112e-05, "loss": 0.4908, "step": 30054 }, { "epoch": 0.9232636008969988, "grad_norm": 0.3747442662715912, "learning_rate": 1.1228760651918065e-05, "loss": 0.545, "step": 30055 }, { "epoch": 0.9232943200319479, "grad_norm": 0.47478190064430237, "learning_rate": 1.1228281035411163e-05, "loss": 0.573, "step": 30056 }, { "epoch": 0.923325039166897, "grad_norm": 0.3876458704471588, "learning_rate": 1.1227801416035525e-05, "loss": 0.6162, "step": 30057 }, { "epoch": 0.9233557583018462, "grad_norm": 0.38263174891471863, "learning_rate": 1.122732179379227e-05, "loss": 0.5911, "step": 30058 }, { "epoch": 0.9233864774367954, "grad_norm": 0.4247816801071167, "learning_rate": 1.1226842168682519e-05, "loss": 0.5247, "step": 30059 }, { "epoch": 0.9234171965717446, "grad_norm": 0.3633385896682739, "learning_rate": 1.1226362540707391e-05, "loss": 0.5185, "step": 30060 }, { "epoch": 0.9234479157066937, "grad_norm": 0.36043211817741394, "learning_rate": 1.122588290986801e-05, "loss": 0.5561, "step": 30061 }, { "epoch": 0.9234786348416428, "grad_norm": 0.35157543420791626, "learning_rate": 1.1225403276165495e-05, "loss": 0.6182, "step": 30062 }, { "epoch": 0.9235093539765921, "grad_norm": 0.39171096682548523, "learning_rate": 1.1224923639600961e-05, "loss": 0.4646, "step": 30063 }, { "epoch": 0.9235400731115412, "grad_norm": 0.3847489655017853, "learning_rate": 1.1224444000175537e-05, "loss": 0.5944, "step": 30064 }, { "epoch": 0.9235707922464903, "grad_norm": 0.3767077326774597, "learning_rate": 1.1223964357890336e-05, "loss": 0.6682, "step": 30065 }, { "epoch": 0.9236015113814395, "grad_norm": 0.3323873281478882, "learning_rate": 1.122348471274648e-05, "loss": 0.471, "step": 30066 }, { "epoch": 0.9236322305163887, "grad_norm": 0.3674118220806122, "learning_rate": 1.1223005064745092e-05, "loss": 0.5301, "step": 30067 }, { "epoch": 0.9236629496513378, "grad_norm": 0.3650074899196625, "learning_rate": 1.1222525413887291e-05, "loss": 0.5047, "step": 30068 }, { "epoch": 0.923693668786287, "grad_norm": 0.3513357639312744, "learning_rate": 1.1222045760174194e-05, "loss": 0.5474, "step": 30069 }, { "epoch": 0.9237243879212361, "grad_norm": 0.40105152130126953, "learning_rate": 1.1221566103606925e-05, "loss": 0.542, "step": 30070 }, { "epoch": 0.9237551070561852, "grad_norm": 0.4702177941799164, "learning_rate": 1.1221086444186602e-05, "loss": 0.5239, "step": 30071 }, { "epoch": 0.9237858261911345, "grad_norm": 0.3570261001586914, "learning_rate": 1.1220606781914347e-05, "loss": 0.5343, "step": 30072 }, { "epoch": 0.9238165453260836, "grad_norm": 0.6798921823501587, "learning_rate": 1.122012711679128e-05, "loss": 0.5008, "step": 30073 }, { "epoch": 0.9238472644610328, "grad_norm": 0.34671902656555176, "learning_rate": 1.121964744881852e-05, "loss": 0.5269, "step": 30074 }, { "epoch": 0.9238779835959819, "grad_norm": 0.3652777373790741, "learning_rate": 1.121916777799719e-05, "loss": 0.513, "step": 30075 }, { "epoch": 0.9239087027309311, "grad_norm": 0.36349523067474365, "learning_rate": 1.1218688104328408e-05, "loss": 0.5766, "step": 30076 }, { "epoch": 0.9239394218658803, "grad_norm": 0.3817389905452728, "learning_rate": 1.1218208427813293e-05, "loss": 0.5573, "step": 30077 }, { "epoch": 0.9239701410008294, "grad_norm": 0.3545553982257843, "learning_rate": 1.1217728748452967e-05, "loss": 0.5827, "step": 30078 }, { "epoch": 0.9240008601357785, "grad_norm": 0.3860584497451782, "learning_rate": 1.1217249066248551e-05, "loss": 0.522, "step": 30079 }, { "epoch": 0.9240315792707278, "grad_norm": 0.3380851745605469, "learning_rate": 1.1216769381201165e-05, "loss": 0.4915, "step": 30080 }, { "epoch": 0.9240622984056769, "grad_norm": 0.467037558555603, "learning_rate": 1.121628969331193e-05, "loss": 0.6002, "step": 30081 }, { "epoch": 0.924093017540626, "grad_norm": 0.3553353250026703, "learning_rate": 1.1215810002581966e-05, "loss": 0.6308, "step": 30082 }, { "epoch": 0.9241237366755752, "grad_norm": 0.43807563185691833, "learning_rate": 1.1215330309012393e-05, "loss": 0.6467, "step": 30083 }, { "epoch": 0.9241544558105244, "grad_norm": 0.362082302570343, "learning_rate": 1.121485061260433e-05, "loss": 0.5567, "step": 30084 }, { "epoch": 0.9241851749454736, "grad_norm": 0.3634592890739441, "learning_rate": 1.1214370913358898e-05, "loss": 0.4357, "step": 30085 }, { "epoch": 0.9242158940804227, "grad_norm": 0.3891961872577667, "learning_rate": 1.1213891211277219e-05, "loss": 0.614, "step": 30086 }, { "epoch": 0.9242466132153718, "grad_norm": 0.385306715965271, "learning_rate": 1.1213411506360413e-05, "loss": 0.5815, "step": 30087 }, { "epoch": 0.9242773323503211, "grad_norm": 0.35788872838020325, "learning_rate": 1.1212931798609599e-05, "loss": 0.5365, "step": 30088 }, { "epoch": 0.9243080514852702, "grad_norm": 0.4196898639202118, "learning_rate": 1.1212452088025895e-05, "loss": 0.4995, "step": 30089 }, { "epoch": 0.9243387706202193, "grad_norm": 0.3561948537826538, "learning_rate": 1.121197237461043e-05, "loss": 0.509, "step": 30090 }, { "epoch": 0.9243694897551685, "grad_norm": 0.6514362692832947, "learning_rate": 1.1211492658364314e-05, "loss": 0.5327, "step": 30091 }, { "epoch": 0.9244002088901176, "grad_norm": 0.3332548141479492, "learning_rate": 1.1211012939288675e-05, "loss": 0.5238, "step": 30092 }, { "epoch": 0.9244309280250668, "grad_norm": 0.35330429673194885, "learning_rate": 1.1210533217384629e-05, "loss": 0.4793, "step": 30093 }, { "epoch": 0.924461647160016, "grad_norm": 0.4256698489189148, "learning_rate": 1.1210053492653301e-05, "loss": 0.4688, "step": 30094 }, { "epoch": 0.9244923662949651, "grad_norm": 0.3737295866012573, "learning_rate": 1.1209573765095804e-05, "loss": 0.5143, "step": 30095 }, { "epoch": 0.9245230854299143, "grad_norm": 0.3410843312740326, "learning_rate": 1.1209094034713266e-05, "loss": 0.5006, "step": 30096 }, { "epoch": 0.9245538045648635, "grad_norm": 0.3523876965045929, "learning_rate": 1.1208614301506803e-05, "loss": 0.6028, "step": 30097 }, { "epoch": 0.9245845236998126, "grad_norm": 0.3579404652118683, "learning_rate": 1.1208134565477537e-05, "loss": 0.5306, "step": 30098 }, { "epoch": 0.9246152428347618, "grad_norm": 0.4276377260684967, "learning_rate": 1.1207654826626592e-05, "loss": 0.5569, "step": 30099 }, { "epoch": 0.9246459619697109, "grad_norm": 0.36201366782188416, "learning_rate": 1.120717508495508e-05, "loss": 0.542, "step": 30100 }, { "epoch": 0.9246766811046601, "grad_norm": 0.37701302766799927, "learning_rate": 1.1206695340464131e-05, "loss": 0.5332, "step": 30101 }, { "epoch": 0.9247074002396093, "grad_norm": 0.37165459990501404, "learning_rate": 1.1206215593154856e-05, "loss": 0.597, "step": 30102 }, { "epoch": 0.9247381193745584, "grad_norm": 0.35811248421669006, "learning_rate": 1.1205735843028385e-05, "loss": 0.6412, "step": 30103 }, { "epoch": 0.9247688385095075, "grad_norm": 0.34183159470558167, "learning_rate": 1.1205256090085832e-05, "loss": 0.6135, "step": 30104 }, { "epoch": 0.9247995576444568, "grad_norm": 0.3511563241481781, "learning_rate": 1.1204776334328315e-05, "loss": 0.5247, "step": 30105 }, { "epoch": 0.9248302767794059, "grad_norm": 0.3845004141330719, "learning_rate": 1.1204296575756966e-05, "loss": 0.5535, "step": 30106 }, { "epoch": 0.924860995914355, "grad_norm": 0.33403444290161133, "learning_rate": 1.1203816814372891e-05, "loss": 0.5062, "step": 30107 }, { "epoch": 0.9248917150493042, "grad_norm": 0.34827741980552673, "learning_rate": 1.1203337050177223e-05, "loss": 0.4774, "step": 30108 }, { "epoch": 0.9249224341842534, "grad_norm": 0.3576902449131012, "learning_rate": 1.1202857283171073e-05, "loss": 0.6077, "step": 30109 }, { "epoch": 0.9249531533192026, "grad_norm": 0.36620208621025085, "learning_rate": 1.1202377513355571e-05, "loss": 0.6001, "step": 30110 }, { "epoch": 0.9249838724541517, "grad_norm": 0.42570504546165466, "learning_rate": 1.1201897740731829e-05, "loss": 0.5418, "step": 30111 }, { "epoch": 0.9250145915891008, "grad_norm": 0.49701520800590515, "learning_rate": 1.1201417965300972e-05, "loss": 0.5107, "step": 30112 }, { "epoch": 0.92504531072405, "grad_norm": 0.42950209975242615, "learning_rate": 1.1200938187064116e-05, "loss": 0.5971, "step": 30113 }, { "epoch": 0.9250760298589992, "grad_norm": 0.391383558511734, "learning_rate": 1.1200458406022388e-05, "loss": 0.6149, "step": 30114 }, { "epoch": 0.9251067489939483, "grad_norm": 0.3904705047607422, "learning_rate": 1.1199978622176905e-05, "loss": 0.5551, "step": 30115 }, { "epoch": 0.9251374681288975, "grad_norm": 0.3922196328639984, "learning_rate": 1.1199498835528791e-05, "loss": 0.5689, "step": 30116 }, { "epoch": 0.9251681872638466, "grad_norm": 0.40581902861595154, "learning_rate": 1.119901904607916e-05, "loss": 0.5401, "step": 30117 }, { "epoch": 0.9251989063987958, "grad_norm": 0.3520056903362274, "learning_rate": 1.1198539253829136e-05, "loss": 0.5329, "step": 30118 }, { "epoch": 0.925229625533745, "grad_norm": 0.37194889783859253, "learning_rate": 1.1198059458779844e-05, "loss": 0.5482, "step": 30119 }, { "epoch": 0.9252603446686941, "grad_norm": 0.37963056564331055, "learning_rate": 1.1197579660932399e-05, "loss": 0.4966, "step": 30120 }, { "epoch": 0.9252910638036433, "grad_norm": 0.3649732172489166, "learning_rate": 1.119709986028792e-05, "loss": 0.513, "step": 30121 }, { "epoch": 0.9253217829385925, "grad_norm": 0.3953976035118103, "learning_rate": 1.1196620056847534e-05, "loss": 0.4547, "step": 30122 }, { "epoch": 0.9253525020735416, "grad_norm": 0.38349175453186035, "learning_rate": 1.1196140250612355e-05, "loss": 0.5884, "step": 30123 }, { "epoch": 0.9253832212084908, "grad_norm": 0.35273653268814087, "learning_rate": 1.1195660441583511e-05, "loss": 0.5528, "step": 30124 }, { "epoch": 0.9254139403434399, "grad_norm": 0.39514032006263733, "learning_rate": 1.1195180629762114e-05, "loss": 0.5961, "step": 30125 }, { "epoch": 0.925444659478389, "grad_norm": 0.41808539628982544, "learning_rate": 1.1194700815149294e-05, "loss": 0.5562, "step": 30126 }, { "epoch": 0.9254753786133383, "grad_norm": 0.3773539662361145, "learning_rate": 1.1194220997746163e-05, "loss": 0.4475, "step": 30127 }, { "epoch": 0.9255060977482874, "grad_norm": 0.3471530079841614, "learning_rate": 1.1193741177553847e-05, "loss": 0.5661, "step": 30128 }, { "epoch": 0.9255368168832365, "grad_norm": 0.3452467620372772, "learning_rate": 1.1193261354573464e-05, "loss": 0.5028, "step": 30129 }, { "epoch": 0.9255675360181858, "grad_norm": 0.36960768699645996, "learning_rate": 1.1192781528806139e-05, "loss": 0.5197, "step": 30130 }, { "epoch": 0.9255982551531349, "grad_norm": 0.3780989646911621, "learning_rate": 1.1192301700252987e-05, "loss": 0.6063, "step": 30131 }, { "epoch": 0.925628974288084, "grad_norm": 0.3182419538497925, "learning_rate": 1.1191821868915126e-05, "loss": 0.5079, "step": 30132 }, { "epoch": 0.9256596934230332, "grad_norm": 0.38533058762550354, "learning_rate": 1.119134203479369e-05, "loss": 0.539, "step": 30133 }, { "epoch": 0.9256904125579823, "grad_norm": 0.3389412760734558, "learning_rate": 1.1190862197889788e-05, "loss": 0.5061, "step": 30134 }, { "epoch": 0.9257211316929316, "grad_norm": 0.3827824294567108, "learning_rate": 1.1190382358204546e-05, "loss": 0.5061, "step": 30135 }, { "epoch": 0.9257518508278807, "grad_norm": 0.3931884467601776, "learning_rate": 1.118990251573908e-05, "loss": 0.5273, "step": 30136 }, { "epoch": 0.9257825699628298, "grad_norm": 0.4692617654800415, "learning_rate": 1.1189422670494517e-05, "loss": 0.4984, "step": 30137 }, { "epoch": 0.925813289097779, "grad_norm": 0.3824388384819031, "learning_rate": 1.118894282247197e-05, "loss": 0.4776, "step": 30138 }, { "epoch": 0.9258440082327282, "grad_norm": 0.3953078091144562, "learning_rate": 1.1188462971672567e-05, "loss": 0.5823, "step": 30139 }, { "epoch": 0.9258747273676773, "grad_norm": 0.8896430134773254, "learning_rate": 1.1187983118097425e-05, "loss": 0.4917, "step": 30140 }, { "epoch": 0.9259054465026265, "grad_norm": 0.3522399067878723, "learning_rate": 1.1187503261747666e-05, "loss": 0.5186, "step": 30141 }, { "epoch": 0.9259361656375756, "grad_norm": 0.3614135682582855, "learning_rate": 1.118702340262441e-05, "loss": 0.5321, "step": 30142 }, { "epoch": 0.9259668847725248, "grad_norm": 0.33916154503822327, "learning_rate": 1.1186543540728776e-05, "loss": 0.5302, "step": 30143 }, { "epoch": 0.925997603907474, "grad_norm": 0.3717201054096222, "learning_rate": 1.1186063676061891e-05, "loss": 0.461, "step": 30144 }, { "epoch": 0.9260283230424231, "grad_norm": 0.37520650029182434, "learning_rate": 1.1185583808624867e-05, "loss": 0.567, "step": 30145 }, { "epoch": 0.9260590421773723, "grad_norm": 0.39829209446907043, "learning_rate": 1.1185103938418833e-05, "loss": 0.5492, "step": 30146 }, { "epoch": 0.9260897613123215, "grad_norm": 0.3451083302497864, "learning_rate": 1.1184624065444901e-05, "loss": 0.4873, "step": 30147 }, { "epoch": 0.9261204804472706, "grad_norm": 0.3733082711696625, "learning_rate": 1.1184144189704203e-05, "loss": 0.5129, "step": 30148 }, { "epoch": 0.9261511995822198, "grad_norm": 0.6512208580970764, "learning_rate": 1.118366431119785e-05, "loss": 0.5181, "step": 30149 }, { "epoch": 0.9261819187171689, "grad_norm": 0.3566914498806, "learning_rate": 1.1183184429926962e-05, "loss": 0.477, "step": 30150 }, { "epoch": 0.926212637852118, "grad_norm": 0.4268516004085541, "learning_rate": 1.1182704545892671e-05, "loss": 0.5119, "step": 30151 }, { "epoch": 0.9262433569870673, "grad_norm": 0.4622008502483368, "learning_rate": 1.1182224659096087e-05, "loss": 0.6095, "step": 30152 }, { "epoch": 0.9262740761220164, "grad_norm": 0.3923918604850769, "learning_rate": 1.1181744769538339e-05, "loss": 0.5601, "step": 30153 }, { "epoch": 0.9263047952569655, "grad_norm": 0.36760303378105164, "learning_rate": 1.118126487722054e-05, "loss": 0.501, "step": 30154 }, { "epoch": 0.9263355143919147, "grad_norm": 0.39281052350997925, "learning_rate": 1.1180784982143816e-05, "loss": 0.588, "step": 30155 }, { "epoch": 0.9263662335268639, "grad_norm": 0.4472496211528778, "learning_rate": 1.1180305084309286e-05, "loss": 0.4992, "step": 30156 }, { "epoch": 0.926396952661813, "grad_norm": 0.3810509443283081, "learning_rate": 1.117982518371807e-05, "loss": 0.5128, "step": 30157 }, { "epoch": 0.9264276717967622, "grad_norm": 0.3797038495540619, "learning_rate": 1.1179345280371292e-05, "loss": 0.59, "step": 30158 }, { "epoch": 0.9264583909317113, "grad_norm": 0.3495463728904724, "learning_rate": 1.117886537427007e-05, "loss": 0.4298, "step": 30159 }, { "epoch": 0.9264891100666606, "grad_norm": 0.355399489402771, "learning_rate": 1.1178385465415524e-05, "loss": 0.49, "step": 30160 }, { "epoch": 0.9265198292016097, "grad_norm": 0.3675629496574402, "learning_rate": 1.1177905553808776e-05, "loss": 0.5293, "step": 30161 }, { "epoch": 0.9265505483365588, "grad_norm": 0.347541868686676, "learning_rate": 1.117742563945095e-05, "loss": 0.6078, "step": 30162 }, { "epoch": 0.926581267471508, "grad_norm": 0.4146934449672699, "learning_rate": 1.1176945722343163e-05, "loss": 0.5419, "step": 30163 }, { "epoch": 0.9266119866064572, "grad_norm": 0.4882093071937561, "learning_rate": 1.1176465802486536e-05, "loss": 0.5047, "step": 30164 }, { "epoch": 0.9266427057414063, "grad_norm": 0.3694634735584259, "learning_rate": 1.1175985879882191e-05, "loss": 0.5318, "step": 30165 }, { "epoch": 0.9266734248763555, "grad_norm": 0.361701101064682, "learning_rate": 1.1175505954531252e-05, "loss": 0.5664, "step": 30166 }, { "epoch": 0.9267041440113046, "grad_norm": 0.3858471214771271, "learning_rate": 1.1175026026434834e-05, "loss": 0.5287, "step": 30167 }, { "epoch": 0.9267348631462538, "grad_norm": 0.3563547730445862, "learning_rate": 1.1174546095594058e-05, "loss": 0.617, "step": 30168 }, { "epoch": 0.926765582281203, "grad_norm": 0.41002345085144043, "learning_rate": 1.1174066162010053e-05, "loss": 0.5417, "step": 30169 }, { "epoch": 0.9267963014161521, "grad_norm": 0.4150068461894989, "learning_rate": 1.117358622568393e-05, "loss": 0.5607, "step": 30170 }, { "epoch": 0.9268270205511013, "grad_norm": 0.35791248083114624, "learning_rate": 1.1173106286616818e-05, "loss": 0.5202, "step": 30171 }, { "epoch": 0.9268577396860505, "grad_norm": 0.33973070979118347, "learning_rate": 1.1172626344809833e-05, "loss": 0.5299, "step": 30172 }, { "epoch": 0.9268884588209996, "grad_norm": 0.38376370072364807, "learning_rate": 1.1172146400264096e-05, "loss": 0.4888, "step": 30173 }, { "epoch": 0.9269191779559488, "grad_norm": 0.4016306698322296, "learning_rate": 1.1171666452980732e-05, "loss": 0.6017, "step": 30174 }, { "epoch": 0.9269498970908979, "grad_norm": 0.37777766585350037, "learning_rate": 1.1171186502960855e-05, "loss": 0.4538, "step": 30175 }, { "epoch": 0.926980616225847, "grad_norm": 0.40095362067222595, "learning_rate": 1.1170706550205597e-05, "loss": 0.5504, "step": 30176 }, { "epoch": 0.9270113353607963, "grad_norm": 0.37049660086631775, "learning_rate": 1.1170226594716065e-05, "loss": 0.575, "step": 30177 }, { "epoch": 0.9270420544957454, "grad_norm": 0.359235942363739, "learning_rate": 1.1169746636493391e-05, "loss": 0.5766, "step": 30178 }, { "epoch": 0.9270727736306945, "grad_norm": 0.38208338618278503, "learning_rate": 1.116926667553869e-05, "loss": 0.5173, "step": 30179 }, { "epoch": 0.9271034927656437, "grad_norm": 0.420673668384552, "learning_rate": 1.1168786711853086e-05, "loss": 0.6042, "step": 30180 }, { "epoch": 0.9271342119005929, "grad_norm": 0.39908769726753235, "learning_rate": 1.1168306745437699e-05, "loss": 0.5883, "step": 30181 }, { "epoch": 0.927164931035542, "grad_norm": 0.5924801826477051, "learning_rate": 1.116782677629365e-05, "loss": 0.5061, "step": 30182 }, { "epoch": 0.9271956501704912, "grad_norm": 0.42617547512054443, "learning_rate": 1.1167346804422062e-05, "loss": 0.4808, "step": 30183 }, { "epoch": 0.9272263693054403, "grad_norm": 0.36484843492507935, "learning_rate": 1.116686682982405e-05, "loss": 0.5867, "step": 30184 }, { "epoch": 0.9272570884403896, "grad_norm": 0.35996875166893005, "learning_rate": 1.1166386852500742e-05, "loss": 0.5899, "step": 30185 }, { "epoch": 0.9272878075753387, "grad_norm": 2.797250270843506, "learning_rate": 1.1165906872453252e-05, "loss": 0.5431, "step": 30186 }, { "epoch": 0.9273185267102878, "grad_norm": 0.40430307388305664, "learning_rate": 1.1165426889682709e-05, "loss": 0.5845, "step": 30187 }, { "epoch": 0.927349245845237, "grad_norm": 0.4203284978866577, "learning_rate": 1.1164946904190229e-05, "loss": 0.5638, "step": 30188 }, { "epoch": 0.9273799649801862, "grad_norm": 0.41030803322792053, "learning_rate": 1.1164466915976935e-05, "loss": 0.5009, "step": 30189 }, { "epoch": 0.9274106841151353, "grad_norm": 0.3622567355632782, "learning_rate": 1.1163986925043948e-05, "loss": 0.4143, "step": 30190 }, { "epoch": 0.9274414032500845, "grad_norm": 0.35550570487976074, "learning_rate": 1.1163506931392384e-05, "loss": 0.5843, "step": 30191 }, { "epoch": 0.9274721223850336, "grad_norm": 0.32544004917144775, "learning_rate": 1.1163026935023373e-05, "loss": 0.4901, "step": 30192 }, { "epoch": 0.9275028415199827, "grad_norm": 0.3664722442626953, "learning_rate": 1.1162546935938028e-05, "loss": 0.6014, "step": 30193 }, { "epoch": 0.927533560654932, "grad_norm": 0.33454054594039917, "learning_rate": 1.1162066934137477e-05, "loss": 0.5652, "step": 30194 }, { "epoch": 0.9275642797898811, "grad_norm": 0.371844619512558, "learning_rate": 1.1161586929622834e-05, "loss": 0.525, "step": 30195 }, { "epoch": 0.9275949989248303, "grad_norm": 0.3830709457397461, "learning_rate": 1.1161106922395226e-05, "loss": 0.578, "step": 30196 }, { "epoch": 0.9276257180597794, "grad_norm": 0.36906898021698, "learning_rate": 1.116062691245577e-05, "loss": 0.56, "step": 30197 }, { "epoch": 0.9276564371947286, "grad_norm": 0.3508935272693634, "learning_rate": 1.1160146899805588e-05, "loss": 0.5921, "step": 30198 }, { "epoch": 0.9276871563296778, "grad_norm": 0.3938095271587372, "learning_rate": 1.1159666884445803e-05, "loss": 0.5132, "step": 30199 }, { "epoch": 0.9277178754646269, "grad_norm": 0.3800613284111023, "learning_rate": 1.1159186866377536e-05, "loss": 0.5565, "step": 30200 }, { "epoch": 0.927748594599576, "grad_norm": 0.3776589334011078, "learning_rate": 1.115870684560191e-05, "loss": 0.5867, "step": 30201 }, { "epoch": 0.9277793137345253, "grad_norm": 0.3376885950565338, "learning_rate": 1.1158226822120037e-05, "loss": 0.5782, "step": 30202 }, { "epoch": 0.9278100328694744, "grad_norm": 0.3892041742801666, "learning_rate": 1.115774679593305e-05, "loss": 0.605, "step": 30203 }, { "epoch": 0.9278407520044235, "grad_norm": 0.4134499430656433, "learning_rate": 1.1157266767042059e-05, "loss": 0.5076, "step": 30204 }, { "epoch": 0.9278714711393727, "grad_norm": 0.4356251657009125, "learning_rate": 1.1156786735448193e-05, "loss": 0.5477, "step": 30205 }, { "epoch": 0.9279021902743219, "grad_norm": 0.34721511602401733, "learning_rate": 1.1156306701152569e-05, "loss": 0.5775, "step": 30206 }, { "epoch": 0.9279329094092711, "grad_norm": 0.35710087418556213, "learning_rate": 1.1155826664156313e-05, "loss": 0.5952, "step": 30207 }, { "epoch": 0.9279636285442202, "grad_norm": 0.3986756205558777, "learning_rate": 1.1155346624460543e-05, "loss": 0.5319, "step": 30208 }, { "epoch": 0.9279943476791693, "grad_norm": 0.3309602439403534, "learning_rate": 1.115486658206638e-05, "loss": 0.4545, "step": 30209 }, { "epoch": 0.9280250668141186, "grad_norm": 0.5391010046005249, "learning_rate": 1.1154386536974944e-05, "loss": 0.534, "step": 30210 }, { "epoch": 0.9280557859490677, "grad_norm": 0.5507050156593323, "learning_rate": 1.1153906489187358e-05, "loss": 0.6325, "step": 30211 }, { "epoch": 0.9280865050840168, "grad_norm": 0.39598146080970764, "learning_rate": 1.1153426438704743e-05, "loss": 0.5678, "step": 30212 }, { "epoch": 0.928117224218966, "grad_norm": 0.38939976692199707, "learning_rate": 1.115294638552822e-05, "loss": 0.5814, "step": 30213 }, { "epoch": 0.9281479433539152, "grad_norm": 0.3737190365791321, "learning_rate": 1.1152466329658912e-05, "loss": 0.5561, "step": 30214 }, { "epoch": 0.9281786624888643, "grad_norm": 0.3885956406593323, "learning_rate": 1.1151986271097934e-05, "loss": 0.4476, "step": 30215 }, { "epoch": 0.9282093816238135, "grad_norm": 0.3685022294521332, "learning_rate": 1.1151506209846417e-05, "loss": 0.534, "step": 30216 }, { "epoch": 0.9282401007587626, "grad_norm": 0.6021429896354675, "learning_rate": 1.1151026145905472e-05, "loss": 0.4643, "step": 30217 }, { "epoch": 0.9282708198937117, "grad_norm": 0.3735480308532715, "learning_rate": 1.1150546079276228e-05, "loss": 0.4784, "step": 30218 }, { "epoch": 0.928301539028661, "grad_norm": 0.3666605055332184, "learning_rate": 1.1150066009959804e-05, "loss": 0.521, "step": 30219 }, { "epoch": 0.9283322581636101, "grad_norm": 0.3541179597377777, "learning_rate": 1.114958593795732e-05, "loss": 0.4946, "step": 30220 }, { "epoch": 0.9283629772985593, "grad_norm": 0.48388898372650146, "learning_rate": 1.1149105863269896e-05, "loss": 0.5943, "step": 30221 }, { "epoch": 0.9283936964335084, "grad_norm": 0.36419540643692017, "learning_rate": 1.1148625785898655e-05, "loss": 0.5239, "step": 30222 }, { "epoch": 0.9284244155684576, "grad_norm": 0.37652289867401123, "learning_rate": 1.1148145705844717e-05, "loss": 0.5461, "step": 30223 }, { "epoch": 0.9284551347034068, "grad_norm": 0.3907907009124756, "learning_rate": 1.114766562310921e-05, "loss": 0.6592, "step": 30224 }, { "epoch": 0.9284858538383559, "grad_norm": 0.3776606023311615, "learning_rate": 1.1147185537693243e-05, "loss": 0.5407, "step": 30225 }, { "epoch": 0.928516572973305, "grad_norm": 0.3791624903678894, "learning_rate": 1.114670544959795e-05, "loss": 0.497, "step": 30226 }, { "epoch": 0.9285472921082543, "grad_norm": 0.3809923827648163, "learning_rate": 1.1146225358824443e-05, "loss": 0.5287, "step": 30227 }, { "epoch": 0.9285780112432034, "grad_norm": 0.360485315322876, "learning_rate": 1.1145745265373849e-05, "loss": 0.5419, "step": 30228 }, { "epoch": 0.9286087303781525, "grad_norm": 0.3593321144580841, "learning_rate": 1.1145265169247284e-05, "loss": 0.5842, "step": 30229 }, { "epoch": 0.9286394495131017, "grad_norm": 0.44525986909866333, "learning_rate": 1.1144785070445875e-05, "loss": 0.5806, "step": 30230 }, { "epoch": 0.9286701686480509, "grad_norm": 0.3508704602718353, "learning_rate": 1.1144304968970737e-05, "loss": 0.5289, "step": 30231 }, { "epoch": 0.9287008877830001, "grad_norm": 0.34527403116226196, "learning_rate": 1.1143824864822998e-05, "loss": 0.4758, "step": 30232 }, { "epoch": 0.9287316069179492, "grad_norm": 0.4040375351905823, "learning_rate": 1.1143344758003773e-05, "loss": 0.5765, "step": 30233 }, { "epoch": 0.9287623260528983, "grad_norm": 0.42134901881217957, "learning_rate": 1.1142864648514189e-05, "loss": 0.5058, "step": 30234 }, { "epoch": 0.9287930451878476, "grad_norm": 0.3717963993549347, "learning_rate": 1.1142384536355367e-05, "loss": 0.5393, "step": 30235 }, { "epoch": 0.9288237643227967, "grad_norm": 0.3814767599105835, "learning_rate": 1.1141904421528423e-05, "loss": 0.5479, "step": 30236 }, { "epoch": 0.9288544834577458, "grad_norm": 0.6191640496253967, "learning_rate": 1.114142430403448e-05, "loss": 0.5124, "step": 30237 }, { "epoch": 0.928885202592695, "grad_norm": 0.31743502616882324, "learning_rate": 1.1140944183874662e-05, "loss": 0.5343, "step": 30238 }, { "epoch": 0.9289159217276441, "grad_norm": 0.43037182092666626, "learning_rate": 1.1140464061050092e-05, "loss": 0.5879, "step": 30239 }, { "epoch": 0.9289466408625933, "grad_norm": 0.3716990351676941, "learning_rate": 1.1139983935561885e-05, "loss": 0.5266, "step": 30240 }, { "epoch": 0.9289773599975425, "grad_norm": 0.33153966069221497, "learning_rate": 1.1139503807411167e-05, "loss": 0.5389, "step": 30241 }, { "epoch": 0.9290080791324916, "grad_norm": 0.359673410654068, "learning_rate": 1.1139023676599058e-05, "loss": 0.5387, "step": 30242 }, { "epoch": 0.9290387982674407, "grad_norm": 0.3780340850353241, "learning_rate": 1.1138543543126682e-05, "loss": 0.5567, "step": 30243 }, { "epoch": 0.92906951740239, "grad_norm": 0.3849318027496338, "learning_rate": 1.1138063406995157e-05, "loss": 0.5619, "step": 30244 }, { "epoch": 0.9291002365373391, "grad_norm": 0.36578169465065, "learning_rate": 1.1137583268205603e-05, "loss": 0.6635, "step": 30245 }, { "epoch": 0.9291309556722883, "grad_norm": 0.3579077124595642, "learning_rate": 1.1137103126759145e-05, "loss": 0.6202, "step": 30246 }, { "epoch": 0.9291616748072374, "grad_norm": 0.34753501415252686, "learning_rate": 1.1136622982656905e-05, "loss": 0.5133, "step": 30247 }, { "epoch": 0.9291923939421866, "grad_norm": 0.3555839955806732, "learning_rate": 1.1136142835900002e-05, "loss": 0.6061, "step": 30248 }, { "epoch": 0.9292231130771358, "grad_norm": 0.34870967268943787, "learning_rate": 1.1135662686489557e-05, "loss": 0.5979, "step": 30249 }, { "epoch": 0.9292538322120849, "grad_norm": 0.3530363440513611, "learning_rate": 1.1135182534426692e-05, "loss": 0.4879, "step": 30250 }, { "epoch": 0.929284551347034, "grad_norm": 0.35928890109062195, "learning_rate": 1.113470237971253e-05, "loss": 0.5983, "step": 30251 }, { "epoch": 0.9293152704819833, "grad_norm": 0.34430286288261414, "learning_rate": 1.1134222222348193e-05, "loss": 0.5099, "step": 30252 }, { "epoch": 0.9293459896169324, "grad_norm": 0.38013923168182373, "learning_rate": 1.1133742062334798e-05, "loss": 0.5879, "step": 30253 }, { "epoch": 0.9293767087518815, "grad_norm": 0.39649054408073425, "learning_rate": 1.113326189967347e-05, "loss": 0.6244, "step": 30254 }, { "epoch": 0.9294074278868307, "grad_norm": 0.42225560545921326, "learning_rate": 1.113278173436533e-05, "loss": 0.5627, "step": 30255 }, { "epoch": 0.9294381470217798, "grad_norm": 0.3511589765548706, "learning_rate": 1.1132301566411499e-05, "loss": 0.5287, "step": 30256 }, { "epoch": 0.9294688661567291, "grad_norm": 0.3910181224346161, "learning_rate": 1.11318213958131e-05, "loss": 0.5608, "step": 30257 }, { "epoch": 0.9294995852916782, "grad_norm": 0.3382779061794281, "learning_rate": 1.113134122257125e-05, "loss": 0.5389, "step": 30258 }, { "epoch": 0.9295303044266273, "grad_norm": 0.3790891468524933, "learning_rate": 1.1130861046687074e-05, "loss": 0.5686, "step": 30259 }, { "epoch": 0.9295610235615765, "grad_norm": 0.3785017728805542, "learning_rate": 1.1130380868161696e-05, "loss": 0.4903, "step": 30260 }, { "epoch": 0.9295917426965257, "grad_norm": 0.47548434138298035, "learning_rate": 1.1129900686996231e-05, "loss": 0.5732, "step": 30261 }, { "epoch": 0.9296224618314748, "grad_norm": 0.3643988370895386, "learning_rate": 1.1129420503191808e-05, "loss": 0.5589, "step": 30262 }, { "epoch": 0.929653180966424, "grad_norm": 0.39958712458610535, "learning_rate": 1.1128940316749542e-05, "loss": 0.594, "step": 30263 }, { "epoch": 0.9296839001013731, "grad_norm": 0.35895901918411255, "learning_rate": 1.1128460127670558e-05, "loss": 0.5568, "step": 30264 }, { "epoch": 0.9297146192363223, "grad_norm": 0.44642120599746704, "learning_rate": 1.1127979935955976e-05, "loss": 0.6024, "step": 30265 }, { "epoch": 0.9297453383712715, "grad_norm": 0.7317638397216797, "learning_rate": 1.1127499741606917e-05, "loss": 0.55, "step": 30266 }, { "epoch": 0.9297760575062206, "grad_norm": 0.3856443762779236, "learning_rate": 1.1127019544624505e-05, "loss": 0.474, "step": 30267 }, { "epoch": 0.9298067766411697, "grad_norm": 0.3441910147666931, "learning_rate": 1.1126539345009861e-05, "loss": 0.5254, "step": 30268 }, { "epoch": 0.929837495776119, "grad_norm": 0.3704175353050232, "learning_rate": 1.1126059142764103e-05, "loss": 0.5827, "step": 30269 }, { "epoch": 0.9298682149110681, "grad_norm": 0.3879517912864685, "learning_rate": 1.112557893788836e-05, "loss": 0.5499, "step": 30270 }, { "epoch": 0.9298989340460173, "grad_norm": 0.403178334236145, "learning_rate": 1.1125098730383745e-05, "loss": 0.5546, "step": 30271 }, { "epoch": 0.9299296531809664, "grad_norm": 0.4097563922405243, "learning_rate": 1.1124618520251383e-05, "loss": 0.5936, "step": 30272 }, { "epoch": 0.9299603723159156, "grad_norm": 0.3510383069515228, "learning_rate": 1.1124138307492397e-05, "loss": 0.5386, "step": 30273 }, { "epoch": 0.9299910914508648, "grad_norm": 0.3902590870857239, "learning_rate": 1.1123658092107908e-05, "loss": 0.6001, "step": 30274 }, { "epoch": 0.9300218105858139, "grad_norm": 0.3971397876739502, "learning_rate": 1.1123177874099036e-05, "loss": 0.4856, "step": 30275 }, { "epoch": 0.930052529720763, "grad_norm": 0.36651384830474854, "learning_rate": 1.1122697653466902e-05, "loss": 0.5393, "step": 30276 }, { "epoch": 0.9300832488557123, "grad_norm": 0.3709697425365448, "learning_rate": 1.1122217430212635e-05, "loss": 0.5203, "step": 30277 }, { "epoch": 0.9301139679906614, "grad_norm": 0.35187020897865295, "learning_rate": 1.1121737204337344e-05, "loss": 0.568, "step": 30278 }, { "epoch": 0.9301446871256105, "grad_norm": 0.35930103063583374, "learning_rate": 1.112125697584216e-05, "loss": 0.5387, "step": 30279 }, { "epoch": 0.9301754062605597, "grad_norm": 0.35704857110977173, "learning_rate": 1.1120776744728203e-05, "loss": 0.5125, "step": 30280 }, { "epoch": 0.9302061253955088, "grad_norm": 0.357063353061676, "learning_rate": 1.1120296510996593e-05, "loss": 0.5889, "step": 30281 }, { "epoch": 0.9302368445304581, "grad_norm": 0.37344467639923096, "learning_rate": 1.1119816274648455e-05, "loss": 0.4615, "step": 30282 }, { "epoch": 0.9302675636654072, "grad_norm": 0.37770646810531616, "learning_rate": 1.1119336035684902e-05, "loss": 0.5589, "step": 30283 }, { "epoch": 0.9302982828003563, "grad_norm": 0.3569287359714508, "learning_rate": 1.1118855794107066e-05, "loss": 0.5541, "step": 30284 }, { "epoch": 0.9303290019353055, "grad_norm": 0.3535950183868408, "learning_rate": 1.1118375549916063e-05, "loss": 0.5607, "step": 30285 }, { "epoch": 0.9303597210702547, "grad_norm": 0.3312683403491974, "learning_rate": 1.1117895303113016e-05, "loss": 0.4824, "step": 30286 }, { "epoch": 0.9303904402052038, "grad_norm": 0.40768083930015564, "learning_rate": 1.1117415053699047e-05, "loss": 0.5158, "step": 30287 }, { "epoch": 0.930421159340153, "grad_norm": 0.38447415828704834, "learning_rate": 1.1116934801675276e-05, "loss": 0.4889, "step": 30288 }, { "epoch": 0.9304518784751021, "grad_norm": 0.6174691319465637, "learning_rate": 1.1116454547042827e-05, "loss": 0.562, "step": 30289 }, { "epoch": 0.9304825976100513, "grad_norm": 0.3826034367084503, "learning_rate": 1.1115974289802818e-05, "loss": 0.5107, "step": 30290 }, { "epoch": 0.9305133167450005, "grad_norm": 0.3630359470844269, "learning_rate": 1.1115494029956375e-05, "loss": 0.5957, "step": 30291 }, { "epoch": 0.9305440358799496, "grad_norm": 0.3385410010814667, "learning_rate": 1.1115013767504617e-05, "loss": 0.5497, "step": 30292 }, { "epoch": 0.9305747550148988, "grad_norm": 0.35418665409088135, "learning_rate": 1.1114533502448667e-05, "loss": 0.5506, "step": 30293 }, { "epoch": 0.930605474149848, "grad_norm": 0.371004581451416, "learning_rate": 1.1114053234789646e-05, "loss": 0.5403, "step": 30294 }, { "epoch": 0.9306361932847971, "grad_norm": 0.3869469165802002, "learning_rate": 1.1113572964528679e-05, "loss": 0.5794, "step": 30295 }, { "epoch": 0.9306669124197463, "grad_norm": 0.37821313738822937, "learning_rate": 1.1113092691666878e-05, "loss": 0.5001, "step": 30296 }, { "epoch": 0.9306976315546954, "grad_norm": 0.539116382598877, "learning_rate": 1.1112612416205375e-05, "loss": 0.5189, "step": 30297 }, { "epoch": 0.9307283506896445, "grad_norm": 0.5251274108886719, "learning_rate": 1.111213213814529e-05, "loss": 0.4727, "step": 30298 }, { "epoch": 0.9307590698245938, "grad_norm": 0.832741379737854, "learning_rate": 1.1111651857487738e-05, "loss": 0.5494, "step": 30299 }, { "epoch": 0.9307897889595429, "grad_norm": 0.34394291043281555, "learning_rate": 1.1111171574233847e-05, "loss": 0.5243, "step": 30300 }, { "epoch": 0.930820508094492, "grad_norm": 0.39309871196746826, "learning_rate": 1.111069128838474e-05, "loss": 0.4977, "step": 30301 }, { "epoch": 0.9308512272294412, "grad_norm": 0.39742332696914673, "learning_rate": 1.1110210999941534e-05, "loss": 0.4866, "step": 30302 }, { "epoch": 0.9308819463643904, "grad_norm": 0.36647337675094604, "learning_rate": 1.110973070890535e-05, "loss": 0.5625, "step": 30303 }, { "epoch": 0.9309126654993395, "grad_norm": 0.4457987844944, "learning_rate": 1.1109250415277317e-05, "loss": 0.5083, "step": 30304 }, { "epoch": 0.9309433846342887, "grad_norm": 0.3755800724029541, "learning_rate": 1.1108770119058552e-05, "loss": 0.6149, "step": 30305 }, { "epoch": 0.9309741037692378, "grad_norm": 0.41726958751678467, "learning_rate": 1.1108289820250174e-05, "loss": 0.5807, "step": 30306 }, { "epoch": 0.9310048229041871, "grad_norm": 0.4208855926990509, "learning_rate": 1.110780951885331e-05, "loss": 0.5282, "step": 30307 }, { "epoch": 0.9310355420391362, "grad_norm": 0.3998314142227173, "learning_rate": 1.1107329214869078e-05, "loss": 0.67, "step": 30308 }, { "epoch": 0.9310662611740853, "grad_norm": 0.39345234632492065, "learning_rate": 1.1106848908298601e-05, "loss": 0.5297, "step": 30309 }, { "epoch": 0.9310969803090345, "grad_norm": 0.5335297584533691, "learning_rate": 1.1106368599143002e-05, "loss": 0.5184, "step": 30310 }, { "epoch": 0.9311276994439837, "grad_norm": 0.4061771333217621, "learning_rate": 1.1105888287403404e-05, "loss": 0.5525, "step": 30311 }, { "epoch": 0.9311584185789328, "grad_norm": 0.38005882501602173, "learning_rate": 1.1105407973080921e-05, "loss": 0.5803, "step": 30312 }, { "epoch": 0.931189137713882, "grad_norm": 0.5651965737342834, "learning_rate": 1.1104927656176687e-05, "loss": 0.5456, "step": 30313 }, { "epoch": 0.9312198568488311, "grad_norm": 0.3830072283744812, "learning_rate": 1.1104447336691812e-05, "loss": 0.5991, "step": 30314 }, { "epoch": 0.9312505759837802, "grad_norm": 0.3461000323295593, "learning_rate": 1.1103967014627424e-05, "loss": 0.4919, "step": 30315 }, { "epoch": 0.9312812951187295, "grad_norm": 0.3364415466785431, "learning_rate": 1.1103486689984646e-05, "loss": 0.5092, "step": 30316 }, { "epoch": 0.9313120142536786, "grad_norm": 0.3605083227157593, "learning_rate": 1.1103006362764597e-05, "loss": 0.5148, "step": 30317 }, { "epoch": 0.9313427333886278, "grad_norm": 0.3487533926963806, "learning_rate": 1.1102526032968401e-05, "loss": 0.5759, "step": 30318 }, { "epoch": 0.931373452523577, "grad_norm": 0.3756280839443207, "learning_rate": 1.1102045700597174e-05, "loss": 0.4824, "step": 30319 }, { "epoch": 0.9314041716585261, "grad_norm": 0.8592475652694702, "learning_rate": 1.1101565365652048e-05, "loss": 0.5309, "step": 30320 }, { "epoch": 0.9314348907934753, "grad_norm": 0.35884276032447815, "learning_rate": 1.1101085028134135e-05, "loss": 0.5203, "step": 30321 }, { "epoch": 0.9314656099284244, "grad_norm": 0.3769422471523285, "learning_rate": 1.1100604688044563e-05, "loss": 0.5983, "step": 30322 }, { "epoch": 0.9314963290633735, "grad_norm": 0.3817802965641022, "learning_rate": 1.1100124345384453e-05, "loss": 0.5807, "step": 30323 }, { "epoch": 0.9315270481983228, "grad_norm": 0.38562169671058655, "learning_rate": 1.1099644000154924e-05, "loss": 0.5542, "step": 30324 }, { "epoch": 0.9315577673332719, "grad_norm": 0.3510757088661194, "learning_rate": 1.1099163652357101e-05, "loss": 0.5876, "step": 30325 }, { "epoch": 0.931588486468221, "grad_norm": 0.39286163449287415, "learning_rate": 1.1098683301992102e-05, "loss": 0.6194, "step": 30326 }, { "epoch": 0.9316192056031702, "grad_norm": 0.3503819406032562, "learning_rate": 1.1098202949061054e-05, "loss": 0.5615, "step": 30327 }, { "epoch": 0.9316499247381194, "grad_norm": 0.3372073471546173, "learning_rate": 1.1097722593565072e-05, "loss": 0.515, "step": 30328 }, { "epoch": 0.9316806438730685, "grad_norm": 0.36622095108032227, "learning_rate": 1.1097242235505288e-05, "loss": 0.5506, "step": 30329 }, { "epoch": 0.9317113630080177, "grad_norm": 0.40187743306159973, "learning_rate": 1.1096761874882813e-05, "loss": 0.6105, "step": 30330 }, { "epoch": 0.9317420821429668, "grad_norm": 0.4083707928657532, "learning_rate": 1.1096281511698779e-05, "loss": 0.5528, "step": 30331 }, { "epoch": 0.9317728012779161, "grad_norm": 0.44303038716316223, "learning_rate": 1.10958011459543e-05, "loss": 0.5413, "step": 30332 }, { "epoch": 0.9318035204128652, "grad_norm": 0.35881274938583374, "learning_rate": 1.1095320777650498e-05, "loss": 0.5682, "step": 30333 }, { "epoch": 0.9318342395478143, "grad_norm": 0.5636556148529053, "learning_rate": 1.1094840406788503e-05, "loss": 0.6506, "step": 30334 }, { "epoch": 0.9318649586827635, "grad_norm": 0.38395124673843384, "learning_rate": 1.109436003336943e-05, "loss": 0.6242, "step": 30335 }, { "epoch": 0.9318956778177127, "grad_norm": 0.3429311215877533, "learning_rate": 1.1093879657394401e-05, "loss": 0.544, "step": 30336 }, { "epoch": 0.9319263969526618, "grad_norm": 0.34896957874298096, "learning_rate": 1.1093399278864542e-05, "loss": 0.6278, "step": 30337 }, { "epoch": 0.931957116087611, "grad_norm": 0.3332914710044861, "learning_rate": 1.1092918897780972e-05, "loss": 0.5157, "step": 30338 }, { "epoch": 0.9319878352225601, "grad_norm": 0.37697938084602356, "learning_rate": 1.1092438514144814e-05, "loss": 0.5447, "step": 30339 }, { "epoch": 0.9320185543575092, "grad_norm": 0.3488859236240387, "learning_rate": 1.109195812795719e-05, "loss": 0.5722, "step": 30340 }, { "epoch": 0.9320492734924585, "grad_norm": 0.3985539376735687, "learning_rate": 1.109147773921922e-05, "loss": 0.5252, "step": 30341 }, { "epoch": 0.9320799926274076, "grad_norm": 0.3363508880138397, "learning_rate": 1.1090997347932028e-05, "loss": 0.5824, "step": 30342 }, { "epoch": 0.9321107117623568, "grad_norm": 0.37577423453330994, "learning_rate": 1.1090516954096736e-05, "loss": 0.5989, "step": 30343 }, { "epoch": 0.932141430897306, "grad_norm": 0.47941818833351135, "learning_rate": 1.1090036557714466e-05, "loss": 0.4084, "step": 30344 }, { "epoch": 0.9321721500322551, "grad_norm": 0.43231022357940674, "learning_rate": 1.1089556158786339e-05, "loss": 0.5275, "step": 30345 }, { "epoch": 0.9322028691672043, "grad_norm": 0.3653135597705841, "learning_rate": 1.1089075757313474e-05, "loss": 0.5319, "step": 30346 }, { "epoch": 0.9322335883021534, "grad_norm": 0.3376675546169281, "learning_rate": 1.1088595353297002e-05, "loss": 0.5, "step": 30347 }, { "epoch": 0.9322643074371025, "grad_norm": 0.34869813919067383, "learning_rate": 1.1088114946738035e-05, "loss": 0.5214, "step": 30348 }, { "epoch": 0.9322950265720518, "grad_norm": 0.3511779308319092, "learning_rate": 1.1087634537637704e-05, "loss": 0.5232, "step": 30349 }, { "epoch": 0.9323257457070009, "grad_norm": 0.37601909041404724, "learning_rate": 1.1087154125997124e-05, "loss": 0.5251, "step": 30350 }, { "epoch": 0.93235646484195, "grad_norm": 0.39936646819114685, "learning_rate": 1.1086673711817417e-05, "loss": 0.5758, "step": 30351 }, { "epoch": 0.9323871839768992, "grad_norm": 0.3818790912628174, "learning_rate": 1.1086193295099712e-05, "loss": 0.57, "step": 30352 }, { "epoch": 0.9324179031118484, "grad_norm": 0.35319772362709045, "learning_rate": 1.1085712875845124e-05, "loss": 0.6077, "step": 30353 }, { "epoch": 0.9324486222467975, "grad_norm": 0.37438344955444336, "learning_rate": 1.1085232454054779e-05, "loss": 0.5357, "step": 30354 }, { "epoch": 0.9324793413817467, "grad_norm": 0.37220096588134766, "learning_rate": 1.1084752029729797e-05, "loss": 0.4531, "step": 30355 }, { "epoch": 0.9325100605166958, "grad_norm": 0.4069632291793823, "learning_rate": 1.1084271602871303e-05, "loss": 0.5223, "step": 30356 }, { "epoch": 0.9325407796516451, "grad_norm": 0.3822646737098694, "learning_rate": 1.1083791173480419e-05, "loss": 0.5588, "step": 30357 }, { "epoch": 0.9325714987865942, "grad_norm": 0.3690261244773865, "learning_rate": 1.108331074155826e-05, "loss": 0.5791, "step": 30358 }, { "epoch": 0.9326022179215433, "grad_norm": 0.37075576186180115, "learning_rate": 1.1082830307105954e-05, "loss": 0.5407, "step": 30359 }, { "epoch": 0.9326329370564925, "grad_norm": 0.3640204966068268, "learning_rate": 1.1082349870124622e-05, "loss": 0.5901, "step": 30360 }, { "epoch": 0.9326636561914416, "grad_norm": 0.35000869631767273, "learning_rate": 1.1081869430615389e-05, "loss": 0.4996, "step": 30361 }, { "epoch": 0.9326943753263908, "grad_norm": 0.34881195425987244, "learning_rate": 1.1081388988579372e-05, "loss": 0.5556, "step": 30362 }, { "epoch": 0.93272509446134, "grad_norm": 0.37116649746894836, "learning_rate": 1.1080908544017696e-05, "loss": 0.6141, "step": 30363 }, { "epoch": 0.9327558135962891, "grad_norm": 0.3539467751979828, "learning_rate": 1.1080428096931482e-05, "loss": 0.5615, "step": 30364 }, { "epoch": 0.9327865327312382, "grad_norm": 0.40236207842826843, "learning_rate": 1.1079947647321854e-05, "loss": 0.5997, "step": 30365 }, { "epoch": 0.9328172518661875, "grad_norm": 0.40072304010391235, "learning_rate": 1.1079467195189933e-05, "loss": 0.5476, "step": 30366 }, { "epoch": 0.9328479710011366, "grad_norm": 0.33370593190193176, "learning_rate": 1.1078986740536838e-05, "loss": 0.5105, "step": 30367 }, { "epoch": 0.9328786901360858, "grad_norm": 0.42500945925712585, "learning_rate": 1.1078506283363699e-05, "loss": 0.5092, "step": 30368 }, { "epoch": 0.9329094092710349, "grad_norm": 1.5651564598083496, "learning_rate": 1.1078025823671627e-05, "loss": 0.525, "step": 30369 }, { "epoch": 0.9329401284059841, "grad_norm": 0.3538157045841217, "learning_rate": 1.1077545361461754e-05, "loss": 0.5467, "step": 30370 }, { "epoch": 0.9329708475409333, "grad_norm": 0.3625772297382355, "learning_rate": 1.1077064896735198e-05, "loss": 0.5738, "step": 30371 }, { "epoch": 0.9330015666758824, "grad_norm": 0.3862822353839874, "learning_rate": 1.1076584429493085e-05, "loss": 0.5228, "step": 30372 }, { "epoch": 0.9330322858108315, "grad_norm": 0.33536428213119507, "learning_rate": 1.1076103959736528e-05, "loss": 0.5485, "step": 30373 }, { "epoch": 0.9330630049457808, "grad_norm": 0.36813119053840637, "learning_rate": 1.107562348746666e-05, "loss": 0.4551, "step": 30374 }, { "epoch": 0.9330937240807299, "grad_norm": 0.41439634561538696, "learning_rate": 1.1075143012684597e-05, "loss": 0.5269, "step": 30375 }, { "epoch": 0.933124443215679, "grad_norm": 0.3621596693992615, "learning_rate": 1.1074662535391462e-05, "loss": 0.467, "step": 30376 }, { "epoch": 0.9331551623506282, "grad_norm": 0.7658784985542297, "learning_rate": 1.107418205558838e-05, "loss": 0.5768, "step": 30377 }, { "epoch": 0.9331858814855774, "grad_norm": 0.38123631477355957, "learning_rate": 1.1073701573276466e-05, "loss": 0.5695, "step": 30378 }, { "epoch": 0.9332166006205265, "grad_norm": 0.4125577509403229, "learning_rate": 1.1073221088456848e-05, "loss": 0.6159, "step": 30379 }, { "epoch": 0.9332473197554757, "grad_norm": 0.40151122212409973, "learning_rate": 1.1072740601130649e-05, "loss": 0.5936, "step": 30380 }, { "epoch": 0.9332780388904248, "grad_norm": 0.33082494139671326, "learning_rate": 1.1072260111298989e-05, "loss": 0.5551, "step": 30381 }, { "epoch": 0.933308758025374, "grad_norm": 0.41849684715270996, "learning_rate": 1.107177961896299e-05, "loss": 0.4983, "step": 30382 }, { "epoch": 0.9333394771603232, "grad_norm": 0.38790836930274963, "learning_rate": 1.1071299124123777e-05, "loss": 0.5225, "step": 30383 }, { "epoch": 0.9333701962952723, "grad_norm": 0.3678896129131317, "learning_rate": 1.1070818626782467e-05, "loss": 0.5683, "step": 30384 }, { "epoch": 0.9334009154302215, "grad_norm": 0.34562885761260986, "learning_rate": 1.1070338126940188e-05, "loss": 0.5088, "step": 30385 }, { "epoch": 0.9334316345651706, "grad_norm": 0.3572862148284912, "learning_rate": 1.106985762459806e-05, "loss": 0.5565, "step": 30386 }, { "epoch": 0.9334623537001198, "grad_norm": 0.5405210256576538, "learning_rate": 1.1069377119757199e-05, "loss": 0.5807, "step": 30387 }, { "epoch": 0.933493072835069, "grad_norm": 0.3750610947608948, "learning_rate": 1.1068896612418738e-05, "loss": 0.6382, "step": 30388 }, { "epoch": 0.9335237919700181, "grad_norm": 0.3921282887458801, "learning_rate": 1.1068416102583792e-05, "loss": 0.4999, "step": 30389 }, { "epoch": 0.9335545111049672, "grad_norm": 0.4546433687210083, "learning_rate": 1.1067935590253488e-05, "loss": 0.5867, "step": 30390 }, { "epoch": 0.9335852302399165, "grad_norm": 0.3721960186958313, "learning_rate": 1.1067455075428944e-05, "loss": 0.5662, "step": 30391 }, { "epoch": 0.9336159493748656, "grad_norm": 0.36129632592201233, "learning_rate": 1.1066974558111286e-05, "loss": 0.5461, "step": 30392 }, { "epoch": 0.9336466685098148, "grad_norm": 0.3666982054710388, "learning_rate": 1.1066494038301634e-05, "loss": 0.5909, "step": 30393 }, { "epoch": 0.9336773876447639, "grad_norm": 0.37953704595565796, "learning_rate": 1.1066013516001109e-05, "loss": 0.5915, "step": 30394 }, { "epoch": 0.933708106779713, "grad_norm": 0.4282543361186981, "learning_rate": 1.1065532991210837e-05, "loss": 0.5552, "step": 30395 }, { "epoch": 0.9337388259146623, "grad_norm": 0.3558879494667053, "learning_rate": 1.1065052463931936e-05, "loss": 0.6569, "step": 30396 }, { "epoch": 0.9337695450496114, "grad_norm": 0.5090029835700989, "learning_rate": 1.1064571934165534e-05, "loss": 0.4801, "step": 30397 }, { "epoch": 0.9338002641845605, "grad_norm": 0.3559035360813141, "learning_rate": 1.1064091401912746e-05, "loss": 0.4851, "step": 30398 }, { "epoch": 0.9338309833195098, "grad_norm": 0.3810226321220398, "learning_rate": 1.10636108671747e-05, "loss": 0.5287, "step": 30399 }, { "epoch": 0.9338617024544589, "grad_norm": 0.3929925858974457, "learning_rate": 1.1063130329952516e-05, "loss": 0.492, "step": 30400 }, { "epoch": 0.933892421589408, "grad_norm": 0.37412703037261963, "learning_rate": 1.106264979024732e-05, "loss": 0.5285, "step": 30401 }, { "epoch": 0.9339231407243572, "grad_norm": 0.40237346291542053, "learning_rate": 1.1062169248060227e-05, "loss": 0.5307, "step": 30402 }, { "epoch": 0.9339538598593063, "grad_norm": 0.42197373509407043, "learning_rate": 1.1061688703392367e-05, "loss": 0.551, "step": 30403 }, { "epoch": 0.9339845789942556, "grad_norm": 0.4056549072265625, "learning_rate": 1.1061208156244856e-05, "loss": 0.5296, "step": 30404 }, { "epoch": 0.9340152981292047, "grad_norm": 0.4414810240268707, "learning_rate": 1.1060727606618818e-05, "loss": 0.4717, "step": 30405 }, { "epoch": 0.9340460172641538, "grad_norm": 0.3619079291820526, "learning_rate": 1.1060247054515379e-05, "loss": 0.5343, "step": 30406 }, { "epoch": 0.934076736399103, "grad_norm": 0.3521060049533844, "learning_rate": 1.1059766499935658e-05, "loss": 0.5257, "step": 30407 }, { "epoch": 0.9341074555340522, "grad_norm": 0.36023157835006714, "learning_rate": 1.105928594288078e-05, "loss": 0.4708, "step": 30408 }, { "epoch": 0.9341381746690013, "grad_norm": 0.5210719108581543, "learning_rate": 1.1058805383351867e-05, "loss": 0.5893, "step": 30409 }, { "epoch": 0.9341688938039505, "grad_norm": 0.38019904494285583, "learning_rate": 1.1058324821350035e-05, "loss": 0.5886, "step": 30410 }, { "epoch": 0.9341996129388996, "grad_norm": 0.3812773525714874, "learning_rate": 1.1057844256876416e-05, "loss": 0.5106, "step": 30411 }, { "epoch": 0.9342303320738488, "grad_norm": 0.3476789891719818, "learning_rate": 1.1057363689932126e-05, "loss": 0.5209, "step": 30412 }, { "epoch": 0.934261051208798, "grad_norm": 0.3741928040981293, "learning_rate": 1.105688312051829e-05, "loss": 0.5542, "step": 30413 }, { "epoch": 0.9342917703437471, "grad_norm": 0.34308159351348877, "learning_rate": 1.1056402548636027e-05, "loss": 0.5173, "step": 30414 }, { "epoch": 0.9343224894786962, "grad_norm": 0.38292208313941956, "learning_rate": 1.1055921974286465e-05, "loss": 0.5474, "step": 30415 }, { "epoch": 0.9343532086136455, "grad_norm": 0.4737255871295929, "learning_rate": 1.1055441397470724e-05, "loss": 0.5287, "step": 30416 }, { "epoch": 0.9343839277485946, "grad_norm": 0.38519009947776794, "learning_rate": 1.1054960818189924e-05, "loss": 0.6544, "step": 30417 }, { "epoch": 0.9344146468835438, "grad_norm": 0.36267155408859253, "learning_rate": 1.105448023644519e-05, "loss": 0.4885, "step": 30418 }, { "epoch": 0.9344453660184929, "grad_norm": 0.3529633581638336, "learning_rate": 1.1053999652237643e-05, "loss": 0.4649, "step": 30419 }, { "epoch": 0.934476085153442, "grad_norm": 0.42582646012306213, "learning_rate": 1.105351906556841e-05, "loss": 0.5379, "step": 30420 }, { "epoch": 0.9345068042883913, "grad_norm": 0.3851524591445923, "learning_rate": 1.1053038476438604e-05, "loss": 0.5039, "step": 30421 }, { "epoch": 0.9345375234233404, "grad_norm": 0.5443708896636963, "learning_rate": 1.1052557884849357e-05, "loss": 0.5273, "step": 30422 }, { "epoch": 0.9345682425582895, "grad_norm": 0.3715219795703888, "learning_rate": 1.1052077290801785e-05, "loss": 0.543, "step": 30423 }, { "epoch": 0.9345989616932388, "grad_norm": 0.38686370849609375, "learning_rate": 1.1051596694297013e-05, "loss": 0.4744, "step": 30424 }, { "epoch": 0.9346296808281879, "grad_norm": 0.37719622254371643, "learning_rate": 1.1051116095336166e-05, "loss": 0.6243, "step": 30425 }, { "epoch": 0.934660399963137, "grad_norm": 0.3858301043510437, "learning_rate": 1.1050635493920363e-05, "loss": 0.5325, "step": 30426 }, { "epoch": 0.9346911190980862, "grad_norm": 0.4668706953525543, "learning_rate": 1.1050154890050729e-05, "loss": 0.5497, "step": 30427 }, { "epoch": 0.9347218382330353, "grad_norm": 0.3703922927379608, "learning_rate": 1.1049674283728379e-05, "loss": 0.5355, "step": 30428 }, { "epoch": 0.9347525573679846, "grad_norm": 0.4908474385738373, "learning_rate": 1.1049193674954448e-05, "loss": 0.5374, "step": 30429 }, { "epoch": 0.9347832765029337, "grad_norm": 0.42486608028411865, "learning_rate": 1.1048713063730048e-05, "loss": 0.5526, "step": 30430 }, { "epoch": 0.9348139956378828, "grad_norm": 0.44306185841560364, "learning_rate": 1.1048232450056306e-05, "loss": 0.598, "step": 30431 }, { "epoch": 0.934844714772832, "grad_norm": 0.37743595242500305, "learning_rate": 1.1047751833934344e-05, "loss": 0.6303, "step": 30432 }, { "epoch": 0.9348754339077812, "grad_norm": 0.3774903118610382, "learning_rate": 1.1047271215365287e-05, "loss": 0.4842, "step": 30433 }, { "epoch": 0.9349061530427303, "grad_norm": 0.33440661430358887, "learning_rate": 1.1046790594350253e-05, "loss": 0.5463, "step": 30434 }, { "epoch": 0.9349368721776795, "grad_norm": 0.38640710711479187, "learning_rate": 1.1046309970890365e-05, "loss": 0.5513, "step": 30435 }, { "epoch": 0.9349675913126286, "grad_norm": 0.388054758310318, "learning_rate": 1.104582934498675e-05, "loss": 0.4894, "step": 30436 }, { "epoch": 0.9349983104475778, "grad_norm": 0.3746173083782196, "learning_rate": 1.1045348716640525e-05, "loss": 0.5463, "step": 30437 }, { "epoch": 0.935029029582527, "grad_norm": 0.3446078598499298, "learning_rate": 1.1044868085852817e-05, "loss": 0.5037, "step": 30438 }, { "epoch": 0.9350597487174761, "grad_norm": 0.3576319217681885, "learning_rate": 1.1044387452624746e-05, "loss": 0.5738, "step": 30439 }, { "epoch": 0.9350904678524252, "grad_norm": 0.34665703773498535, "learning_rate": 1.1043906816957436e-05, "loss": 0.5015, "step": 30440 }, { "epoch": 0.9351211869873745, "grad_norm": 0.399756520986557, "learning_rate": 1.1043426178852006e-05, "loss": 0.5621, "step": 30441 }, { "epoch": 0.9351519061223236, "grad_norm": 0.4014807641506195, "learning_rate": 1.1042945538309582e-05, "loss": 0.5959, "step": 30442 }, { "epoch": 0.9351826252572728, "grad_norm": 0.3342481851577759, "learning_rate": 1.1042464895331284e-05, "loss": 0.5775, "step": 30443 }, { "epoch": 0.9352133443922219, "grad_norm": 0.3667951226234436, "learning_rate": 1.104198424991824e-05, "loss": 0.5725, "step": 30444 }, { "epoch": 0.935244063527171, "grad_norm": 0.31376245617866516, "learning_rate": 1.104150360207157e-05, "loss": 0.5161, "step": 30445 }, { "epoch": 0.9352747826621203, "grad_norm": 0.36095085740089417, "learning_rate": 1.1041022951792393e-05, "loss": 0.485, "step": 30446 }, { "epoch": 0.9353055017970694, "grad_norm": 0.40169069170951843, "learning_rate": 1.1040542299081835e-05, "loss": 0.5982, "step": 30447 }, { "epoch": 0.9353362209320185, "grad_norm": 0.337018221616745, "learning_rate": 1.1040061643941016e-05, "loss": 0.4876, "step": 30448 }, { "epoch": 0.9353669400669677, "grad_norm": 0.3413563668727875, "learning_rate": 1.1039580986371064e-05, "loss": 0.4244, "step": 30449 }, { "epoch": 0.9353976592019169, "grad_norm": 0.37029215693473816, "learning_rate": 1.1039100326373095e-05, "loss": 0.5634, "step": 30450 }, { "epoch": 0.935428378336866, "grad_norm": 0.3702857494354248, "learning_rate": 1.1038619663948236e-05, "loss": 0.4873, "step": 30451 }, { "epoch": 0.9354590974718152, "grad_norm": 0.3662261366844177, "learning_rate": 1.1038138999097607e-05, "loss": 0.4875, "step": 30452 }, { "epoch": 0.9354898166067643, "grad_norm": 0.38516971468925476, "learning_rate": 1.1037658331822334e-05, "loss": 0.573, "step": 30453 }, { "epoch": 0.9355205357417136, "grad_norm": 0.3602786660194397, "learning_rate": 1.1037177662123534e-05, "loss": 0.4715, "step": 30454 }, { "epoch": 0.9355512548766627, "grad_norm": 0.45929694175720215, "learning_rate": 1.1036696990002339e-05, "loss": 0.5863, "step": 30455 }, { "epoch": 0.9355819740116118, "grad_norm": 0.3505461812019348, "learning_rate": 1.1036216315459862e-05, "loss": 0.474, "step": 30456 }, { "epoch": 0.935612693146561, "grad_norm": 0.392701655626297, "learning_rate": 1.103573563849723e-05, "loss": 0.5313, "step": 30457 }, { "epoch": 0.9356434122815102, "grad_norm": 0.8423426747322083, "learning_rate": 1.1035254959115564e-05, "loss": 0.5123, "step": 30458 }, { "epoch": 0.9356741314164593, "grad_norm": 0.3674393594264984, "learning_rate": 1.1034774277315987e-05, "loss": 0.4749, "step": 30459 }, { "epoch": 0.9357048505514085, "grad_norm": 0.3928945064544678, "learning_rate": 1.1034293593099623e-05, "loss": 0.4661, "step": 30460 }, { "epoch": 0.9357355696863576, "grad_norm": 0.3649788200855255, "learning_rate": 1.1033812906467596e-05, "loss": 0.4968, "step": 30461 }, { "epoch": 0.9357662888213067, "grad_norm": 0.39478060603141785, "learning_rate": 1.1033332217421024e-05, "loss": 0.5613, "step": 30462 }, { "epoch": 0.935797007956256, "grad_norm": 0.3862840533256531, "learning_rate": 1.1032851525961034e-05, "loss": 0.4998, "step": 30463 }, { "epoch": 0.9358277270912051, "grad_norm": 0.4269610047340393, "learning_rate": 1.1032370832088748e-05, "loss": 0.5287, "step": 30464 }, { "epoch": 0.9358584462261542, "grad_norm": 0.3816654086112976, "learning_rate": 1.1031890135805288e-05, "loss": 0.5941, "step": 30465 }, { "epoch": 0.9358891653611034, "grad_norm": 0.35906827449798584, "learning_rate": 1.1031409437111775e-05, "loss": 0.5733, "step": 30466 }, { "epoch": 0.9359198844960526, "grad_norm": 0.35455194115638733, "learning_rate": 1.1030928736009335e-05, "loss": 0.5297, "step": 30467 }, { "epoch": 0.9359506036310018, "grad_norm": 0.31200411915779114, "learning_rate": 1.1030448032499085e-05, "loss": 0.5509, "step": 30468 }, { "epoch": 0.9359813227659509, "grad_norm": 0.37960201501846313, "learning_rate": 1.1029967326582156e-05, "loss": 0.6164, "step": 30469 }, { "epoch": 0.9360120419009, "grad_norm": 0.5749216079711914, "learning_rate": 1.1029486618259663e-05, "loss": 0.543, "step": 30470 }, { "epoch": 0.9360427610358493, "grad_norm": 0.34579917788505554, "learning_rate": 1.1029005907532737e-05, "loss": 0.5735, "step": 30471 }, { "epoch": 0.9360734801707984, "grad_norm": 0.4105397164821625, "learning_rate": 1.1028525194402492e-05, "loss": 0.6368, "step": 30472 }, { "epoch": 0.9361041993057475, "grad_norm": 0.38152557611465454, "learning_rate": 1.1028044478870055e-05, "loss": 0.5342, "step": 30473 }, { "epoch": 0.9361349184406967, "grad_norm": 0.3708680272102356, "learning_rate": 1.102756376093655e-05, "loss": 0.5648, "step": 30474 }, { "epoch": 0.9361656375756459, "grad_norm": 0.3293423652648926, "learning_rate": 1.1027083040603095e-05, "loss": 0.5685, "step": 30475 }, { "epoch": 0.936196356710595, "grad_norm": 0.4050220549106598, "learning_rate": 1.1026602317870819e-05, "loss": 0.5413, "step": 30476 }, { "epoch": 0.9362270758455442, "grad_norm": 0.3598083555698395, "learning_rate": 1.102612159274084e-05, "loss": 0.5526, "step": 30477 }, { "epoch": 0.9362577949804933, "grad_norm": 0.36917465925216675, "learning_rate": 1.102564086521428e-05, "loss": 0.4909, "step": 30478 }, { "epoch": 0.9362885141154426, "grad_norm": 0.37562596797943115, "learning_rate": 1.1025160135292268e-05, "loss": 0.579, "step": 30479 }, { "epoch": 0.9363192332503917, "grad_norm": 0.40075400471687317, "learning_rate": 1.102467940297592e-05, "loss": 0.5671, "step": 30480 }, { "epoch": 0.9363499523853408, "grad_norm": 0.34429940581321716, "learning_rate": 1.1024198668266364e-05, "loss": 0.5523, "step": 30481 }, { "epoch": 0.93638067152029, "grad_norm": 0.3654640316963196, "learning_rate": 1.1023717931164719e-05, "loss": 0.5146, "step": 30482 }, { "epoch": 0.9364113906552392, "grad_norm": 0.34985607862472534, "learning_rate": 1.1023237191672112e-05, "loss": 0.5373, "step": 30483 }, { "epoch": 0.9364421097901883, "grad_norm": 0.3714081346988678, "learning_rate": 1.102275644978966e-05, "loss": 0.4518, "step": 30484 }, { "epoch": 0.9364728289251375, "grad_norm": 0.38884812593460083, "learning_rate": 1.102227570551849e-05, "loss": 0.4925, "step": 30485 }, { "epoch": 0.9365035480600866, "grad_norm": 0.3605530261993408, "learning_rate": 1.1021794958859721e-05, "loss": 0.4931, "step": 30486 }, { "epoch": 0.9365342671950357, "grad_norm": 0.39612045884132385, "learning_rate": 1.1021314209814485e-05, "loss": 0.6224, "step": 30487 }, { "epoch": 0.936564986329985, "grad_norm": 0.35809189081192017, "learning_rate": 1.1020833458383894e-05, "loss": 0.5784, "step": 30488 }, { "epoch": 0.9365957054649341, "grad_norm": 0.386057585477829, "learning_rate": 1.1020352704569076e-05, "loss": 0.5128, "step": 30489 }, { "epoch": 0.9366264245998833, "grad_norm": 0.6502811908721924, "learning_rate": 1.1019871948371153e-05, "loss": 0.5442, "step": 30490 }, { "epoch": 0.9366571437348324, "grad_norm": 0.36868345737457275, "learning_rate": 1.1019391189791246e-05, "loss": 0.6053, "step": 30491 }, { "epoch": 0.9366878628697816, "grad_norm": 0.3854413330554962, "learning_rate": 1.1018910428830483e-05, "loss": 0.4629, "step": 30492 }, { "epoch": 0.9367185820047308, "grad_norm": 0.3991881012916565, "learning_rate": 1.101842966548998e-05, "loss": 0.5386, "step": 30493 }, { "epoch": 0.9367493011396799, "grad_norm": 0.37113523483276367, "learning_rate": 1.1017948899770865e-05, "loss": 0.5331, "step": 30494 }, { "epoch": 0.936780020274629, "grad_norm": 0.37614697217941284, "learning_rate": 1.1017468131674258e-05, "loss": 0.5577, "step": 30495 }, { "epoch": 0.9368107394095783, "grad_norm": 0.34639772772789, "learning_rate": 1.1016987361201285e-05, "loss": 0.5683, "step": 30496 }, { "epoch": 0.9368414585445274, "grad_norm": 0.3526081144809723, "learning_rate": 1.1016506588353066e-05, "loss": 0.5333, "step": 30497 }, { "epoch": 0.9368721776794765, "grad_norm": 0.3944303095340729, "learning_rate": 1.1016025813130724e-05, "loss": 0.5867, "step": 30498 }, { "epoch": 0.9369028968144257, "grad_norm": 0.33467525243759155, "learning_rate": 1.1015545035535386e-05, "loss": 0.5438, "step": 30499 }, { "epoch": 0.9369336159493749, "grad_norm": 0.3856748640537262, "learning_rate": 1.1015064255568169e-05, "loss": 0.5435, "step": 30500 }, { "epoch": 0.936964335084324, "grad_norm": 0.5413875579833984, "learning_rate": 1.10145834732302e-05, "loss": 0.5802, "step": 30501 }, { "epoch": 0.9369950542192732, "grad_norm": 0.3418818712234497, "learning_rate": 1.1014102688522599e-05, "loss": 0.5173, "step": 30502 }, { "epoch": 0.9370257733542223, "grad_norm": 0.424415647983551, "learning_rate": 1.1013621901446491e-05, "loss": 0.4916, "step": 30503 }, { "epoch": 0.9370564924891716, "grad_norm": 0.31459856033325195, "learning_rate": 1.1013141112002999e-05, "loss": 0.4694, "step": 30504 }, { "epoch": 0.9370872116241207, "grad_norm": 0.3602941930294037, "learning_rate": 1.1012660320193244e-05, "loss": 0.5899, "step": 30505 }, { "epoch": 0.9371179307590698, "grad_norm": 0.352975994348526, "learning_rate": 1.101217952601835e-05, "loss": 0.5487, "step": 30506 }, { "epoch": 0.937148649894019, "grad_norm": 0.3918739855289459, "learning_rate": 1.1011698729479441e-05, "loss": 0.6075, "step": 30507 }, { "epoch": 0.9371793690289681, "grad_norm": 0.3351406455039978, "learning_rate": 1.101121793057764e-05, "loss": 0.5228, "step": 30508 }, { "epoch": 0.9372100881639173, "grad_norm": 0.371866375207901, "learning_rate": 1.1010737129314066e-05, "loss": 0.5792, "step": 30509 }, { "epoch": 0.9372408072988665, "grad_norm": 0.33267083764076233, "learning_rate": 1.1010256325689848e-05, "loss": 0.4676, "step": 30510 }, { "epoch": 0.9372715264338156, "grad_norm": 0.3816780745983124, "learning_rate": 1.1009775519706103e-05, "loss": 0.5918, "step": 30511 }, { "epoch": 0.9373022455687647, "grad_norm": 0.3800414800643921, "learning_rate": 1.1009294711363958e-05, "loss": 0.5515, "step": 30512 }, { "epoch": 0.937332964703714, "grad_norm": 0.3644031882286072, "learning_rate": 1.1008813900664535e-05, "loss": 0.5743, "step": 30513 }, { "epoch": 0.9373636838386631, "grad_norm": 0.3885006308555603, "learning_rate": 1.1008333087608956e-05, "loss": 0.5923, "step": 30514 }, { "epoch": 0.9373944029736123, "grad_norm": 0.3762899339199066, "learning_rate": 1.1007852272198343e-05, "loss": 0.6011, "step": 30515 }, { "epoch": 0.9374251221085614, "grad_norm": 0.3698633313179016, "learning_rate": 1.1007371454433821e-05, "loss": 0.5071, "step": 30516 }, { "epoch": 0.9374558412435106, "grad_norm": 0.38348883390426636, "learning_rate": 1.1006890634316517e-05, "loss": 0.5393, "step": 30517 }, { "epoch": 0.9374865603784598, "grad_norm": 0.38565918803215027, "learning_rate": 1.1006409811847544e-05, "loss": 0.4897, "step": 30518 }, { "epoch": 0.9375172795134089, "grad_norm": 0.40739551186561584, "learning_rate": 1.1005928987028036e-05, "loss": 0.5786, "step": 30519 }, { "epoch": 0.937547998648358, "grad_norm": 0.36479464173316956, "learning_rate": 1.1005448159859106e-05, "loss": 0.5541, "step": 30520 }, { "epoch": 0.9375787177833073, "grad_norm": 0.35180723667144775, "learning_rate": 1.1004967330341884e-05, "loss": 0.485, "step": 30521 }, { "epoch": 0.9376094369182564, "grad_norm": 0.4682829976081848, "learning_rate": 1.100448649847749e-05, "loss": 0.5227, "step": 30522 }, { "epoch": 0.9376401560532055, "grad_norm": 0.3561836779117584, "learning_rate": 1.1004005664267048e-05, "loss": 0.4781, "step": 30523 }, { "epoch": 0.9376708751881547, "grad_norm": 0.39238440990448, "learning_rate": 1.1003524827711683e-05, "loss": 0.5635, "step": 30524 }, { "epoch": 0.9377015943231038, "grad_norm": 0.3665147125720978, "learning_rate": 1.1003043988812512e-05, "loss": 0.4943, "step": 30525 }, { "epoch": 0.937732313458053, "grad_norm": 0.34298229217529297, "learning_rate": 1.1002563147570664e-05, "loss": 0.5721, "step": 30526 }, { "epoch": 0.9377630325930022, "grad_norm": 0.3582199513912201, "learning_rate": 1.1002082303987257e-05, "loss": 0.5608, "step": 30527 }, { "epoch": 0.9377937517279513, "grad_norm": 0.36220067739486694, "learning_rate": 1.100160145806342e-05, "loss": 0.4902, "step": 30528 }, { "epoch": 0.9378244708629005, "grad_norm": 0.3550640344619751, "learning_rate": 1.100112060980027e-05, "loss": 0.5657, "step": 30529 }, { "epoch": 0.9378551899978497, "grad_norm": 0.41666603088378906, "learning_rate": 1.1000639759198935e-05, "loss": 0.5847, "step": 30530 }, { "epoch": 0.9378859091327988, "grad_norm": 0.39002132415771484, "learning_rate": 1.1000158906260534e-05, "loss": 0.5678, "step": 30531 }, { "epoch": 0.937916628267748, "grad_norm": 0.36284101009368896, "learning_rate": 1.0999678050986193e-05, "loss": 0.4974, "step": 30532 }, { "epoch": 0.9379473474026971, "grad_norm": 0.4138340651988983, "learning_rate": 1.0999197193377034e-05, "loss": 0.5227, "step": 30533 }, { "epoch": 0.9379780665376463, "grad_norm": 0.3592665195465088, "learning_rate": 1.0998716333434178e-05, "loss": 0.5209, "step": 30534 }, { "epoch": 0.9380087856725955, "grad_norm": 0.35422003269195557, "learning_rate": 1.0998235471158753e-05, "loss": 0.5936, "step": 30535 }, { "epoch": 0.9380395048075446, "grad_norm": 0.42085981369018555, "learning_rate": 1.0997754606551878e-05, "loss": 0.5299, "step": 30536 }, { "epoch": 0.9380702239424937, "grad_norm": 0.39680832624435425, "learning_rate": 1.099727373961468e-05, "loss": 0.5807, "step": 30537 }, { "epoch": 0.938100943077443, "grad_norm": 0.36455219984054565, "learning_rate": 1.0996792870348275e-05, "loss": 0.5051, "step": 30538 }, { "epoch": 0.9381316622123921, "grad_norm": 0.38291677832603455, "learning_rate": 1.0996311998753797e-05, "loss": 0.5521, "step": 30539 }, { "epoch": 0.9381623813473413, "grad_norm": 0.6399614810943604, "learning_rate": 1.0995831124832359e-05, "loss": 0.4586, "step": 30540 }, { "epoch": 0.9381931004822904, "grad_norm": 0.4435330927371979, "learning_rate": 1.0995350248585088e-05, "loss": 0.5538, "step": 30541 }, { "epoch": 0.9382238196172396, "grad_norm": 0.4028201997280121, "learning_rate": 1.0994869370013108e-05, "loss": 0.4922, "step": 30542 }, { "epoch": 0.9382545387521888, "grad_norm": 0.3791435658931732, "learning_rate": 1.099438848911754e-05, "loss": 0.5813, "step": 30543 }, { "epoch": 0.9382852578871379, "grad_norm": 0.3636995553970337, "learning_rate": 1.099390760589951e-05, "loss": 0.5704, "step": 30544 }, { "epoch": 0.938315977022087, "grad_norm": 0.3340231776237488, "learning_rate": 1.0993426720360136e-05, "loss": 0.5009, "step": 30545 }, { "epoch": 0.9383466961570363, "grad_norm": 0.3629789650440216, "learning_rate": 1.0992945832500548e-05, "loss": 0.4889, "step": 30546 }, { "epoch": 0.9383774152919854, "grad_norm": 0.36056530475616455, "learning_rate": 1.0992464942321864e-05, "loss": 0.6249, "step": 30547 }, { "epoch": 0.9384081344269345, "grad_norm": 0.40736493468284607, "learning_rate": 1.099198404982521e-05, "loss": 0.5651, "step": 30548 }, { "epoch": 0.9384388535618837, "grad_norm": 0.4686693251132965, "learning_rate": 1.0991503155011705e-05, "loss": 0.5981, "step": 30549 }, { "epoch": 0.9384695726968328, "grad_norm": 0.4550022780895233, "learning_rate": 1.0991022257882478e-05, "loss": 0.4363, "step": 30550 }, { "epoch": 0.938500291831782, "grad_norm": 0.3507893979549408, "learning_rate": 1.0990541358438647e-05, "loss": 0.5053, "step": 30551 }, { "epoch": 0.9385310109667312, "grad_norm": 0.33585864305496216, "learning_rate": 1.0990060456681338e-05, "loss": 0.4345, "step": 30552 }, { "epoch": 0.9385617301016803, "grad_norm": 0.41759976744651794, "learning_rate": 1.0989579552611674e-05, "loss": 0.588, "step": 30553 }, { "epoch": 0.9385924492366295, "grad_norm": 1.5914274454116821, "learning_rate": 1.0989098646230777e-05, "loss": 0.505, "step": 30554 }, { "epoch": 0.9386231683715787, "grad_norm": 0.38381126523017883, "learning_rate": 1.0988617737539773e-05, "loss": 0.5402, "step": 30555 }, { "epoch": 0.9386538875065278, "grad_norm": 0.38524991273880005, "learning_rate": 1.0988136826539782e-05, "loss": 0.5307, "step": 30556 }, { "epoch": 0.938684606641477, "grad_norm": 0.3492327034473419, "learning_rate": 1.098765591323193e-05, "loss": 0.5706, "step": 30557 }, { "epoch": 0.9387153257764261, "grad_norm": 0.3999633491039276, "learning_rate": 1.0987174997617336e-05, "loss": 0.5013, "step": 30558 }, { "epoch": 0.9387460449113753, "grad_norm": 0.3781188130378723, "learning_rate": 1.0986694079697128e-05, "loss": 0.5611, "step": 30559 }, { "epoch": 0.9387767640463245, "grad_norm": 0.39904001355171204, "learning_rate": 1.0986213159472428e-05, "loss": 0.5358, "step": 30560 }, { "epoch": 0.9388074831812736, "grad_norm": 0.42379456758499146, "learning_rate": 1.0985732236944355e-05, "loss": 0.6249, "step": 30561 }, { "epoch": 0.9388382023162227, "grad_norm": 0.3560948371887207, "learning_rate": 1.098525131211404e-05, "loss": 0.537, "step": 30562 }, { "epoch": 0.938868921451172, "grad_norm": 0.33941134810447693, "learning_rate": 1.0984770384982597e-05, "loss": 0.5181, "step": 30563 }, { "epoch": 0.9388996405861211, "grad_norm": 0.37272125482559204, "learning_rate": 1.0984289455551159e-05, "loss": 0.5583, "step": 30564 }, { "epoch": 0.9389303597210703, "grad_norm": 0.3448246717453003, "learning_rate": 1.0983808523820838e-05, "loss": 0.4691, "step": 30565 }, { "epoch": 0.9389610788560194, "grad_norm": 0.38180193305015564, "learning_rate": 1.0983327589792769e-05, "loss": 0.62, "step": 30566 }, { "epoch": 0.9389917979909685, "grad_norm": 0.3451305627822876, "learning_rate": 1.0982846653468067e-05, "loss": 0.5637, "step": 30567 }, { "epoch": 0.9390225171259178, "grad_norm": 0.35697197914123535, "learning_rate": 1.0982365714847858e-05, "loss": 0.4963, "step": 30568 }, { "epoch": 0.9390532362608669, "grad_norm": 0.5609351992607117, "learning_rate": 1.0981884773933264e-05, "loss": 0.5659, "step": 30569 }, { "epoch": 0.939083955395816, "grad_norm": 0.3856447637081146, "learning_rate": 1.0981403830725413e-05, "loss": 0.559, "step": 30570 }, { "epoch": 0.9391146745307652, "grad_norm": 0.3715640604496002, "learning_rate": 1.0980922885225423e-05, "loss": 0.5167, "step": 30571 }, { "epoch": 0.9391453936657144, "grad_norm": 0.44372087717056274, "learning_rate": 1.0980441937434419e-05, "loss": 0.6017, "step": 30572 }, { "epoch": 0.9391761128006635, "grad_norm": 0.35631072521209717, "learning_rate": 1.0979960987353524e-05, "loss": 0.5969, "step": 30573 }, { "epoch": 0.9392068319356127, "grad_norm": 0.37863269448280334, "learning_rate": 1.097948003498386e-05, "loss": 0.5143, "step": 30574 }, { "epoch": 0.9392375510705618, "grad_norm": 0.36044996976852417, "learning_rate": 1.0978999080326555e-05, "loss": 0.4955, "step": 30575 }, { "epoch": 0.939268270205511, "grad_norm": 0.34307265281677246, "learning_rate": 1.0978518123382732e-05, "loss": 0.5303, "step": 30576 }, { "epoch": 0.9392989893404602, "grad_norm": 0.3604673147201538, "learning_rate": 1.0978037164153505e-05, "loss": 0.5333, "step": 30577 }, { "epoch": 0.9393297084754093, "grad_norm": 0.3738802373409271, "learning_rate": 1.097755620264001e-05, "loss": 0.469, "step": 30578 }, { "epoch": 0.9393604276103585, "grad_norm": 0.39931657910346985, "learning_rate": 1.097707523884336e-05, "loss": 0.5739, "step": 30579 }, { "epoch": 0.9393911467453077, "grad_norm": 0.38523465394973755, "learning_rate": 1.0976594272764685e-05, "loss": 0.5402, "step": 30580 }, { "epoch": 0.9394218658802568, "grad_norm": 0.4134034812450409, "learning_rate": 1.0976113304405103e-05, "loss": 0.5623, "step": 30581 }, { "epoch": 0.939452585015206, "grad_norm": 0.3752863109111786, "learning_rate": 1.0975632333765743e-05, "loss": 0.5936, "step": 30582 }, { "epoch": 0.9394833041501551, "grad_norm": 0.3676816523075104, "learning_rate": 1.0975151360847722e-05, "loss": 0.5012, "step": 30583 }, { "epoch": 0.9395140232851042, "grad_norm": 0.3982495069503784, "learning_rate": 1.0974670385652171e-05, "loss": 0.6132, "step": 30584 }, { "epoch": 0.9395447424200535, "grad_norm": 0.38514381647109985, "learning_rate": 1.0974189408180208e-05, "loss": 0.5052, "step": 30585 }, { "epoch": 0.9395754615550026, "grad_norm": 0.4124583601951599, "learning_rate": 1.0973708428432956e-05, "loss": 0.5222, "step": 30586 }, { "epoch": 0.9396061806899517, "grad_norm": 0.3395598232746124, "learning_rate": 1.0973227446411543e-05, "loss": 0.5423, "step": 30587 }, { "epoch": 0.939636899824901, "grad_norm": 0.4008364975452423, "learning_rate": 1.0972746462117083e-05, "loss": 0.5585, "step": 30588 }, { "epoch": 0.9396676189598501, "grad_norm": 0.36572718620300293, "learning_rate": 1.0972265475550711e-05, "loss": 0.5137, "step": 30589 }, { "epoch": 0.9396983380947993, "grad_norm": 0.3857976794242859, "learning_rate": 1.0971784486713544e-05, "loss": 0.5895, "step": 30590 }, { "epoch": 0.9397290572297484, "grad_norm": 0.3478478491306305, "learning_rate": 1.0971303495606707e-05, "loss": 0.5533, "step": 30591 }, { "epoch": 0.9397597763646975, "grad_norm": 0.383031964302063, "learning_rate": 1.0970822502231322e-05, "loss": 0.4685, "step": 30592 }, { "epoch": 0.9397904954996468, "grad_norm": 0.3461166322231293, "learning_rate": 1.0970341506588516e-05, "loss": 0.5378, "step": 30593 }, { "epoch": 0.9398212146345959, "grad_norm": 0.3745899498462677, "learning_rate": 1.0969860508679406e-05, "loss": 0.4918, "step": 30594 }, { "epoch": 0.939851933769545, "grad_norm": 0.3817872405052185, "learning_rate": 1.096937950850512e-05, "loss": 0.541, "step": 30595 }, { "epoch": 0.9398826529044942, "grad_norm": 0.3677142858505249, "learning_rate": 1.0968898506066782e-05, "loss": 0.5276, "step": 30596 }, { "epoch": 0.9399133720394434, "grad_norm": 0.4343697726726532, "learning_rate": 1.0968417501365511e-05, "loss": 0.5507, "step": 30597 }, { "epoch": 0.9399440911743925, "grad_norm": 0.352441668510437, "learning_rate": 1.0967936494402437e-05, "loss": 0.5552, "step": 30598 }, { "epoch": 0.9399748103093417, "grad_norm": 0.3439881205558777, "learning_rate": 1.0967455485178678e-05, "loss": 0.5449, "step": 30599 }, { "epoch": 0.9400055294442908, "grad_norm": 0.3587932586669922, "learning_rate": 1.096697447369536e-05, "loss": 0.5658, "step": 30600 }, { "epoch": 0.9400362485792401, "grad_norm": 0.37353864312171936, "learning_rate": 1.0966493459953602e-05, "loss": 0.5587, "step": 30601 }, { "epoch": 0.9400669677141892, "grad_norm": 0.3950367867946625, "learning_rate": 1.0966012443954534e-05, "loss": 0.5085, "step": 30602 }, { "epoch": 0.9400976868491383, "grad_norm": 0.36878931522369385, "learning_rate": 1.0965531425699276e-05, "loss": 0.5029, "step": 30603 }, { "epoch": 0.9401284059840875, "grad_norm": 0.5952804088592529, "learning_rate": 1.0965050405188953e-05, "loss": 0.4704, "step": 30604 }, { "epoch": 0.9401591251190367, "grad_norm": 0.377076119184494, "learning_rate": 1.0964569382424688e-05, "loss": 0.6289, "step": 30605 }, { "epoch": 0.9401898442539858, "grad_norm": 0.3976678252220154, "learning_rate": 1.0964088357407598e-05, "loss": 0.5118, "step": 30606 }, { "epoch": 0.940220563388935, "grad_norm": 0.41511476039886475, "learning_rate": 1.0963607330138816e-05, "loss": 0.4957, "step": 30607 }, { "epoch": 0.9402512825238841, "grad_norm": 0.3688296675682068, "learning_rate": 1.0963126300619462e-05, "loss": 0.5584, "step": 30608 }, { "epoch": 0.9402820016588332, "grad_norm": 0.3909655511379242, "learning_rate": 1.096264526885066e-05, "loss": 0.5197, "step": 30609 }, { "epoch": 0.9403127207937825, "grad_norm": 0.4365016222000122, "learning_rate": 1.0962164234833532e-05, "loss": 0.5768, "step": 30610 }, { "epoch": 0.9403434399287316, "grad_norm": 0.3880998194217682, "learning_rate": 1.0961683198569204e-05, "loss": 0.6037, "step": 30611 }, { "epoch": 0.9403741590636807, "grad_norm": 0.39228084683418274, "learning_rate": 1.0961202160058796e-05, "loss": 0.5221, "step": 30612 }, { "epoch": 0.94040487819863, "grad_norm": 0.33076387643814087, "learning_rate": 1.0960721119303433e-05, "loss": 0.5421, "step": 30613 }, { "epoch": 0.9404355973335791, "grad_norm": 0.40231987833976746, "learning_rate": 1.096024007630424e-05, "loss": 0.5614, "step": 30614 }, { "epoch": 0.9404663164685283, "grad_norm": 0.37394943833351135, "learning_rate": 1.0959759031062339e-05, "loss": 0.5859, "step": 30615 }, { "epoch": 0.9404970356034774, "grad_norm": 0.42474156618118286, "learning_rate": 1.0959277983578854e-05, "loss": 0.5792, "step": 30616 }, { "epoch": 0.9405277547384265, "grad_norm": 0.3770480155944824, "learning_rate": 1.0958796933854905e-05, "loss": 0.4568, "step": 30617 }, { "epoch": 0.9405584738733758, "grad_norm": 0.35851266980171204, "learning_rate": 1.0958315881891623e-05, "loss": 0.4985, "step": 30618 }, { "epoch": 0.9405891930083249, "grad_norm": 0.3842771053314209, "learning_rate": 1.0957834827690123e-05, "loss": 0.5149, "step": 30619 }, { "epoch": 0.940619912143274, "grad_norm": 0.3565131723880768, "learning_rate": 1.0957353771251538e-05, "loss": 0.5931, "step": 30620 }, { "epoch": 0.9406506312782232, "grad_norm": 0.37876391410827637, "learning_rate": 1.0956872712576982e-05, "loss": 0.5303, "step": 30621 }, { "epoch": 0.9406813504131724, "grad_norm": 0.3564164340496063, "learning_rate": 1.0956391651667586e-05, "loss": 0.5833, "step": 30622 }, { "epoch": 0.9407120695481215, "grad_norm": 0.34185802936553955, "learning_rate": 1.095591058852447e-05, "loss": 0.5126, "step": 30623 }, { "epoch": 0.9407427886830707, "grad_norm": 0.3650984764099121, "learning_rate": 1.0955429523148756e-05, "loss": 0.5597, "step": 30624 }, { "epoch": 0.9407735078180198, "grad_norm": 0.37044936418533325, "learning_rate": 1.095494845554157e-05, "loss": 0.5899, "step": 30625 }, { "epoch": 0.9408042269529691, "grad_norm": 0.33271145820617676, "learning_rate": 1.0954467385704035e-05, "loss": 0.5779, "step": 30626 }, { "epoch": 0.9408349460879182, "grad_norm": 0.3642820417881012, "learning_rate": 1.0953986313637277e-05, "loss": 0.5926, "step": 30627 }, { "epoch": 0.9408656652228673, "grad_norm": 0.36204296350479126, "learning_rate": 1.0953505239342417e-05, "loss": 0.5159, "step": 30628 }, { "epoch": 0.9408963843578165, "grad_norm": 0.38862529397010803, "learning_rate": 1.0953024162820579e-05, "loss": 0.5847, "step": 30629 }, { "epoch": 0.9409271034927656, "grad_norm": 0.35233786702156067, "learning_rate": 1.0952543084072887e-05, "loss": 0.5669, "step": 30630 }, { "epoch": 0.9409578226277148, "grad_norm": 0.3567686378955841, "learning_rate": 1.0952062003100462e-05, "loss": 0.5383, "step": 30631 }, { "epoch": 0.940988541762664, "grad_norm": 0.3495686650276184, "learning_rate": 1.095158091990443e-05, "loss": 0.4313, "step": 30632 }, { "epoch": 0.9410192608976131, "grad_norm": 0.3990044593811035, "learning_rate": 1.0951099834485914e-05, "loss": 0.524, "step": 30633 }, { "epoch": 0.9410499800325622, "grad_norm": 0.42440304160118103, "learning_rate": 1.095061874684604e-05, "loss": 0.6276, "step": 30634 }, { "epoch": 0.9410806991675115, "grad_norm": 0.3947928547859192, "learning_rate": 1.0950137656985927e-05, "loss": 0.4481, "step": 30635 }, { "epoch": 0.9411114183024606, "grad_norm": 0.3798501491546631, "learning_rate": 1.0949656564906704e-05, "loss": 0.4202, "step": 30636 }, { "epoch": 0.9411421374374097, "grad_norm": 0.3592594563961029, "learning_rate": 1.0949175470609488e-05, "loss": 0.6285, "step": 30637 }, { "epoch": 0.9411728565723589, "grad_norm": 0.4066087603569031, "learning_rate": 1.094869437409541e-05, "loss": 0.5373, "step": 30638 }, { "epoch": 0.9412035757073081, "grad_norm": 0.3720725476741791, "learning_rate": 1.0948213275365587e-05, "loss": 0.5372, "step": 30639 }, { "epoch": 0.9412342948422573, "grad_norm": 0.378385454416275, "learning_rate": 1.094773217442115e-05, "loss": 0.558, "step": 30640 }, { "epoch": 0.9412650139772064, "grad_norm": 0.3698541224002838, "learning_rate": 1.0947251071263215e-05, "loss": 0.5489, "step": 30641 }, { "epoch": 0.9412957331121555, "grad_norm": 0.33005666732788086, "learning_rate": 1.094676996589291e-05, "loss": 0.5048, "step": 30642 }, { "epoch": 0.9413264522471048, "grad_norm": 0.4004095792770386, "learning_rate": 1.0946288858311353e-05, "loss": 0.5098, "step": 30643 }, { "epoch": 0.9413571713820539, "grad_norm": 0.35159602761268616, "learning_rate": 1.0945807748519677e-05, "loss": 0.5487, "step": 30644 }, { "epoch": 0.941387890517003, "grad_norm": 0.3710736036300659, "learning_rate": 1.0945326636519e-05, "loss": 0.5557, "step": 30645 }, { "epoch": 0.9414186096519522, "grad_norm": 0.3679526150226593, "learning_rate": 1.094484552231045e-05, "loss": 0.5574, "step": 30646 }, { "epoch": 0.9414493287869014, "grad_norm": 0.36016786098480225, "learning_rate": 1.0944364405895142e-05, "loss": 0.6006, "step": 30647 }, { "epoch": 0.9414800479218505, "grad_norm": 0.35138291120529175, "learning_rate": 1.0943883287274208e-05, "loss": 0.5249, "step": 30648 }, { "epoch": 0.9415107670567997, "grad_norm": 0.330581396818161, "learning_rate": 1.0943402166448767e-05, "loss": 0.6269, "step": 30649 }, { "epoch": 0.9415414861917488, "grad_norm": 0.39819324016571045, "learning_rate": 1.0942921043419948e-05, "loss": 0.4877, "step": 30650 }, { "epoch": 0.941572205326698, "grad_norm": 0.3623211979866028, "learning_rate": 1.0942439918188865e-05, "loss": 0.5281, "step": 30651 }, { "epoch": 0.9416029244616472, "grad_norm": 0.4391518533229828, "learning_rate": 1.0941958790756652e-05, "loss": 0.5441, "step": 30652 }, { "epoch": 0.9416336435965963, "grad_norm": 0.3672217130661011, "learning_rate": 1.0941477661124429e-05, "loss": 0.5284, "step": 30653 }, { "epoch": 0.9416643627315455, "grad_norm": 0.3744504749774933, "learning_rate": 1.0940996529293318e-05, "loss": 0.5303, "step": 30654 }, { "epoch": 0.9416950818664946, "grad_norm": 0.37162771821022034, "learning_rate": 1.0940515395264441e-05, "loss": 0.5591, "step": 30655 }, { "epoch": 0.9417258010014438, "grad_norm": 0.32833045721054077, "learning_rate": 1.094003425903893e-05, "loss": 0.546, "step": 30656 }, { "epoch": 0.941756520136393, "grad_norm": 0.35805678367614746, "learning_rate": 1.09395531206179e-05, "loss": 0.5487, "step": 30657 }, { "epoch": 0.9417872392713421, "grad_norm": 0.3968590497970581, "learning_rate": 1.0939071980002479e-05, "loss": 0.6006, "step": 30658 }, { "epoch": 0.9418179584062912, "grad_norm": 0.3920024335384369, "learning_rate": 1.093859083719379e-05, "loss": 0.5981, "step": 30659 }, { "epoch": 0.9418486775412405, "grad_norm": 0.3350933790206909, "learning_rate": 1.0938109692192955e-05, "loss": 0.4547, "step": 30660 }, { "epoch": 0.9418793966761896, "grad_norm": 0.3847745656967163, "learning_rate": 1.09376285450011e-05, "loss": 0.5487, "step": 30661 }, { "epoch": 0.9419101158111387, "grad_norm": 0.41053205728530884, "learning_rate": 1.0937147395619345e-05, "loss": 0.5483, "step": 30662 }, { "epoch": 0.9419408349460879, "grad_norm": 0.38871651887893677, "learning_rate": 1.0936666244048824e-05, "loss": 0.6229, "step": 30663 }, { "epoch": 0.941971554081037, "grad_norm": 0.4028288424015045, "learning_rate": 1.093618509029065e-05, "loss": 0.6392, "step": 30664 }, { "epoch": 0.9420022732159863, "grad_norm": 0.4544677436351776, "learning_rate": 1.093570393434595e-05, "loss": 0.5262, "step": 30665 }, { "epoch": 0.9420329923509354, "grad_norm": 0.3976188600063324, "learning_rate": 1.0935222776215848e-05, "loss": 0.5819, "step": 30666 }, { "epoch": 0.9420637114858845, "grad_norm": 0.347687304019928, "learning_rate": 1.0934741615901467e-05, "loss": 0.4844, "step": 30667 }, { "epoch": 0.9420944306208338, "grad_norm": 0.36080387234687805, "learning_rate": 1.0934260453403933e-05, "loss": 0.5257, "step": 30668 }, { "epoch": 0.9421251497557829, "grad_norm": 0.36903485655784607, "learning_rate": 1.0933779288724366e-05, "loss": 0.5471, "step": 30669 }, { "epoch": 0.942155868890732, "grad_norm": 0.39570382237434387, "learning_rate": 1.0933298121863896e-05, "loss": 0.5324, "step": 30670 }, { "epoch": 0.9421865880256812, "grad_norm": 0.3322794437408447, "learning_rate": 1.0932816952823642e-05, "loss": 0.5214, "step": 30671 }, { "epoch": 0.9422173071606303, "grad_norm": 0.36827924847602844, "learning_rate": 1.0932335781604729e-05, "loss": 0.5477, "step": 30672 }, { "epoch": 0.9422480262955795, "grad_norm": 0.37766700983047485, "learning_rate": 1.093185460820828e-05, "loss": 0.5418, "step": 30673 }, { "epoch": 0.9422787454305287, "grad_norm": 0.34616997838020325, "learning_rate": 1.093137343263542e-05, "loss": 0.4954, "step": 30674 }, { "epoch": 0.9423094645654778, "grad_norm": 0.4389040172100067, "learning_rate": 1.0930892254887273e-05, "loss": 0.5746, "step": 30675 }, { "epoch": 0.942340183700427, "grad_norm": 0.41306641697883606, "learning_rate": 1.093041107496496e-05, "loss": 0.5234, "step": 30676 }, { "epoch": 0.9423709028353762, "grad_norm": 0.3939557671546936, "learning_rate": 1.0929929892869608e-05, "loss": 0.6104, "step": 30677 }, { "epoch": 0.9424016219703253, "grad_norm": 0.3664713203907013, "learning_rate": 1.092944870860234e-05, "loss": 0.6444, "step": 30678 }, { "epoch": 0.9424323411052745, "grad_norm": 0.4531330466270447, "learning_rate": 1.0928967522164275e-05, "loss": 0.5452, "step": 30679 }, { "epoch": 0.9424630602402236, "grad_norm": 0.3812418282032013, "learning_rate": 1.0928486333556547e-05, "loss": 0.535, "step": 30680 }, { "epoch": 0.9424937793751728, "grad_norm": 0.6591178178787231, "learning_rate": 1.0928005142780273e-05, "loss": 0.5737, "step": 30681 }, { "epoch": 0.942524498510122, "grad_norm": 0.3918472230434418, "learning_rate": 1.092752394983658e-05, "loss": 0.6348, "step": 30682 }, { "epoch": 0.9425552176450711, "grad_norm": 0.41105133295059204, "learning_rate": 1.0927042754726587e-05, "loss": 0.5409, "step": 30683 }, { "epoch": 0.9425859367800202, "grad_norm": 1.6519687175750732, "learning_rate": 1.0926561557451424e-05, "loss": 0.596, "step": 30684 }, { "epoch": 0.9426166559149695, "grad_norm": 0.35563623905181885, "learning_rate": 1.0926080358012208e-05, "loss": 0.5689, "step": 30685 }, { "epoch": 0.9426473750499186, "grad_norm": 0.33878716826438904, "learning_rate": 1.092559915641007e-05, "loss": 0.5104, "step": 30686 }, { "epoch": 0.9426780941848677, "grad_norm": 0.3320094347000122, "learning_rate": 1.0925117952646127e-05, "loss": 0.6018, "step": 30687 }, { "epoch": 0.9427088133198169, "grad_norm": 0.3782365918159485, "learning_rate": 1.092463674672151e-05, "loss": 0.5098, "step": 30688 }, { "epoch": 0.942739532454766, "grad_norm": 0.37028658390045166, "learning_rate": 1.0924155538637338e-05, "loss": 0.4378, "step": 30689 }, { "epoch": 0.9427702515897153, "grad_norm": 0.35953789949417114, "learning_rate": 1.0923674328394735e-05, "loss": 0.5965, "step": 30690 }, { "epoch": 0.9428009707246644, "grad_norm": 0.40814805030822754, "learning_rate": 1.0923193115994828e-05, "loss": 0.5056, "step": 30691 }, { "epoch": 0.9428316898596135, "grad_norm": 0.44681328535079956, "learning_rate": 1.0922711901438739e-05, "loss": 0.5016, "step": 30692 }, { "epoch": 0.9428624089945628, "grad_norm": 0.4427691698074341, "learning_rate": 1.0922230684727593e-05, "loss": 0.5967, "step": 30693 }, { "epoch": 0.9428931281295119, "grad_norm": 0.36629268527030945, "learning_rate": 1.0921749465862508e-05, "loss": 0.5653, "step": 30694 }, { "epoch": 0.942923847264461, "grad_norm": 0.3426606059074402, "learning_rate": 1.0921268244844618e-05, "loss": 0.5872, "step": 30695 }, { "epoch": 0.9429545663994102, "grad_norm": 0.396567702293396, "learning_rate": 1.0920787021675036e-05, "loss": 0.5502, "step": 30696 }, { "epoch": 0.9429852855343593, "grad_norm": 0.3817180097103119, "learning_rate": 1.0920305796354895e-05, "loss": 0.4769, "step": 30697 }, { "epoch": 0.9430160046693085, "grad_norm": 0.3242032825946808, "learning_rate": 1.0919824568885317e-05, "loss": 0.553, "step": 30698 }, { "epoch": 0.9430467238042577, "grad_norm": 0.3894921839237213, "learning_rate": 1.091934333926742e-05, "loss": 0.5186, "step": 30699 }, { "epoch": 0.9430774429392068, "grad_norm": 0.38750123977661133, "learning_rate": 1.091886210750234e-05, "loss": 0.4083, "step": 30700 }, { "epoch": 0.943108162074156, "grad_norm": 0.38312217593193054, "learning_rate": 1.0918380873591185e-05, "loss": 0.5465, "step": 30701 }, { "epoch": 0.9431388812091052, "grad_norm": 0.3582525849342346, "learning_rate": 1.0917899637535094e-05, "loss": 0.5589, "step": 30702 }, { "epoch": 0.9431696003440543, "grad_norm": 0.37599971890449524, "learning_rate": 1.091741839933518e-05, "loss": 0.4115, "step": 30703 }, { "epoch": 0.9432003194790035, "grad_norm": 0.4013470709323883, "learning_rate": 1.0916937158992573e-05, "loss": 0.6143, "step": 30704 }, { "epoch": 0.9432310386139526, "grad_norm": 0.4118158519268036, "learning_rate": 1.0916455916508395e-05, "loss": 0.5386, "step": 30705 }, { "epoch": 0.9432617577489018, "grad_norm": 0.38869982957839966, "learning_rate": 1.0915974671883772e-05, "loss": 0.6044, "step": 30706 }, { "epoch": 0.943292476883851, "grad_norm": 0.3812267780303955, "learning_rate": 1.091549342511982e-05, "loss": 0.5315, "step": 30707 }, { "epoch": 0.9433231960188001, "grad_norm": 0.47151172161102295, "learning_rate": 1.0915012176217677e-05, "loss": 0.675, "step": 30708 }, { "epoch": 0.9433539151537492, "grad_norm": 0.4259050786495209, "learning_rate": 1.0914530925178454e-05, "loss": 0.5835, "step": 30709 }, { "epoch": 0.9433846342886985, "grad_norm": 0.3968071937561035, "learning_rate": 1.0914049672003282e-05, "loss": 0.5265, "step": 30710 }, { "epoch": 0.9434153534236476, "grad_norm": 0.3967810273170471, "learning_rate": 1.0913568416693284e-05, "loss": 0.5175, "step": 30711 }, { "epoch": 0.9434460725585968, "grad_norm": 0.4038400650024414, "learning_rate": 1.091308715924958e-05, "loss": 0.5731, "step": 30712 }, { "epoch": 0.9434767916935459, "grad_norm": 0.3953440487384796, "learning_rate": 1.09126058996733e-05, "loss": 0.5828, "step": 30713 }, { "epoch": 0.943507510828495, "grad_norm": 0.3524687886238098, "learning_rate": 1.0912124637965564e-05, "loss": 0.578, "step": 30714 }, { "epoch": 0.9435382299634443, "grad_norm": 0.36862215399742126, "learning_rate": 1.0911643374127499e-05, "loss": 0.4966, "step": 30715 }, { "epoch": 0.9435689490983934, "grad_norm": 0.35657447576522827, "learning_rate": 1.0911162108160225e-05, "loss": 0.5427, "step": 30716 }, { "epoch": 0.9435996682333425, "grad_norm": 0.3504733145236969, "learning_rate": 1.0910680840064869e-05, "loss": 0.5363, "step": 30717 }, { "epoch": 0.9436303873682917, "grad_norm": 0.3724963665008545, "learning_rate": 1.0910199569842555e-05, "loss": 0.5762, "step": 30718 }, { "epoch": 0.9436611065032409, "grad_norm": 0.5166528224945068, "learning_rate": 1.0909718297494404e-05, "loss": 0.5107, "step": 30719 }, { "epoch": 0.94369182563819, "grad_norm": 0.9325889945030212, "learning_rate": 1.0909237023021546e-05, "loss": 0.5806, "step": 30720 }, { "epoch": 0.9437225447731392, "grad_norm": 0.3726601004600525, "learning_rate": 1.0908755746425098e-05, "loss": 0.5292, "step": 30721 }, { "epoch": 0.9437532639080883, "grad_norm": 0.34769582748413086, "learning_rate": 1.090827446770619e-05, "loss": 0.6006, "step": 30722 }, { "epoch": 0.9437839830430375, "grad_norm": 0.40030744671821594, "learning_rate": 1.0907793186865941e-05, "loss": 0.5485, "step": 30723 }, { "epoch": 0.9438147021779867, "grad_norm": 0.37545162439346313, "learning_rate": 1.090731190390548e-05, "loss": 0.555, "step": 30724 }, { "epoch": 0.9438454213129358, "grad_norm": 0.3553171455860138, "learning_rate": 1.0906830618825928e-05, "loss": 0.5979, "step": 30725 }, { "epoch": 0.943876140447885, "grad_norm": 0.3591116964817047, "learning_rate": 1.090634933162841e-05, "loss": 0.5006, "step": 30726 }, { "epoch": 0.9439068595828342, "grad_norm": 0.40418368577957153, "learning_rate": 1.0905868042314052e-05, "loss": 0.6645, "step": 30727 }, { "epoch": 0.9439375787177833, "grad_norm": 0.4203190505504608, "learning_rate": 1.090538675088397e-05, "loss": 0.6149, "step": 30728 }, { "epoch": 0.9439682978527325, "grad_norm": 0.3460955321788788, "learning_rate": 1.0904905457339299e-05, "loss": 0.578, "step": 30729 }, { "epoch": 0.9439990169876816, "grad_norm": 0.4423180818557739, "learning_rate": 1.0904424161681155e-05, "loss": 0.504, "step": 30730 }, { "epoch": 0.9440297361226307, "grad_norm": 0.39953377842903137, "learning_rate": 1.0903942863910669e-05, "loss": 0.5689, "step": 30731 }, { "epoch": 0.94406045525758, "grad_norm": 0.37462472915649414, "learning_rate": 1.0903461564028958e-05, "loss": 0.4585, "step": 30732 }, { "epoch": 0.9440911743925291, "grad_norm": 0.7479169964790344, "learning_rate": 1.0902980262037151e-05, "loss": 0.5272, "step": 30733 }, { "epoch": 0.9441218935274782, "grad_norm": 0.3691602349281311, "learning_rate": 1.090249895793637e-05, "loss": 0.5266, "step": 30734 }, { "epoch": 0.9441526126624274, "grad_norm": 0.3551304340362549, "learning_rate": 1.0902017651727736e-05, "loss": 0.5178, "step": 30735 }, { "epoch": 0.9441833317973766, "grad_norm": 0.437679260969162, "learning_rate": 1.0901536343412382e-05, "loss": 0.5783, "step": 30736 }, { "epoch": 0.9442140509323258, "grad_norm": 0.38654035329818726, "learning_rate": 1.0901055032991425e-05, "loss": 0.5313, "step": 30737 }, { "epoch": 0.9442447700672749, "grad_norm": 0.35758712887763977, "learning_rate": 1.0900573720465992e-05, "loss": 0.5831, "step": 30738 }, { "epoch": 0.944275489202224, "grad_norm": 0.3819088041782379, "learning_rate": 1.0900092405837206e-05, "loss": 0.669, "step": 30739 }, { "epoch": 0.9443062083371733, "grad_norm": 0.4337395131587982, "learning_rate": 1.089961108910619e-05, "loss": 0.4155, "step": 30740 }, { "epoch": 0.9443369274721224, "grad_norm": 0.3885762393474579, "learning_rate": 1.089912977027407e-05, "loss": 0.5867, "step": 30741 }, { "epoch": 0.9443676466070715, "grad_norm": 0.3640156090259552, "learning_rate": 1.089864844934197e-05, "loss": 0.5715, "step": 30742 }, { "epoch": 0.9443983657420207, "grad_norm": 0.3953651189804077, "learning_rate": 1.0898167126311015e-05, "loss": 0.5848, "step": 30743 }, { "epoch": 0.9444290848769699, "grad_norm": 0.3452005088329315, "learning_rate": 1.0897685801182327e-05, "loss": 0.4987, "step": 30744 }, { "epoch": 0.944459804011919, "grad_norm": 0.33986711502075195, "learning_rate": 1.0897204473957032e-05, "loss": 0.5113, "step": 30745 }, { "epoch": 0.9444905231468682, "grad_norm": 2.3873989582061768, "learning_rate": 1.0896723144636252e-05, "loss": 0.6544, "step": 30746 }, { "epoch": 0.9445212422818173, "grad_norm": 0.35634103417396545, "learning_rate": 1.0896241813221113e-05, "loss": 0.5061, "step": 30747 }, { "epoch": 0.9445519614167665, "grad_norm": 0.37158727645874023, "learning_rate": 1.0895760479712736e-05, "loss": 0.5508, "step": 30748 }, { "epoch": 0.9445826805517157, "grad_norm": 0.40026381611824036, "learning_rate": 1.0895279144112252e-05, "loss": 0.6468, "step": 30749 }, { "epoch": 0.9446133996866648, "grad_norm": 0.3809809386730194, "learning_rate": 1.0894797806420778e-05, "loss": 0.5191, "step": 30750 }, { "epoch": 0.944644118821614, "grad_norm": 0.36660927534103394, "learning_rate": 1.0894316466639443e-05, "loss": 0.5367, "step": 30751 }, { "epoch": 0.9446748379565632, "grad_norm": 0.35769328474998474, "learning_rate": 1.0893835124769367e-05, "loss": 0.5259, "step": 30752 }, { "epoch": 0.9447055570915123, "grad_norm": 0.35556861758232117, "learning_rate": 1.0893353780811677e-05, "loss": 0.5785, "step": 30753 }, { "epoch": 0.9447362762264615, "grad_norm": 0.474855899810791, "learning_rate": 1.0892872434767501e-05, "loss": 0.4606, "step": 30754 }, { "epoch": 0.9447669953614106, "grad_norm": 0.34195569157600403, "learning_rate": 1.0892391086637955e-05, "loss": 0.492, "step": 30755 }, { "epoch": 0.9447977144963597, "grad_norm": 0.36304664611816406, "learning_rate": 1.0891909736424169e-05, "loss": 0.5814, "step": 30756 }, { "epoch": 0.944828433631309, "grad_norm": 0.3815009295940399, "learning_rate": 1.0891428384127265e-05, "loss": 0.5233, "step": 30757 }, { "epoch": 0.9448591527662581, "grad_norm": 0.38864272832870483, "learning_rate": 1.089094702974837e-05, "loss": 0.546, "step": 30758 }, { "epoch": 0.9448898719012072, "grad_norm": 0.3925333023071289, "learning_rate": 1.0890465673288602e-05, "loss": 0.5727, "step": 30759 }, { "epoch": 0.9449205910361564, "grad_norm": 0.36815911531448364, "learning_rate": 1.0889984314749092e-05, "loss": 0.5659, "step": 30760 }, { "epoch": 0.9449513101711056, "grad_norm": 0.38197362422943115, "learning_rate": 1.0889502954130961e-05, "loss": 0.5537, "step": 30761 }, { "epoch": 0.9449820293060548, "grad_norm": 0.35442760586738586, "learning_rate": 1.0889021591435332e-05, "loss": 0.5588, "step": 30762 }, { "epoch": 0.9450127484410039, "grad_norm": 0.3832736909389496, "learning_rate": 1.0888540226663334e-05, "loss": 0.4952, "step": 30763 }, { "epoch": 0.945043467575953, "grad_norm": 0.4046028256416321, "learning_rate": 1.0888058859816086e-05, "loss": 0.5475, "step": 30764 }, { "epoch": 0.9450741867109023, "grad_norm": 0.3734009563922882, "learning_rate": 1.0887577490894717e-05, "loss": 0.5675, "step": 30765 }, { "epoch": 0.9451049058458514, "grad_norm": 0.4119701385498047, "learning_rate": 1.0887096119900345e-05, "loss": 0.523, "step": 30766 }, { "epoch": 0.9451356249808005, "grad_norm": 0.3440074026584625, "learning_rate": 1.0886614746834101e-05, "loss": 0.5819, "step": 30767 }, { "epoch": 0.9451663441157497, "grad_norm": 0.38046497106552124, "learning_rate": 1.0886133371697103e-05, "loss": 0.4392, "step": 30768 }, { "epoch": 0.9451970632506989, "grad_norm": 0.3558594286441803, "learning_rate": 1.0885651994490483e-05, "loss": 0.5645, "step": 30769 }, { "epoch": 0.945227782385648, "grad_norm": 0.3685606122016907, "learning_rate": 1.0885170615215358e-05, "loss": 0.507, "step": 30770 }, { "epoch": 0.9452585015205972, "grad_norm": 0.35258328914642334, "learning_rate": 1.0884689233872853e-05, "loss": 0.5212, "step": 30771 }, { "epoch": 0.9452892206555463, "grad_norm": 0.4346478581428528, "learning_rate": 1.0884207850464099e-05, "loss": 0.562, "step": 30772 }, { "epoch": 0.9453199397904954, "grad_norm": 0.4406599998474121, "learning_rate": 1.0883726464990214e-05, "loss": 0.5902, "step": 30773 }, { "epoch": 0.9453506589254447, "grad_norm": 0.3500063717365265, "learning_rate": 1.0883245077452324e-05, "loss": 0.5501, "step": 30774 }, { "epoch": 0.9453813780603938, "grad_norm": 0.4959089457988739, "learning_rate": 1.0882763687851554e-05, "loss": 0.6084, "step": 30775 }, { "epoch": 0.945412097195343, "grad_norm": 0.537386953830719, "learning_rate": 1.0882282296189029e-05, "loss": 0.5818, "step": 30776 }, { "epoch": 0.9454428163302921, "grad_norm": 0.49674904346466064, "learning_rate": 1.088180090246587e-05, "loss": 0.5293, "step": 30777 }, { "epoch": 0.9454735354652413, "grad_norm": 0.3675539493560791, "learning_rate": 1.0881319506683205e-05, "loss": 0.5649, "step": 30778 }, { "epoch": 0.9455042546001905, "grad_norm": 0.5166592001914978, "learning_rate": 1.0880838108842155e-05, "loss": 0.5106, "step": 30779 }, { "epoch": 0.9455349737351396, "grad_norm": 0.3945609927177429, "learning_rate": 1.0880356708943847e-05, "loss": 0.502, "step": 30780 }, { "epoch": 0.9455656928700887, "grad_norm": 0.3702213764190674, "learning_rate": 1.0879875306989405e-05, "loss": 0.5555, "step": 30781 }, { "epoch": 0.945596412005038, "grad_norm": 0.40363165736198425, "learning_rate": 1.0879393902979952e-05, "loss": 0.5664, "step": 30782 }, { "epoch": 0.9456271311399871, "grad_norm": 0.3684442937374115, "learning_rate": 1.0878912496916613e-05, "loss": 0.5317, "step": 30783 }, { "epoch": 0.9456578502749362, "grad_norm": 0.3604065179824829, "learning_rate": 1.0878431088800512e-05, "loss": 0.593, "step": 30784 }, { "epoch": 0.9456885694098854, "grad_norm": 0.6360969543457031, "learning_rate": 1.0877949678632777e-05, "loss": 0.529, "step": 30785 }, { "epoch": 0.9457192885448346, "grad_norm": 0.41391491889953613, "learning_rate": 1.0877468266414526e-05, "loss": 0.4774, "step": 30786 }, { "epoch": 0.9457500076797838, "grad_norm": 0.365797758102417, "learning_rate": 1.0876986852146886e-05, "loss": 0.5158, "step": 30787 }, { "epoch": 0.9457807268147329, "grad_norm": 0.3944757878780365, "learning_rate": 1.0876505435830985e-05, "loss": 0.6236, "step": 30788 }, { "epoch": 0.945811445949682, "grad_norm": 0.3921526372432709, "learning_rate": 1.0876024017467942e-05, "loss": 0.5708, "step": 30789 }, { "epoch": 0.9458421650846313, "grad_norm": 0.38548731803894043, "learning_rate": 1.0875542597058884e-05, "loss": 0.536, "step": 30790 }, { "epoch": 0.9458728842195804, "grad_norm": 0.38636600971221924, "learning_rate": 1.0875061174604934e-05, "loss": 0.5988, "step": 30791 }, { "epoch": 0.9459036033545295, "grad_norm": 0.42805007100105286, "learning_rate": 1.0874579750107221e-05, "loss": 0.5322, "step": 30792 }, { "epoch": 0.9459343224894787, "grad_norm": 0.3599318563938141, "learning_rate": 1.0874098323566861e-05, "loss": 0.5701, "step": 30793 }, { "epoch": 0.9459650416244278, "grad_norm": 0.3465520739555359, "learning_rate": 1.087361689498499e-05, "loss": 0.5698, "step": 30794 }, { "epoch": 0.945995760759377, "grad_norm": 0.3748539686203003, "learning_rate": 1.0873135464362718e-05, "loss": 0.5525, "step": 30795 }, { "epoch": 0.9460264798943262, "grad_norm": 0.34569424390792847, "learning_rate": 1.0872654031701185e-05, "loss": 0.5627, "step": 30796 }, { "epoch": 0.9460571990292753, "grad_norm": 0.3614463210105896, "learning_rate": 1.0872172597001505e-05, "loss": 0.5297, "step": 30797 }, { "epoch": 0.9460879181642246, "grad_norm": 0.38440608978271484, "learning_rate": 1.0871691160264802e-05, "loss": 0.5501, "step": 30798 }, { "epoch": 0.9461186372991737, "grad_norm": 0.39415857195854187, "learning_rate": 1.0871209721492207e-05, "loss": 0.5994, "step": 30799 }, { "epoch": 0.9461493564341228, "grad_norm": 0.3998890817165375, "learning_rate": 1.0870728280684837e-05, "loss": 0.5854, "step": 30800 }, { "epoch": 0.946180075569072, "grad_norm": 0.38441187143325806, "learning_rate": 1.0870246837843823e-05, "loss": 0.5379, "step": 30801 }, { "epoch": 0.9462107947040211, "grad_norm": 0.3825955390930176, "learning_rate": 1.0869765392970283e-05, "loss": 0.5308, "step": 30802 }, { "epoch": 0.9462415138389703, "grad_norm": 0.3708370625972748, "learning_rate": 1.086928394606535e-05, "loss": 0.5746, "step": 30803 }, { "epoch": 0.9462722329739195, "grad_norm": 0.39425474405288696, "learning_rate": 1.0868802497130142e-05, "loss": 0.5709, "step": 30804 }, { "epoch": 0.9463029521088686, "grad_norm": 0.430525004863739, "learning_rate": 1.0868321046165786e-05, "loss": 0.5649, "step": 30805 }, { "epoch": 0.9463336712438177, "grad_norm": 0.357363224029541, "learning_rate": 1.0867839593173403e-05, "loss": 0.5741, "step": 30806 }, { "epoch": 0.946364390378767, "grad_norm": 0.37563878297805786, "learning_rate": 1.086735813815412e-05, "loss": 0.568, "step": 30807 }, { "epoch": 0.9463951095137161, "grad_norm": 1.2520571947097778, "learning_rate": 1.0866876681109064e-05, "loss": 0.5641, "step": 30808 }, { "epoch": 0.9464258286486652, "grad_norm": 0.38165706396102905, "learning_rate": 1.0866395222039355e-05, "loss": 0.5742, "step": 30809 }, { "epoch": 0.9464565477836144, "grad_norm": 0.43859684467315674, "learning_rate": 1.086591376094612e-05, "loss": 0.5575, "step": 30810 }, { "epoch": 0.9464872669185636, "grad_norm": 0.39867863059043884, "learning_rate": 1.0865432297830483e-05, "loss": 0.5563, "step": 30811 }, { "epoch": 0.9465179860535128, "grad_norm": 0.4017806649208069, "learning_rate": 1.086495083269357e-05, "loss": 0.5604, "step": 30812 }, { "epoch": 0.9465487051884619, "grad_norm": 0.38888779282569885, "learning_rate": 1.0864469365536504e-05, "loss": 0.5783, "step": 30813 }, { "epoch": 0.946579424323411, "grad_norm": 0.3396161198616028, "learning_rate": 1.0863987896360406e-05, "loss": 0.534, "step": 30814 }, { "epoch": 0.9466101434583603, "grad_norm": 0.36972159147262573, "learning_rate": 1.0863506425166407e-05, "loss": 0.5674, "step": 30815 }, { "epoch": 0.9466408625933094, "grad_norm": 0.4062753915786743, "learning_rate": 1.0863024951955628e-05, "loss": 0.5699, "step": 30816 }, { "epoch": 0.9466715817282585, "grad_norm": 0.35333114862442017, "learning_rate": 1.0862543476729195e-05, "loss": 0.521, "step": 30817 }, { "epoch": 0.9467023008632077, "grad_norm": 0.39264345169067383, "learning_rate": 1.0862061999488228e-05, "loss": 0.4942, "step": 30818 }, { "epoch": 0.9467330199981568, "grad_norm": 0.3768382966518402, "learning_rate": 1.0861580520233858e-05, "loss": 0.5135, "step": 30819 }, { "epoch": 0.946763739133106, "grad_norm": 0.38220781087875366, "learning_rate": 1.0861099038967204e-05, "loss": 0.5434, "step": 30820 }, { "epoch": 0.9467944582680552, "grad_norm": 0.37298721075057983, "learning_rate": 1.0860617555689394e-05, "loss": 0.5633, "step": 30821 }, { "epoch": 0.9468251774030043, "grad_norm": 0.3746001124382019, "learning_rate": 1.0860136070401553e-05, "loss": 0.4882, "step": 30822 }, { "epoch": 0.9468558965379535, "grad_norm": 0.34808748960494995, "learning_rate": 1.0859654583104804e-05, "loss": 0.5367, "step": 30823 }, { "epoch": 0.9468866156729027, "grad_norm": 0.39319562911987305, "learning_rate": 1.0859173093800273e-05, "loss": 0.5484, "step": 30824 }, { "epoch": 0.9469173348078518, "grad_norm": 0.34816837310791016, "learning_rate": 1.0858691602489077e-05, "loss": 0.5578, "step": 30825 }, { "epoch": 0.946948053942801, "grad_norm": 0.35941609740257263, "learning_rate": 1.0858210109172352e-05, "loss": 0.5909, "step": 30826 }, { "epoch": 0.9469787730777501, "grad_norm": 0.38683420419692993, "learning_rate": 1.0857728613851215e-05, "loss": 0.468, "step": 30827 }, { "epoch": 0.9470094922126993, "grad_norm": 0.36542654037475586, "learning_rate": 1.0857247116526794e-05, "loss": 0.5695, "step": 30828 }, { "epoch": 0.9470402113476485, "grad_norm": 0.37170901894569397, "learning_rate": 1.0856765617200212e-05, "loss": 0.6178, "step": 30829 }, { "epoch": 0.9470709304825976, "grad_norm": 0.3401474356651306, "learning_rate": 1.0856284115872595e-05, "loss": 0.4959, "step": 30830 }, { "epoch": 0.9471016496175467, "grad_norm": 0.37856990098953247, "learning_rate": 1.0855802612545068e-05, "loss": 0.4816, "step": 30831 }, { "epoch": 0.947132368752496, "grad_norm": 0.38005298376083374, "learning_rate": 1.0855321107218752e-05, "loss": 0.4635, "step": 30832 }, { "epoch": 0.9471630878874451, "grad_norm": 0.3534003496170044, "learning_rate": 1.0854839599894776e-05, "loss": 0.5216, "step": 30833 }, { "epoch": 0.9471938070223942, "grad_norm": 0.38012373447418213, "learning_rate": 1.085435809057426e-05, "loss": 0.5819, "step": 30834 }, { "epoch": 0.9472245261573434, "grad_norm": 0.34466177225112915, "learning_rate": 1.0853876579258333e-05, "loss": 0.5866, "step": 30835 }, { "epoch": 0.9472552452922925, "grad_norm": 0.32831907272338867, "learning_rate": 1.0853395065948113e-05, "loss": 0.4584, "step": 30836 }, { "epoch": 0.9472859644272418, "grad_norm": 0.37011852860450745, "learning_rate": 1.0852913550644734e-05, "loss": 0.5666, "step": 30837 }, { "epoch": 0.9473166835621909, "grad_norm": 0.3392943739891052, "learning_rate": 1.0852432033349314e-05, "loss": 0.5249, "step": 30838 }, { "epoch": 0.94734740269714, "grad_norm": 0.38693496584892273, "learning_rate": 1.0851950514062981e-05, "loss": 0.5691, "step": 30839 }, { "epoch": 0.9473781218320892, "grad_norm": 0.4698086380958557, "learning_rate": 1.0851468992786858e-05, "loss": 0.5186, "step": 30840 }, { "epoch": 0.9474088409670384, "grad_norm": 0.4185049831867218, "learning_rate": 1.0850987469522068e-05, "loss": 0.6051, "step": 30841 }, { "epoch": 0.9474395601019875, "grad_norm": 0.4219761788845062, "learning_rate": 1.085050594426974e-05, "loss": 0.5556, "step": 30842 }, { "epoch": 0.9474702792369367, "grad_norm": 0.32831040024757385, "learning_rate": 1.0850024417030991e-05, "loss": 0.5813, "step": 30843 }, { "epoch": 0.9475009983718858, "grad_norm": 0.5203371644020081, "learning_rate": 1.0849542887806955e-05, "loss": 0.5656, "step": 30844 }, { "epoch": 0.947531717506835, "grad_norm": 0.3890923261642456, "learning_rate": 1.0849061356598749e-05, "loss": 0.5553, "step": 30845 }, { "epoch": 0.9475624366417842, "grad_norm": 0.3956112265586853, "learning_rate": 1.0848579823407503e-05, "loss": 0.5053, "step": 30846 }, { "epoch": 0.9475931557767333, "grad_norm": 0.3891233205795288, "learning_rate": 1.084809828823434e-05, "loss": 0.6278, "step": 30847 }, { "epoch": 0.9476238749116825, "grad_norm": 0.3502945005893707, "learning_rate": 1.0847616751080384e-05, "loss": 0.5304, "step": 30848 }, { "epoch": 0.9476545940466317, "grad_norm": 0.3877919614315033, "learning_rate": 1.084713521194676e-05, "loss": 0.6233, "step": 30849 }, { "epoch": 0.9476853131815808, "grad_norm": 0.3559962809085846, "learning_rate": 1.084665367083459e-05, "loss": 0.5837, "step": 30850 }, { "epoch": 0.94771603231653, "grad_norm": 0.37164273858070374, "learning_rate": 1.0846172127745004e-05, "loss": 0.4871, "step": 30851 }, { "epoch": 0.9477467514514791, "grad_norm": 0.4073159694671631, "learning_rate": 1.0845690582679122e-05, "loss": 0.5196, "step": 30852 }, { "epoch": 0.9477774705864283, "grad_norm": 0.3453143537044525, "learning_rate": 1.0845209035638073e-05, "loss": 0.5977, "step": 30853 }, { "epoch": 0.9478081897213775, "grad_norm": 0.42367732524871826, "learning_rate": 1.084472748662298e-05, "loss": 0.5732, "step": 30854 }, { "epoch": 0.9478389088563266, "grad_norm": 0.36757591366767883, "learning_rate": 1.0844245935634965e-05, "loss": 0.5139, "step": 30855 }, { "epoch": 0.9478696279912757, "grad_norm": 0.3743816316127777, "learning_rate": 1.0843764382675153e-05, "loss": 0.536, "step": 30856 }, { "epoch": 0.947900347126225, "grad_norm": 0.37106308341026306, "learning_rate": 1.0843282827744673e-05, "loss": 0.5705, "step": 30857 }, { "epoch": 0.9479310662611741, "grad_norm": 0.40849408507347107, "learning_rate": 1.0842801270844645e-05, "loss": 0.565, "step": 30858 }, { "epoch": 0.9479617853961232, "grad_norm": 0.3713688850402832, "learning_rate": 1.0842319711976197e-05, "loss": 0.5016, "step": 30859 }, { "epoch": 0.9479925045310724, "grad_norm": 0.36145952343940735, "learning_rate": 1.0841838151140453e-05, "loss": 0.5674, "step": 30860 }, { "epoch": 0.9480232236660215, "grad_norm": 0.3769175708293915, "learning_rate": 1.0841356588338537e-05, "loss": 0.5747, "step": 30861 }, { "epoch": 0.9480539428009708, "grad_norm": 0.3778441548347473, "learning_rate": 1.0840875023571571e-05, "loss": 0.5585, "step": 30862 }, { "epoch": 0.9480846619359199, "grad_norm": 0.35089170932769775, "learning_rate": 1.0840393456840686e-05, "loss": 0.4725, "step": 30863 }, { "epoch": 0.948115381070869, "grad_norm": 0.5021405816078186, "learning_rate": 1.0839911888147001e-05, "loss": 0.6247, "step": 30864 }, { "epoch": 0.9481461002058182, "grad_norm": 0.3611185550689697, "learning_rate": 1.0839430317491646e-05, "loss": 0.5467, "step": 30865 }, { "epoch": 0.9481768193407674, "grad_norm": 0.42122790217399597, "learning_rate": 1.0838948744875741e-05, "loss": 0.5368, "step": 30866 }, { "epoch": 0.9482075384757165, "grad_norm": 0.38480815291404724, "learning_rate": 1.0838467170300415e-05, "loss": 0.5316, "step": 30867 }, { "epoch": 0.9482382576106657, "grad_norm": 0.3519192636013031, "learning_rate": 1.0837985593766787e-05, "loss": 0.4874, "step": 30868 }, { "epoch": 0.9482689767456148, "grad_norm": 0.34229815006256104, "learning_rate": 1.0837504015275989e-05, "loss": 0.5842, "step": 30869 }, { "epoch": 0.948299695880564, "grad_norm": 0.3751688003540039, "learning_rate": 1.0837022434829136e-05, "loss": 0.468, "step": 30870 }, { "epoch": 0.9483304150155132, "grad_norm": 0.41560089588165283, "learning_rate": 1.0836540852427364e-05, "loss": 0.4928, "step": 30871 }, { "epoch": 0.9483611341504623, "grad_norm": 0.3451233208179474, "learning_rate": 1.0836059268071788e-05, "loss": 0.5064, "step": 30872 }, { "epoch": 0.9483918532854115, "grad_norm": 0.4089796543121338, "learning_rate": 1.0835577681763543e-05, "loss": 0.6016, "step": 30873 }, { "epoch": 0.9484225724203607, "grad_norm": 0.5191814303398132, "learning_rate": 1.0835096093503743e-05, "loss": 0.504, "step": 30874 }, { "epoch": 0.9484532915553098, "grad_norm": 0.38024112582206726, "learning_rate": 1.0834614503293519e-05, "loss": 0.486, "step": 30875 }, { "epoch": 0.948484010690259, "grad_norm": 0.38176390528678894, "learning_rate": 1.0834132911133994e-05, "loss": 0.5295, "step": 30876 }, { "epoch": 0.9485147298252081, "grad_norm": 0.355461984872818, "learning_rate": 1.0833651317026297e-05, "loss": 0.5174, "step": 30877 }, { "epoch": 0.9485454489601572, "grad_norm": 0.4096698462963104, "learning_rate": 1.0833169720971546e-05, "loss": 0.5736, "step": 30878 }, { "epoch": 0.9485761680951065, "grad_norm": 0.38363775610923767, "learning_rate": 1.0832688122970869e-05, "loss": 0.5046, "step": 30879 }, { "epoch": 0.9486068872300556, "grad_norm": 0.4012843370437622, "learning_rate": 1.083220652302539e-05, "loss": 0.4949, "step": 30880 }, { "epoch": 0.9486376063650047, "grad_norm": 0.41232892870903015, "learning_rate": 1.0831724921136235e-05, "loss": 0.6238, "step": 30881 }, { "epoch": 0.948668325499954, "grad_norm": 0.379808634519577, "learning_rate": 1.0831243317304529e-05, "loss": 0.5991, "step": 30882 }, { "epoch": 0.9486990446349031, "grad_norm": 0.37171101570129395, "learning_rate": 1.0830761711531397e-05, "loss": 0.5858, "step": 30883 }, { "epoch": 0.9487297637698522, "grad_norm": 0.47273218631744385, "learning_rate": 1.0830280103817961e-05, "loss": 0.584, "step": 30884 }, { "epoch": 0.9487604829048014, "grad_norm": 0.42394542694091797, "learning_rate": 1.0829798494165351e-05, "loss": 0.5406, "step": 30885 }, { "epoch": 0.9487912020397505, "grad_norm": 0.5417384505271912, "learning_rate": 1.0829316882574685e-05, "loss": 0.4411, "step": 30886 }, { "epoch": 0.9488219211746998, "grad_norm": 0.3566627502441406, "learning_rate": 1.0828835269047096e-05, "loss": 0.6008, "step": 30887 }, { "epoch": 0.9488526403096489, "grad_norm": 0.3553386926651001, "learning_rate": 1.0828353653583698e-05, "loss": 0.6158, "step": 30888 }, { "epoch": 0.948883359444598, "grad_norm": 0.3821137845516205, "learning_rate": 1.0827872036185627e-05, "loss": 0.5678, "step": 30889 }, { "epoch": 0.9489140785795472, "grad_norm": 0.46718865633010864, "learning_rate": 1.0827390416854e-05, "loss": 0.5011, "step": 30890 }, { "epoch": 0.9489447977144964, "grad_norm": 0.3752416968345642, "learning_rate": 1.082690879558995e-05, "loss": 0.5971, "step": 30891 }, { "epoch": 0.9489755168494455, "grad_norm": 0.381106436252594, "learning_rate": 1.0826427172394592e-05, "loss": 0.6055, "step": 30892 }, { "epoch": 0.9490062359843947, "grad_norm": 0.3784431219100952, "learning_rate": 1.0825945547269057e-05, "loss": 0.5045, "step": 30893 }, { "epoch": 0.9490369551193438, "grad_norm": 0.40219590067863464, "learning_rate": 1.082546392021447e-05, "loss": 0.502, "step": 30894 }, { "epoch": 0.949067674254293, "grad_norm": 0.37085556983947754, "learning_rate": 1.0824982291231951e-05, "loss": 0.6095, "step": 30895 }, { "epoch": 0.9490983933892422, "grad_norm": 0.38054779171943665, "learning_rate": 1.0824500660322633e-05, "loss": 0.5684, "step": 30896 }, { "epoch": 0.9491291125241913, "grad_norm": 0.40322959423065186, "learning_rate": 1.082401902748763e-05, "loss": 0.5979, "step": 30897 }, { "epoch": 0.9491598316591405, "grad_norm": 0.4010585844516754, "learning_rate": 1.0823537392728076e-05, "loss": 0.5882, "step": 30898 }, { "epoch": 0.9491905507940896, "grad_norm": 0.4444984197616577, "learning_rate": 1.0823055756045092e-05, "loss": 0.5629, "step": 30899 }, { "epoch": 0.9492212699290388, "grad_norm": 0.3968735933303833, "learning_rate": 1.0822574117439806e-05, "loss": 0.5655, "step": 30900 }, { "epoch": 0.949251989063988, "grad_norm": 0.39215174317359924, "learning_rate": 1.082209247691334e-05, "loss": 0.5762, "step": 30901 }, { "epoch": 0.9492827081989371, "grad_norm": 0.37365272641181946, "learning_rate": 1.0821610834466818e-05, "loss": 0.5672, "step": 30902 }, { "epoch": 0.9493134273338862, "grad_norm": 0.38274842500686646, "learning_rate": 1.082112919010137e-05, "loss": 0.5454, "step": 30903 }, { "epoch": 0.9493441464688355, "grad_norm": 0.38718533515930176, "learning_rate": 1.0820647543818115e-05, "loss": 0.5757, "step": 30904 }, { "epoch": 0.9493748656037846, "grad_norm": 0.38718634843826294, "learning_rate": 1.082016589561818e-05, "loss": 0.5017, "step": 30905 }, { "epoch": 0.9494055847387337, "grad_norm": 0.3836512565612793, "learning_rate": 1.0819684245502691e-05, "loss": 0.54, "step": 30906 }, { "epoch": 0.9494363038736829, "grad_norm": 0.36414843797683716, "learning_rate": 1.0819202593472772e-05, "loss": 0.63, "step": 30907 }, { "epoch": 0.9494670230086321, "grad_norm": 0.38565129041671753, "learning_rate": 1.0818720939529548e-05, "loss": 0.5487, "step": 30908 }, { "epoch": 0.9494977421435813, "grad_norm": 0.3537083864212036, "learning_rate": 1.0818239283674145e-05, "loss": 0.5167, "step": 30909 }, { "epoch": 0.9495284612785304, "grad_norm": 0.4190846383571625, "learning_rate": 1.0817757625907686e-05, "loss": 0.5863, "step": 30910 }, { "epoch": 0.9495591804134795, "grad_norm": 0.3769286572933197, "learning_rate": 1.0817275966231297e-05, "loss": 0.6037, "step": 30911 }, { "epoch": 0.9495898995484288, "grad_norm": 0.375468373298645, "learning_rate": 1.0816794304646104e-05, "loss": 0.6385, "step": 30912 }, { "epoch": 0.9496206186833779, "grad_norm": 0.4135298728942871, "learning_rate": 1.0816312641153229e-05, "loss": 0.517, "step": 30913 }, { "epoch": 0.949651337818327, "grad_norm": 0.37074559926986694, "learning_rate": 1.0815830975753801e-05, "loss": 0.5291, "step": 30914 }, { "epoch": 0.9496820569532762, "grad_norm": 0.3413192331790924, "learning_rate": 1.0815349308448939e-05, "loss": 0.5391, "step": 30915 }, { "epoch": 0.9497127760882254, "grad_norm": 0.38806870579719543, "learning_rate": 1.0814867639239775e-05, "loss": 0.5667, "step": 30916 }, { "epoch": 0.9497434952231745, "grad_norm": 0.5278379321098328, "learning_rate": 1.0814385968127427e-05, "loss": 0.5868, "step": 30917 }, { "epoch": 0.9497742143581237, "grad_norm": 0.34025296568870544, "learning_rate": 1.0813904295113029e-05, "loss": 0.4902, "step": 30918 }, { "epoch": 0.9498049334930728, "grad_norm": 0.36522406339645386, "learning_rate": 1.08134226201977e-05, "loss": 0.5744, "step": 30919 }, { "epoch": 0.9498356526280219, "grad_norm": 0.40396592020988464, "learning_rate": 1.0812940943382561e-05, "loss": 0.5802, "step": 30920 }, { "epoch": 0.9498663717629712, "grad_norm": 0.3551875650882721, "learning_rate": 1.0812459264668746e-05, "loss": 0.5456, "step": 30921 }, { "epoch": 0.9498970908979203, "grad_norm": 0.3771699368953705, "learning_rate": 1.0811977584057374e-05, "loss": 0.5916, "step": 30922 }, { "epoch": 0.9499278100328695, "grad_norm": 0.4529913663864136, "learning_rate": 1.0811495901549573e-05, "loss": 0.5713, "step": 30923 }, { "epoch": 0.9499585291678186, "grad_norm": 0.3813149034976959, "learning_rate": 1.0811014217146464e-05, "loss": 0.6139, "step": 30924 }, { "epoch": 0.9499892483027678, "grad_norm": 0.3374125361442566, "learning_rate": 1.0810532530849179e-05, "loss": 0.4998, "step": 30925 }, { "epoch": 0.950019967437717, "grad_norm": 0.3609704077243805, "learning_rate": 1.0810050842658835e-05, "loss": 0.4985, "step": 30926 }, { "epoch": 0.9500506865726661, "grad_norm": 0.40962982177734375, "learning_rate": 1.0809569152576562e-05, "loss": 0.5052, "step": 30927 }, { "epoch": 0.9500814057076152, "grad_norm": 0.4080347716808319, "learning_rate": 1.0809087460603482e-05, "loss": 0.5781, "step": 30928 }, { "epoch": 0.9501121248425645, "grad_norm": 0.45644158124923706, "learning_rate": 1.0808605766740724e-05, "loss": 0.5164, "step": 30929 }, { "epoch": 0.9501428439775136, "grad_norm": 0.37348800897598267, "learning_rate": 1.0808124070989413e-05, "loss": 0.6234, "step": 30930 }, { "epoch": 0.9501735631124627, "grad_norm": 0.3790205121040344, "learning_rate": 1.0807642373350666e-05, "loss": 0.5205, "step": 30931 }, { "epoch": 0.9502042822474119, "grad_norm": 0.35059595108032227, "learning_rate": 1.0807160673825617e-05, "loss": 0.602, "step": 30932 }, { "epoch": 0.950235001382361, "grad_norm": 0.39221686124801636, "learning_rate": 1.0806678972415385e-05, "loss": 0.5605, "step": 30933 }, { "epoch": 0.9502657205173103, "grad_norm": 0.3685643672943115, "learning_rate": 1.0806197269121102e-05, "loss": 0.549, "step": 30934 }, { "epoch": 0.9502964396522594, "grad_norm": 0.3706153333187103, "learning_rate": 1.0805715563943886e-05, "loss": 0.5472, "step": 30935 }, { "epoch": 0.9503271587872085, "grad_norm": 0.36282363533973694, "learning_rate": 1.0805233856884865e-05, "loss": 0.4787, "step": 30936 }, { "epoch": 0.9503578779221578, "grad_norm": 0.3842688202857971, "learning_rate": 1.0804752147945167e-05, "loss": 0.4754, "step": 30937 }, { "epoch": 0.9503885970571069, "grad_norm": 0.36942124366760254, "learning_rate": 1.0804270437125912e-05, "loss": 0.588, "step": 30938 }, { "epoch": 0.950419316192056, "grad_norm": 0.4372921586036682, "learning_rate": 1.0803788724428228e-05, "loss": 0.6241, "step": 30939 }, { "epoch": 0.9504500353270052, "grad_norm": 0.3549700975418091, "learning_rate": 1.0803307009853239e-05, "loss": 0.5405, "step": 30940 }, { "epoch": 0.9504807544619543, "grad_norm": 0.3994918465614319, "learning_rate": 1.080282529340207e-05, "loss": 0.5243, "step": 30941 }, { "epoch": 0.9505114735969035, "grad_norm": 0.47728633880615234, "learning_rate": 1.0802343575075846e-05, "loss": 0.5556, "step": 30942 }, { "epoch": 0.9505421927318527, "grad_norm": 0.34720802307128906, "learning_rate": 1.0801861854875695e-05, "loss": 0.4975, "step": 30943 }, { "epoch": 0.9505729118668018, "grad_norm": 0.37236812710762024, "learning_rate": 1.0801380132802736e-05, "loss": 0.5435, "step": 30944 }, { "epoch": 0.9506036310017509, "grad_norm": 0.40105491876602173, "learning_rate": 1.0800898408858102e-05, "loss": 0.6174, "step": 30945 }, { "epoch": 0.9506343501367002, "grad_norm": 0.42225512862205505, "learning_rate": 1.0800416683042911e-05, "loss": 0.5442, "step": 30946 }, { "epoch": 0.9506650692716493, "grad_norm": 0.39088818430900574, "learning_rate": 1.0799934955358291e-05, "loss": 0.5234, "step": 30947 }, { "epoch": 0.9506957884065985, "grad_norm": 0.3738718330860138, "learning_rate": 1.079945322580537e-05, "loss": 0.5049, "step": 30948 }, { "epoch": 0.9507265075415476, "grad_norm": 0.3710094094276428, "learning_rate": 1.0798971494385266e-05, "loss": 0.554, "step": 30949 }, { "epoch": 0.9507572266764968, "grad_norm": 0.355217844247818, "learning_rate": 1.0798489761099112e-05, "loss": 0.5337, "step": 30950 }, { "epoch": 0.950787945811446, "grad_norm": 0.38979461789131165, "learning_rate": 1.0798008025948025e-05, "loss": 0.6029, "step": 30951 }, { "epoch": 0.9508186649463951, "grad_norm": 0.3462592661380768, "learning_rate": 1.0797526288933137e-05, "loss": 0.5142, "step": 30952 }, { "epoch": 0.9508493840813442, "grad_norm": 0.3972148895263672, "learning_rate": 1.0797044550055571e-05, "loss": 0.5401, "step": 30953 }, { "epoch": 0.9508801032162935, "grad_norm": 0.37774044275283813, "learning_rate": 1.0796562809316451e-05, "loss": 0.5053, "step": 30954 }, { "epoch": 0.9509108223512426, "grad_norm": 0.3407493233680725, "learning_rate": 1.0796081066716904e-05, "loss": 0.5791, "step": 30955 }, { "epoch": 0.9509415414861917, "grad_norm": 0.4368727207183838, "learning_rate": 1.0795599322258053e-05, "loss": 0.582, "step": 30956 }, { "epoch": 0.9509722606211409, "grad_norm": 0.40769559144973755, "learning_rate": 1.0795117575941022e-05, "loss": 0.5361, "step": 30957 }, { "epoch": 0.95100297975609, "grad_norm": 0.34875115752220154, "learning_rate": 1.0794635827766941e-05, "loss": 0.5038, "step": 30958 }, { "epoch": 0.9510336988910393, "grad_norm": 0.37398770451545715, "learning_rate": 1.0794154077736937e-05, "loss": 0.5996, "step": 30959 }, { "epoch": 0.9510644180259884, "grad_norm": 0.35486823320388794, "learning_rate": 1.0793672325852123e-05, "loss": 0.4876, "step": 30960 }, { "epoch": 0.9510951371609375, "grad_norm": 0.38553789258003235, "learning_rate": 1.0793190572113637e-05, "loss": 0.5738, "step": 30961 }, { "epoch": 0.9511258562958868, "grad_norm": 1.3310694694519043, "learning_rate": 1.0792708816522595e-05, "loss": 0.5561, "step": 30962 }, { "epoch": 0.9511565754308359, "grad_norm": 0.40530306100845337, "learning_rate": 1.079222705908013e-05, "loss": 0.6066, "step": 30963 }, { "epoch": 0.951187294565785, "grad_norm": 0.35542547702789307, "learning_rate": 1.0791745299787362e-05, "loss": 0.5, "step": 30964 }, { "epoch": 0.9512180137007342, "grad_norm": 0.4095938503742218, "learning_rate": 1.0791263538645415e-05, "loss": 0.506, "step": 30965 }, { "epoch": 0.9512487328356833, "grad_norm": 0.41990503668785095, "learning_rate": 1.0790781775655422e-05, "loss": 0.6299, "step": 30966 }, { "epoch": 0.9512794519706325, "grad_norm": 0.3851116895675659, "learning_rate": 1.0790300010818497e-05, "loss": 0.5764, "step": 30967 }, { "epoch": 0.9513101711055817, "grad_norm": 0.5524551868438721, "learning_rate": 1.0789818244135777e-05, "loss": 0.5293, "step": 30968 }, { "epoch": 0.9513408902405308, "grad_norm": 0.5793049931526184, "learning_rate": 1.0789336475608377e-05, "loss": 0.548, "step": 30969 }, { "epoch": 0.9513716093754799, "grad_norm": 0.6024867296218872, "learning_rate": 1.0788854705237428e-05, "loss": 0.6463, "step": 30970 }, { "epoch": 0.9514023285104292, "grad_norm": 1.3629859685897827, "learning_rate": 1.0788372933024052e-05, "loss": 0.4293, "step": 30971 }, { "epoch": 0.9514330476453783, "grad_norm": 0.38339847326278687, "learning_rate": 1.0787891158969378e-05, "loss": 0.5568, "step": 30972 }, { "epoch": 0.9514637667803275, "grad_norm": 0.3673935532569885, "learning_rate": 1.0787409383074529e-05, "loss": 0.532, "step": 30973 }, { "epoch": 0.9514944859152766, "grad_norm": 0.3607647716999054, "learning_rate": 1.078692760534063e-05, "loss": 0.5708, "step": 30974 }, { "epoch": 0.9515252050502258, "grad_norm": 0.35404273867607117, "learning_rate": 1.0786445825768807e-05, "loss": 0.5188, "step": 30975 }, { "epoch": 0.951555924185175, "grad_norm": 0.3698168694972992, "learning_rate": 1.0785964044360182e-05, "loss": 0.5737, "step": 30976 }, { "epoch": 0.9515866433201241, "grad_norm": 0.3860945999622345, "learning_rate": 1.0785482261115888e-05, "loss": 0.5207, "step": 30977 }, { "epoch": 0.9516173624550732, "grad_norm": 0.4588707983493805, "learning_rate": 1.0785000476037042e-05, "loss": 0.5623, "step": 30978 }, { "epoch": 0.9516480815900225, "grad_norm": 0.4128292500972748, "learning_rate": 1.0784518689124775e-05, "loss": 0.6081, "step": 30979 }, { "epoch": 0.9516788007249716, "grad_norm": 0.48906612396240234, "learning_rate": 1.0784036900380206e-05, "loss": 0.6055, "step": 30980 }, { "epoch": 0.9517095198599207, "grad_norm": 0.3795595169067383, "learning_rate": 1.0783555109804469e-05, "loss": 0.5491, "step": 30981 }, { "epoch": 0.9517402389948699, "grad_norm": 0.41110214591026306, "learning_rate": 1.0783073317398683e-05, "loss": 0.5164, "step": 30982 }, { "epoch": 0.951770958129819, "grad_norm": 0.3734653890132904, "learning_rate": 1.078259152316397e-05, "loss": 0.5991, "step": 30983 }, { "epoch": 0.9518016772647683, "grad_norm": 0.3675665855407715, "learning_rate": 1.0782109727101467e-05, "loss": 0.5192, "step": 30984 }, { "epoch": 0.9518323963997174, "grad_norm": 0.3482363224029541, "learning_rate": 1.0781627929212286e-05, "loss": 0.5227, "step": 30985 }, { "epoch": 0.9518631155346665, "grad_norm": 0.5390978455543518, "learning_rate": 1.0781146129497561e-05, "loss": 0.5726, "step": 30986 }, { "epoch": 0.9518938346696157, "grad_norm": 0.5234819054603577, "learning_rate": 1.0780664327958413e-05, "loss": 0.5463, "step": 30987 }, { "epoch": 0.9519245538045649, "grad_norm": 0.41684797406196594, "learning_rate": 1.0780182524595973e-05, "loss": 0.5678, "step": 30988 }, { "epoch": 0.951955272939514, "grad_norm": 0.3566279113292694, "learning_rate": 1.0779700719411357e-05, "loss": 0.5812, "step": 30989 }, { "epoch": 0.9519859920744632, "grad_norm": 0.33322274684906006, "learning_rate": 1.0779218912405698e-05, "loss": 0.521, "step": 30990 }, { "epoch": 0.9520167112094123, "grad_norm": 0.3635595142841339, "learning_rate": 1.0778737103580119e-05, "loss": 0.5, "step": 30991 }, { "epoch": 0.9520474303443615, "grad_norm": 0.374435693025589, "learning_rate": 1.0778255292935745e-05, "loss": 0.5973, "step": 30992 }, { "epoch": 0.9520781494793107, "grad_norm": 0.4385298788547516, "learning_rate": 1.0777773480473701e-05, "loss": 0.5203, "step": 30993 }, { "epoch": 0.9521088686142598, "grad_norm": 0.3397486209869385, "learning_rate": 1.077729166619511e-05, "loss": 0.4835, "step": 30994 }, { "epoch": 0.952139587749209, "grad_norm": 0.31903403997421265, "learning_rate": 1.0776809850101107e-05, "loss": 0.4989, "step": 30995 }, { "epoch": 0.9521703068841582, "grad_norm": 0.4623032510280609, "learning_rate": 1.0776328032192804e-05, "loss": 0.5527, "step": 30996 }, { "epoch": 0.9522010260191073, "grad_norm": 0.3519303798675537, "learning_rate": 1.0775846212471337e-05, "loss": 0.5841, "step": 30997 }, { "epoch": 0.9522317451540565, "grad_norm": 0.3923332095146179, "learning_rate": 1.0775364390937826e-05, "loss": 0.5565, "step": 30998 }, { "epoch": 0.9522624642890056, "grad_norm": 0.3302658200263977, "learning_rate": 1.0774882567593395e-05, "loss": 0.5102, "step": 30999 }, { "epoch": 0.9522931834239547, "grad_norm": 0.5237910747528076, "learning_rate": 1.0774400742439176e-05, "loss": 0.5526, "step": 31000 }, { "epoch": 0.952323902558904, "grad_norm": 0.39163893461227417, "learning_rate": 1.0773918915476285e-05, "loss": 0.5363, "step": 31001 }, { "epoch": 0.9523546216938531, "grad_norm": 0.3617075979709625, "learning_rate": 1.0773437086705854e-05, "loss": 0.5873, "step": 31002 }, { "epoch": 0.9523853408288022, "grad_norm": 0.36217838525772095, "learning_rate": 1.0772955256129008e-05, "loss": 0.4849, "step": 31003 }, { "epoch": 0.9524160599637514, "grad_norm": 0.38370147347450256, "learning_rate": 1.077247342374687e-05, "loss": 0.5889, "step": 31004 }, { "epoch": 0.9524467790987006, "grad_norm": 0.40930983424186707, "learning_rate": 1.0771991589560564e-05, "loss": 0.5681, "step": 31005 }, { "epoch": 0.9524774982336497, "grad_norm": 0.3676809072494507, "learning_rate": 1.0771509753571223e-05, "loss": 0.5152, "step": 31006 }, { "epoch": 0.9525082173685989, "grad_norm": 0.3903096914291382, "learning_rate": 1.0771027915779962e-05, "loss": 0.5962, "step": 31007 }, { "epoch": 0.952538936503548, "grad_norm": 0.41065743565559387, "learning_rate": 1.0770546076187914e-05, "loss": 0.6267, "step": 31008 }, { "epoch": 0.9525696556384973, "grad_norm": 0.3601507544517517, "learning_rate": 1.07700642347962e-05, "loss": 0.4774, "step": 31009 }, { "epoch": 0.9526003747734464, "grad_norm": 0.36511093378067017, "learning_rate": 1.0769582391605949e-05, "loss": 0.5602, "step": 31010 }, { "epoch": 0.9526310939083955, "grad_norm": 0.36941829323768616, "learning_rate": 1.0769100546618286e-05, "loss": 0.5243, "step": 31011 }, { "epoch": 0.9526618130433447, "grad_norm": 0.44703564047813416, "learning_rate": 1.0768618699834332e-05, "loss": 0.5197, "step": 31012 }, { "epoch": 0.9526925321782939, "grad_norm": 0.34644606709480286, "learning_rate": 1.0768136851255218e-05, "loss": 0.5097, "step": 31013 }, { "epoch": 0.952723251313243, "grad_norm": 0.36371251940727234, "learning_rate": 1.0767655000882062e-05, "loss": 0.5846, "step": 31014 }, { "epoch": 0.9527539704481922, "grad_norm": 0.3661706745624542, "learning_rate": 1.0767173148715999e-05, "loss": 0.5753, "step": 31015 }, { "epoch": 0.9527846895831413, "grad_norm": 0.3599836826324463, "learning_rate": 1.0766691294758148e-05, "loss": 0.5966, "step": 31016 }, { "epoch": 0.9528154087180905, "grad_norm": 0.3794582784175873, "learning_rate": 1.0766209439009636e-05, "loss": 0.5155, "step": 31017 }, { "epoch": 0.9528461278530397, "grad_norm": 0.474394291639328, "learning_rate": 1.076572758147159e-05, "loss": 0.554, "step": 31018 }, { "epoch": 0.9528768469879888, "grad_norm": 0.4326396584510803, "learning_rate": 1.076524572214513e-05, "loss": 0.4917, "step": 31019 }, { "epoch": 0.952907566122938, "grad_norm": 0.36827903985977173, "learning_rate": 1.076476386103139e-05, "loss": 0.5768, "step": 31020 }, { "epoch": 0.9529382852578872, "grad_norm": 0.3426353931427002, "learning_rate": 1.0764281998131485e-05, "loss": 0.5188, "step": 31021 }, { "epoch": 0.9529690043928363, "grad_norm": 0.34185877442359924, "learning_rate": 1.0763800133446551e-05, "loss": 0.5245, "step": 31022 }, { "epoch": 0.9529997235277855, "grad_norm": 0.42596349120140076, "learning_rate": 1.0763318266977705e-05, "loss": 0.4824, "step": 31023 }, { "epoch": 0.9530304426627346, "grad_norm": 0.46154919266700745, "learning_rate": 1.0762836398726077e-05, "loss": 0.4264, "step": 31024 }, { "epoch": 0.9530611617976837, "grad_norm": 0.38565388321876526, "learning_rate": 1.0762354528692792e-05, "loss": 0.4503, "step": 31025 }, { "epoch": 0.953091880932633, "grad_norm": 0.34229665994644165, "learning_rate": 1.0761872656878971e-05, "loss": 0.5313, "step": 31026 }, { "epoch": 0.9531226000675821, "grad_norm": 0.3626265823841095, "learning_rate": 1.0761390783285748e-05, "loss": 0.5622, "step": 31027 }, { "epoch": 0.9531533192025312, "grad_norm": 0.43914955854415894, "learning_rate": 1.0760908907914242e-05, "loss": 0.6465, "step": 31028 }, { "epoch": 0.9531840383374804, "grad_norm": 0.3192178010940552, "learning_rate": 1.0760427030765582e-05, "loss": 0.5019, "step": 31029 }, { "epoch": 0.9532147574724296, "grad_norm": 0.36597269773483276, "learning_rate": 1.0759945151840888e-05, "loss": 0.4591, "step": 31030 }, { "epoch": 0.9532454766073787, "grad_norm": 0.45514678955078125, "learning_rate": 1.075946327114129e-05, "loss": 0.5218, "step": 31031 }, { "epoch": 0.9532761957423279, "grad_norm": 0.35683944821357727, "learning_rate": 1.0758981388667913e-05, "loss": 0.564, "step": 31032 }, { "epoch": 0.953306914877277, "grad_norm": 0.39912697672843933, "learning_rate": 1.0758499504421884e-05, "loss": 0.5235, "step": 31033 }, { "epoch": 0.9533376340122263, "grad_norm": 0.334819495677948, "learning_rate": 1.0758017618404326e-05, "loss": 0.5945, "step": 31034 }, { "epoch": 0.9533683531471754, "grad_norm": 0.40810495615005493, "learning_rate": 1.0757535730616362e-05, "loss": 0.5342, "step": 31035 }, { "epoch": 0.9533990722821245, "grad_norm": 0.38793641328811646, "learning_rate": 1.0757053841059123e-05, "loss": 0.5975, "step": 31036 }, { "epoch": 0.9534297914170737, "grad_norm": 0.4117545187473297, "learning_rate": 1.0756571949733729e-05, "loss": 0.5469, "step": 31037 }, { "epoch": 0.9534605105520229, "grad_norm": 0.3754781484603882, "learning_rate": 1.0756090056641311e-05, "loss": 0.6148, "step": 31038 }, { "epoch": 0.953491229686972, "grad_norm": 0.35030680894851685, "learning_rate": 1.075560816178299e-05, "loss": 0.4898, "step": 31039 }, { "epoch": 0.9535219488219212, "grad_norm": 0.47073888778686523, "learning_rate": 1.0755126265159896e-05, "loss": 0.5989, "step": 31040 }, { "epoch": 0.9535526679568703, "grad_norm": 0.37108463048934937, "learning_rate": 1.0754644366773151e-05, "loss": 0.5317, "step": 31041 }, { "epoch": 0.9535833870918194, "grad_norm": 0.5055146217346191, "learning_rate": 1.0754162466623882e-05, "loss": 0.5458, "step": 31042 }, { "epoch": 0.9536141062267687, "grad_norm": 0.3737885057926178, "learning_rate": 1.0753680564713209e-05, "loss": 0.53, "step": 31043 }, { "epoch": 0.9536448253617178, "grad_norm": 0.35530543327331543, "learning_rate": 1.0753198661042266e-05, "loss": 0.5207, "step": 31044 }, { "epoch": 0.953675544496667, "grad_norm": 0.3642294406890869, "learning_rate": 1.0752716755612176e-05, "loss": 0.5585, "step": 31045 }, { "epoch": 0.9537062636316161, "grad_norm": 0.3617432713508606, "learning_rate": 1.0752234848424061e-05, "loss": 0.5052, "step": 31046 }, { "epoch": 0.9537369827665653, "grad_norm": 0.3821350038051605, "learning_rate": 1.075175293947905e-05, "loss": 0.5941, "step": 31047 }, { "epoch": 0.9537677019015145, "grad_norm": 0.3533734083175659, "learning_rate": 1.0751271028778267e-05, "loss": 0.5998, "step": 31048 }, { "epoch": 0.9537984210364636, "grad_norm": 0.36095690727233887, "learning_rate": 1.075078911632284e-05, "loss": 0.5433, "step": 31049 }, { "epoch": 0.9538291401714127, "grad_norm": 0.4086596667766571, "learning_rate": 1.0750307202113893e-05, "loss": 0.5026, "step": 31050 }, { "epoch": 0.953859859306362, "grad_norm": 0.38475295901298523, "learning_rate": 1.074982528615255e-05, "loss": 0.5033, "step": 31051 }, { "epoch": 0.9538905784413111, "grad_norm": 0.37277552485466003, "learning_rate": 1.0749343368439937e-05, "loss": 0.5188, "step": 31052 }, { "epoch": 0.9539212975762602, "grad_norm": 0.42490410804748535, "learning_rate": 1.074886144897718e-05, "loss": 0.6137, "step": 31053 }, { "epoch": 0.9539520167112094, "grad_norm": 0.35291436314582825, "learning_rate": 1.0748379527765404e-05, "loss": 0.5365, "step": 31054 }, { "epoch": 0.9539827358461586, "grad_norm": 0.3947669565677643, "learning_rate": 1.0747897604805737e-05, "loss": 0.5262, "step": 31055 }, { "epoch": 0.9540134549811077, "grad_norm": 0.35671401023864746, "learning_rate": 1.0747415680099302e-05, "loss": 0.5634, "step": 31056 }, { "epoch": 0.9540441741160569, "grad_norm": 0.3493051826953888, "learning_rate": 1.0746933753647227e-05, "loss": 0.537, "step": 31057 }, { "epoch": 0.954074893251006, "grad_norm": 0.34591493010520935, "learning_rate": 1.0746451825450635e-05, "loss": 0.5009, "step": 31058 }, { "epoch": 0.9541056123859553, "grad_norm": 0.36736592650413513, "learning_rate": 1.0745969895510652e-05, "loss": 0.5417, "step": 31059 }, { "epoch": 0.9541363315209044, "grad_norm": 0.40323376655578613, "learning_rate": 1.0745487963828408e-05, "loss": 0.5871, "step": 31060 }, { "epoch": 0.9541670506558535, "grad_norm": 0.3677736520767212, "learning_rate": 1.0745006030405021e-05, "loss": 0.5425, "step": 31061 }, { "epoch": 0.9541977697908027, "grad_norm": 0.37404665350914, "learning_rate": 1.0744524095241617e-05, "loss": 0.6127, "step": 31062 }, { "epoch": 0.9542284889257518, "grad_norm": 0.3697444498538971, "learning_rate": 1.0744042158339332e-05, "loss": 0.5496, "step": 31063 }, { "epoch": 0.954259208060701, "grad_norm": 0.3573072552680969, "learning_rate": 1.0743560219699278e-05, "loss": 0.5034, "step": 31064 }, { "epoch": 0.9542899271956502, "grad_norm": 0.38660964369773865, "learning_rate": 1.0743078279322594e-05, "loss": 0.5313, "step": 31065 }, { "epoch": 0.9543206463305993, "grad_norm": 0.3655413091182709, "learning_rate": 1.0742596337210393e-05, "loss": 0.6189, "step": 31066 }, { "epoch": 0.9543513654655484, "grad_norm": 0.39382922649383545, "learning_rate": 1.0742114393363812e-05, "loss": 0.5727, "step": 31067 }, { "epoch": 0.9543820846004977, "grad_norm": 0.42909032106399536, "learning_rate": 1.0741632447783967e-05, "loss": 0.5323, "step": 31068 }, { "epoch": 0.9544128037354468, "grad_norm": 0.38690218329429626, "learning_rate": 1.0741150500471988e-05, "loss": 0.518, "step": 31069 }, { "epoch": 0.954443522870396, "grad_norm": 0.35824382305145264, "learning_rate": 1.0740668551429004e-05, "loss": 0.5567, "step": 31070 }, { "epoch": 0.9544742420053451, "grad_norm": 0.3571988344192505, "learning_rate": 1.0740186600656131e-05, "loss": 0.6099, "step": 31071 }, { "epoch": 0.9545049611402943, "grad_norm": 0.36972352862358093, "learning_rate": 1.0739704648154503e-05, "loss": 0.5454, "step": 31072 }, { "epoch": 0.9545356802752435, "grad_norm": 0.3783293664455414, "learning_rate": 1.0739222693925244e-05, "loss": 0.4876, "step": 31073 }, { "epoch": 0.9545663994101926, "grad_norm": 0.3639947474002838, "learning_rate": 1.0738740737969477e-05, "loss": 0.5029, "step": 31074 }, { "epoch": 0.9545971185451417, "grad_norm": 0.3684832453727722, "learning_rate": 1.073825878028833e-05, "loss": 0.5868, "step": 31075 }, { "epoch": 0.954627837680091, "grad_norm": 0.3908505439758301, "learning_rate": 1.073777682088293e-05, "loss": 0.5761, "step": 31076 }, { "epoch": 0.9546585568150401, "grad_norm": 0.39796268939971924, "learning_rate": 1.07372948597544e-05, "loss": 0.5164, "step": 31077 }, { "epoch": 0.9546892759499892, "grad_norm": 0.4063141345977783, "learning_rate": 1.0736812896903865e-05, "loss": 0.5804, "step": 31078 }, { "epoch": 0.9547199950849384, "grad_norm": 0.3271752595901489, "learning_rate": 1.0736330932332454e-05, "loss": 0.5427, "step": 31079 }, { "epoch": 0.9547507142198876, "grad_norm": 0.3891965448856354, "learning_rate": 1.073584896604129e-05, "loss": 0.5576, "step": 31080 }, { "epoch": 0.9547814333548367, "grad_norm": 0.3719733953475952, "learning_rate": 1.0735366998031496e-05, "loss": 0.5433, "step": 31081 }, { "epoch": 0.9548121524897859, "grad_norm": 0.418241024017334, "learning_rate": 1.0734885028304202e-05, "loss": 0.5817, "step": 31082 }, { "epoch": 0.954842871624735, "grad_norm": 0.367776483297348, "learning_rate": 1.0734403056860536e-05, "loss": 0.5449, "step": 31083 }, { "epoch": 0.9548735907596843, "grad_norm": 0.7414613366127014, "learning_rate": 1.0733921083701617e-05, "loss": 0.6168, "step": 31084 }, { "epoch": 0.9549043098946334, "grad_norm": 0.37466633319854736, "learning_rate": 1.0733439108828578e-05, "loss": 0.5515, "step": 31085 }, { "epoch": 0.9549350290295825, "grad_norm": 0.3651575744152069, "learning_rate": 1.0732957132242539e-05, "loss": 0.5434, "step": 31086 }, { "epoch": 0.9549657481645317, "grad_norm": 0.4150734841823578, "learning_rate": 1.0732475153944626e-05, "loss": 0.5756, "step": 31087 }, { "epoch": 0.9549964672994808, "grad_norm": 0.4145182967185974, "learning_rate": 1.0731993173935968e-05, "loss": 0.5323, "step": 31088 }, { "epoch": 0.95502718643443, "grad_norm": 0.3986591696739197, "learning_rate": 1.0731511192217687e-05, "loss": 0.6459, "step": 31089 }, { "epoch": 0.9550579055693792, "grad_norm": 0.5021080374717712, "learning_rate": 1.0731029208790913e-05, "loss": 0.5727, "step": 31090 }, { "epoch": 0.9550886247043283, "grad_norm": 0.3459963798522949, "learning_rate": 1.0730547223656766e-05, "loss": 0.4805, "step": 31091 }, { "epoch": 0.9551193438392774, "grad_norm": 0.3794079124927521, "learning_rate": 1.0730065236816378e-05, "loss": 0.5623, "step": 31092 }, { "epoch": 0.9551500629742267, "grad_norm": 0.42028912901878357, "learning_rate": 1.072958324827087e-05, "loss": 0.5187, "step": 31093 }, { "epoch": 0.9551807821091758, "grad_norm": 0.36280569434165955, "learning_rate": 1.072910125802137e-05, "loss": 0.5551, "step": 31094 }, { "epoch": 0.955211501244125, "grad_norm": 0.4109530746936798, "learning_rate": 1.0728619266069003e-05, "loss": 0.4859, "step": 31095 }, { "epoch": 0.9552422203790741, "grad_norm": 0.3339897394180298, "learning_rate": 1.0728137272414896e-05, "loss": 0.4998, "step": 31096 }, { "epoch": 0.9552729395140233, "grad_norm": 0.367399126291275, "learning_rate": 1.0727655277060172e-05, "loss": 0.5763, "step": 31097 }, { "epoch": 0.9553036586489725, "grad_norm": 0.3619154691696167, "learning_rate": 1.0727173280005957e-05, "loss": 0.5249, "step": 31098 }, { "epoch": 0.9553343777839216, "grad_norm": 0.3608294725418091, "learning_rate": 1.0726691281253379e-05, "loss": 0.5534, "step": 31099 }, { "epoch": 0.9553650969188707, "grad_norm": 0.38171514868736267, "learning_rate": 1.072620928080356e-05, "loss": 0.5525, "step": 31100 }, { "epoch": 0.95539581605382, "grad_norm": 0.36142656207084656, "learning_rate": 1.0725727278657634e-05, "loss": 0.5284, "step": 31101 }, { "epoch": 0.9554265351887691, "grad_norm": 0.4538683295249939, "learning_rate": 1.0725245274816721e-05, "loss": 0.5329, "step": 31102 }, { "epoch": 0.9554572543237182, "grad_norm": 0.40785178542137146, "learning_rate": 1.0724763269281943e-05, "loss": 0.4813, "step": 31103 }, { "epoch": 0.9554879734586674, "grad_norm": 0.3817589282989502, "learning_rate": 1.0724281262054435e-05, "loss": 0.5488, "step": 31104 }, { "epoch": 0.9555186925936165, "grad_norm": 0.40095341205596924, "learning_rate": 1.0723799253135311e-05, "loss": 0.5557, "step": 31105 }, { "epoch": 0.9555494117285658, "grad_norm": 0.384059876203537, "learning_rate": 1.0723317242525708e-05, "loss": 0.4778, "step": 31106 }, { "epoch": 0.9555801308635149, "grad_norm": 0.35819414258003235, "learning_rate": 1.0722835230226744e-05, "loss": 0.5715, "step": 31107 }, { "epoch": 0.955610849998464, "grad_norm": 0.36972275376319885, "learning_rate": 1.0722353216239552e-05, "loss": 0.5393, "step": 31108 }, { "epoch": 0.9556415691334132, "grad_norm": 0.3692964017391205, "learning_rate": 1.0721871200565247e-05, "loss": 0.5224, "step": 31109 }, { "epoch": 0.9556722882683624, "grad_norm": 0.3484252095222473, "learning_rate": 1.0721389183204968e-05, "loss": 0.3976, "step": 31110 }, { "epoch": 0.9557030074033115, "grad_norm": 0.4097770154476166, "learning_rate": 1.072090716415983e-05, "loss": 0.5451, "step": 31111 }, { "epoch": 0.9557337265382607, "grad_norm": 0.35587674379348755, "learning_rate": 1.0720425143430969e-05, "loss": 0.5148, "step": 31112 }, { "epoch": 0.9557644456732098, "grad_norm": 0.39030206203460693, "learning_rate": 1.07199431210195e-05, "loss": 0.5867, "step": 31113 }, { "epoch": 0.955795164808159, "grad_norm": 0.33973848819732666, "learning_rate": 1.0719461096926554e-05, "loss": 0.6168, "step": 31114 }, { "epoch": 0.9558258839431082, "grad_norm": 0.38966819643974304, "learning_rate": 1.0718979071153256e-05, "loss": 0.6515, "step": 31115 }, { "epoch": 0.9558566030780573, "grad_norm": 0.4711534380912781, "learning_rate": 1.0718497043700731e-05, "loss": 0.5674, "step": 31116 }, { "epoch": 0.9558873222130064, "grad_norm": 0.3775947093963623, "learning_rate": 1.0718015014570108e-05, "loss": 0.5551, "step": 31117 }, { "epoch": 0.9559180413479557, "grad_norm": 0.3504534959793091, "learning_rate": 1.071753298376251e-05, "loss": 0.544, "step": 31118 }, { "epoch": 0.9559487604829048, "grad_norm": 0.39083459973335266, "learning_rate": 1.0717050951279062e-05, "loss": 0.5057, "step": 31119 }, { "epoch": 0.955979479617854, "grad_norm": 0.4404184818267822, "learning_rate": 1.0716568917120896e-05, "loss": 0.5252, "step": 31120 }, { "epoch": 0.9560101987528031, "grad_norm": 0.36017128825187683, "learning_rate": 1.0716086881289129e-05, "loss": 0.5563, "step": 31121 }, { "epoch": 0.9560409178877523, "grad_norm": 0.408235639333725, "learning_rate": 1.0715604843784893e-05, "loss": 0.5153, "step": 31122 }, { "epoch": 0.9560716370227015, "grad_norm": 0.3616859018802643, "learning_rate": 1.0715122804609311e-05, "loss": 0.5031, "step": 31123 }, { "epoch": 0.9561023561576506, "grad_norm": 0.35563409328460693, "learning_rate": 1.071464076376351e-05, "loss": 0.5001, "step": 31124 }, { "epoch": 0.9561330752925997, "grad_norm": 0.3692292869091034, "learning_rate": 1.0714158721248616e-05, "loss": 0.515, "step": 31125 }, { "epoch": 0.956163794427549, "grad_norm": 0.40356799960136414, "learning_rate": 1.0713676677065754e-05, "loss": 0.5562, "step": 31126 }, { "epoch": 0.9561945135624981, "grad_norm": 0.3936729431152344, "learning_rate": 1.071319463121605e-05, "loss": 0.5605, "step": 31127 }, { "epoch": 0.9562252326974472, "grad_norm": 0.37917360663414, "learning_rate": 1.0712712583700632e-05, "loss": 0.5354, "step": 31128 }, { "epoch": 0.9562559518323964, "grad_norm": 0.36767256259918213, "learning_rate": 1.0712230534520622e-05, "loss": 0.4948, "step": 31129 }, { "epoch": 0.9562866709673455, "grad_norm": 0.4002554416656494, "learning_rate": 1.0711748483677149e-05, "loss": 0.5618, "step": 31130 }, { "epoch": 0.9563173901022948, "grad_norm": 0.3604064881801605, "learning_rate": 1.0711266431171338e-05, "loss": 0.5199, "step": 31131 }, { "epoch": 0.9563481092372439, "grad_norm": 0.3642093241214752, "learning_rate": 1.0710784377004313e-05, "loss": 0.6542, "step": 31132 }, { "epoch": 0.956378828372193, "grad_norm": 0.39119064807891846, "learning_rate": 1.0710302321177204e-05, "loss": 0.5906, "step": 31133 }, { "epoch": 0.9564095475071422, "grad_norm": 0.3996908962726593, "learning_rate": 1.0709820263691131e-05, "loss": 0.5383, "step": 31134 }, { "epoch": 0.9564402666420914, "grad_norm": 0.36287838220596313, "learning_rate": 1.0709338204547223e-05, "loss": 0.4934, "step": 31135 }, { "epoch": 0.9564709857770405, "grad_norm": 0.3638654947280884, "learning_rate": 1.0708856143746607e-05, "loss": 0.568, "step": 31136 }, { "epoch": 0.9565017049119897, "grad_norm": 0.3651708960533142, "learning_rate": 1.0708374081290409e-05, "loss": 0.5024, "step": 31137 }, { "epoch": 0.9565324240469388, "grad_norm": 0.39108192920684814, "learning_rate": 1.0707892017179755e-05, "loss": 0.5342, "step": 31138 }, { "epoch": 0.956563143181888, "grad_norm": 0.3625302016735077, "learning_rate": 1.0707409951415768e-05, "loss": 0.5392, "step": 31139 }, { "epoch": 0.9565938623168372, "grad_norm": 0.5079867243766785, "learning_rate": 1.0706927883999579e-05, "loss": 0.446, "step": 31140 }, { "epoch": 0.9566245814517863, "grad_norm": 0.368571400642395, "learning_rate": 1.0706445814932306e-05, "loss": 0.4762, "step": 31141 }, { "epoch": 0.9566553005867354, "grad_norm": 0.35284143686294556, "learning_rate": 1.0705963744215081e-05, "loss": 0.5413, "step": 31142 }, { "epoch": 0.9566860197216847, "grad_norm": 0.38096883893013, "learning_rate": 1.0705481671849028e-05, "loss": 0.6281, "step": 31143 }, { "epoch": 0.9567167388566338, "grad_norm": 0.3574945032596588, "learning_rate": 1.0704999597835275e-05, "loss": 0.5185, "step": 31144 }, { "epoch": 0.956747457991583, "grad_norm": 0.39579108357429504, "learning_rate": 1.0704517522174945e-05, "loss": 0.5769, "step": 31145 }, { "epoch": 0.9567781771265321, "grad_norm": 0.46602070331573486, "learning_rate": 1.0704035444869165e-05, "loss": 0.587, "step": 31146 }, { "epoch": 0.9568088962614812, "grad_norm": 0.4970080554485321, "learning_rate": 1.070355336591906e-05, "loss": 0.6237, "step": 31147 }, { "epoch": 0.9568396153964305, "grad_norm": 0.4302845299243927, "learning_rate": 1.0703071285325759e-05, "loss": 0.5996, "step": 31148 }, { "epoch": 0.9568703345313796, "grad_norm": 0.4191899299621582, "learning_rate": 1.0702589203090386e-05, "loss": 0.5593, "step": 31149 }, { "epoch": 0.9569010536663287, "grad_norm": 0.4484550654888153, "learning_rate": 1.0702107119214064e-05, "loss": 0.6234, "step": 31150 }, { "epoch": 0.956931772801278, "grad_norm": 0.3731340169906616, "learning_rate": 1.0701625033697924e-05, "loss": 0.4936, "step": 31151 }, { "epoch": 0.9569624919362271, "grad_norm": 0.4104960262775421, "learning_rate": 1.0701142946543091e-05, "loss": 0.5503, "step": 31152 }, { "epoch": 0.9569932110711762, "grad_norm": 0.3299347460269928, "learning_rate": 1.0700660857750689e-05, "loss": 0.5679, "step": 31153 }, { "epoch": 0.9570239302061254, "grad_norm": 0.42125409841537476, "learning_rate": 1.0700178767321839e-05, "loss": 0.5684, "step": 31154 }, { "epoch": 0.9570546493410745, "grad_norm": 0.396264910697937, "learning_rate": 1.0699696675257679e-05, "loss": 0.5263, "step": 31155 }, { "epoch": 0.9570853684760238, "grad_norm": 0.3439236581325531, "learning_rate": 1.0699214581559327e-05, "loss": 0.5236, "step": 31156 }, { "epoch": 0.9571160876109729, "grad_norm": 0.3468596935272217, "learning_rate": 1.069873248622791e-05, "loss": 0.567, "step": 31157 }, { "epoch": 0.957146806745922, "grad_norm": 0.3387076258659363, "learning_rate": 1.0698250389264556e-05, "loss": 0.5726, "step": 31158 }, { "epoch": 0.9571775258808712, "grad_norm": 0.3570272624492645, "learning_rate": 1.0697768290670388e-05, "loss": 0.4543, "step": 31159 }, { "epoch": 0.9572082450158204, "grad_norm": 0.4091201722621918, "learning_rate": 1.0697286190446534e-05, "loss": 0.6373, "step": 31160 }, { "epoch": 0.9572389641507695, "grad_norm": 0.34831514954566956, "learning_rate": 1.0696804088594117e-05, "loss": 0.5589, "step": 31161 }, { "epoch": 0.9572696832857187, "grad_norm": 0.34645330905914307, "learning_rate": 1.0696321985114268e-05, "loss": 0.5493, "step": 31162 }, { "epoch": 0.9573004024206678, "grad_norm": 0.3438960015773773, "learning_rate": 1.069583988000811e-05, "loss": 0.5094, "step": 31163 }, { "epoch": 0.957331121555617, "grad_norm": 0.3519696593284607, "learning_rate": 1.069535777327677e-05, "loss": 0.5094, "step": 31164 }, { "epoch": 0.9573618406905662, "grad_norm": 0.3144344091415405, "learning_rate": 1.0694875664921372e-05, "loss": 0.4798, "step": 31165 }, { "epoch": 0.9573925598255153, "grad_norm": 0.363679975271225, "learning_rate": 1.0694393554943046e-05, "loss": 0.5466, "step": 31166 }, { "epoch": 0.9574232789604644, "grad_norm": 0.38615596294403076, "learning_rate": 1.0693911443342912e-05, "loss": 0.5188, "step": 31167 }, { "epoch": 0.9574539980954136, "grad_norm": 0.3939090967178345, "learning_rate": 1.06934293301221e-05, "loss": 0.5999, "step": 31168 }, { "epoch": 0.9574847172303628, "grad_norm": 0.3297935128211975, "learning_rate": 1.0692947215281737e-05, "loss": 0.4745, "step": 31169 }, { "epoch": 0.957515436365312, "grad_norm": 0.3635217547416687, "learning_rate": 1.0692465098822947e-05, "loss": 0.6292, "step": 31170 }, { "epoch": 0.9575461555002611, "grad_norm": 0.7239215970039368, "learning_rate": 1.0691982980746855e-05, "loss": 0.5553, "step": 31171 }, { "epoch": 0.9575768746352102, "grad_norm": 0.3399271070957184, "learning_rate": 1.069150086105459e-05, "loss": 0.4836, "step": 31172 }, { "epoch": 0.9576075937701595, "grad_norm": 0.3971153497695923, "learning_rate": 1.0691018739747274e-05, "loss": 0.5727, "step": 31173 }, { "epoch": 0.9576383129051086, "grad_norm": 0.3430200219154358, "learning_rate": 1.0690536616826037e-05, "loss": 0.4668, "step": 31174 }, { "epoch": 0.9576690320400577, "grad_norm": 0.3712984025478363, "learning_rate": 1.0690054492292004e-05, "loss": 0.5567, "step": 31175 }, { "epoch": 0.9576997511750069, "grad_norm": 0.5242653489112854, "learning_rate": 1.0689572366146302e-05, "loss": 0.5183, "step": 31176 }, { "epoch": 0.9577304703099561, "grad_norm": 0.3577764332294464, "learning_rate": 1.0689090238390052e-05, "loss": 0.4832, "step": 31177 }, { "epoch": 0.9577611894449052, "grad_norm": 0.5000536441802979, "learning_rate": 1.0688608109024385e-05, "loss": 0.4822, "step": 31178 }, { "epoch": 0.9577919085798544, "grad_norm": 0.3390420377254486, "learning_rate": 1.0688125978050425e-05, "loss": 0.5216, "step": 31179 }, { "epoch": 0.9578226277148035, "grad_norm": 0.4083571135997772, "learning_rate": 1.0687643845469302e-05, "loss": 0.5534, "step": 31180 }, { "epoch": 0.9578533468497528, "grad_norm": 0.3600711226463318, "learning_rate": 1.0687161711282134e-05, "loss": 0.5378, "step": 31181 }, { "epoch": 0.9578840659847019, "grad_norm": 0.42300164699554443, "learning_rate": 1.0686679575490057e-05, "loss": 0.4767, "step": 31182 }, { "epoch": 0.957914785119651, "grad_norm": 0.43008291721343994, "learning_rate": 1.068619743809419e-05, "loss": 0.5286, "step": 31183 }, { "epoch": 0.9579455042546002, "grad_norm": 0.3900984525680542, "learning_rate": 1.0685715299095661e-05, "loss": 0.5527, "step": 31184 }, { "epoch": 0.9579762233895494, "grad_norm": 0.47899574041366577, "learning_rate": 1.0685233158495596e-05, "loss": 0.5348, "step": 31185 }, { "epoch": 0.9580069425244985, "grad_norm": 0.4063452184200287, "learning_rate": 1.0684751016295119e-05, "loss": 0.5992, "step": 31186 }, { "epoch": 0.9580376616594477, "grad_norm": 0.34418004751205444, "learning_rate": 1.068426887249536e-05, "loss": 0.5578, "step": 31187 }, { "epoch": 0.9580683807943968, "grad_norm": 0.37643858790397644, "learning_rate": 1.0683786727097443e-05, "loss": 0.5171, "step": 31188 }, { "epoch": 0.9580990999293459, "grad_norm": 0.3899693787097931, "learning_rate": 1.0683304580102493e-05, "loss": 0.5823, "step": 31189 }, { "epoch": 0.9581298190642952, "grad_norm": 0.36122435331344604, "learning_rate": 1.0682822431511639e-05, "loss": 0.6121, "step": 31190 }, { "epoch": 0.9581605381992443, "grad_norm": 0.44431084394454956, "learning_rate": 1.0682340281326002e-05, "loss": 0.5048, "step": 31191 }, { "epoch": 0.9581912573341934, "grad_norm": 0.3539047837257385, "learning_rate": 1.0681858129546718e-05, "loss": 0.5662, "step": 31192 }, { "epoch": 0.9582219764691426, "grad_norm": 0.37932583689689636, "learning_rate": 1.0681375976174901e-05, "loss": 0.5185, "step": 31193 }, { "epoch": 0.9582526956040918, "grad_norm": 0.41849416494369507, "learning_rate": 1.0680893821211687e-05, "loss": 0.552, "step": 31194 }, { "epoch": 0.958283414739041, "grad_norm": 0.35941359400749207, "learning_rate": 1.0680411664658194e-05, "loss": 0.5362, "step": 31195 }, { "epoch": 0.9583141338739901, "grad_norm": 0.31945210695266724, "learning_rate": 1.0679929506515557e-05, "loss": 0.5791, "step": 31196 }, { "epoch": 0.9583448530089392, "grad_norm": 0.40678009390830994, "learning_rate": 1.0679447346784892e-05, "loss": 0.5712, "step": 31197 }, { "epoch": 0.9583755721438885, "grad_norm": 0.3873791992664337, "learning_rate": 1.0678965185467333e-05, "loss": 0.5926, "step": 31198 }, { "epoch": 0.9584062912788376, "grad_norm": 0.3951398730278015, "learning_rate": 1.0678483022564004e-05, "loss": 0.5294, "step": 31199 }, { "epoch": 0.9584370104137867, "grad_norm": 0.3724425733089447, "learning_rate": 1.067800085807603e-05, "loss": 0.5407, "step": 31200 }, { "epoch": 0.9584677295487359, "grad_norm": 0.4112881124019623, "learning_rate": 1.0677518692004539e-05, "loss": 0.5424, "step": 31201 }, { "epoch": 0.9584984486836851, "grad_norm": 0.40565699338912964, "learning_rate": 1.0677036524350652e-05, "loss": 0.5419, "step": 31202 }, { "epoch": 0.9585291678186342, "grad_norm": 0.42522022128105164, "learning_rate": 1.0676554355115503e-05, "loss": 0.545, "step": 31203 }, { "epoch": 0.9585598869535834, "grad_norm": 0.3484309911727905, "learning_rate": 1.067607218430021e-05, "loss": 0.5767, "step": 31204 }, { "epoch": 0.9585906060885325, "grad_norm": 0.354068398475647, "learning_rate": 1.0675590011905908e-05, "loss": 0.5406, "step": 31205 }, { "epoch": 0.9586213252234818, "grad_norm": 0.4337655007839203, "learning_rate": 1.0675107837933714e-05, "loss": 0.602, "step": 31206 }, { "epoch": 0.9586520443584309, "grad_norm": 0.3696839213371277, "learning_rate": 1.0674625662384762e-05, "loss": 0.4514, "step": 31207 }, { "epoch": 0.95868276349338, "grad_norm": 0.37757787108421326, "learning_rate": 1.0674143485260169e-05, "loss": 0.5718, "step": 31208 }, { "epoch": 0.9587134826283292, "grad_norm": 0.3516896963119507, "learning_rate": 1.0673661306561069e-05, "loss": 0.4962, "step": 31209 }, { "epoch": 0.9587442017632783, "grad_norm": 0.39465653896331787, "learning_rate": 1.0673179126288588e-05, "loss": 0.5584, "step": 31210 }, { "epoch": 0.9587749208982275, "grad_norm": 0.3471616804599762, "learning_rate": 1.067269694444385e-05, "loss": 0.5448, "step": 31211 }, { "epoch": 0.9588056400331767, "grad_norm": 0.3962997794151306, "learning_rate": 1.067221476102798e-05, "loss": 0.554, "step": 31212 }, { "epoch": 0.9588363591681258, "grad_norm": 0.38729000091552734, "learning_rate": 1.0671732576042107e-05, "loss": 0.5405, "step": 31213 }, { "epoch": 0.9588670783030749, "grad_norm": 0.33640921115875244, "learning_rate": 1.0671250389487356e-05, "loss": 0.5183, "step": 31214 }, { "epoch": 0.9588977974380242, "grad_norm": 0.4182981848716736, "learning_rate": 1.0670768201364852e-05, "loss": 0.5879, "step": 31215 }, { "epoch": 0.9589285165729733, "grad_norm": 0.39197954535484314, "learning_rate": 1.0670286011675721e-05, "loss": 0.5439, "step": 31216 }, { "epoch": 0.9589592357079225, "grad_norm": 0.3769283890724182, "learning_rate": 1.0669803820421092e-05, "loss": 0.5693, "step": 31217 }, { "epoch": 0.9589899548428716, "grad_norm": 0.3680107891559601, "learning_rate": 1.066932162760209e-05, "loss": 0.5372, "step": 31218 }, { "epoch": 0.9590206739778208, "grad_norm": 0.3899257779121399, "learning_rate": 1.066883943321984e-05, "loss": 0.579, "step": 31219 }, { "epoch": 0.95905139311277, "grad_norm": 0.4734916388988495, "learning_rate": 1.0668357237275467e-05, "loss": 0.5599, "step": 31220 }, { "epoch": 0.9590821122477191, "grad_norm": 0.36482611298561096, "learning_rate": 1.06678750397701e-05, "loss": 0.4969, "step": 31221 }, { "epoch": 0.9591128313826682, "grad_norm": 0.437812864780426, "learning_rate": 1.0667392840704863e-05, "loss": 0.6634, "step": 31222 }, { "epoch": 0.9591435505176175, "grad_norm": 0.3597114384174347, "learning_rate": 1.0666910640080886e-05, "loss": 0.5275, "step": 31223 }, { "epoch": 0.9591742696525666, "grad_norm": 0.3695373833179474, "learning_rate": 1.066642843789929e-05, "loss": 0.5357, "step": 31224 }, { "epoch": 0.9592049887875157, "grad_norm": 0.35906362533569336, "learning_rate": 1.0665946234161204e-05, "loss": 0.5686, "step": 31225 }, { "epoch": 0.9592357079224649, "grad_norm": 0.3392479419708252, "learning_rate": 1.0665464028867757e-05, "loss": 0.558, "step": 31226 }, { "epoch": 0.959266427057414, "grad_norm": 0.4014118015766144, "learning_rate": 1.0664981822020066e-05, "loss": 0.577, "step": 31227 }, { "epoch": 0.9592971461923632, "grad_norm": 0.34549570083618164, "learning_rate": 1.066449961361927e-05, "loss": 0.521, "step": 31228 }, { "epoch": 0.9593278653273124, "grad_norm": 0.3726067543029785, "learning_rate": 1.0664017403666485e-05, "loss": 0.4754, "step": 31229 }, { "epoch": 0.9593585844622615, "grad_norm": 0.3768826127052307, "learning_rate": 1.0663535192162844e-05, "loss": 0.5152, "step": 31230 }, { "epoch": 0.9593893035972108, "grad_norm": 0.3453803062438965, "learning_rate": 1.0663052979109467e-05, "loss": 0.5252, "step": 31231 }, { "epoch": 0.9594200227321599, "grad_norm": 0.37634333968162537, "learning_rate": 1.0662570764507488e-05, "loss": 0.5155, "step": 31232 }, { "epoch": 0.959450741867109, "grad_norm": 0.4971604347229004, "learning_rate": 1.0662088548358023e-05, "loss": 0.4861, "step": 31233 }, { "epoch": 0.9594814610020582, "grad_norm": 0.3878370523452759, "learning_rate": 1.0661606330662209e-05, "loss": 0.5822, "step": 31234 }, { "epoch": 0.9595121801370073, "grad_norm": 0.3863724172115326, "learning_rate": 1.0661124111421166e-05, "loss": 0.5312, "step": 31235 }, { "epoch": 0.9595428992719565, "grad_norm": 0.44514790177345276, "learning_rate": 1.066064189063602e-05, "loss": 0.5069, "step": 31236 }, { "epoch": 0.9595736184069057, "grad_norm": 0.3894006311893463, "learning_rate": 1.0660159668307899e-05, "loss": 0.5764, "step": 31237 }, { "epoch": 0.9596043375418548, "grad_norm": 0.3814907670021057, "learning_rate": 1.0659677444437928e-05, "loss": 0.5826, "step": 31238 }, { "epoch": 0.9596350566768039, "grad_norm": 0.3832489848136902, "learning_rate": 1.0659195219027236e-05, "loss": 0.5611, "step": 31239 }, { "epoch": 0.9596657758117532, "grad_norm": 0.3605572283267975, "learning_rate": 1.0658712992076948e-05, "loss": 0.5976, "step": 31240 }, { "epoch": 0.9596964949467023, "grad_norm": 0.37168827652931213, "learning_rate": 1.0658230763588189e-05, "loss": 0.4739, "step": 31241 }, { "epoch": 0.9597272140816515, "grad_norm": 0.604569137096405, "learning_rate": 1.0657748533562084e-05, "loss": 0.5661, "step": 31242 }, { "epoch": 0.9597579332166006, "grad_norm": 0.3844488263130188, "learning_rate": 1.0657266301999764e-05, "loss": 0.5334, "step": 31243 }, { "epoch": 0.9597886523515498, "grad_norm": 0.3375336527824402, "learning_rate": 1.0656784068902353e-05, "loss": 0.5727, "step": 31244 }, { "epoch": 0.959819371486499, "grad_norm": 0.38923221826553345, "learning_rate": 1.0656301834270974e-05, "loss": 0.5222, "step": 31245 }, { "epoch": 0.9598500906214481, "grad_norm": 0.36969664692878723, "learning_rate": 1.0655819598106758e-05, "loss": 0.5764, "step": 31246 }, { "epoch": 0.9598808097563972, "grad_norm": 0.3323362171649933, "learning_rate": 1.0655337360410829e-05, "loss": 0.5002, "step": 31247 }, { "epoch": 0.9599115288913465, "grad_norm": 0.3293166756629944, "learning_rate": 1.0654855121184315e-05, "loss": 0.4929, "step": 31248 }, { "epoch": 0.9599422480262956, "grad_norm": 0.41125425696372986, "learning_rate": 1.065437288042834e-05, "loss": 0.5561, "step": 31249 }, { "epoch": 0.9599729671612447, "grad_norm": 0.361122727394104, "learning_rate": 1.0653890638144034e-05, "loss": 0.4779, "step": 31250 }, { "epoch": 0.9600036862961939, "grad_norm": 0.3337033987045288, "learning_rate": 1.0653408394332518e-05, "loss": 0.5806, "step": 31251 }, { "epoch": 0.960034405431143, "grad_norm": 0.3735812306404114, "learning_rate": 1.0652926148994922e-05, "loss": 0.5807, "step": 31252 }, { "epoch": 0.9600651245660922, "grad_norm": 0.38690319657325745, "learning_rate": 1.0652443902132372e-05, "loss": 0.543, "step": 31253 }, { "epoch": 0.9600958437010414, "grad_norm": 0.3715425133705139, "learning_rate": 1.0651961653745992e-05, "loss": 0.4945, "step": 31254 }, { "epoch": 0.9601265628359905, "grad_norm": 0.44294771552085876, "learning_rate": 1.0651479403836912e-05, "loss": 0.5583, "step": 31255 }, { "epoch": 0.9601572819709397, "grad_norm": 0.3510442078113556, "learning_rate": 1.0650997152406256e-05, "loss": 0.4916, "step": 31256 }, { "epoch": 0.9601880011058889, "grad_norm": 0.3968023359775543, "learning_rate": 1.065051489945515e-05, "loss": 0.5689, "step": 31257 }, { "epoch": 0.960218720240838, "grad_norm": 0.40577244758605957, "learning_rate": 1.065003264498472e-05, "loss": 0.5204, "step": 31258 }, { "epoch": 0.9602494393757872, "grad_norm": 0.3590373992919922, "learning_rate": 1.0649550388996095e-05, "loss": 0.5715, "step": 31259 }, { "epoch": 0.9602801585107363, "grad_norm": 0.3640611469745636, "learning_rate": 1.0649068131490398e-05, "loss": 0.541, "step": 31260 }, { "epoch": 0.9603108776456855, "grad_norm": 0.3978162407875061, "learning_rate": 1.064858587246876e-05, "loss": 0.6015, "step": 31261 }, { "epoch": 0.9603415967806347, "grad_norm": 0.3801581859588623, "learning_rate": 1.06481036119323e-05, "loss": 0.5605, "step": 31262 }, { "epoch": 0.9603723159155838, "grad_norm": 0.35424119234085083, "learning_rate": 1.0647621349882152e-05, "loss": 0.5722, "step": 31263 }, { "epoch": 0.9604030350505329, "grad_norm": 0.36806753277778625, "learning_rate": 1.0647139086319437e-05, "loss": 0.5549, "step": 31264 }, { "epoch": 0.9604337541854822, "grad_norm": 0.397953599691391, "learning_rate": 1.0646656821245286e-05, "loss": 0.4979, "step": 31265 }, { "epoch": 0.9604644733204313, "grad_norm": 0.356113076210022, "learning_rate": 1.0646174554660822e-05, "loss": 0.4912, "step": 31266 }, { "epoch": 0.9604951924553805, "grad_norm": 0.4235452115535736, "learning_rate": 1.064569228656717e-05, "loss": 0.5302, "step": 31267 }, { "epoch": 0.9605259115903296, "grad_norm": 0.4105539321899414, "learning_rate": 1.0645210016965462e-05, "loss": 0.5718, "step": 31268 }, { "epoch": 0.9605566307252787, "grad_norm": 0.379419207572937, "learning_rate": 1.0644727745856817e-05, "loss": 0.4715, "step": 31269 }, { "epoch": 0.960587349860228, "grad_norm": 0.4355173408985138, "learning_rate": 1.064424547324237e-05, "loss": 0.5729, "step": 31270 }, { "epoch": 0.9606180689951771, "grad_norm": 0.427031546831131, "learning_rate": 1.0643763199123241e-05, "loss": 0.5492, "step": 31271 }, { "epoch": 0.9606487881301262, "grad_norm": 0.3636293411254883, "learning_rate": 1.0643280923500556e-05, "loss": 0.5752, "step": 31272 }, { "epoch": 0.9606795072650754, "grad_norm": 0.340276837348938, "learning_rate": 1.0642798646375447e-05, "loss": 0.5157, "step": 31273 }, { "epoch": 0.9607102264000246, "grad_norm": 0.37847182154655457, "learning_rate": 1.0642316367749032e-05, "loss": 0.4237, "step": 31274 }, { "epoch": 0.9607409455349737, "grad_norm": 0.36110520362854004, "learning_rate": 1.0641834087622446e-05, "loss": 0.5207, "step": 31275 }, { "epoch": 0.9607716646699229, "grad_norm": 0.42602360248565674, "learning_rate": 1.0641351805996812e-05, "loss": 0.5539, "step": 31276 }, { "epoch": 0.960802383804872, "grad_norm": 0.38110750913619995, "learning_rate": 1.0640869522873256e-05, "loss": 0.4703, "step": 31277 }, { "epoch": 0.9608331029398212, "grad_norm": 0.40462446212768555, "learning_rate": 1.06403872382529e-05, "loss": 0.5547, "step": 31278 }, { "epoch": 0.9608638220747704, "grad_norm": 0.38750994205474854, "learning_rate": 1.0639904952136882e-05, "loss": 0.5025, "step": 31279 }, { "epoch": 0.9608945412097195, "grad_norm": 0.36655667424201965, "learning_rate": 1.0639422664526316e-05, "loss": 0.4848, "step": 31280 }, { "epoch": 0.9609252603446687, "grad_norm": 0.4096637964248657, "learning_rate": 1.0638940375422333e-05, "loss": 0.4956, "step": 31281 }, { "epoch": 0.9609559794796179, "grad_norm": 0.3832806348800659, "learning_rate": 1.0638458084826064e-05, "loss": 0.523, "step": 31282 }, { "epoch": 0.960986698614567, "grad_norm": 0.4190296232700348, "learning_rate": 1.0637975792738629e-05, "loss": 0.5493, "step": 31283 }, { "epoch": 0.9610174177495162, "grad_norm": 0.33006811141967773, "learning_rate": 1.0637493499161158e-05, "loss": 0.5031, "step": 31284 }, { "epoch": 0.9610481368844653, "grad_norm": 0.4006226658821106, "learning_rate": 1.0637011204094776e-05, "loss": 0.5676, "step": 31285 }, { "epoch": 0.9610788560194145, "grad_norm": 0.43273675441741943, "learning_rate": 1.063652890754061e-05, "loss": 0.5159, "step": 31286 }, { "epoch": 0.9611095751543637, "grad_norm": 0.3524719476699829, "learning_rate": 1.0636046609499788e-05, "loss": 0.5288, "step": 31287 }, { "epoch": 0.9611402942893128, "grad_norm": 0.35332077741622925, "learning_rate": 1.0635564309973432e-05, "loss": 0.541, "step": 31288 }, { "epoch": 0.9611710134242619, "grad_norm": 0.3744797706604004, "learning_rate": 1.0635082008962674e-05, "loss": 0.5384, "step": 31289 }, { "epoch": 0.9612017325592112, "grad_norm": 0.3802827298641205, "learning_rate": 1.0634599706468633e-05, "loss": 0.5815, "step": 31290 }, { "epoch": 0.9612324516941603, "grad_norm": 0.37270259857177734, "learning_rate": 1.0634117402492445e-05, "loss": 0.5422, "step": 31291 }, { "epoch": 0.9612631708291095, "grad_norm": 0.48703011870384216, "learning_rate": 1.0633635097035229e-05, "loss": 0.4929, "step": 31292 }, { "epoch": 0.9612938899640586, "grad_norm": 0.3363785743713379, "learning_rate": 1.0633152790098115e-05, "loss": 0.5421, "step": 31293 }, { "epoch": 0.9613246090990077, "grad_norm": 0.38242611289024353, "learning_rate": 1.0632670481682226e-05, "loss": 0.6323, "step": 31294 }, { "epoch": 0.961355328233957, "grad_norm": 0.4389020502567291, "learning_rate": 1.0632188171788695e-05, "loss": 0.5672, "step": 31295 }, { "epoch": 0.9613860473689061, "grad_norm": 0.36584120988845825, "learning_rate": 1.0631705860418642e-05, "loss": 0.5281, "step": 31296 }, { "epoch": 0.9614167665038552, "grad_norm": 1.0208768844604492, "learning_rate": 1.0631223547573196e-05, "loss": 0.4507, "step": 31297 }, { "epoch": 0.9614474856388044, "grad_norm": 0.3511597812175751, "learning_rate": 1.0630741233253485e-05, "loss": 0.5347, "step": 31298 }, { "epoch": 0.9614782047737536, "grad_norm": 0.3445207178592682, "learning_rate": 1.0630258917460629e-05, "loss": 0.4642, "step": 31299 }, { "epoch": 0.9615089239087027, "grad_norm": 0.40512973070144653, "learning_rate": 1.0629776600195763e-05, "loss": 0.5474, "step": 31300 }, { "epoch": 0.9615396430436519, "grad_norm": 0.37555253505706787, "learning_rate": 1.0629294281460007e-05, "loss": 0.5846, "step": 31301 }, { "epoch": 0.961570362178601, "grad_norm": 0.38207152485847473, "learning_rate": 1.0628811961254496e-05, "loss": 0.4329, "step": 31302 }, { "epoch": 0.9616010813135503, "grad_norm": 0.35642457008361816, "learning_rate": 1.0628329639580344e-05, "loss": 0.5626, "step": 31303 }, { "epoch": 0.9616318004484994, "grad_norm": 0.42203488945961, "learning_rate": 1.0627847316438689e-05, "loss": 0.5854, "step": 31304 }, { "epoch": 0.9616625195834485, "grad_norm": 0.479033887386322, "learning_rate": 1.0627364991830654e-05, "loss": 0.5924, "step": 31305 }, { "epoch": 0.9616932387183977, "grad_norm": 0.36524534225463867, "learning_rate": 1.0626882665757358e-05, "loss": 0.6084, "step": 31306 }, { "epoch": 0.9617239578533469, "grad_norm": 0.5206381678581238, "learning_rate": 1.0626400338219938e-05, "loss": 0.5155, "step": 31307 }, { "epoch": 0.961754676988296, "grad_norm": 0.4083087742328644, "learning_rate": 1.0625918009219515e-05, "loss": 0.5257, "step": 31308 }, { "epoch": 0.9617853961232452, "grad_norm": 0.351176917552948, "learning_rate": 1.0625435678757219e-05, "loss": 0.5465, "step": 31309 }, { "epoch": 0.9618161152581943, "grad_norm": 0.3691273629665375, "learning_rate": 1.062495334683417e-05, "loss": 0.5552, "step": 31310 }, { "epoch": 0.9618468343931434, "grad_norm": 0.34286707639694214, "learning_rate": 1.0624471013451504e-05, "loss": 0.5297, "step": 31311 }, { "epoch": 0.9618775535280927, "grad_norm": 0.3986319899559021, "learning_rate": 1.0623988678610338e-05, "loss": 0.5498, "step": 31312 }, { "epoch": 0.9619082726630418, "grad_norm": 0.3914359509944916, "learning_rate": 1.0623506342311805e-05, "loss": 0.5119, "step": 31313 }, { "epoch": 0.9619389917979909, "grad_norm": 0.43056178092956543, "learning_rate": 1.0623024004557028e-05, "loss": 0.5066, "step": 31314 }, { "epoch": 0.9619697109329401, "grad_norm": 0.37845563888549805, "learning_rate": 1.0622541665347137e-05, "loss": 0.4759, "step": 31315 }, { "epoch": 0.9620004300678893, "grad_norm": 0.4149250090122223, "learning_rate": 1.0622059324683255e-05, "loss": 0.5882, "step": 31316 }, { "epoch": 0.9620311492028385, "grad_norm": 0.39439117908477783, "learning_rate": 1.062157698256651e-05, "loss": 0.5176, "step": 31317 }, { "epoch": 0.9620618683377876, "grad_norm": 0.3358139395713806, "learning_rate": 1.0621094638998028e-05, "loss": 0.5882, "step": 31318 }, { "epoch": 0.9620925874727367, "grad_norm": 0.3814910650253296, "learning_rate": 1.0620612293978936e-05, "loss": 0.5481, "step": 31319 }, { "epoch": 0.962123306607686, "grad_norm": 0.37419167160987854, "learning_rate": 1.0620129947510363e-05, "loss": 0.5879, "step": 31320 }, { "epoch": 0.9621540257426351, "grad_norm": 0.36219146847724915, "learning_rate": 1.061964759959343e-05, "loss": 0.541, "step": 31321 }, { "epoch": 0.9621847448775842, "grad_norm": 0.39330047369003296, "learning_rate": 1.0619165250229271e-05, "loss": 0.6335, "step": 31322 }, { "epoch": 0.9622154640125334, "grad_norm": 0.3540416359901428, "learning_rate": 1.0618682899419006e-05, "loss": 0.4923, "step": 31323 }, { "epoch": 0.9622461831474826, "grad_norm": 0.36720576882362366, "learning_rate": 1.0618200547163764e-05, "loss": 0.4838, "step": 31324 }, { "epoch": 0.9622769022824317, "grad_norm": 0.4478447437286377, "learning_rate": 1.061771819346467e-05, "loss": 0.5416, "step": 31325 }, { "epoch": 0.9623076214173809, "grad_norm": 0.35928502678871155, "learning_rate": 1.0617235838322853e-05, "loss": 0.6008, "step": 31326 }, { "epoch": 0.96233834055233, "grad_norm": 0.4029443860054016, "learning_rate": 1.061675348173944e-05, "loss": 0.4528, "step": 31327 }, { "epoch": 0.9623690596872793, "grad_norm": 0.40657132863998413, "learning_rate": 1.0616271123715553e-05, "loss": 0.5925, "step": 31328 }, { "epoch": 0.9623997788222284, "grad_norm": 0.5462896823883057, "learning_rate": 1.0615788764252326e-05, "loss": 0.5456, "step": 31329 }, { "epoch": 0.9624304979571775, "grad_norm": 0.3442525565624237, "learning_rate": 1.0615306403350879e-05, "loss": 0.4739, "step": 31330 }, { "epoch": 0.9624612170921267, "grad_norm": 0.38281258940696716, "learning_rate": 1.0614824041012342e-05, "loss": 0.4758, "step": 31331 }, { "epoch": 0.9624919362270759, "grad_norm": 0.3696674406528473, "learning_rate": 1.0614341677237835e-05, "loss": 0.5161, "step": 31332 }, { "epoch": 0.962522655362025, "grad_norm": 0.3680787980556488, "learning_rate": 1.0613859312028498e-05, "loss": 0.5682, "step": 31333 }, { "epoch": 0.9625533744969742, "grad_norm": 0.3768875300884247, "learning_rate": 1.0613376945385448e-05, "loss": 0.6073, "step": 31334 }, { "epoch": 0.9625840936319233, "grad_norm": 0.3823074996471405, "learning_rate": 1.061289457730981e-05, "loss": 0.4594, "step": 31335 }, { "epoch": 0.9626148127668724, "grad_norm": 0.3811758756637573, "learning_rate": 1.0612412207802713e-05, "loss": 0.6691, "step": 31336 }, { "epoch": 0.9626455319018217, "grad_norm": 0.3744005858898163, "learning_rate": 1.0611929836865288e-05, "loss": 0.5609, "step": 31337 }, { "epoch": 0.9626762510367708, "grad_norm": 0.37993812561035156, "learning_rate": 1.0611447464498657e-05, "loss": 0.5641, "step": 31338 }, { "epoch": 0.9627069701717199, "grad_norm": 0.38699930906295776, "learning_rate": 1.0610965090703949e-05, "loss": 0.5539, "step": 31339 }, { "epoch": 0.9627376893066691, "grad_norm": 0.40495556592941284, "learning_rate": 1.0610482715482287e-05, "loss": 0.5994, "step": 31340 }, { "epoch": 0.9627684084416183, "grad_norm": 0.36308714747428894, "learning_rate": 1.0610000338834802e-05, "loss": 0.5627, "step": 31341 }, { "epoch": 0.9627991275765675, "grad_norm": 0.6773108243942261, "learning_rate": 1.0609517960762617e-05, "loss": 0.469, "step": 31342 }, { "epoch": 0.9628298467115166, "grad_norm": 0.3962970972061157, "learning_rate": 1.0609035581266862e-05, "loss": 0.5807, "step": 31343 }, { "epoch": 0.9628605658464657, "grad_norm": 0.3716757297515869, "learning_rate": 1.060855320034866e-05, "loss": 0.4727, "step": 31344 }, { "epoch": 0.962891284981415, "grad_norm": 0.3341767489910126, "learning_rate": 1.0608070818009142e-05, "loss": 0.564, "step": 31345 }, { "epoch": 0.9629220041163641, "grad_norm": 0.4499187767505646, "learning_rate": 1.060758843424943e-05, "loss": 0.581, "step": 31346 }, { "epoch": 0.9629527232513132, "grad_norm": 0.3993331491947174, "learning_rate": 1.0607106049070655e-05, "loss": 0.5849, "step": 31347 }, { "epoch": 0.9629834423862624, "grad_norm": 0.3474039137363434, "learning_rate": 1.0606623662473939e-05, "loss": 0.4829, "step": 31348 }, { "epoch": 0.9630141615212116, "grad_norm": 0.3436499834060669, "learning_rate": 1.060614127446041e-05, "loss": 0.5594, "step": 31349 }, { "epoch": 0.9630448806561607, "grad_norm": 0.36353278160095215, "learning_rate": 1.06056588850312e-05, "loss": 0.5294, "step": 31350 }, { "epoch": 0.9630755997911099, "grad_norm": 0.3413015902042389, "learning_rate": 1.0605176494187428e-05, "loss": 0.5597, "step": 31351 }, { "epoch": 0.963106318926059, "grad_norm": 0.3846340775489807, "learning_rate": 1.0604694101930226e-05, "loss": 0.4732, "step": 31352 }, { "epoch": 0.9631370380610083, "grad_norm": 0.3929237723350525, "learning_rate": 1.0604211708260716e-05, "loss": 0.5292, "step": 31353 }, { "epoch": 0.9631677571959574, "grad_norm": 0.38955169916152954, "learning_rate": 1.060372931318003e-05, "loss": 0.5863, "step": 31354 }, { "epoch": 0.9631984763309065, "grad_norm": 0.4047471284866333, "learning_rate": 1.0603246916689289e-05, "loss": 0.5612, "step": 31355 }, { "epoch": 0.9632291954658557, "grad_norm": 0.3864074647426605, "learning_rate": 1.0602764518789626e-05, "loss": 0.5245, "step": 31356 }, { "epoch": 0.9632599146008048, "grad_norm": 0.3699687421321869, "learning_rate": 1.0602282119482163e-05, "loss": 0.6139, "step": 31357 }, { "epoch": 0.963290633735754, "grad_norm": 0.3962550163269043, "learning_rate": 1.0601799718768028e-05, "loss": 0.5732, "step": 31358 }, { "epoch": 0.9633213528707032, "grad_norm": 0.35786476731300354, "learning_rate": 1.0601317316648349e-05, "loss": 0.4934, "step": 31359 }, { "epoch": 0.9633520720056523, "grad_norm": 0.36887112259864807, "learning_rate": 1.060083491312425e-05, "loss": 0.5312, "step": 31360 }, { "epoch": 0.9633827911406014, "grad_norm": 0.4184873402118683, "learning_rate": 1.0600352508196861e-05, "loss": 0.6065, "step": 31361 }, { "epoch": 0.9634135102755507, "grad_norm": 0.6548122763633728, "learning_rate": 1.0599870101867302e-05, "loss": 0.5824, "step": 31362 }, { "epoch": 0.9634442294104998, "grad_norm": 0.4044815003871918, "learning_rate": 1.0599387694136709e-05, "loss": 0.5433, "step": 31363 }, { "epoch": 0.9634749485454489, "grad_norm": 0.4959205687046051, "learning_rate": 1.0598905285006202e-05, "loss": 0.4638, "step": 31364 }, { "epoch": 0.9635056676803981, "grad_norm": 0.3668917119503021, "learning_rate": 1.0598422874476913e-05, "loss": 0.5723, "step": 31365 }, { "epoch": 0.9635363868153473, "grad_norm": 0.390249103307724, "learning_rate": 1.0597940462549961e-05, "loss": 0.6071, "step": 31366 }, { "epoch": 0.9635671059502965, "grad_norm": 0.3708314895629883, "learning_rate": 1.0597458049226482e-05, "loss": 0.5236, "step": 31367 }, { "epoch": 0.9635978250852456, "grad_norm": 0.7216207385063171, "learning_rate": 1.0596975634507595e-05, "loss": 0.4787, "step": 31368 }, { "epoch": 0.9636285442201947, "grad_norm": 0.34182995557785034, "learning_rate": 1.059649321839443e-05, "loss": 0.5463, "step": 31369 }, { "epoch": 0.963659263355144, "grad_norm": 0.38303259015083313, "learning_rate": 1.0596010800888114e-05, "loss": 0.5786, "step": 31370 }, { "epoch": 0.9636899824900931, "grad_norm": 0.37822821736335754, "learning_rate": 1.0595528381989772e-05, "loss": 0.5962, "step": 31371 }, { "epoch": 0.9637207016250422, "grad_norm": 0.376499205827713, "learning_rate": 1.0595045961700535e-05, "loss": 0.5387, "step": 31372 }, { "epoch": 0.9637514207599914, "grad_norm": 0.3780275881290436, "learning_rate": 1.0594563540021521e-05, "loss": 0.5153, "step": 31373 }, { "epoch": 0.9637821398949405, "grad_norm": 0.3739379048347473, "learning_rate": 1.0594081116953866e-05, "loss": 0.5739, "step": 31374 }, { "epoch": 0.9638128590298897, "grad_norm": 0.4480989873409271, "learning_rate": 1.0593598692498694e-05, "loss": 0.5491, "step": 31375 }, { "epoch": 0.9638435781648389, "grad_norm": 0.4056874215602875, "learning_rate": 1.0593116266657128e-05, "loss": 0.619, "step": 31376 }, { "epoch": 0.963874297299788, "grad_norm": 0.39930009841918945, "learning_rate": 1.0592633839430303e-05, "loss": 0.5845, "step": 31377 }, { "epoch": 0.9639050164347372, "grad_norm": 0.37627869844436646, "learning_rate": 1.0592151410819334e-05, "loss": 0.5617, "step": 31378 }, { "epoch": 0.9639357355696864, "grad_norm": 0.40860515832901, "learning_rate": 1.0591668980825355e-05, "loss": 0.627, "step": 31379 }, { "epoch": 0.9639664547046355, "grad_norm": 0.40317055583000183, "learning_rate": 1.0591186549449495e-05, "loss": 0.5345, "step": 31380 }, { "epoch": 0.9639971738395847, "grad_norm": 0.41351887583732605, "learning_rate": 1.0590704116692876e-05, "loss": 0.5546, "step": 31381 }, { "epoch": 0.9640278929745338, "grad_norm": 0.4152762293815613, "learning_rate": 1.0590221682556625e-05, "loss": 0.5748, "step": 31382 }, { "epoch": 0.964058612109483, "grad_norm": 0.3787285387516022, "learning_rate": 1.0589739247041873e-05, "loss": 0.5002, "step": 31383 }, { "epoch": 0.9640893312444322, "grad_norm": 0.35376080870628357, "learning_rate": 1.058925681014974e-05, "loss": 0.5045, "step": 31384 }, { "epoch": 0.9641200503793813, "grad_norm": 0.3771818280220032, "learning_rate": 1.0588774371881359e-05, "loss": 0.5747, "step": 31385 }, { "epoch": 0.9641507695143304, "grad_norm": 0.3313387334346771, "learning_rate": 1.0588291932237855e-05, "loss": 0.5125, "step": 31386 }, { "epoch": 0.9641814886492797, "grad_norm": 0.3711093068122864, "learning_rate": 1.0587809491220351e-05, "loss": 0.5922, "step": 31387 }, { "epoch": 0.9642122077842288, "grad_norm": 0.37837427854537964, "learning_rate": 1.058732704882998e-05, "loss": 0.6412, "step": 31388 }, { "epoch": 0.9642429269191779, "grad_norm": 0.5178149342536926, "learning_rate": 1.0586844605067863e-05, "loss": 0.5595, "step": 31389 }, { "epoch": 0.9642736460541271, "grad_norm": 0.3593289852142334, "learning_rate": 1.0586362159935133e-05, "loss": 0.5724, "step": 31390 }, { "epoch": 0.9643043651890763, "grad_norm": 0.4578056037425995, "learning_rate": 1.058587971343291e-05, "loss": 0.6273, "step": 31391 }, { "epoch": 0.9643350843240255, "grad_norm": 0.3900642395019531, "learning_rate": 1.0585397265562322e-05, "loss": 0.492, "step": 31392 }, { "epoch": 0.9643658034589746, "grad_norm": 0.38649871945381165, "learning_rate": 1.0584914816324502e-05, "loss": 0.607, "step": 31393 }, { "epoch": 0.9643965225939237, "grad_norm": 0.45116767287254333, "learning_rate": 1.0584432365720572e-05, "loss": 0.5167, "step": 31394 }, { "epoch": 0.964427241728873, "grad_norm": 0.41964033246040344, "learning_rate": 1.0583949913751658e-05, "loss": 0.4943, "step": 31395 }, { "epoch": 0.9644579608638221, "grad_norm": 0.42506253719329834, "learning_rate": 1.058346746041889e-05, "loss": 0.5367, "step": 31396 }, { "epoch": 0.9644886799987712, "grad_norm": 0.3502868115901947, "learning_rate": 1.0582985005723392e-05, "loss": 0.5239, "step": 31397 }, { "epoch": 0.9645193991337204, "grad_norm": 0.4167330861091614, "learning_rate": 1.0582502549666291e-05, "loss": 0.5693, "step": 31398 }, { "epoch": 0.9645501182686695, "grad_norm": 0.3796357214450836, "learning_rate": 1.0582020092248718e-05, "loss": 0.5148, "step": 31399 }, { "epoch": 0.9645808374036187, "grad_norm": 0.3608141839504242, "learning_rate": 1.0581537633471792e-05, "loss": 0.5905, "step": 31400 }, { "epoch": 0.9646115565385679, "grad_norm": 0.39116305112838745, "learning_rate": 1.0581055173336647e-05, "loss": 0.5835, "step": 31401 }, { "epoch": 0.964642275673517, "grad_norm": 0.33313265442848206, "learning_rate": 1.0580572711844409e-05, "loss": 0.5026, "step": 31402 }, { "epoch": 0.9646729948084662, "grad_norm": 0.37582555413246155, "learning_rate": 1.0580090248996197e-05, "loss": 0.5058, "step": 31403 }, { "epoch": 0.9647037139434154, "grad_norm": 0.3657222390174866, "learning_rate": 1.057960778479315e-05, "loss": 0.5176, "step": 31404 }, { "epoch": 0.9647344330783645, "grad_norm": 0.40661758184432983, "learning_rate": 1.0579125319236382e-05, "loss": 0.5807, "step": 31405 }, { "epoch": 0.9647651522133137, "grad_norm": 0.5013903379440308, "learning_rate": 1.057864285232703e-05, "loss": 0.5451, "step": 31406 }, { "epoch": 0.9647958713482628, "grad_norm": 0.39998942613601685, "learning_rate": 1.0578160384066216e-05, "loss": 0.5281, "step": 31407 }, { "epoch": 0.964826590483212, "grad_norm": 0.33370065689086914, "learning_rate": 1.0577677914455072e-05, "loss": 0.4954, "step": 31408 }, { "epoch": 0.9648573096181612, "grad_norm": 0.40260404348373413, "learning_rate": 1.0577195443494717e-05, "loss": 0.5138, "step": 31409 }, { "epoch": 0.9648880287531103, "grad_norm": 0.3968352973461151, "learning_rate": 1.057671297118628e-05, "loss": 0.5617, "step": 31410 }, { "epoch": 0.9649187478880594, "grad_norm": 0.35828548669815063, "learning_rate": 1.0576230497530893e-05, "loss": 0.5406, "step": 31411 }, { "epoch": 0.9649494670230087, "grad_norm": 0.4532454013824463, "learning_rate": 1.0575748022529678e-05, "loss": 0.4945, "step": 31412 }, { "epoch": 0.9649801861579578, "grad_norm": 0.3827739357948303, "learning_rate": 1.0575265546183764e-05, "loss": 0.5546, "step": 31413 }, { "epoch": 0.965010905292907, "grad_norm": 0.4335762560367584, "learning_rate": 1.0574783068494277e-05, "loss": 0.5359, "step": 31414 }, { "epoch": 0.9650416244278561, "grad_norm": 0.3695290684700012, "learning_rate": 1.0574300589462345e-05, "loss": 0.5459, "step": 31415 }, { "epoch": 0.9650723435628052, "grad_norm": 0.38871148228645325, "learning_rate": 1.0573818109089092e-05, "loss": 0.569, "step": 31416 }, { "epoch": 0.9651030626977545, "grad_norm": 0.5736449360847473, "learning_rate": 1.0573335627375649e-05, "loss": 0.552, "step": 31417 }, { "epoch": 0.9651337818327036, "grad_norm": 0.34838420152664185, "learning_rate": 1.0572853144323137e-05, "loss": 0.5387, "step": 31418 }, { "epoch": 0.9651645009676527, "grad_norm": 0.36492660641670227, "learning_rate": 1.0572370659932688e-05, "loss": 0.5433, "step": 31419 }, { "epoch": 0.965195220102602, "grad_norm": 0.34290167689323425, "learning_rate": 1.0571888174205431e-05, "loss": 0.5134, "step": 31420 }, { "epoch": 0.9652259392375511, "grad_norm": 0.3379954993724823, "learning_rate": 1.0571405687142487e-05, "loss": 0.5096, "step": 31421 }, { "epoch": 0.9652566583725002, "grad_norm": 0.39643022418022156, "learning_rate": 1.0570923198744985e-05, "loss": 0.5587, "step": 31422 }, { "epoch": 0.9652873775074494, "grad_norm": 0.4122602641582489, "learning_rate": 1.0570440709014049e-05, "loss": 0.5093, "step": 31423 }, { "epoch": 0.9653180966423985, "grad_norm": 0.35781171917915344, "learning_rate": 1.0569958217950814e-05, "loss": 0.532, "step": 31424 }, { "epoch": 0.9653488157773477, "grad_norm": 0.38870760798454285, "learning_rate": 1.0569475725556397e-05, "loss": 0.517, "step": 31425 }, { "epoch": 0.9653795349122969, "grad_norm": 0.3740333020687103, "learning_rate": 1.0568993231831933e-05, "loss": 0.5553, "step": 31426 }, { "epoch": 0.965410254047246, "grad_norm": 0.4045684337615967, "learning_rate": 1.0568510736778544e-05, "loss": 0.5737, "step": 31427 }, { "epoch": 0.9654409731821952, "grad_norm": 0.3622325360774994, "learning_rate": 1.0568028240397357e-05, "loss": 0.5221, "step": 31428 }, { "epoch": 0.9654716923171444, "grad_norm": 0.6681498885154724, "learning_rate": 1.0567545742689503e-05, "loss": 0.5764, "step": 31429 }, { "epoch": 0.9655024114520935, "grad_norm": 0.38864004611968994, "learning_rate": 1.0567063243656103e-05, "loss": 0.5021, "step": 31430 }, { "epoch": 0.9655331305870427, "grad_norm": 0.37636426091194153, "learning_rate": 1.0566580743298291e-05, "loss": 0.5505, "step": 31431 }, { "epoch": 0.9655638497219918, "grad_norm": 0.40853533148765564, "learning_rate": 1.0566098241617188e-05, "loss": 0.5153, "step": 31432 }, { "epoch": 0.965594568856941, "grad_norm": 0.38884061574935913, "learning_rate": 1.0565615738613923e-05, "loss": 0.5556, "step": 31433 }, { "epoch": 0.9656252879918902, "grad_norm": 0.36034825444221497, "learning_rate": 1.0565133234289623e-05, "loss": 0.479, "step": 31434 }, { "epoch": 0.9656560071268393, "grad_norm": 0.40104350447654724, "learning_rate": 1.0564650728645418e-05, "loss": 0.5388, "step": 31435 }, { "epoch": 0.9656867262617884, "grad_norm": 0.3575049042701721, "learning_rate": 1.0564168221682428e-05, "loss": 0.5834, "step": 31436 }, { "epoch": 0.9657174453967377, "grad_norm": 0.3453225791454315, "learning_rate": 1.0563685713401786e-05, "loss": 0.5605, "step": 31437 }, { "epoch": 0.9657481645316868, "grad_norm": 0.3570595383644104, "learning_rate": 1.0563203203804617e-05, "loss": 0.5202, "step": 31438 }, { "epoch": 0.965778883666636, "grad_norm": 0.3556460738182068, "learning_rate": 1.0562720692892046e-05, "loss": 0.5858, "step": 31439 }, { "epoch": 0.9658096028015851, "grad_norm": 0.35979539155960083, "learning_rate": 1.0562238180665202e-05, "loss": 0.5457, "step": 31440 }, { "epoch": 0.9658403219365342, "grad_norm": 0.3801931142807007, "learning_rate": 1.056175566712521e-05, "loss": 0.5727, "step": 31441 }, { "epoch": 0.9658710410714835, "grad_norm": 0.3284153938293457, "learning_rate": 1.0561273152273201e-05, "loss": 0.4318, "step": 31442 }, { "epoch": 0.9659017602064326, "grad_norm": 0.41104915738105774, "learning_rate": 1.0560790636110296e-05, "loss": 0.5771, "step": 31443 }, { "epoch": 0.9659324793413817, "grad_norm": 0.4236783981323242, "learning_rate": 1.0560308118637628e-05, "loss": 0.5985, "step": 31444 }, { "epoch": 0.9659631984763309, "grad_norm": 0.3547949194908142, "learning_rate": 1.055982559985632e-05, "loss": 0.5269, "step": 31445 }, { "epoch": 0.9659939176112801, "grad_norm": 0.3523111641407013, "learning_rate": 1.0559343079767498e-05, "loss": 0.4984, "step": 31446 }, { "epoch": 0.9660246367462292, "grad_norm": 0.39730459451675415, "learning_rate": 1.0558860558372296e-05, "loss": 0.5085, "step": 31447 }, { "epoch": 0.9660553558811784, "grad_norm": 1.2049907445907593, "learning_rate": 1.0558378035671834e-05, "loss": 0.4765, "step": 31448 }, { "epoch": 0.9660860750161275, "grad_norm": 0.35346519947052, "learning_rate": 1.0557895511667242e-05, "loss": 0.4796, "step": 31449 }, { "epoch": 0.9661167941510767, "grad_norm": 0.3422947824001312, "learning_rate": 1.0557412986359644e-05, "loss": 0.5284, "step": 31450 }, { "epoch": 0.9661475132860259, "grad_norm": 0.4590795934200287, "learning_rate": 1.0556930459750172e-05, "loss": 0.5526, "step": 31451 }, { "epoch": 0.966178232420975, "grad_norm": 0.42062535881996155, "learning_rate": 1.0556447931839946e-05, "loss": 0.5892, "step": 31452 }, { "epoch": 0.9662089515559242, "grad_norm": 0.3570277690887451, "learning_rate": 1.0555965402630101e-05, "loss": 0.6167, "step": 31453 }, { "epoch": 0.9662396706908734, "grad_norm": 0.35375475883483887, "learning_rate": 1.0555482872121758e-05, "loss": 0.5617, "step": 31454 }, { "epoch": 0.9662703898258225, "grad_norm": 0.36927419900894165, "learning_rate": 1.0555000340316045e-05, "loss": 0.5659, "step": 31455 }, { "epoch": 0.9663011089607717, "grad_norm": 0.3982950747013092, "learning_rate": 1.0554517807214093e-05, "loss": 0.5786, "step": 31456 }, { "epoch": 0.9663318280957208, "grad_norm": 0.37177377939224243, "learning_rate": 1.0554035272817025e-05, "loss": 0.5924, "step": 31457 }, { "epoch": 0.9663625472306699, "grad_norm": 0.411020427942276, "learning_rate": 1.0553552737125967e-05, "loss": 0.5831, "step": 31458 }, { "epoch": 0.9663932663656192, "grad_norm": 0.3554731607437134, "learning_rate": 1.0553070200142048e-05, "loss": 0.5156, "step": 31459 }, { "epoch": 0.9664239855005683, "grad_norm": 0.39531394839286804, "learning_rate": 1.0552587661866397e-05, "loss": 0.5625, "step": 31460 }, { "epoch": 0.9664547046355174, "grad_norm": 0.40776386857032776, "learning_rate": 1.0552105122300138e-05, "loss": 0.5411, "step": 31461 }, { "epoch": 0.9664854237704666, "grad_norm": 0.3652268350124359, "learning_rate": 1.0551622581444399e-05, "loss": 0.5387, "step": 31462 }, { "epoch": 0.9665161429054158, "grad_norm": 0.359662801027298, "learning_rate": 1.0551140039300305e-05, "loss": 0.5873, "step": 31463 }, { "epoch": 0.966546862040365, "grad_norm": 0.3693923354148865, "learning_rate": 1.0550657495868986e-05, "loss": 0.5874, "step": 31464 }, { "epoch": 0.9665775811753141, "grad_norm": 0.36592432856559753, "learning_rate": 1.055017495115157e-05, "loss": 0.5161, "step": 31465 }, { "epoch": 0.9666083003102632, "grad_norm": 0.4071306586265564, "learning_rate": 1.0549692405149179e-05, "loss": 0.5595, "step": 31466 }, { "epoch": 0.9666390194452125, "grad_norm": 0.3637971878051758, "learning_rate": 1.0549209857862945e-05, "loss": 0.534, "step": 31467 }, { "epoch": 0.9666697385801616, "grad_norm": 0.6144012808799744, "learning_rate": 1.0548727309293994e-05, "loss": 0.5782, "step": 31468 }, { "epoch": 0.9667004577151107, "grad_norm": 0.5172902345657349, "learning_rate": 1.0548244759443451e-05, "loss": 0.5159, "step": 31469 }, { "epoch": 0.9667311768500599, "grad_norm": 0.4003470838069916, "learning_rate": 1.0547762208312443e-05, "loss": 0.597, "step": 31470 }, { "epoch": 0.9667618959850091, "grad_norm": 0.3976392447948456, "learning_rate": 1.0547279655902098e-05, "loss": 0.6008, "step": 31471 }, { "epoch": 0.9667926151199582, "grad_norm": 0.6284521818161011, "learning_rate": 1.0546797102213547e-05, "loss": 0.5403, "step": 31472 }, { "epoch": 0.9668233342549074, "grad_norm": 0.3489667773246765, "learning_rate": 1.0546314547247908e-05, "loss": 0.5404, "step": 31473 }, { "epoch": 0.9668540533898565, "grad_norm": 0.4498499631881714, "learning_rate": 1.0545831991006317e-05, "loss": 0.4928, "step": 31474 }, { "epoch": 0.9668847725248056, "grad_norm": 0.3613872230052948, "learning_rate": 1.0545349433489893e-05, "loss": 0.547, "step": 31475 }, { "epoch": 0.9669154916597549, "grad_norm": 0.5393545627593994, "learning_rate": 1.0544866874699772e-05, "loss": 0.5299, "step": 31476 }, { "epoch": 0.966946210794704, "grad_norm": 0.36444997787475586, "learning_rate": 1.0544384314637073e-05, "loss": 0.5236, "step": 31477 }, { "epoch": 0.9669769299296532, "grad_norm": 0.3298209309577942, "learning_rate": 1.054390175330293e-05, "loss": 0.4995, "step": 31478 }, { "epoch": 0.9670076490646023, "grad_norm": 0.3340143859386444, "learning_rate": 1.0543419190698463e-05, "loss": 0.5124, "step": 31479 }, { "epoch": 0.9670383681995515, "grad_norm": 0.37353119254112244, "learning_rate": 1.0542936626824805e-05, "loss": 0.5819, "step": 31480 }, { "epoch": 0.9670690873345007, "grad_norm": 0.38200345635414124, "learning_rate": 1.0542454061683076e-05, "loss": 0.591, "step": 31481 }, { "epoch": 0.9670998064694498, "grad_norm": 0.37436559796333313, "learning_rate": 1.054197149527441e-05, "loss": 0.5418, "step": 31482 }, { "epoch": 0.9671305256043989, "grad_norm": 0.3718794584274292, "learning_rate": 1.0541488927599934e-05, "loss": 0.5481, "step": 31483 }, { "epoch": 0.9671612447393482, "grad_norm": 0.3828454613685608, "learning_rate": 1.054100635866077e-05, "loss": 0.5219, "step": 31484 }, { "epoch": 0.9671919638742973, "grad_norm": 0.4051169455051422, "learning_rate": 1.054052378845805e-05, "loss": 0.5, "step": 31485 }, { "epoch": 0.9672226830092464, "grad_norm": 0.3911679685115814, "learning_rate": 1.0540041216992898e-05, "loss": 0.5962, "step": 31486 }, { "epoch": 0.9672534021441956, "grad_norm": 0.36748436093330383, "learning_rate": 1.0539558644266443e-05, "loss": 0.5676, "step": 31487 }, { "epoch": 0.9672841212791448, "grad_norm": 0.340848445892334, "learning_rate": 1.053907607027981e-05, "loss": 0.4915, "step": 31488 }, { "epoch": 0.967314840414094, "grad_norm": 0.3733859360218048, "learning_rate": 1.0538593495034129e-05, "loss": 0.531, "step": 31489 }, { "epoch": 0.9673455595490431, "grad_norm": 0.38750743865966797, "learning_rate": 1.0538110918530525e-05, "loss": 0.5465, "step": 31490 }, { "epoch": 0.9673762786839922, "grad_norm": 0.3798360526561737, "learning_rate": 1.0537628340770124e-05, "loss": 0.5106, "step": 31491 }, { "epoch": 0.9674069978189415, "grad_norm": 0.3573945164680481, "learning_rate": 1.0537145761754056e-05, "loss": 0.4858, "step": 31492 }, { "epoch": 0.9674377169538906, "grad_norm": 0.39133062958717346, "learning_rate": 1.0536663181483442e-05, "loss": 0.5527, "step": 31493 }, { "epoch": 0.9674684360888397, "grad_norm": 0.37457939982414246, "learning_rate": 1.0536180599959418e-05, "loss": 0.5302, "step": 31494 }, { "epoch": 0.9674991552237889, "grad_norm": 0.442684531211853, "learning_rate": 1.0535698017183104e-05, "loss": 0.4507, "step": 31495 }, { "epoch": 0.967529874358738, "grad_norm": 0.3378961682319641, "learning_rate": 1.0535215433155634e-05, "loss": 0.5964, "step": 31496 }, { "epoch": 0.9675605934936872, "grad_norm": 0.3808746635913849, "learning_rate": 1.0534732847878127e-05, "loss": 0.5634, "step": 31497 }, { "epoch": 0.9675913126286364, "grad_norm": 0.38587966561317444, "learning_rate": 1.0534250261351718e-05, "loss": 0.4861, "step": 31498 }, { "epoch": 0.9676220317635855, "grad_norm": 0.3573414385318756, "learning_rate": 1.0533767673577525e-05, "loss": 0.5078, "step": 31499 }, { "epoch": 0.9676527508985348, "grad_norm": 0.3637157678604126, "learning_rate": 1.0533285084556682e-05, "loss": 0.5345, "step": 31500 }, { "epoch": 0.9676834700334839, "grad_norm": 0.4425986707210541, "learning_rate": 1.0532802494290317e-05, "loss": 0.551, "step": 31501 }, { "epoch": 0.967714189168433, "grad_norm": 0.32927370071411133, "learning_rate": 1.0532319902779553e-05, "loss": 0.5594, "step": 31502 }, { "epoch": 0.9677449083033822, "grad_norm": 0.39324477314949036, "learning_rate": 1.0531837310025518e-05, "loss": 0.577, "step": 31503 }, { "epoch": 0.9677756274383313, "grad_norm": 0.396894633769989, "learning_rate": 1.053135471602934e-05, "loss": 0.5924, "step": 31504 }, { "epoch": 0.9678063465732805, "grad_norm": 0.36470773816108704, "learning_rate": 1.0530872120792148e-05, "loss": 0.4895, "step": 31505 }, { "epoch": 0.9678370657082297, "grad_norm": 0.34801948070526123, "learning_rate": 1.0530389524315066e-05, "loss": 0.4695, "step": 31506 }, { "epoch": 0.9678677848431788, "grad_norm": 0.3997515141963959, "learning_rate": 1.0529906926599223e-05, "loss": 0.6078, "step": 31507 }, { "epoch": 0.9678985039781279, "grad_norm": 0.3805995285511017, "learning_rate": 1.0529424327645743e-05, "loss": 0.5846, "step": 31508 }, { "epoch": 0.9679292231130772, "grad_norm": 0.336215615272522, "learning_rate": 1.0528941727455757e-05, "loss": 0.5871, "step": 31509 }, { "epoch": 0.9679599422480263, "grad_norm": 0.38454991579055786, "learning_rate": 1.052845912603039e-05, "loss": 0.5551, "step": 31510 }, { "epoch": 0.9679906613829754, "grad_norm": 0.3563859462738037, "learning_rate": 1.0527976523370771e-05, "loss": 0.553, "step": 31511 }, { "epoch": 0.9680213805179246, "grad_norm": 0.3329734802246094, "learning_rate": 1.0527493919478027e-05, "loss": 0.5011, "step": 31512 }, { "epoch": 0.9680520996528738, "grad_norm": 0.43396300077438354, "learning_rate": 1.052701131435328e-05, "loss": 0.4838, "step": 31513 }, { "epoch": 0.968082818787823, "grad_norm": 0.3934822976589203, "learning_rate": 1.0526528707997666e-05, "loss": 0.5026, "step": 31514 }, { "epoch": 0.9681135379227721, "grad_norm": 0.3989558815956116, "learning_rate": 1.0526046100412301e-05, "loss": 0.5567, "step": 31515 }, { "epoch": 0.9681442570577212, "grad_norm": 0.3961544930934906, "learning_rate": 1.0525563491598326e-05, "loss": 0.5225, "step": 31516 }, { "epoch": 0.9681749761926705, "grad_norm": 0.351864218711853, "learning_rate": 1.0525080881556855e-05, "loss": 0.427, "step": 31517 }, { "epoch": 0.9682056953276196, "grad_norm": 0.3699679374694824, "learning_rate": 1.0524598270289024e-05, "loss": 0.4486, "step": 31518 }, { "epoch": 0.9682364144625687, "grad_norm": 0.48269155621528625, "learning_rate": 1.0524115657795957e-05, "loss": 0.5139, "step": 31519 }, { "epoch": 0.9682671335975179, "grad_norm": 0.3711334466934204, "learning_rate": 1.0523633044078779e-05, "loss": 0.6341, "step": 31520 }, { "epoch": 0.968297852732467, "grad_norm": 0.39163607358932495, "learning_rate": 1.0523150429138622e-05, "loss": 0.5484, "step": 31521 }, { "epoch": 0.9683285718674162, "grad_norm": 0.3598252534866333, "learning_rate": 1.0522667812976608e-05, "loss": 0.5413, "step": 31522 }, { "epoch": 0.9683592910023654, "grad_norm": 0.3469293713569641, "learning_rate": 1.052218519559387e-05, "loss": 0.5342, "step": 31523 }, { "epoch": 0.9683900101373145, "grad_norm": 0.39953160285949707, "learning_rate": 1.0521702576991532e-05, "loss": 0.5208, "step": 31524 }, { "epoch": 0.9684207292722637, "grad_norm": 0.4100295603275299, "learning_rate": 1.0521219957170719e-05, "loss": 0.5818, "step": 31525 }, { "epoch": 0.9684514484072129, "grad_norm": 0.3594414293766022, "learning_rate": 1.052073733613256e-05, "loss": 0.5599, "step": 31526 }, { "epoch": 0.968482167542162, "grad_norm": 0.3953518867492676, "learning_rate": 1.0520254713878185e-05, "loss": 0.5052, "step": 31527 }, { "epoch": 0.9685128866771112, "grad_norm": 0.3680560886859894, "learning_rate": 1.0519772090408718e-05, "loss": 0.5418, "step": 31528 }, { "epoch": 0.9685436058120603, "grad_norm": 0.33910897374153137, "learning_rate": 1.0519289465725283e-05, "loss": 0.6193, "step": 31529 }, { "epoch": 0.9685743249470095, "grad_norm": 0.37358948588371277, "learning_rate": 1.0518806839829018e-05, "loss": 0.5152, "step": 31530 }, { "epoch": 0.9686050440819587, "grad_norm": 0.3909909725189209, "learning_rate": 1.0518324212721038e-05, "loss": 0.5387, "step": 31531 }, { "epoch": 0.9686357632169078, "grad_norm": 0.32781150937080383, "learning_rate": 1.0517841584402479e-05, "loss": 0.4774, "step": 31532 }, { "epoch": 0.9686664823518569, "grad_norm": 0.3959876596927643, "learning_rate": 1.0517358954874464e-05, "loss": 0.6351, "step": 31533 }, { "epoch": 0.9686972014868062, "grad_norm": 0.452894002199173, "learning_rate": 1.0516876324138122e-05, "loss": 0.5214, "step": 31534 }, { "epoch": 0.9687279206217553, "grad_norm": 0.353475958108902, "learning_rate": 1.0516393692194578e-05, "loss": 0.5339, "step": 31535 }, { "epoch": 0.9687586397567044, "grad_norm": 0.4886999726295471, "learning_rate": 1.0515911059044958e-05, "loss": 0.5897, "step": 31536 }, { "epoch": 0.9687893588916536, "grad_norm": 0.39769744873046875, "learning_rate": 1.0515428424690393e-05, "loss": 0.5243, "step": 31537 }, { "epoch": 0.9688200780266027, "grad_norm": 0.3857365548610687, "learning_rate": 1.0514945789132009e-05, "loss": 0.5935, "step": 31538 }, { "epoch": 0.968850797161552, "grad_norm": 0.3668992817401886, "learning_rate": 1.0514463152370935e-05, "loss": 0.5897, "step": 31539 }, { "epoch": 0.9688815162965011, "grad_norm": 0.3751409947872162, "learning_rate": 1.0513980514408294e-05, "loss": 0.5162, "step": 31540 }, { "epoch": 0.9689122354314502, "grad_norm": 0.3752150237560272, "learning_rate": 1.0513497875245218e-05, "loss": 0.5124, "step": 31541 }, { "epoch": 0.9689429545663995, "grad_norm": 0.3490377366542816, "learning_rate": 1.0513015234882833e-05, "loss": 0.4411, "step": 31542 }, { "epoch": 0.9689736737013486, "grad_norm": 0.3692469000816345, "learning_rate": 1.0512532593322262e-05, "loss": 0.57, "step": 31543 }, { "epoch": 0.9690043928362977, "grad_norm": 0.4128754436969757, "learning_rate": 1.0512049950564639e-05, "loss": 0.6015, "step": 31544 }, { "epoch": 0.9690351119712469, "grad_norm": 0.3690427243709564, "learning_rate": 1.0511567306611081e-05, "loss": 0.4721, "step": 31545 }, { "epoch": 0.969065831106196, "grad_norm": 0.4351033568382263, "learning_rate": 1.0511084661462728e-05, "loss": 0.5308, "step": 31546 }, { "epoch": 0.9690965502411452, "grad_norm": 0.3727462589740753, "learning_rate": 1.0510602015120696e-05, "loss": 0.5526, "step": 31547 }, { "epoch": 0.9691272693760944, "grad_norm": 0.37958335876464844, "learning_rate": 1.0510119367586122e-05, "loss": 0.5951, "step": 31548 }, { "epoch": 0.9691579885110435, "grad_norm": 0.3608228862285614, "learning_rate": 1.0509636718860125e-05, "loss": 0.5698, "step": 31549 }, { "epoch": 0.9691887076459927, "grad_norm": 0.375479519367218, "learning_rate": 1.0509154068943839e-05, "loss": 0.625, "step": 31550 }, { "epoch": 0.9692194267809419, "grad_norm": 0.34613195061683655, "learning_rate": 1.0508671417838385e-05, "loss": 0.4573, "step": 31551 }, { "epoch": 0.969250145915891, "grad_norm": 0.36412933468818665, "learning_rate": 1.0508188765544897e-05, "loss": 0.6029, "step": 31552 }, { "epoch": 0.9692808650508402, "grad_norm": 0.36040350794792175, "learning_rate": 1.0507706112064498e-05, "loss": 0.555, "step": 31553 }, { "epoch": 0.9693115841857893, "grad_norm": 0.4057673215866089, "learning_rate": 1.0507223457398313e-05, "loss": 0.5588, "step": 31554 }, { "epoch": 0.9693423033207385, "grad_norm": 0.3463574945926666, "learning_rate": 1.0506740801547473e-05, "loss": 0.5484, "step": 31555 }, { "epoch": 0.9693730224556877, "grad_norm": 0.3506704568862915, "learning_rate": 1.0506258144513104e-05, "loss": 0.4744, "step": 31556 }, { "epoch": 0.9694037415906368, "grad_norm": 0.34461966156959534, "learning_rate": 1.0505775486296337e-05, "loss": 0.4877, "step": 31557 }, { "epoch": 0.9694344607255859, "grad_norm": 0.39006686210632324, "learning_rate": 1.0505292826898292e-05, "loss": 0.5907, "step": 31558 }, { "epoch": 0.9694651798605352, "grad_norm": 0.4030389189720154, "learning_rate": 1.0504810166320104e-05, "loss": 0.5157, "step": 31559 }, { "epoch": 0.9694958989954843, "grad_norm": 0.3452884554862976, "learning_rate": 1.0504327504562894e-05, "loss": 0.5202, "step": 31560 }, { "epoch": 0.9695266181304334, "grad_norm": 0.36559900641441345, "learning_rate": 1.0503844841627792e-05, "loss": 0.5124, "step": 31561 }, { "epoch": 0.9695573372653826, "grad_norm": 0.3887709975242615, "learning_rate": 1.0503362177515928e-05, "loss": 0.555, "step": 31562 }, { "epoch": 0.9695880564003317, "grad_norm": 0.3724515736103058, "learning_rate": 1.0502879512228423e-05, "loss": 0.5277, "step": 31563 }, { "epoch": 0.969618775535281, "grad_norm": 0.4354902505874634, "learning_rate": 1.0502396845766411e-05, "loss": 0.5301, "step": 31564 }, { "epoch": 0.9696494946702301, "grad_norm": 0.3187417984008789, "learning_rate": 1.0501914178131014e-05, "loss": 0.4623, "step": 31565 }, { "epoch": 0.9696802138051792, "grad_norm": 0.34548917412757874, "learning_rate": 1.0501431509323361e-05, "loss": 0.6017, "step": 31566 }, { "epoch": 0.9697109329401284, "grad_norm": 0.37835949659347534, "learning_rate": 1.0500948839344581e-05, "loss": 0.6232, "step": 31567 }, { "epoch": 0.9697416520750776, "grad_norm": 0.47944435477256775, "learning_rate": 1.0500466168195799e-05, "loss": 0.5418, "step": 31568 }, { "epoch": 0.9697723712100267, "grad_norm": 0.3762350082397461, "learning_rate": 1.0499983495878144e-05, "loss": 0.4577, "step": 31569 }, { "epoch": 0.9698030903449759, "grad_norm": 0.35220322012901306, "learning_rate": 1.0499500822392744e-05, "loss": 0.5186, "step": 31570 }, { "epoch": 0.969833809479925, "grad_norm": 0.3742513060569763, "learning_rate": 1.0499018147740723e-05, "loss": 0.5553, "step": 31571 }, { "epoch": 0.9698645286148742, "grad_norm": 0.3501766622066498, "learning_rate": 1.049853547192321e-05, "loss": 0.5275, "step": 31572 }, { "epoch": 0.9698952477498234, "grad_norm": 0.4040467143058777, "learning_rate": 1.0498052794941331e-05, "loss": 0.5583, "step": 31573 }, { "epoch": 0.9699259668847725, "grad_norm": 0.3993037939071655, "learning_rate": 1.0497570116796217e-05, "loss": 0.5009, "step": 31574 }, { "epoch": 0.9699566860197217, "grad_norm": 0.374704509973526, "learning_rate": 1.0497087437488993e-05, "loss": 0.5261, "step": 31575 }, { "epoch": 0.9699874051546709, "grad_norm": 0.38204294443130493, "learning_rate": 1.049660475702079e-05, "loss": 0.5902, "step": 31576 }, { "epoch": 0.97001812428962, "grad_norm": 0.38777968287467957, "learning_rate": 1.0496122075392727e-05, "loss": 0.5505, "step": 31577 }, { "epoch": 0.9700488434245692, "grad_norm": 0.3447237014770508, "learning_rate": 1.0495639392605939e-05, "loss": 0.5833, "step": 31578 }, { "epoch": 0.9700795625595183, "grad_norm": 0.3643571138381958, "learning_rate": 1.0495156708661549e-05, "loss": 0.5747, "step": 31579 }, { "epoch": 0.9701102816944674, "grad_norm": 0.48193737864494324, "learning_rate": 1.0494674023560688e-05, "loss": 0.6558, "step": 31580 }, { "epoch": 0.9701410008294167, "grad_norm": 0.382099986076355, "learning_rate": 1.0494191337304478e-05, "loss": 0.5856, "step": 31581 }, { "epoch": 0.9701717199643658, "grad_norm": 0.3432740569114685, "learning_rate": 1.0493708649894053e-05, "loss": 0.5247, "step": 31582 }, { "epoch": 0.9702024390993149, "grad_norm": 0.3925546705722809, "learning_rate": 1.0493225961330533e-05, "loss": 0.5365, "step": 31583 }, { "epoch": 0.9702331582342641, "grad_norm": 0.41769132018089294, "learning_rate": 1.0492743271615053e-05, "loss": 0.5429, "step": 31584 }, { "epoch": 0.9702638773692133, "grad_norm": 0.4180874824523926, "learning_rate": 1.0492260580748735e-05, "loss": 0.5159, "step": 31585 }, { "epoch": 0.9702945965041624, "grad_norm": 0.35358813405036926, "learning_rate": 1.0491777888732708e-05, "loss": 0.5016, "step": 31586 }, { "epoch": 0.9703253156391116, "grad_norm": 0.6353484988212585, "learning_rate": 1.0491295195568104e-05, "loss": 0.6104, "step": 31587 }, { "epoch": 0.9703560347740607, "grad_norm": 0.3773576319217682, "learning_rate": 1.0490812501256039e-05, "loss": 0.5059, "step": 31588 }, { "epoch": 0.97038675390901, "grad_norm": 0.32948657870292664, "learning_rate": 1.049032980579765e-05, "loss": 0.4449, "step": 31589 }, { "epoch": 0.9704174730439591, "grad_norm": 0.3576734662055969, "learning_rate": 1.048984710919406e-05, "loss": 0.4935, "step": 31590 }, { "epoch": 0.9704481921789082, "grad_norm": 0.4327628016471863, "learning_rate": 1.0489364411446401e-05, "loss": 0.5403, "step": 31591 }, { "epoch": 0.9704789113138574, "grad_norm": 0.385507196187973, "learning_rate": 1.0488881712555791e-05, "loss": 0.5763, "step": 31592 }, { "epoch": 0.9705096304488066, "grad_norm": 0.3564428985118866, "learning_rate": 1.0488399012523369e-05, "loss": 0.5152, "step": 31593 }, { "epoch": 0.9705403495837557, "grad_norm": 0.3512336015701294, "learning_rate": 1.0487916311350257e-05, "loss": 0.5417, "step": 31594 }, { "epoch": 0.9705710687187049, "grad_norm": 0.3721691370010376, "learning_rate": 1.0487433609037581e-05, "loss": 0.573, "step": 31595 }, { "epoch": 0.970601787853654, "grad_norm": 0.4147168695926666, "learning_rate": 1.0486950905586471e-05, "loss": 0.5263, "step": 31596 }, { "epoch": 0.9706325069886031, "grad_norm": 0.4040277898311615, "learning_rate": 1.0486468200998051e-05, "loss": 0.524, "step": 31597 }, { "epoch": 0.9706632261235524, "grad_norm": 0.40527936816215515, "learning_rate": 1.0485985495273452e-05, "loss": 0.5279, "step": 31598 }, { "epoch": 0.9706939452585015, "grad_norm": 0.36210545897483826, "learning_rate": 1.0485502788413799e-05, "loss": 0.5743, "step": 31599 }, { "epoch": 0.9707246643934507, "grad_norm": 0.36723068356513977, "learning_rate": 1.0485020080420224e-05, "loss": 0.5649, "step": 31600 }, { "epoch": 0.9707553835283999, "grad_norm": 0.42020702362060547, "learning_rate": 1.0484537371293845e-05, "loss": 0.4629, "step": 31601 }, { "epoch": 0.970786102663349, "grad_norm": 0.41366374492645264, "learning_rate": 1.0484054661035799e-05, "loss": 0.6018, "step": 31602 }, { "epoch": 0.9708168217982982, "grad_norm": 0.38865697383880615, "learning_rate": 1.0483571949647207e-05, "loss": 0.5556, "step": 31603 }, { "epoch": 0.9708475409332473, "grad_norm": 0.3723064064979553, "learning_rate": 1.0483089237129202e-05, "loss": 0.5055, "step": 31604 }, { "epoch": 0.9708782600681964, "grad_norm": 0.3696882426738739, "learning_rate": 1.0482606523482907e-05, "loss": 0.5215, "step": 31605 }, { "epoch": 0.9709089792031457, "grad_norm": 0.7731230854988098, "learning_rate": 1.048212380870945e-05, "loss": 0.5397, "step": 31606 }, { "epoch": 0.9709396983380948, "grad_norm": 0.3783917725086212, "learning_rate": 1.0481641092809963e-05, "loss": 0.6254, "step": 31607 }, { "epoch": 0.9709704174730439, "grad_norm": 0.4347817003726959, "learning_rate": 1.0481158375785563e-05, "loss": 0.5114, "step": 31608 }, { "epoch": 0.9710011366079931, "grad_norm": 0.45428428053855896, "learning_rate": 1.048067565763739e-05, "loss": 0.6319, "step": 31609 }, { "epoch": 0.9710318557429423, "grad_norm": 0.3613196015357971, "learning_rate": 1.0480192938366558e-05, "loss": 0.5101, "step": 31610 }, { "epoch": 0.9710625748778915, "grad_norm": 0.34813493490219116, "learning_rate": 1.0479710217974208e-05, "loss": 0.4984, "step": 31611 }, { "epoch": 0.9710932940128406, "grad_norm": 0.3677864670753479, "learning_rate": 1.0479227496461459e-05, "loss": 0.5052, "step": 31612 }, { "epoch": 0.9711240131477897, "grad_norm": 0.39821264147758484, "learning_rate": 1.0478744773829442e-05, "loss": 0.5297, "step": 31613 }, { "epoch": 0.971154732282739, "grad_norm": 0.3579888343811035, "learning_rate": 1.0478262050079283e-05, "loss": 0.5161, "step": 31614 }, { "epoch": 0.9711854514176881, "grad_norm": 0.3287578225135803, "learning_rate": 1.0477779325212109e-05, "loss": 0.5549, "step": 31615 }, { "epoch": 0.9712161705526372, "grad_norm": 0.34841588139533997, "learning_rate": 1.047729659922905e-05, "loss": 0.5574, "step": 31616 }, { "epoch": 0.9712468896875864, "grad_norm": 0.33949705958366394, "learning_rate": 1.0476813872131228e-05, "loss": 0.5941, "step": 31617 }, { "epoch": 0.9712776088225356, "grad_norm": 0.34355372190475464, "learning_rate": 1.0476331143919776e-05, "loss": 0.5199, "step": 31618 }, { "epoch": 0.9713083279574847, "grad_norm": 0.3672342300415039, "learning_rate": 1.0475848414595818e-05, "loss": 0.474, "step": 31619 }, { "epoch": 0.9713390470924339, "grad_norm": 0.3922313153743744, "learning_rate": 1.0475365684160486e-05, "loss": 0.4933, "step": 31620 }, { "epoch": 0.971369766227383, "grad_norm": 0.3746148645877838, "learning_rate": 1.04748829526149e-05, "loss": 0.5395, "step": 31621 }, { "epoch": 0.9714004853623321, "grad_norm": 0.5751687288284302, "learning_rate": 1.0474400219960195e-05, "loss": 0.4846, "step": 31622 }, { "epoch": 0.9714312044972814, "grad_norm": 0.39324694871902466, "learning_rate": 1.0473917486197496e-05, "loss": 0.6266, "step": 31623 }, { "epoch": 0.9714619236322305, "grad_norm": 0.34184303879737854, "learning_rate": 1.0473434751327924e-05, "loss": 0.3962, "step": 31624 }, { "epoch": 0.9714926427671797, "grad_norm": 0.38958415389060974, "learning_rate": 1.0472952015352618e-05, "loss": 0.5343, "step": 31625 }, { "epoch": 0.9715233619021288, "grad_norm": 0.3596343398094177, "learning_rate": 1.0472469278272697e-05, "loss": 0.5559, "step": 31626 }, { "epoch": 0.971554081037078, "grad_norm": 0.4262900650501251, "learning_rate": 1.0471986540089292e-05, "loss": 0.5328, "step": 31627 }, { "epoch": 0.9715848001720272, "grad_norm": 0.3851257860660553, "learning_rate": 1.0471503800803527e-05, "loss": 0.583, "step": 31628 }, { "epoch": 0.9716155193069763, "grad_norm": 0.3810208737850189, "learning_rate": 1.047102106041653e-05, "loss": 0.5048, "step": 31629 }, { "epoch": 0.9716462384419254, "grad_norm": 0.35892030596733093, "learning_rate": 1.0470538318929438e-05, "loss": 0.5009, "step": 31630 }, { "epoch": 0.9716769575768747, "grad_norm": 0.3408537805080414, "learning_rate": 1.0470055576343366e-05, "loss": 0.517, "step": 31631 }, { "epoch": 0.9717076767118238, "grad_norm": 0.3496520519256592, "learning_rate": 1.0469572832659448e-05, "loss": 0.4703, "step": 31632 }, { "epoch": 0.9717383958467729, "grad_norm": 0.36936572194099426, "learning_rate": 1.0469090087878806e-05, "loss": 0.5443, "step": 31633 }, { "epoch": 0.9717691149817221, "grad_norm": 0.37108927965164185, "learning_rate": 1.0468607342002577e-05, "loss": 0.5556, "step": 31634 }, { "epoch": 0.9717998341166713, "grad_norm": 0.39539986848831177, "learning_rate": 1.0468124595031878e-05, "loss": 0.5166, "step": 31635 }, { "epoch": 0.9718305532516205, "grad_norm": 0.3903703987598419, "learning_rate": 1.0467641846967844e-05, "loss": 0.5101, "step": 31636 }, { "epoch": 0.9718612723865696, "grad_norm": 0.4007001519203186, "learning_rate": 1.0467159097811599e-05, "loss": 0.575, "step": 31637 }, { "epoch": 0.9718919915215187, "grad_norm": 0.48600417375564575, "learning_rate": 1.0466676347564273e-05, "loss": 0.5884, "step": 31638 }, { "epoch": 0.971922710656468, "grad_norm": 0.3581136465072632, "learning_rate": 1.0466193596226992e-05, "loss": 0.51, "step": 31639 }, { "epoch": 0.9719534297914171, "grad_norm": 0.3584838807582855, "learning_rate": 1.046571084380088e-05, "loss": 0.5212, "step": 31640 }, { "epoch": 0.9719841489263662, "grad_norm": 0.39481422305107117, "learning_rate": 1.0465228090287072e-05, "loss": 0.5961, "step": 31641 }, { "epoch": 0.9720148680613154, "grad_norm": 0.3776954114437103, "learning_rate": 1.0464745335686685e-05, "loss": 0.5838, "step": 31642 }, { "epoch": 0.9720455871962645, "grad_norm": 0.3926045894622803, "learning_rate": 1.0464262580000859e-05, "loss": 0.5238, "step": 31643 }, { "epoch": 0.9720763063312137, "grad_norm": 0.36088451743125916, "learning_rate": 1.0463779823230714e-05, "loss": 0.5852, "step": 31644 }, { "epoch": 0.9721070254661629, "grad_norm": 0.34080055356025696, "learning_rate": 1.0463297065377378e-05, "loss": 0.5782, "step": 31645 }, { "epoch": 0.972137744601112, "grad_norm": 0.3612198829650879, "learning_rate": 1.0462814306441978e-05, "loss": 0.6262, "step": 31646 }, { "epoch": 0.9721684637360611, "grad_norm": 0.41339752078056335, "learning_rate": 1.0462331546425641e-05, "loss": 0.5588, "step": 31647 }, { "epoch": 0.9721991828710104, "grad_norm": 0.4051739573478699, "learning_rate": 1.0461848785329502e-05, "loss": 0.5651, "step": 31648 }, { "epoch": 0.9722299020059595, "grad_norm": 0.37674662470817566, "learning_rate": 1.0461366023154677e-05, "loss": 0.4878, "step": 31649 }, { "epoch": 0.9722606211409087, "grad_norm": 0.378358393907547, "learning_rate": 1.0460883259902306e-05, "loss": 0.4558, "step": 31650 }, { "epoch": 0.9722913402758578, "grad_norm": 0.3620191216468811, "learning_rate": 1.0460400495573504e-05, "loss": 0.4766, "step": 31651 }, { "epoch": 0.972322059410807, "grad_norm": 0.4066103398799896, "learning_rate": 1.0459917730169408e-05, "loss": 0.5549, "step": 31652 }, { "epoch": 0.9723527785457562, "grad_norm": 0.39105284214019775, "learning_rate": 1.0459434963691143e-05, "loss": 0.6346, "step": 31653 }, { "epoch": 0.9723834976807053, "grad_norm": 0.4198315143585205, "learning_rate": 1.0458952196139833e-05, "loss": 0.604, "step": 31654 }, { "epoch": 0.9724142168156544, "grad_norm": 0.38698410987854004, "learning_rate": 1.0458469427516609e-05, "loss": 0.4075, "step": 31655 }, { "epoch": 0.9724449359506037, "grad_norm": 0.3670664131641388, "learning_rate": 1.04579866578226e-05, "loss": 0.473, "step": 31656 }, { "epoch": 0.9724756550855528, "grad_norm": 0.664344310760498, "learning_rate": 1.0457503887058928e-05, "loss": 0.4964, "step": 31657 }, { "epoch": 0.9725063742205019, "grad_norm": 0.33645278215408325, "learning_rate": 1.0457021115226722e-05, "loss": 0.5785, "step": 31658 }, { "epoch": 0.9725370933554511, "grad_norm": 0.35189223289489746, "learning_rate": 1.0456538342327117e-05, "loss": 0.4989, "step": 31659 }, { "epoch": 0.9725678124904003, "grad_norm": 0.3407069444656372, "learning_rate": 1.045605556836123e-05, "loss": 0.4471, "step": 31660 }, { "epoch": 0.9725985316253495, "grad_norm": 0.3706635534763336, "learning_rate": 1.0455572793330195e-05, "loss": 0.5072, "step": 31661 }, { "epoch": 0.9726292507602986, "grad_norm": 0.34976404905319214, "learning_rate": 1.0455090017235137e-05, "loss": 0.5305, "step": 31662 }, { "epoch": 0.9726599698952477, "grad_norm": 0.3936966061592102, "learning_rate": 1.0454607240077187e-05, "loss": 0.5266, "step": 31663 }, { "epoch": 0.972690689030197, "grad_norm": 0.3550461530685425, "learning_rate": 1.0454124461857467e-05, "loss": 0.5152, "step": 31664 }, { "epoch": 0.9727214081651461, "grad_norm": 0.36760470271110535, "learning_rate": 1.0453641682577108e-05, "loss": 0.5461, "step": 31665 }, { "epoch": 0.9727521273000952, "grad_norm": 0.37979018688201904, "learning_rate": 1.0453158902237237e-05, "loss": 0.5931, "step": 31666 }, { "epoch": 0.9727828464350444, "grad_norm": 0.3222266137599945, "learning_rate": 1.0452676120838982e-05, "loss": 0.581, "step": 31667 }, { "epoch": 0.9728135655699935, "grad_norm": 0.36130547523498535, "learning_rate": 1.0452193338383471e-05, "loss": 0.577, "step": 31668 }, { "epoch": 0.9728442847049427, "grad_norm": 0.4214039444923401, "learning_rate": 1.0451710554871832e-05, "loss": 0.5517, "step": 31669 }, { "epoch": 0.9728750038398919, "grad_norm": 0.36164847016334534, "learning_rate": 1.045122777030519e-05, "loss": 0.578, "step": 31670 }, { "epoch": 0.972905722974841, "grad_norm": 0.4615993797779083, "learning_rate": 1.0450744984684675e-05, "loss": 0.5701, "step": 31671 }, { "epoch": 0.9729364421097901, "grad_norm": 0.37631139159202576, "learning_rate": 1.0450262198011414e-05, "loss": 0.5881, "step": 31672 }, { "epoch": 0.9729671612447394, "grad_norm": 0.34962451457977295, "learning_rate": 1.0449779410286531e-05, "loss": 0.4971, "step": 31673 }, { "epoch": 0.9729978803796885, "grad_norm": 0.3916904926300049, "learning_rate": 1.044929662151116e-05, "loss": 0.505, "step": 31674 }, { "epoch": 0.9730285995146377, "grad_norm": 0.34764787554740906, "learning_rate": 1.0448813831686426e-05, "loss": 0.4994, "step": 31675 }, { "epoch": 0.9730593186495868, "grad_norm": 0.37518173456192017, "learning_rate": 1.0448331040813454e-05, "loss": 0.5452, "step": 31676 }, { "epoch": 0.973090037784536, "grad_norm": 0.3292928636074066, "learning_rate": 1.0447848248893376e-05, "loss": 0.5279, "step": 31677 }, { "epoch": 0.9731207569194852, "grad_norm": 0.3505244255065918, "learning_rate": 1.0447365455927312e-05, "loss": 0.4692, "step": 31678 }, { "epoch": 0.9731514760544343, "grad_norm": 0.3917117118835449, "learning_rate": 1.04468826619164e-05, "loss": 0.5611, "step": 31679 }, { "epoch": 0.9731821951893834, "grad_norm": 0.3418664038181305, "learning_rate": 1.0446399866861758e-05, "loss": 0.5036, "step": 31680 }, { "epoch": 0.9732129143243327, "grad_norm": 0.3909062445163727, "learning_rate": 1.0445917070764521e-05, "loss": 0.4917, "step": 31681 }, { "epoch": 0.9732436334592818, "grad_norm": 0.39646798372268677, "learning_rate": 1.0445434273625813e-05, "loss": 0.5429, "step": 31682 }, { "epoch": 0.9732743525942309, "grad_norm": 0.39745762944221497, "learning_rate": 1.044495147544676e-05, "loss": 0.6324, "step": 31683 }, { "epoch": 0.9733050717291801, "grad_norm": 0.36094731092453003, "learning_rate": 1.0444468676228495e-05, "loss": 0.5216, "step": 31684 }, { "epoch": 0.9733357908641292, "grad_norm": 0.37656477093696594, "learning_rate": 1.0443985875972137e-05, "loss": 0.5422, "step": 31685 }, { "epoch": 0.9733665099990785, "grad_norm": 0.42133378982543945, "learning_rate": 1.0443503074678825e-05, "loss": 0.5523, "step": 31686 }, { "epoch": 0.9733972291340276, "grad_norm": 0.39036816358566284, "learning_rate": 1.0443020272349678e-05, "loss": 0.5555, "step": 31687 }, { "epoch": 0.9734279482689767, "grad_norm": 0.3580911159515381, "learning_rate": 1.0442537468985827e-05, "loss": 0.535, "step": 31688 }, { "epoch": 0.973458667403926, "grad_norm": 0.40291744470596313, "learning_rate": 1.0442054664588397e-05, "loss": 0.5134, "step": 31689 }, { "epoch": 0.9734893865388751, "grad_norm": 0.3464770019054413, "learning_rate": 1.0441571859158521e-05, "loss": 0.5608, "step": 31690 }, { "epoch": 0.9735201056738242, "grad_norm": 0.3484649956226349, "learning_rate": 1.044108905269732e-05, "loss": 0.5251, "step": 31691 }, { "epoch": 0.9735508248087734, "grad_norm": 0.394458532333374, "learning_rate": 1.0440606245205926e-05, "loss": 0.5567, "step": 31692 }, { "epoch": 0.9735815439437225, "grad_norm": 0.40247583389282227, "learning_rate": 1.0440123436685467e-05, "loss": 0.5668, "step": 31693 }, { "epoch": 0.9736122630786717, "grad_norm": 0.39186885952949524, "learning_rate": 1.0439640627137067e-05, "loss": 0.5068, "step": 31694 }, { "epoch": 0.9736429822136209, "grad_norm": 0.3667524755001068, "learning_rate": 1.0439157816561856e-05, "loss": 0.6079, "step": 31695 }, { "epoch": 0.97367370134857, "grad_norm": 2.2328598499298096, "learning_rate": 1.0438675004960959e-05, "loss": 0.5182, "step": 31696 }, { "epoch": 0.9737044204835192, "grad_norm": 0.4311334788799286, "learning_rate": 1.043819219233551e-05, "loss": 0.5637, "step": 31697 }, { "epoch": 0.9737351396184684, "grad_norm": 0.3893047273159027, "learning_rate": 1.043770937868663e-05, "loss": 0.5601, "step": 31698 }, { "epoch": 0.9737658587534175, "grad_norm": 0.4187517464160919, "learning_rate": 1.0437226564015448e-05, "loss": 0.6214, "step": 31699 }, { "epoch": 0.9737965778883667, "grad_norm": 0.5849623680114746, "learning_rate": 1.0436743748323094e-05, "loss": 0.5997, "step": 31700 }, { "epoch": 0.9738272970233158, "grad_norm": 0.36580339074134827, "learning_rate": 1.0436260931610694e-05, "loss": 0.5466, "step": 31701 }, { "epoch": 0.973858016158265, "grad_norm": 0.3744162917137146, "learning_rate": 1.0435778113879378e-05, "loss": 0.5806, "step": 31702 }, { "epoch": 0.9738887352932142, "grad_norm": 0.3684847950935364, "learning_rate": 1.043529529513027e-05, "loss": 0.5683, "step": 31703 }, { "epoch": 0.9739194544281633, "grad_norm": 0.3652074933052063, "learning_rate": 1.04348124753645e-05, "loss": 0.5142, "step": 31704 }, { "epoch": 0.9739501735631124, "grad_norm": 0.3457178473472595, "learning_rate": 1.0434329654583194e-05, "loss": 0.5297, "step": 31705 }, { "epoch": 0.9739808926980617, "grad_norm": 0.3881971538066864, "learning_rate": 1.0433846832787484e-05, "loss": 0.5788, "step": 31706 }, { "epoch": 0.9740116118330108, "grad_norm": 0.6442381143569946, "learning_rate": 1.0433364009978491e-05, "loss": 0.5637, "step": 31707 }, { "epoch": 0.9740423309679599, "grad_norm": 0.3546704053878784, "learning_rate": 1.0432881186157348e-05, "loss": 0.5168, "step": 31708 }, { "epoch": 0.9740730501029091, "grad_norm": 0.3925181031227112, "learning_rate": 1.0432398361325182e-05, "loss": 0.5327, "step": 31709 }, { "epoch": 0.9741037692378582, "grad_norm": 0.38471102714538574, "learning_rate": 1.0431915535483116e-05, "loss": 0.5677, "step": 31710 }, { "epoch": 0.9741344883728075, "grad_norm": 0.389973908662796, "learning_rate": 1.0431432708632286e-05, "loss": 0.51, "step": 31711 }, { "epoch": 0.9741652075077566, "grad_norm": 0.35187774896621704, "learning_rate": 1.0430949880773808e-05, "loss": 0.5094, "step": 31712 }, { "epoch": 0.9741959266427057, "grad_norm": 0.36420732736587524, "learning_rate": 1.0430467051908821e-05, "loss": 0.5367, "step": 31713 }, { "epoch": 0.9742266457776549, "grad_norm": 0.3667846918106079, "learning_rate": 1.0429984222038447e-05, "loss": 0.4926, "step": 31714 }, { "epoch": 0.9742573649126041, "grad_norm": 0.3470425307750702, "learning_rate": 1.0429501391163817e-05, "loss": 0.5527, "step": 31715 }, { "epoch": 0.9742880840475532, "grad_norm": 0.3793902099132538, "learning_rate": 1.0429018559286052e-05, "loss": 0.5463, "step": 31716 }, { "epoch": 0.9743188031825024, "grad_norm": 0.4066644310951233, "learning_rate": 1.0428535726406288e-05, "loss": 0.5913, "step": 31717 }, { "epoch": 0.9743495223174515, "grad_norm": 0.386191725730896, "learning_rate": 1.0428052892525647e-05, "loss": 0.5781, "step": 31718 }, { "epoch": 0.9743802414524007, "grad_norm": 0.3767086863517761, "learning_rate": 1.0427570057645257e-05, "loss": 0.5873, "step": 31719 }, { "epoch": 0.9744109605873499, "grad_norm": 0.4875081777572632, "learning_rate": 1.042708722176625e-05, "loss": 0.61, "step": 31720 }, { "epoch": 0.974441679722299, "grad_norm": 0.3366873562335968, "learning_rate": 1.042660438488975e-05, "loss": 0.4985, "step": 31721 }, { "epoch": 0.9744723988572482, "grad_norm": 0.3486553728580475, "learning_rate": 1.0426121547016887e-05, "loss": 0.5688, "step": 31722 }, { "epoch": 0.9745031179921974, "grad_norm": 0.37633031606674194, "learning_rate": 1.0425638708148786e-05, "loss": 0.5882, "step": 31723 }, { "epoch": 0.9745338371271465, "grad_norm": 0.3910212218761444, "learning_rate": 1.0425155868286578e-05, "loss": 0.5548, "step": 31724 }, { "epoch": 0.9745645562620957, "grad_norm": 0.3756842613220215, "learning_rate": 1.0424673027431384e-05, "loss": 0.5761, "step": 31725 }, { "epoch": 0.9745952753970448, "grad_norm": 0.3567489683628082, "learning_rate": 1.0424190185584341e-05, "loss": 0.5363, "step": 31726 }, { "epoch": 0.9746259945319939, "grad_norm": 0.40549495816230774, "learning_rate": 1.0423707342746574e-05, "loss": 0.6353, "step": 31727 }, { "epoch": 0.9746567136669432, "grad_norm": 0.39330926537513733, "learning_rate": 1.0423224498919204e-05, "loss": 0.6192, "step": 31728 }, { "epoch": 0.9746874328018923, "grad_norm": 0.7695876955986023, "learning_rate": 1.0422741654103365e-05, "loss": 0.5153, "step": 31729 }, { "epoch": 0.9747181519368414, "grad_norm": 0.3837186098098755, "learning_rate": 1.0422258808300186e-05, "loss": 0.537, "step": 31730 }, { "epoch": 0.9747488710717906, "grad_norm": 0.35759156942367554, "learning_rate": 1.0421775961510789e-05, "loss": 0.3948, "step": 31731 }, { "epoch": 0.9747795902067398, "grad_norm": 0.36448514461517334, "learning_rate": 1.0421293113736303e-05, "loss": 0.4855, "step": 31732 }, { "epoch": 0.9748103093416889, "grad_norm": 0.42020484805107117, "learning_rate": 1.0420810264977863e-05, "loss": 0.5701, "step": 31733 }, { "epoch": 0.9748410284766381, "grad_norm": 0.4496251046657562, "learning_rate": 1.0420327415236586e-05, "loss": 0.5213, "step": 31734 }, { "epoch": 0.9748717476115872, "grad_norm": 0.45878395438194275, "learning_rate": 1.041984456451361e-05, "loss": 0.5214, "step": 31735 }, { "epoch": 0.9749024667465365, "grad_norm": 0.373402863740921, "learning_rate": 1.0419361712810054e-05, "loss": 0.5099, "step": 31736 }, { "epoch": 0.9749331858814856, "grad_norm": 0.4473849833011627, "learning_rate": 1.0418878860127051e-05, "loss": 0.5218, "step": 31737 }, { "epoch": 0.9749639050164347, "grad_norm": 0.40925267338752747, "learning_rate": 1.0418396006465725e-05, "loss": 0.5567, "step": 31738 }, { "epoch": 0.9749946241513839, "grad_norm": 0.3737342953681946, "learning_rate": 1.0417913151827208e-05, "loss": 0.5364, "step": 31739 }, { "epoch": 0.9750253432863331, "grad_norm": 0.3575590252876282, "learning_rate": 1.0417430296212626e-05, "loss": 0.4604, "step": 31740 }, { "epoch": 0.9750560624212822, "grad_norm": 0.3412824869155884, "learning_rate": 1.0416947439623104e-05, "loss": 0.5577, "step": 31741 }, { "epoch": 0.9750867815562314, "grad_norm": 0.3810481131076813, "learning_rate": 1.0416464582059775e-05, "loss": 0.5402, "step": 31742 }, { "epoch": 0.9751175006911805, "grad_norm": 0.40847325325012207, "learning_rate": 1.0415981723523761e-05, "loss": 0.5092, "step": 31743 }, { "epoch": 0.9751482198261296, "grad_norm": 0.35818222165107727, "learning_rate": 1.0415498864016195e-05, "loss": 0.5439, "step": 31744 }, { "epoch": 0.9751789389610789, "grad_norm": 0.48065656423568726, "learning_rate": 1.0415016003538204e-05, "loss": 0.4885, "step": 31745 }, { "epoch": 0.975209658096028, "grad_norm": 0.3533862829208374, "learning_rate": 1.041453314209091e-05, "loss": 0.5185, "step": 31746 }, { "epoch": 0.9752403772309772, "grad_norm": 0.38209784030914307, "learning_rate": 1.0414050279675447e-05, "loss": 0.5107, "step": 31747 }, { "epoch": 0.9752710963659263, "grad_norm": 0.42415711283683777, "learning_rate": 1.041356741629294e-05, "loss": 0.5655, "step": 31748 }, { "epoch": 0.9753018155008755, "grad_norm": 0.3698064982891083, "learning_rate": 1.0413084551944519e-05, "loss": 0.5449, "step": 31749 }, { "epoch": 0.9753325346358247, "grad_norm": 0.43311718106269836, "learning_rate": 1.0412601686631308e-05, "loss": 0.5008, "step": 31750 }, { "epoch": 0.9753632537707738, "grad_norm": 0.39226260781288147, "learning_rate": 1.0412118820354439e-05, "loss": 0.5675, "step": 31751 }, { "epoch": 0.9753939729057229, "grad_norm": 0.5346664786338806, "learning_rate": 1.0411635953115036e-05, "loss": 0.6131, "step": 31752 }, { "epoch": 0.9754246920406722, "grad_norm": 0.4796568751335144, "learning_rate": 1.0411153084914229e-05, "loss": 0.462, "step": 31753 }, { "epoch": 0.9754554111756213, "grad_norm": 0.35523420572280884, "learning_rate": 1.0410670215753145e-05, "loss": 0.5367, "step": 31754 }, { "epoch": 0.9754861303105704, "grad_norm": 0.37570008635520935, "learning_rate": 1.0410187345632914e-05, "loss": 0.5205, "step": 31755 }, { "epoch": 0.9755168494455196, "grad_norm": 0.36763226985931396, "learning_rate": 1.0409704474554658e-05, "loss": 0.5508, "step": 31756 }, { "epoch": 0.9755475685804688, "grad_norm": 0.3975234031677246, "learning_rate": 1.040922160251951e-05, "loss": 0.5523, "step": 31757 }, { "epoch": 0.9755782877154179, "grad_norm": 0.3496290147304535, "learning_rate": 1.0408738729528598e-05, "loss": 0.4697, "step": 31758 }, { "epoch": 0.9756090068503671, "grad_norm": 0.3768047094345093, "learning_rate": 1.0408255855583047e-05, "loss": 0.5602, "step": 31759 }, { "epoch": 0.9756397259853162, "grad_norm": 0.3603276312351227, "learning_rate": 1.0407772980683986e-05, "loss": 0.5286, "step": 31760 }, { "epoch": 0.9756704451202655, "grad_norm": 0.37816518545150757, "learning_rate": 1.0407290104832544e-05, "loss": 0.5104, "step": 31761 }, { "epoch": 0.9757011642552146, "grad_norm": 0.41026052832603455, "learning_rate": 1.0406807228029846e-05, "loss": 0.4927, "step": 31762 }, { "epoch": 0.9757318833901637, "grad_norm": 0.38986316323280334, "learning_rate": 1.0406324350277022e-05, "loss": 0.6257, "step": 31763 }, { "epoch": 0.9757626025251129, "grad_norm": 0.3849495053291321, "learning_rate": 1.0405841471575197e-05, "loss": 0.575, "step": 31764 }, { "epoch": 0.975793321660062, "grad_norm": 0.40273213386535645, "learning_rate": 1.0405358591925504e-05, "loss": 0.5342, "step": 31765 }, { "epoch": 0.9758240407950112, "grad_norm": 0.37506210803985596, "learning_rate": 1.0404875711329066e-05, "loss": 0.6326, "step": 31766 }, { "epoch": 0.9758547599299604, "grad_norm": 0.3436989486217499, "learning_rate": 1.0404392829787016e-05, "loss": 0.5642, "step": 31767 }, { "epoch": 0.9758854790649095, "grad_norm": 0.3619338572025299, "learning_rate": 1.0403909947300472e-05, "loss": 0.5895, "step": 31768 }, { "epoch": 0.9759161981998586, "grad_norm": 0.33303943276405334, "learning_rate": 1.0403427063870572e-05, "loss": 0.5089, "step": 31769 }, { "epoch": 0.9759469173348079, "grad_norm": 0.36806824803352356, "learning_rate": 1.040294417949844e-05, "loss": 0.513, "step": 31770 }, { "epoch": 0.975977636469757, "grad_norm": 0.39438775181770325, "learning_rate": 1.0402461294185204e-05, "loss": 0.5132, "step": 31771 }, { "epoch": 0.9760083556047062, "grad_norm": 0.45494526624679565, "learning_rate": 1.0401978407931992e-05, "loss": 0.5594, "step": 31772 }, { "epoch": 0.9760390747396553, "grad_norm": 0.3860984146595001, "learning_rate": 1.0401495520739928e-05, "loss": 0.5502, "step": 31773 }, { "epoch": 0.9760697938746045, "grad_norm": 0.3698650598526001, "learning_rate": 1.0401012632610143e-05, "loss": 0.5233, "step": 31774 }, { "epoch": 0.9761005130095537, "grad_norm": 0.35371729731559753, "learning_rate": 1.0400529743543765e-05, "loss": 0.5801, "step": 31775 }, { "epoch": 0.9761312321445028, "grad_norm": 0.34774452447891235, "learning_rate": 1.0400046853541927e-05, "loss": 0.4963, "step": 31776 }, { "epoch": 0.9761619512794519, "grad_norm": 0.40209072828292847, "learning_rate": 1.0399563962605748e-05, "loss": 0.5427, "step": 31777 }, { "epoch": 0.9761926704144012, "grad_norm": 0.49042102694511414, "learning_rate": 1.0399081070736362e-05, "loss": 0.5437, "step": 31778 }, { "epoch": 0.9762233895493503, "grad_norm": 0.3823484778404236, "learning_rate": 1.0398598177934892e-05, "loss": 0.524, "step": 31779 }, { "epoch": 0.9762541086842994, "grad_norm": 0.49533891677856445, "learning_rate": 1.0398115284202465e-05, "loss": 0.582, "step": 31780 }, { "epoch": 0.9762848278192486, "grad_norm": 0.3466375172138214, "learning_rate": 1.0397632389540218e-05, "loss": 0.5622, "step": 31781 }, { "epoch": 0.9763155469541978, "grad_norm": 0.3666720390319824, "learning_rate": 1.039714949394927e-05, "loss": 0.5269, "step": 31782 }, { "epoch": 0.9763462660891469, "grad_norm": 0.38320642709732056, "learning_rate": 1.039666659743075e-05, "loss": 0.5887, "step": 31783 }, { "epoch": 0.9763769852240961, "grad_norm": 0.45933330059051514, "learning_rate": 1.039618369998579e-05, "loss": 0.5229, "step": 31784 }, { "epoch": 0.9764077043590452, "grad_norm": 0.3685220181941986, "learning_rate": 1.0395700801615516e-05, "loss": 0.5767, "step": 31785 }, { "epoch": 0.9764384234939945, "grad_norm": 0.400175541639328, "learning_rate": 1.0395217902321053e-05, "loss": 0.5102, "step": 31786 }, { "epoch": 0.9764691426289436, "grad_norm": 0.39334940910339355, "learning_rate": 1.0394735002103533e-05, "loss": 0.5452, "step": 31787 }, { "epoch": 0.9764998617638927, "grad_norm": 0.49555397033691406, "learning_rate": 1.0394252100964078e-05, "loss": 0.6094, "step": 31788 }, { "epoch": 0.9765305808988419, "grad_norm": 0.344706654548645, "learning_rate": 1.0393769198903823e-05, "loss": 0.4994, "step": 31789 }, { "epoch": 0.976561300033791, "grad_norm": 0.37986457347869873, "learning_rate": 1.0393286295923892e-05, "loss": 0.6368, "step": 31790 }, { "epoch": 0.9765920191687402, "grad_norm": 0.38612863421440125, "learning_rate": 1.0392803392025413e-05, "loss": 0.4875, "step": 31791 }, { "epoch": 0.9766227383036894, "grad_norm": 0.35136961936950684, "learning_rate": 1.0392320487209513e-05, "loss": 0.5404, "step": 31792 }, { "epoch": 0.9766534574386385, "grad_norm": 0.34633854031562805, "learning_rate": 1.0391837581477323e-05, "loss": 0.598, "step": 31793 }, { "epoch": 0.9766841765735876, "grad_norm": 0.39858752489089966, "learning_rate": 1.0391354674829968e-05, "loss": 0.5489, "step": 31794 }, { "epoch": 0.9767148957085369, "grad_norm": 0.4272110164165497, "learning_rate": 1.0390871767268576e-05, "loss": 0.564, "step": 31795 }, { "epoch": 0.976745614843486, "grad_norm": 0.44686657190322876, "learning_rate": 1.0390388858794278e-05, "loss": 0.571, "step": 31796 }, { "epoch": 0.9767763339784352, "grad_norm": 0.36476486921310425, "learning_rate": 1.03899059494082e-05, "loss": 0.5838, "step": 31797 }, { "epoch": 0.9768070531133843, "grad_norm": 0.3562127351760864, "learning_rate": 1.0389423039111466e-05, "loss": 0.5581, "step": 31798 }, { "epoch": 0.9768377722483335, "grad_norm": 0.39606910943984985, "learning_rate": 1.0388940127905212e-05, "loss": 0.5395, "step": 31799 }, { "epoch": 0.9768684913832827, "grad_norm": 0.37542226910591125, "learning_rate": 1.0388457215790555e-05, "loss": 0.5523, "step": 31800 }, { "epoch": 0.9768992105182318, "grad_norm": 0.5666344165802002, "learning_rate": 1.0387974302768635e-05, "loss": 0.5352, "step": 31801 }, { "epoch": 0.9769299296531809, "grad_norm": 0.3550601303577423, "learning_rate": 1.038749138884057e-05, "loss": 0.4912, "step": 31802 }, { "epoch": 0.9769606487881302, "grad_norm": 0.39865022897720337, "learning_rate": 1.0387008474007494e-05, "loss": 0.5264, "step": 31803 }, { "epoch": 0.9769913679230793, "grad_norm": 0.3354753851890564, "learning_rate": 1.038652555827053e-05, "loss": 0.4761, "step": 31804 }, { "epoch": 0.9770220870580284, "grad_norm": 0.3739648163318634, "learning_rate": 1.0386042641630812e-05, "loss": 0.568, "step": 31805 }, { "epoch": 0.9770528061929776, "grad_norm": 0.49911996722221375, "learning_rate": 1.0385559724089463e-05, "loss": 0.5441, "step": 31806 }, { "epoch": 0.9770835253279267, "grad_norm": 0.4647774398326874, "learning_rate": 1.0385076805647611e-05, "loss": 0.4541, "step": 31807 }, { "epoch": 0.977114244462876, "grad_norm": 0.39837074279785156, "learning_rate": 1.0384593886306388e-05, "loss": 0.5621, "step": 31808 }, { "epoch": 0.9771449635978251, "grad_norm": 0.3670060634613037, "learning_rate": 1.0384110966066916e-05, "loss": 0.4954, "step": 31809 }, { "epoch": 0.9771756827327742, "grad_norm": 0.4054301679134369, "learning_rate": 1.0383628044930329e-05, "loss": 0.5312, "step": 31810 }, { "epoch": 0.9772064018677235, "grad_norm": 0.3611025810241699, "learning_rate": 1.0383145122897747e-05, "loss": 0.5586, "step": 31811 }, { "epoch": 0.9772371210026726, "grad_norm": 0.3635924160480499, "learning_rate": 1.0382662199970309e-05, "loss": 0.4587, "step": 31812 }, { "epoch": 0.9772678401376217, "grad_norm": 0.4039316773414612, "learning_rate": 1.0382179276149134e-05, "loss": 0.5011, "step": 31813 }, { "epoch": 0.9772985592725709, "grad_norm": 0.3753242790699005, "learning_rate": 1.038169635143535e-05, "loss": 0.5692, "step": 31814 }, { "epoch": 0.97732927840752, "grad_norm": 0.34865444898605347, "learning_rate": 1.0381213425830092e-05, "loss": 0.5438, "step": 31815 }, { "epoch": 0.9773599975424692, "grad_norm": 0.36565086245536804, "learning_rate": 1.0380730499334482e-05, "loss": 0.5365, "step": 31816 }, { "epoch": 0.9773907166774184, "grad_norm": 0.3835081458091736, "learning_rate": 1.038024757194965e-05, "loss": 0.5625, "step": 31817 }, { "epoch": 0.9774214358123675, "grad_norm": 0.4596021771430969, "learning_rate": 1.037976464367672e-05, "loss": 0.5298, "step": 31818 }, { "epoch": 0.9774521549473166, "grad_norm": 0.3540317714214325, "learning_rate": 1.0379281714516825e-05, "loss": 0.5216, "step": 31819 }, { "epoch": 0.9774828740822659, "grad_norm": 0.3067481815814972, "learning_rate": 1.0378798784471091e-05, "loss": 0.5326, "step": 31820 }, { "epoch": 0.977513593217215, "grad_norm": 0.38077476620674133, "learning_rate": 1.0378315853540648e-05, "loss": 0.5737, "step": 31821 }, { "epoch": 0.9775443123521642, "grad_norm": 0.3966257870197296, "learning_rate": 1.0377832921726618e-05, "loss": 0.6019, "step": 31822 }, { "epoch": 0.9775750314871133, "grad_norm": 0.5639654994010925, "learning_rate": 1.0377349989030137e-05, "loss": 0.5193, "step": 31823 }, { "epoch": 0.9776057506220625, "grad_norm": 0.33979514241218567, "learning_rate": 1.0376867055452327e-05, "loss": 0.5075, "step": 31824 }, { "epoch": 0.9776364697570117, "grad_norm": 0.8125075697898865, "learning_rate": 1.0376384120994315e-05, "loss": 0.5751, "step": 31825 }, { "epoch": 0.9776671888919608, "grad_norm": 0.4882851839065552, "learning_rate": 1.0375901185657236e-05, "loss": 0.6143, "step": 31826 }, { "epoch": 0.9776979080269099, "grad_norm": 0.3492678105831146, "learning_rate": 1.037541824944221e-05, "loss": 0.522, "step": 31827 }, { "epoch": 0.9777286271618592, "grad_norm": 0.5231125950813293, "learning_rate": 1.0374935312350372e-05, "loss": 0.5082, "step": 31828 }, { "epoch": 0.9777593462968083, "grad_norm": 0.4409152865409851, "learning_rate": 1.037445237438284e-05, "loss": 0.6011, "step": 31829 }, { "epoch": 0.9777900654317574, "grad_norm": 0.36328673362731934, "learning_rate": 1.0373969435540755e-05, "loss": 0.5614, "step": 31830 }, { "epoch": 0.9778207845667066, "grad_norm": 0.41297417879104614, "learning_rate": 1.0373486495825235e-05, "loss": 0.5598, "step": 31831 }, { "epoch": 0.9778515037016557, "grad_norm": 0.3741563558578491, "learning_rate": 1.0373003555237414e-05, "loss": 0.5036, "step": 31832 }, { "epoch": 0.977882222836605, "grad_norm": 0.34850892424583435, "learning_rate": 1.0372520613778414e-05, "loss": 0.5655, "step": 31833 }, { "epoch": 0.9779129419715541, "grad_norm": 0.6769770383834839, "learning_rate": 1.0372037671449366e-05, "loss": 0.6138, "step": 31834 }, { "epoch": 0.9779436611065032, "grad_norm": 0.44338032603263855, "learning_rate": 1.0371554728251401e-05, "loss": 0.4858, "step": 31835 }, { "epoch": 0.9779743802414524, "grad_norm": 0.3721037805080414, "learning_rate": 1.0371071784185642e-05, "loss": 0.5192, "step": 31836 }, { "epoch": 0.9780050993764016, "grad_norm": 0.4055984616279602, "learning_rate": 1.037058883925322e-05, "loss": 0.5113, "step": 31837 }, { "epoch": 0.9780358185113507, "grad_norm": 0.33181315660476685, "learning_rate": 1.037010589345526e-05, "loss": 0.4828, "step": 31838 }, { "epoch": 0.9780665376462999, "grad_norm": 0.3731290400028229, "learning_rate": 1.0369622946792895e-05, "loss": 0.4927, "step": 31839 }, { "epoch": 0.978097256781249, "grad_norm": 0.3782953917980194, "learning_rate": 1.0369139999267245e-05, "loss": 0.5773, "step": 31840 }, { "epoch": 0.9781279759161982, "grad_norm": 0.37980955839157104, "learning_rate": 1.0368657050879447e-05, "loss": 0.6255, "step": 31841 }, { "epoch": 0.9781586950511474, "grad_norm": 0.3949737846851349, "learning_rate": 1.0368174101630625e-05, "loss": 0.6028, "step": 31842 }, { "epoch": 0.9781894141860965, "grad_norm": 0.4198267459869385, "learning_rate": 1.0367691151521903e-05, "loss": 0.5527, "step": 31843 }, { "epoch": 0.9782201333210456, "grad_norm": 0.36444488167762756, "learning_rate": 1.0367208200554417e-05, "loss": 0.5722, "step": 31844 }, { "epoch": 0.9782508524559949, "grad_norm": 0.522026538848877, "learning_rate": 1.0366725248729287e-05, "loss": 0.5233, "step": 31845 }, { "epoch": 0.978281571590944, "grad_norm": 0.3235233426094055, "learning_rate": 1.0366242296047647e-05, "loss": 0.5257, "step": 31846 }, { "epoch": 0.9783122907258932, "grad_norm": 0.4039902985095978, "learning_rate": 1.0365759342510618e-05, "loss": 0.5523, "step": 31847 }, { "epoch": 0.9783430098608423, "grad_norm": 0.36408674716949463, "learning_rate": 1.0365276388119336e-05, "loss": 0.6072, "step": 31848 }, { "epoch": 0.9783737289957914, "grad_norm": 0.36652442812919617, "learning_rate": 1.0364793432874926e-05, "loss": 0.5448, "step": 31849 }, { "epoch": 0.9784044481307407, "grad_norm": 0.35579001903533936, "learning_rate": 1.0364310476778514e-05, "loss": 0.5637, "step": 31850 }, { "epoch": 0.9784351672656898, "grad_norm": 0.46509498357772827, "learning_rate": 1.0363827519831233e-05, "loss": 0.585, "step": 31851 }, { "epoch": 0.9784658864006389, "grad_norm": 0.4034658968448639, "learning_rate": 1.0363344562034204e-05, "loss": 0.5424, "step": 31852 }, { "epoch": 0.9784966055355881, "grad_norm": 0.3502601385116577, "learning_rate": 1.0362861603388559e-05, "loss": 0.6203, "step": 31853 }, { "epoch": 0.9785273246705373, "grad_norm": 0.40994277596473694, "learning_rate": 1.0362378643895426e-05, "loss": 0.5638, "step": 31854 }, { "epoch": 0.9785580438054864, "grad_norm": 0.3465573191642761, "learning_rate": 1.0361895683555932e-05, "loss": 0.5124, "step": 31855 }, { "epoch": 0.9785887629404356, "grad_norm": 0.35198840498924255, "learning_rate": 1.0361412722371202e-05, "loss": 0.4921, "step": 31856 }, { "epoch": 0.9786194820753847, "grad_norm": 0.3710830807685852, "learning_rate": 1.0360929760342373e-05, "loss": 0.595, "step": 31857 }, { "epoch": 0.978650201210334, "grad_norm": 0.38003620505332947, "learning_rate": 1.0360446797470565e-05, "loss": 0.5588, "step": 31858 }, { "epoch": 0.9786809203452831, "grad_norm": 0.352708101272583, "learning_rate": 1.0359963833756908e-05, "loss": 0.5666, "step": 31859 }, { "epoch": 0.9787116394802322, "grad_norm": 0.44546350836753845, "learning_rate": 1.0359480869202532e-05, "loss": 0.6706, "step": 31860 }, { "epoch": 0.9787423586151814, "grad_norm": 0.3723531663417816, "learning_rate": 1.0358997903808562e-05, "loss": 0.6182, "step": 31861 }, { "epoch": 0.9787730777501306, "grad_norm": 0.8350784778594971, "learning_rate": 1.0358514937576127e-05, "loss": 0.6341, "step": 31862 }, { "epoch": 0.9788037968850797, "grad_norm": 0.5879863500595093, "learning_rate": 1.0358031970506354e-05, "loss": 0.5348, "step": 31863 }, { "epoch": 0.9788345160200289, "grad_norm": 0.3423125445842743, "learning_rate": 1.0357549002600375e-05, "loss": 0.6145, "step": 31864 }, { "epoch": 0.978865235154978, "grad_norm": 0.3890472650527954, "learning_rate": 1.0357066033859313e-05, "loss": 0.5264, "step": 31865 }, { "epoch": 0.9788959542899272, "grad_norm": 0.36578455567359924, "learning_rate": 1.0356583064284299e-05, "loss": 0.5261, "step": 31866 }, { "epoch": 0.9789266734248764, "grad_norm": 0.46511027216911316, "learning_rate": 1.035610009387646e-05, "loss": 0.5467, "step": 31867 }, { "epoch": 0.9789573925598255, "grad_norm": 0.36492007970809937, "learning_rate": 1.0355617122636924e-05, "loss": 0.5813, "step": 31868 }, { "epoch": 0.9789881116947746, "grad_norm": 0.37562286853790283, "learning_rate": 1.0355134150566823e-05, "loss": 0.61, "step": 31869 }, { "epoch": 0.9790188308297239, "grad_norm": 0.40181756019592285, "learning_rate": 1.0354651177667277e-05, "loss": 0.4971, "step": 31870 }, { "epoch": 0.979049549964673, "grad_norm": 0.4207066595554352, "learning_rate": 1.035416820393942e-05, "loss": 0.5728, "step": 31871 }, { "epoch": 0.9790802690996222, "grad_norm": 0.34430626034736633, "learning_rate": 1.035368522938438e-05, "loss": 0.4963, "step": 31872 }, { "epoch": 0.9791109882345713, "grad_norm": 0.3439619541168213, "learning_rate": 1.0353202254003283e-05, "loss": 0.5687, "step": 31873 }, { "epoch": 0.9791417073695204, "grad_norm": 0.3822717070579529, "learning_rate": 1.0352719277797256e-05, "loss": 0.4935, "step": 31874 }, { "epoch": 0.9791724265044697, "grad_norm": 0.386073499917984, "learning_rate": 1.035223630076743e-05, "loss": 0.5209, "step": 31875 }, { "epoch": 0.9792031456394188, "grad_norm": 0.3851942718029022, "learning_rate": 1.0351753322914932e-05, "loss": 0.5298, "step": 31876 }, { "epoch": 0.9792338647743679, "grad_norm": 0.3423268496990204, "learning_rate": 1.0351270344240886e-05, "loss": 0.518, "step": 31877 }, { "epoch": 0.9792645839093171, "grad_norm": 0.45519986748695374, "learning_rate": 1.0350787364746428e-05, "loss": 0.5242, "step": 31878 }, { "epoch": 0.9792953030442663, "grad_norm": 0.34202367067337036, "learning_rate": 1.0350304384432678e-05, "loss": 0.5443, "step": 31879 }, { "epoch": 0.9793260221792154, "grad_norm": 0.32598042488098145, "learning_rate": 1.034982140330077e-05, "loss": 0.5695, "step": 31880 }, { "epoch": 0.9793567413141646, "grad_norm": 0.3484920561313629, "learning_rate": 1.0349338421351827e-05, "loss": 0.5117, "step": 31881 }, { "epoch": 0.9793874604491137, "grad_norm": 0.3661588728427887, "learning_rate": 1.0348855438586984e-05, "loss": 0.5164, "step": 31882 }, { "epoch": 0.979418179584063, "grad_norm": 0.46072494983673096, "learning_rate": 1.034837245500736e-05, "loss": 0.51, "step": 31883 }, { "epoch": 0.9794488987190121, "grad_norm": 0.3719431161880493, "learning_rate": 1.034788947061409e-05, "loss": 0.6088, "step": 31884 }, { "epoch": 0.9794796178539612, "grad_norm": 0.357313871383667, "learning_rate": 1.03474064854083e-05, "loss": 0.5345, "step": 31885 }, { "epoch": 0.9795103369889104, "grad_norm": 0.35182565450668335, "learning_rate": 1.0346923499391117e-05, "loss": 0.4839, "step": 31886 }, { "epoch": 0.9795410561238596, "grad_norm": 0.36317554116249084, "learning_rate": 1.0346440512563671e-05, "loss": 0.5109, "step": 31887 }, { "epoch": 0.9795717752588087, "grad_norm": 0.3221729099750519, "learning_rate": 1.0345957524927088e-05, "loss": 0.4168, "step": 31888 }, { "epoch": 0.9796024943937579, "grad_norm": 0.40131086111068726, "learning_rate": 1.0345474536482498e-05, "loss": 0.5459, "step": 31889 }, { "epoch": 0.979633213528707, "grad_norm": 0.3480820655822754, "learning_rate": 1.0344991547231027e-05, "loss": 0.5431, "step": 31890 }, { "epoch": 0.9796639326636561, "grad_norm": 0.36039528250694275, "learning_rate": 1.0344508557173806e-05, "loss": 0.511, "step": 31891 }, { "epoch": 0.9796946517986054, "grad_norm": 0.6765239834785461, "learning_rate": 1.0344025566311959e-05, "loss": 0.5545, "step": 31892 }, { "epoch": 0.9797253709335545, "grad_norm": 0.38327866792678833, "learning_rate": 1.034354257464662e-05, "loss": 0.6169, "step": 31893 }, { "epoch": 0.9797560900685036, "grad_norm": 0.3840734660625458, "learning_rate": 1.0343059582178911e-05, "loss": 0.521, "step": 31894 }, { "epoch": 0.9797868092034528, "grad_norm": 0.4977242946624756, "learning_rate": 1.0342576588909962e-05, "loss": 0.5478, "step": 31895 }, { "epoch": 0.979817528338402, "grad_norm": 0.33487609028816223, "learning_rate": 1.0342093594840903e-05, "loss": 0.5518, "step": 31896 }, { "epoch": 0.9798482474733512, "grad_norm": 0.33996671438217163, "learning_rate": 1.0341610599972857e-05, "loss": 0.4968, "step": 31897 }, { "epoch": 0.9798789666083003, "grad_norm": 0.4277598261833191, "learning_rate": 1.034112760430696e-05, "loss": 0.5142, "step": 31898 }, { "epoch": 0.9799096857432494, "grad_norm": 0.3711586594581604, "learning_rate": 1.0340644607844332e-05, "loss": 0.517, "step": 31899 }, { "epoch": 0.9799404048781987, "grad_norm": 0.3923964500427246, "learning_rate": 1.0340161610586105e-05, "loss": 0.5706, "step": 31900 }, { "epoch": 0.9799711240131478, "grad_norm": 0.3833853602409363, "learning_rate": 1.0339678612533407e-05, "loss": 0.4999, "step": 31901 }, { "epoch": 0.9800018431480969, "grad_norm": 0.34039515256881714, "learning_rate": 1.0339195613687367e-05, "loss": 0.5408, "step": 31902 }, { "epoch": 0.9800325622830461, "grad_norm": 0.39804625511169434, "learning_rate": 1.0338712614049113e-05, "loss": 0.5483, "step": 31903 }, { "epoch": 0.9800632814179953, "grad_norm": 0.39987415075302124, "learning_rate": 1.033822961361977e-05, "loss": 0.5393, "step": 31904 }, { "epoch": 0.9800940005529444, "grad_norm": 0.4445105195045471, "learning_rate": 1.033774661240047e-05, "loss": 0.5516, "step": 31905 }, { "epoch": 0.9801247196878936, "grad_norm": 0.41202959418296814, "learning_rate": 1.0337263610392336e-05, "loss": 0.5278, "step": 31906 }, { "epoch": 0.9801554388228427, "grad_norm": 0.36656907200813293, "learning_rate": 1.0336780607596504e-05, "loss": 0.5884, "step": 31907 }, { "epoch": 0.980186157957792, "grad_norm": 0.4329138994216919, "learning_rate": 1.0336297604014092e-05, "loss": 0.5252, "step": 31908 }, { "epoch": 0.9802168770927411, "grad_norm": 0.40376463532447815, "learning_rate": 1.0335814599646238e-05, "loss": 0.5173, "step": 31909 }, { "epoch": 0.9802475962276902, "grad_norm": 0.35905522108078003, "learning_rate": 1.0335331594494062e-05, "loss": 0.5701, "step": 31910 }, { "epoch": 0.9802783153626394, "grad_norm": 0.4065244495868683, "learning_rate": 1.03348485885587e-05, "loss": 0.4622, "step": 31911 }, { "epoch": 0.9803090344975885, "grad_norm": 0.4157405495643616, "learning_rate": 1.0334365581841273e-05, "loss": 0.5253, "step": 31912 }, { "epoch": 0.9803397536325377, "grad_norm": 0.40570205450057983, "learning_rate": 1.0333882574342911e-05, "loss": 0.5911, "step": 31913 }, { "epoch": 0.9803704727674869, "grad_norm": 0.3712575137615204, "learning_rate": 1.0333399566064745e-05, "loss": 0.5294, "step": 31914 }, { "epoch": 0.980401191902436, "grad_norm": 0.33144423365592957, "learning_rate": 1.03329165570079e-05, "loss": 0.4885, "step": 31915 }, { "epoch": 0.9804319110373851, "grad_norm": 0.4524989426136017, "learning_rate": 1.0332433547173506e-05, "loss": 0.6204, "step": 31916 }, { "epoch": 0.9804626301723344, "grad_norm": 0.3597210645675659, "learning_rate": 1.0331950536562687e-05, "loss": 0.551, "step": 31917 }, { "epoch": 0.9804933493072835, "grad_norm": 0.3585183918476105, "learning_rate": 1.0331467525176577e-05, "loss": 0.571, "step": 31918 }, { "epoch": 0.9805240684422327, "grad_norm": 0.38666290044784546, "learning_rate": 1.03309845130163e-05, "loss": 0.5251, "step": 31919 }, { "epoch": 0.9805547875771818, "grad_norm": 0.42903590202331543, "learning_rate": 1.0330501500082985e-05, "loss": 0.5301, "step": 31920 }, { "epoch": 0.980585506712131, "grad_norm": 0.36231669783592224, "learning_rate": 1.0330018486377764e-05, "loss": 0.5129, "step": 31921 }, { "epoch": 0.9806162258470802, "grad_norm": 0.3449871242046356, "learning_rate": 1.032953547190176e-05, "loss": 0.5528, "step": 31922 }, { "epoch": 0.9806469449820293, "grad_norm": 0.36352860927581787, "learning_rate": 1.0329052456656103e-05, "loss": 0.5964, "step": 31923 }, { "epoch": 0.9806776641169784, "grad_norm": 0.38250285387039185, "learning_rate": 1.032856944064192e-05, "loss": 0.5339, "step": 31924 }, { "epoch": 0.9807083832519277, "grad_norm": 0.36722198128700256, "learning_rate": 1.0328086423860342e-05, "loss": 0.5549, "step": 31925 }, { "epoch": 0.9807391023868768, "grad_norm": 0.37398913502693176, "learning_rate": 1.0327603406312495e-05, "loss": 0.5687, "step": 31926 }, { "epoch": 0.9807698215218259, "grad_norm": 0.38242676854133606, "learning_rate": 1.0327120387999507e-05, "loss": 0.5073, "step": 31927 }, { "epoch": 0.9808005406567751, "grad_norm": 0.38359880447387695, "learning_rate": 1.0326637368922508e-05, "loss": 0.5715, "step": 31928 }, { "epoch": 0.9808312597917243, "grad_norm": 0.330453097820282, "learning_rate": 1.032615434908262e-05, "loss": 0.5726, "step": 31929 }, { "epoch": 0.9808619789266734, "grad_norm": 0.3590219020843506, "learning_rate": 1.032567132848098e-05, "loss": 0.5561, "step": 31930 }, { "epoch": 0.9808926980616226, "grad_norm": 0.3764004707336426, "learning_rate": 1.032518830711871e-05, "loss": 0.5522, "step": 31931 }, { "epoch": 0.9809234171965717, "grad_norm": 0.38894134759902954, "learning_rate": 1.032470528499694e-05, "loss": 0.5778, "step": 31932 }, { "epoch": 0.980954136331521, "grad_norm": 0.34807613492012024, "learning_rate": 1.0324222262116799e-05, "loss": 0.5198, "step": 31933 }, { "epoch": 0.9809848554664701, "grad_norm": 0.4533226191997528, "learning_rate": 1.0323739238479415e-05, "loss": 0.5848, "step": 31934 }, { "epoch": 0.9810155746014192, "grad_norm": 0.4015680253505707, "learning_rate": 1.0323256214085913e-05, "loss": 0.5672, "step": 31935 }, { "epoch": 0.9810462937363684, "grad_norm": 0.38349786400794983, "learning_rate": 1.0322773188937424e-05, "loss": 0.5042, "step": 31936 }, { "epoch": 0.9810770128713175, "grad_norm": 0.37603333592414856, "learning_rate": 1.0322290163035076e-05, "loss": 0.5681, "step": 31937 }, { "epoch": 0.9811077320062667, "grad_norm": 0.36798420548439026, "learning_rate": 1.0321807136379998e-05, "loss": 0.5561, "step": 31938 }, { "epoch": 0.9811384511412159, "grad_norm": 0.3314763009548187, "learning_rate": 1.0321324108973316e-05, "loss": 0.4878, "step": 31939 }, { "epoch": 0.981169170276165, "grad_norm": 0.3723415732383728, "learning_rate": 1.0320841080816157e-05, "loss": 0.6027, "step": 31940 }, { "epoch": 0.9811998894111141, "grad_norm": 0.4027707576751709, "learning_rate": 1.0320358051909656e-05, "loss": 0.5659, "step": 31941 }, { "epoch": 0.9812306085460634, "grad_norm": 0.3897800147533417, "learning_rate": 1.0319875022254933e-05, "loss": 0.5208, "step": 31942 }, { "epoch": 0.9812613276810125, "grad_norm": 0.33328354358673096, "learning_rate": 1.031939199185312e-05, "loss": 0.4932, "step": 31943 }, { "epoch": 0.9812920468159617, "grad_norm": 0.41365134716033936, "learning_rate": 1.0318908960705345e-05, "loss": 0.5816, "step": 31944 }, { "epoch": 0.9813227659509108, "grad_norm": 0.4682101607322693, "learning_rate": 1.0318425928812736e-05, "loss": 0.5954, "step": 31945 }, { "epoch": 0.98135348508586, "grad_norm": 0.37160632014274597, "learning_rate": 1.0317942896176423e-05, "loss": 0.496, "step": 31946 }, { "epoch": 0.9813842042208092, "grad_norm": 0.39224159717559814, "learning_rate": 1.031745986279753e-05, "loss": 0.5958, "step": 31947 }, { "epoch": 0.9814149233557583, "grad_norm": 0.37091127038002014, "learning_rate": 1.0316976828677188e-05, "loss": 0.5494, "step": 31948 }, { "epoch": 0.9814456424907074, "grad_norm": 0.40283840894699097, "learning_rate": 1.0316493793816523e-05, "loss": 0.579, "step": 31949 }, { "epoch": 0.9814763616256567, "grad_norm": 0.3956664204597473, "learning_rate": 1.0316010758216665e-05, "loss": 0.515, "step": 31950 }, { "epoch": 0.9815070807606058, "grad_norm": 0.9270488023757935, "learning_rate": 1.0315527721878742e-05, "loss": 0.5547, "step": 31951 }, { "epoch": 0.9815377998955549, "grad_norm": 0.4250369369983673, "learning_rate": 1.0315044684803885e-05, "loss": 0.5423, "step": 31952 }, { "epoch": 0.9815685190305041, "grad_norm": 0.3757081627845764, "learning_rate": 1.0314561646993216e-05, "loss": 0.5271, "step": 31953 }, { "epoch": 0.9815992381654532, "grad_norm": 0.47296592593193054, "learning_rate": 1.0314078608447866e-05, "loss": 0.6027, "step": 31954 }, { "epoch": 0.9816299573004024, "grad_norm": 0.4045969843864441, "learning_rate": 1.0313595569168965e-05, "loss": 0.5139, "step": 31955 }, { "epoch": 0.9816606764353516, "grad_norm": 0.37614503502845764, "learning_rate": 1.031311252915764e-05, "loss": 0.5062, "step": 31956 }, { "epoch": 0.9816913955703007, "grad_norm": 0.35088101029396057, "learning_rate": 1.0312629488415016e-05, "loss": 0.5615, "step": 31957 }, { "epoch": 0.98172211470525, "grad_norm": 0.3802255392074585, "learning_rate": 1.0312146446942223e-05, "loss": 0.5742, "step": 31958 }, { "epoch": 0.9817528338401991, "grad_norm": 0.4125663936138153, "learning_rate": 1.0311663404740395e-05, "loss": 0.5945, "step": 31959 }, { "epoch": 0.9817835529751482, "grad_norm": 0.38442471623420715, "learning_rate": 1.0311180361810653e-05, "loss": 0.6155, "step": 31960 }, { "epoch": 0.9818142721100974, "grad_norm": 0.352629691362381, "learning_rate": 1.0310697318154128e-05, "loss": 0.5541, "step": 31961 }, { "epoch": 0.9818449912450465, "grad_norm": 0.343912273645401, "learning_rate": 1.0310214273771948e-05, "loss": 0.5977, "step": 31962 }, { "epoch": 0.9818757103799957, "grad_norm": 0.45226970314979553, "learning_rate": 1.030973122866524e-05, "loss": 0.5907, "step": 31963 }, { "epoch": 0.9819064295149449, "grad_norm": 0.5472174286842346, "learning_rate": 1.0309248182835134e-05, "loss": 0.5453, "step": 31964 }, { "epoch": 0.981937148649894, "grad_norm": 0.3651614487171173, "learning_rate": 1.0308765136282757e-05, "loss": 0.5668, "step": 31965 }, { "epoch": 0.9819678677848431, "grad_norm": 0.34154245257377625, "learning_rate": 1.0308282089009238e-05, "loss": 0.5468, "step": 31966 }, { "epoch": 0.9819985869197924, "grad_norm": 0.38121286034584045, "learning_rate": 1.0307799041015702e-05, "loss": 0.4131, "step": 31967 }, { "epoch": 0.9820293060547415, "grad_norm": 0.4149737060070038, "learning_rate": 1.0307315992303284e-05, "loss": 0.5296, "step": 31968 }, { "epoch": 0.9820600251896907, "grad_norm": 0.4033730924129486, "learning_rate": 1.0306832942873104e-05, "loss": 0.5404, "step": 31969 }, { "epoch": 0.9820907443246398, "grad_norm": 0.363191157579422, "learning_rate": 1.0306349892726298e-05, "loss": 0.548, "step": 31970 }, { "epoch": 0.982121463459589, "grad_norm": 0.3378194272518158, "learning_rate": 1.0305866841863987e-05, "loss": 0.5347, "step": 31971 }, { "epoch": 0.9821521825945382, "grad_norm": 0.3886933922767639, "learning_rate": 1.0305383790287305e-05, "loss": 0.5143, "step": 31972 }, { "epoch": 0.9821829017294873, "grad_norm": 0.4048117399215698, "learning_rate": 1.0304900737997376e-05, "loss": 0.5565, "step": 31973 }, { "epoch": 0.9822136208644364, "grad_norm": 0.4006229341030121, "learning_rate": 1.0304417684995332e-05, "loss": 0.5531, "step": 31974 }, { "epoch": 0.9822443399993857, "grad_norm": 0.34659543633461, "learning_rate": 1.0303934631282298e-05, "loss": 0.5303, "step": 31975 }, { "epoch": 0.9822750591343348, "grad_norm": 0.39900660514831543, "learning_rate": 1.0303451576859402e-05, "loss": 0.5675, "step": 31976 }, { "epoch": 0.9823057782692839, "grad_norm": 0.5357086658477783, "learning_rate": 1.0302968521727776e-05, "loss": 0.6058, "step": 31977 }, { "epoch": 0.9823364974042331, "grad_norm": 0.4201912581920624, "learning_rate": 1.0302485465888544e-05, "loss": 0.535, "step": 31978 }, { "epoch": 0.9823672165391822, "grad_norm": 0.354784220457077, "learning_rate": 1.0302002409342839e-05, "loss": 0.5712, "step": 31979 }, { "epoch": 0.9823979356741314, "grad_norm": 0.34726110100746155, "learning_rate": 1.0301519352091782e-05, "loss": 0.5362, "step": 31980 }, { "epoch": 0.9824286548090806, "grad_norm": 0.34460628032684326, "learning_rate": 1.0301036294136511e-05, "loss": 0.5846, "step": 31981 }, { "epoch": 0.9824593739440297, "grad_norm": 0.3608395755290985, "learning_rate": 1.0300553235478145e-05, "loss": 0.5144, "step": 31982 }, { "epoch": 0.9824900930789789, "grad_norm": 0.43645769357681274, "learning_rate": 1.0300070176117816e-05, "loss": 0.5517, "step": 31983 }, { "epoch": 0.9825208122139281, "grad_norm": 0.34970420598983765, "learning_rate": 1.0299587116056653e-05, "loss": 0.6426, "step": 31984 }, { "epoch": 0.9825515313488772, "grad_norm": 0.40104398131370544, "learning_rate": 1.0299104055295783e-05, "loss": 0.5173, "step": 31985 }, { "epoch": 0.9825822504838264, "grad_norm": 0.35905003547668457, "learning_rate": 1.0298620993836337e-05, "loss": 0.509, "step": 31986 }, { "epoch": 0.9826129696187755, "grad_norm": 0.3893132209777832, "learning_rate": 1.0298137931679437e-05, "loss": 0.4938, "step": 31987 }, { "epoch": 0.9826436887537247, "grad_norm": 0.36325135827064514, "learning_rate": 1.0297654868826216e-05, "loss": 0.6176, "step": 31988 }, { "epoch": 0.9826744078886739, "grad_norm": 0.46086740493774414, "learning_rate": 1.0297171805277802e-05, "loss": 0.5288, "step": 31989 }, { "epoch": 0.982705127023623, "grad_norm": 0.33921533823013306, "learning_rate": 1.0296688741035324e-05, "loss": 0.5041, "step": 31990 }, { "epoch": 0.9827358461585721, "grad_norm": 0.3616562783718109, "learning_rate": 1.0296205676099905e-05, "loss": 0.5495, "step": 31991 }, { "epoch": 0.9827665652935214, "grad_norm": 0.550632119178772, "learning_rate": 1.029572261047268e-05, "loss": 0.5749, "step": 31992 }, { "epoch": 0.9827972844284705, "grad_norm": 0.39157038927078247, "learning_rate": 1.0295239544154771e-05, "loss": 0.5247, "step": 31993 }, { "epoch": 0.9828280035634197, "grad_norm": 0.44686976075172424, "learning_rate": 1.0294756477147311e-05, "loss": 0.4907, "step": 31994 }, { "epoch": 0.9828587226983688, "grad_norm": 0.3693929612636566, "learning_rate": 1.0294273409451429e-05, "loss": 0.5835, "step": 31995 }, { "epoch": 0.982889441833318, "grad_norm": 0.43999773263931274, "learning_rate": 1.0293790341068249e-05, "loss": 0.5923, "step": 31996 }, { "epoch": 0.9829201609682672, "grad_norm": 0.3644382357597351, "learning_rate": 1.0293307271998903e-05, "loss": 0.5456, "step": 31997 }, { "epoch": 0.9829508801032163, "grad_norm": 0.40638068318367004, "learning_rate": 1.0292824202244515e-05, "loss": 0.5717, "step": 31998 }, { "epoch": 0.9829815992381654, "grad_norm": 0.38299068808555603, "learning_rate": 1.0292341131806216e-05, "loss": 0.4311, "step": 31999 }, { "epoch": 0.9830123183731146, "grad_norm": 0.38791799545288086, "learning_rate": 1.0291858060685136e-05, "loss": 0.5266, "step": 32000 }, { "epoch": 0.9830430375080638, "grad_norm": 0.3584929406642914, "learning_rate": 1.0291374988882397e-05, "loss": 0.5156, "step": 32001 }, { "epoch": 0.9830737566430129, "grad_norm": 0.45656511187553406, "learning_rate": 1.0290891916399135e-05, "loss": 0.5948, "step": 32002 }, { "epoch": 0.9831044757779621, "grad_norm": 0.42374640703201294, "learning_rate": 1.0290408843236472e-05, "loss": 0.5296, "step": 32003 }, { "epoch": 0.9831351949129112, "grad_norm": 0.38516440987586975, "learning_rate": 1.0289925769395542e-05, "loss": 0.5701, "step": 32004 }, { "epoch": 0.9831659140478605, "grad_norm": 0.3793541193008423, "learning_rate": 1.0289442694877468e-05, "loss": 0.5298, "step": 32005 }, { "epoch": 0.9831966331828096, "grad_norm": 0.3695896863937378, "learning_rate": 1.028895961968338e-05, "loss": 0.4745, "step": 32006 }, { "epoch": 0.9832273523177587, "grad_norm": 0.3785282075405121, "learning_rate": 1.0288476543814408e-05, "loss": 0.5769, "step": 32007 }, { "epoch": 0.9832580714527079, "grad_norm": 0.36559629440307617, "learning_rate": 1.0287993467271678e-05, "loss": 0.4692, "step": 32008 }, { "epoch": 0.9832887905876571, "grad_norm": 0.3574185073375702, "learning_rate": 1.0287510390056321e-05, "loss": 0.5358, "step": 32009 }, { "epoch": 0.9833195097226062, "grad_norm": 0.4629267454147339, "learning_rate": 1.028702731216946e-05, "loss": 0.5713, "step": 32010 }, { "epoch": 0.9833502288575554, "grad_norm": 0.3944993019104004, "learning_rate": 1.0286544233612228e-05, "loss": 0.5693, "step": 32011 }, { "epoch": 0.9833809479925045, "grad_norm": 0.36778542399406433, "learning_rate": 1.028606115438575e-05, "loss": 0.5552, "step": 32012 }, { "epoch": 0.9834116671274536, "grad_norm": 0.47068390250205994, "learning_rate": 1.028557807449116e-05, "loss": 0.5262, "step": 32013 }, { "epoch": 0.9834423862624029, "grad_norm": 0.33381718397140503, "learning_rate": 1.0285094993929581e-05, "loss": 0.5308, "step": 32014 }, { "epoch": 0.983473105397352, "grad_norm": 0.6523380279541016, "learning_rate": 1.0284611912702143e-05, "loss": 0.4679, "step": 32015 }, { "epoch": 0.9835038245323011, "grad_norm": 0.37125617265701294, "learning_rate": 1.0284128830809975e-05, "loss": 0.586, "step": 32016 }, { "epoch": 0.9835345436672503, "grad_norm": 0.36677655577659607, "learning_rate": 1.02836457482542e-05, "loss": 0.5381, "step": 32017 }, { "epoch": 0.9835652628021995, "grad_norm": 0.3591811954975128, "learning_rate": 1.0283162665035954e-05, "loss": 0.467, "step": 32018 }, { "epoch": 0.9835959819371487, "grad_norm": 0.36789196729660034, "learning_rate": 1.0282679581156362e-05, "loss": 0.5358, "step": 32019 }, { "epoch": 0.9836267010720978, "grad_norm": 0.38514620065689087, "learning_rate": 1.0282196496616552e-05, "loss": 0.5976, "step": 32020 }, { "epoch": 0.9836574202070469, "grad_norm": 0.47825419902801514, "learning_rate": 1.028171341141765e-05, "loss": 0.5488, "step": 32021 }, { "epoch": 0.9836881393419962, "grad_norm": 0.3956505060195923, "learning_rate": 1.0281230325560792e-05, "loss": 0.5719, "step": 32022 }, { "epoch": 0.9837188584769453, "grad_norm": 0.41109201312065125, "learning_rate": 1.0280747239047095e-05, "loss": 0.5332, "step": 32023 }, { "epoch": 0.9837495776118944, "grad_norm": 0.40462726354599, "learning_rate": 1.0280264151877697e-05, "loss": 0.6142, "step": 32024 }, { "epoch": 0.9837802967468436, "grad_norm": 0.3465260863304138, "learning_rate": 1.027978106405372e-05, "loss": 0.5713, "step": 32025 }, { "epoch": 0.9838110158817928, "grad_norm": 0.3638586699962616, "learning_rate": 1.0279297975576297e-05, "loss": 0.5734, "step": 32026 }, { "epoch": 0.9838417350167419, "grad_norm": 0.7211752533912659, "learning_rate": 1.0278814886446553e-05, "loss": 0.5301, "step": 32027 }, { "epoch": 0.9838724541516911, "grad_norm": 0.3732266128063202, "learning_rate": 1.0278331796665616e-05, "loss": 0.48, "step": 32028 }, { "epoch": 0.9839031732866402, "grad_norm": 0.38954854011535645, "learning_rate": 1.027784870623462e-05, "loss": 0.5304, "step": 32029 }, { "epoch": 0.9839338924215895, "grad_norm": 0.48381757736206055, "learning_rate": 1.0277365615154681e-05, "loss": 0.5213, "step": 32030 }, { "epoch": 0.9839646115565386, "grad_norm": 2.149381399154663, "learning_rate": 1.0276882523426943e-05, "loss": 0.5974, "step": 32031 }, { "epoch": 0.9839953306914877, "grad_norm": 0.3987710475921631, "learning_rate": 1.0276399431052522e-05, "loss": 0.5119, "step": 32032 }, { "epoch": 0.9840260498264369, "grad_norm": 0.3687173128128052, "learning_rate": 1.0275916338032551e-05, "loss": 0.5226, "step": 32033 }, { "epoch": 0.984056768961386, "grad_norm": 0.4041935205459595, "learning_rate": 1.0275433244368162e-05, "loss": 0.4392, "step": 32034 }, { "epoch": 0.9840874880963352, "grad_norm": 0.40408897399902344, "learning_rate": 1.0274950150060476e-05, "loss": 0.5134, "step": 32035 }, { "epoch": 0.9841182072312844, "grad_norm": 0.3398621380329132, "learning_rate": 1.0274467055110628e-05, "loss": 0.5163, "step": 32036 }, { "epoch": 0.9841489263662335, "grad_norm": 0.3419729471206665, "learning_rate": 1.027398395951974e-05, "loss": 0.5034, "step": 32037 }, { "epoch": 0.9841796455011826, "grad_norm": 0.38036489486694336, "learning_rate": 1.0273500863288945e-05, "loss": 0.508, "step": 32038 }, { "epoch": 0.9842103646361319, "grad_norm": 0.41096487641334534, "learning_rate": 1.0273017766419366e-05, "loss": 0.6252, "step": 32039 }, { "epoch": 0.984241083771081, "grad_norm": 0.4117507040500641, "learning_rate": 1.027253466891214e-05, "loss": 0.5264, "step": 32040 }, { "epoch": 0.9842718029060301, "grad_norm": 0.34086135029792786, "learning_rate": 1.0272051570768387e-05, "loss": 0.4791, "step": 32041 }, { "epoch": 0.9843025220409793, "grad_norm": 0.37898099422454834, "learning_rate": 1.0271568471989241e-05, "loss": 0.5345, "step": 32042 }, { "epoch": 0.9843332411759285, "grad_norm": 0.3344566524028778, "learning_rate": 1.0271085372575824e-05, "loss": 0.505, "step": 32043 }, { "epoch": 0.9843639603108777, "grad_norm": 0.40458762645721436, "learning_rate": 1.0270602272529273e-05, "loss": 0.5428, "step": 32044 }, { "epoch": 0.9843946794458268, "grad_norm": 0.3471236526966095, "learning_rate": 1.027011917185071e-05, "loss": 0.5576, "step": 32045 }, { "epoch": 0.9844253985807759, "grad_norm": 0.4376184046268463, "learning_rate": 1.0269636070541262e-05, "loss": 0.5374, "step": 32046 }, { "epoch": 0.9844561177157252, "grad_norm": 0.36838850378990173, "learning_rate": 1.0269152968602064e-05, "loss": 0.5153, "step": 32047 }, { "epoch": 0.9844868368506743, "grad_norm": 0.42821189761161804, "learning_rate": 1.0268669866034235e-05, "loss": 0.5166, "step": 32048 }, { "epoch": 0.9845175559856234, "grad_norm": 0.3771461546421051, "learning_rate": 1.0268186762838912e-05, "loss": 0.5256, "step": 32049 }, { "epoch": 0.9845482751205726, "grad_norm": 0.3456963002681732, "learning_rate": 1.0267703659017223e-05, "loss": 0.6023, "step": 32050 }, { "epoch": 0.9845789942555218, "grad_norm": 0.35603198409080505, "learning_rate": 1.0267220554570289e-05, "loss": 0.5789, "step": 32051 }, { "epoch": 0.9846097133904709, "grad_norm": 0.36765363812446594, "learning_rate": 1.0266737449499244e-05, "loss": 0.4934, "step": 32052 }, { "epoch": 0.9846404325254201, "grad_norm": 0.402659147977829, "learning_rate": 1.0266254343805216e-05, "loss": 0.554, "step": 32053 }, { "epoch": 0.9846711516603692, "grad_norm": 0.3323395252227783, "learning_rate": 1.0265771237489332e-05, "loss": 0.4955, "step": 32054 }, { "epoch": 0.9847018707953185, "grad_norm": 0.37765857577323914, "learning_rate": 1.026528813055272e-05, "loss": 0.5098, "step": 32055 }, { "epoch": 0.9847325899302676, "grad_norm": 0.3569028377532959, "learning_rate": 1.0264805022996512e-05, "loss": 0.5778, "step": 32056 }, { "epoch": 0.9847633090652167, "grad_norm": 0.3930826783180237, "learning_rate": 1.026432191482183e-05, "loss": 0.5697, "step": 32057 }, { "epoch": 0.9847940282001659, "grad_norm": 0.3491280972957611, "learning_rate": 1.0263838806029807e-05, "loss": 0.5269, "step": 32058 }, { "epoch": 0.984824747335115, "grad_norm": 0.3929907977581024, "learning_rate": 1.0263355696621569e-05, "loss": 0.5257, "step": 32059 }, { "epoch": 0.9848554664700642, "grad_norm": 0.3435913920402527, "learning_rate": 1.0262872586598247e-05, "loss": 0.5945, "step": 32060 }, { "epoch": 0.9848861856050134, "grad_norm": 0.3846510052680969, "learning_rate": 1.0262389475960968e-05, "loss": 0.5349, "step": 32061 }, { "epoch": 0.9849169047399625, "grad_norm": 0.3685392439365387, "learning_rate": 1.0261906364710856e-05, "loss": 0.5729, "step": 32062 }, { "epoch": 0.9849476238749116, "grad_norm": 0.4071357250213623, "learning_rate": 1.0261423252849048e-05, "loss": 0.4965, "step": 32063 }, { "epoch": 0.9849783430098609, "grad_norm": 0.354800820350647, "learning_rate": 1.0260940140376664e-05, "loss": 0.534, "step": 32064 }, { "epoch": 0.98500906214481, "grad_norm": 0.42088061571121216, "learning_rate": 1.026045702729484e-05, "loss": 0.5887, "step": 32065 }, { "epoch": 0.9850397812797591, "grad_norm": 0.445123553276062, "learning_rate": 1.0259973913604695e-05, "loss": 0.5011, "step": 32066 }, { "epoch": 0.9850705004147083, "grad_norm": 0.3781382143497467, "learning_rate": 1.0259490799307367e-05, "loss": 0.5315, "step": 32067 }, { "epoch": 0.9851012195496575, "grad_norm": 0.3964947760105133, "learning_rate": 1.025900768440398e-05, "loss": 0.5299, "step": 32068 }, { "epoch": 0.9851319386846067, "grad_norm": 0.4811297655105591, "learning_rate": 1.0258524568895663e-05, "loss": 0.4333, "step": 32069 }, { "epoch": 0.9851626578195558, "grad_norm": 0.356442928314209, "learning_rate": 1.0258041452783542e-05, "loss": 0.4687, "step": 32070 }, { "epoch": 0.9851933769545049, "grad_norm": 0.3659574091434479, "learning_rate": 1.0257558336068746e-05, "loss": 0.5268, "step": 32071 }, { "epoch": 0.9852240960894542, "grad_norm": 0.38766664266586304, "learning_rate": 1.025707521875241e-05, "loss": 0.5855, "step": 32072 }, { "epoch": 0.9852548152244033, "grad_norm": 0.4032134711742401, "learning_rate": 1.0256592100835651e-05, "loss": 0.61, "step": 32073 }, { "epoch": 0.9852855343593524, "grad_norm": 0.3879472017288208, "learning_rate": 1.0256108982319608e-05, "loss": 0.5349, "step": 32074 }, { "epoch": 0.9853162534943016, "grad_norm": 0.3500577211380005, "learning_rate": 1.02556258632054e-05, "loss": 0.5013, "step": 32075 }, { "epoch": 0.9853469726292508, "grad_norm": 0.38323652744293213, "learning_rate": 1.0255142743494165e-05, "loss": 0.5072, "step": 32076 }, { "epoch": 0.9853776917641999, "grad_norm": 0.5666323900222778, "learning_rate": 1.025465962318702e-05, "loss": 0.6288, "step": 32077 }, { "epoch": 0.9854084108991491, "grad_norm": 0.36634477972984314, "learning_rate": 1.0254176502285105e-05, "loss": 0.5124, "step": 32078 }, { "epoch": 0.9854391300340982, "grad_norm": 0.4030751883983612, "learning_rate": 1.0253693380789542e-05, "loss": 0.553, "step": 32079 }, { "epoch": 0.9854698491690475, "grad_norm": 0.3452749252319336, "learning_rate": 1.0253210258701458e-05, "loss": 0.4975, "step": 32080 }, { "epoch": 0.9855005683039966, "grad_norm": 0.3653426468372345, "learning_rate": 1.0252727136021987e-05, "loss": 0.5382, "step": 32081 }, { "epoch": 0.9855312874389457, "grad_norm": 0.39312317967414856, "learning_rate": 1.025224401275225e-05, "loss": 0.5267, "step": 32082 }, { "epoch": 0.9855620065738949, "grad_norm": 0.36229953169822693, "learning_rate": 1.0251760888893383e-05, "loss": 0.5266, "step": 32083 }, { "epoch": 0.985592725708844, "grad_norm": 0.3720245361328125, "learning_rate": 1.0251277764446505e-05, "loss": 0.4537, "step": 32084 }, { "epoch": 0.9856234448437932, "grad_norm": 0.3742936849594116, "learning_rate": 1.0250794639412758e-05, "loss": 0.6031, "step": 32085 }, { "epoch": 0.9856541639787424, "grad_norm": 0.38030070066452026, "learning_rate": 1.0250311513793261e-05, "loss": 0.5803, "step": 32086 }, { "epoch": 0.9856848831136915, "grad_norm": 0.3794907331466675, "learning_rate": 1.024982838758914e-05, "loss": 0.521, "step": 32087 }, { "epoch": 0.9857156022486406, "grad_norm": 0.3630807101726532, "learning_rate": 1.0249345260801531e-05, "loss": 0.5828, "step": 32088 }, { "epoch": 0.9857463213835899, "grad_norm": 0.3708418011665344, "learning_rate": 1.0248862133431556e-05, "loss": 0.5222, "step": 32089 }, { "epoch": 0.985777040518539, "grad_norm": 0.38749948143959045, "learning_rate": 1.0248379005480348e-05, "loss": 0.4762, "step": 32090 }, { "epoch": 0.9858077596534881, "grad_norm": 0.437784880399704, "learning_rate": 1.0247895876949031e-05, "loss": 0.5476, "step": 32091 }, { "epoch": 0.9858384787884373, "grad_norm": 0.38579294085502625, "learning_rate": 1.024741274783874e-05, "loss": 0.5571, "step": 32092 }, { "epoch": 0.9858691979233865, "grad_norm": 0.3489190936088562, "learning_rate": 1.0246929618150599e-05, "loss": 0.4966, "step": 32093 }, { "epoch": 0.9858999170583357, "grad_norm": 0.3747866749763489, "learning_rate": 1.0246446487885734e-05, "loss": 0.5451, "step": 32094 }, { "epoch": 0.9859306361932848, "grad_norm": 0.4080027639865875, "learning_rate": 1.0245963357045277e-05, "loss": 0.5191, "step": 32095 }, { "epoch": 0.9859613553282339, "grad_norm": 0.39926716685295105, "learning_rate": 1.0245480225630355e-05, "loss": 0.5506, "step": 32096 }, { "epoch": 0.9859920744631832, "grad_norm": 0.3828631043434143, "learning_rate": 1.0244997093642099e-05, "loss": 0.4851, "step": 32097 }, { "epoch": 0.9860227935981323, "grad_norm": 0.38472846150398254, "learning_rate": 1.0244513961081633e-05, "loss": 0.5152, "step": 32098 }, { "epoch": 0.9860535127330814, "grad_norm": 0.364616334438324, "learning_rate": 1.0244030827950087e-05, "loss": 0.5801, "step": 32099 }, { "epoch": 0.9860842318680306, "grad_norm": 0.3428393006324768, "learning_rate": 1.024354769424859e-05, "loss": 0.4763, "step": 32100 }, { "epoch": 0.9861149510029797, "grad_norm": 0.44365885853767395, "learning_rate": 1.0243064559978272e-05, "loss": 0.5152, "step": 32101 }, { "epoch": 0.9861456701379289, "grad_norm": 0.38405945897102356, "learning_rate": 1.0242581425140258e-05, "loss": 0.5582, "step": 32102 }, { "epoch": 0.9861763892728781, "grad_norm": 0.3438846468925476, "learning_rate": 1.0242098289735678e-05, "loss": 0.4995, "step": 32103 }, { "epoch": 0.9862071084078272, "grad_norm": 0.3666609525680542, "learning_rate": 1.0241615153765663e-05, "loss": 0.5315, "step": 32104 }, { "epoch": 0.9862378275427764, "grad_norm": 0.4006696939468384, "learning_rate": 1.0241132017231337e-05, "loss": 0.608, "step": 32105 }, { "epoch": 0.9862685466777256, "grad_norm": 0.3731255829334259, "learning_rate": 1.0240648880133832e-05, "loss": 0.5064, "step": 32106 }, { "epoch": 0.9862992658126747, "grad_norm": 0.35332319140434265, "learning_rate": 1.024016574247427e-05, "loss": 0.5341, "step": 32107 }, { "epoch": 0.9863299849476239, "grad_norm": 0.3457493185997009, "learning_rate": 1.0239682604253791e-05, "loss": 0.4618, "step": 32108 }, { "epoch": 0.986360704082573, "grad_norm": 0.38677269220352173, "learning_rate": 1.0239199465473511e-05, "loss": 0.5465, "step": 32109 }, { "epoch": 0.9863914232175222, "grad_norm": 0.3368350863456726, "learning_rate": 1.0238716326134567e-05, "loss": 0.5525, "step": 32110 }, { "epoch": 0.9864221423524714, "grad_norm": 0.38273996114730835, "learning_rate": 1.0238233186238083e-05, "loss": 0.6568, "step": 32111 }, { "epoch": 0.9864528614874205, "grad_norm": 0.3683892786502838, "learning_rate": 1.023775004578519e-05, "loss": 0.5958, "step": 32112 }, { "epoch": 0.9864835806223696, "grad_norm": 0.33915042877197266, "learning_rate": 1.0237266904777016e-05, "loss": 0.5079, "step": 32113 }, { "epoch": 0.9865142997573189, "grad_norm": 0.3398439884185791, "learning_rate": 1.0236783763214685e-05, "loss": 0.4602, "step": 32114 }, { "epoch": 0.986545018892268, "grad_norm": 0.3854321837425232, "learning_rate": 1.0236300621099333e-05, "loss": 0.4961, "step": 32115 }, { "epoch": 0.9865757380272172, "grad_norm": 0.38647252321243286, "learning_rate": 1.0235817478432082e-05, "loss": 0.5158, "step": 32116 }, { "epoch": 0.9866064571621663, "grad_norm": 0.38820168375968933, "learning_rate": 1.0235334335214064e-05, "loss": 0.5551, "step": 32117 }, { "epoch": 0.9866371762971154, "grad_norm": 0.37868139147758484, "learning_rate": 1.0234851191446404e-05, "loss": 0.5112, "step": 32118 }, { "epoch": 0.9866678954320647, "grad_norm": 0.3591846823692322, "learning_rate": 1.0234368047130233e-05, "loss": 0.5837, "step": 32119 }, { "epoch": 0.9866986145670138, "grad_norm": 0.34265029430389404, "learning_rate": 1.0233884902266682e-05, "loss": 0.4133, "step": 32120 }, { "epoch": 0.9867293337019629, "grad_norm": 0.3706163167953491, "learning_rate": 1.0233401756856873e-05, "loss": 0.5111, "step": 32121 }, { "epoch": 0.9867600528369121, "grad_norm": 0.39550280570983887, "learning_rate": 1.0232918610901941e-05, "loss": 0.5908, "step": 32122 }, { "epoch": 0.9867907719718613, "grad_norm": 0.3786122798919678, "learning_rate": 1.0232435464403008e-05, "loss": 0.565, "step": 32123 }, { "epoch": 0.9868214911068104, "grad_norm": 0.3481711447238922, "learning_rate": 1.0231952317361209e-05, "loss": 0.5473, "step": 32124 }, { "epoch": 0.9868522102417596, "grad_norm": 0.3632776141166687, "learning_rate": 1.0231469169777667e-05, "loss": 0.4319, "step": 32125 }, { "epoch": 0.9868829293767087, "grad_norm": 0.4220213294029236, "learning_rate": 1.0230986021653512e-05, "loss": 0.542, "step": 32126 }, { "epoch": 0.9869136485116579, "grad_norm": 0.3723360002040863, "learning_rate": 1.0230502872989874e-05, "loss": 0.618, "step": 32127 }, { "epoch": 0.9869443676466071, "grad_norm": 0.3628038465976715, "learning_rate": 1.0230019723787882e-05, "loss": 0.5021, "step": 32128 }, { "epoch": 0.9869750867815562, "grad_norm": 0.41431644558906555, "learning_rate": 1.0229536574048661e-05, "loss": 0.5136, "step": 32129 }, { "epoch": 0.9870058059165054, "grad_norm": 0.35162433981895447, "learning_rate": 1.0229053423773343e-05, "loss": 0.5911, "step": 32130 }, { "epoch": 0.9870365250514546, "grad_norm": 0.38678860664367676, "learning_rate": 1.0228570272963054e-05, "loss": 0.5687, "step": 32131 }, { "epoch": 0.9870672441864037, "grad_norm": 0.35793063044548035, "learning_rate": 1.022808712161892e-05, "loss": 0.4981, "step": 32132 }, { "epoch": 0.9870979633213529, "grad_norm": 0.40275445580482483, "learning_rate": 1.022760396974208e-05, "loss": 0.5517, "step": 32133 }, { "epoch": 0.987128682456302, "grad_norm": 0.3625172972679138, "learning_rate": 1.0227120817333648e-05, "loss": 0.4709, "step": 32134 }, { "epoch": 0.9871594015912512, "grad_norm": 0.3763721287250519, "learning_rate": 1.0226637664394762e-05, "loss": 0.5407, "step": 32135 }, { "epoch": 0.9871901207262004, "grad_norm": 0.38016101717948914, "learning_rate": 1.0226154510926546e-05, "loss": 0.5463, "step": 32136 }, { "epoch": 0.9872208398611495, "grad_norm": 0.38186562061309814, "learning_rate": 1.0225671356930136e-05, "loss": 0.5242, "step": 32137 }, { "epoch": 0.9872515589960986, "grad_norm": 0.3767383396625519, "learning_rate": 1.0225188202406648e-05, "loss": 0.5733, "step": 32138 }, { "epoch": 0.9872822781310479, "grad_norm": 0.3732231557369232, "learning_rate": 1.022470504735722e-05, "loss": 0.5574, "step": 32139 }, { "epoch": 0.987312997265997, "grad_norm": 0.3482104539871216, "learning_rate": 1.0224221891782979e-05, "loss": 0.5832, "step": 32140 }, { "epoch": 0.9873437164009462, "grad_norm": 0.4270542562007904, "learning_rate": 1.022373873568505e-05, "loss": 0.5141, "step": 32141 }, { "epoch": 0.9873744355358953, "grad_norm": 0.40638014674186707, "learning_rate": 1.0223255579064567e-05, "loss": 0.5385, "step": 32142 }, { "epoch": 0.9874051546708444, "grad_norm": 0.36546358466148376, "learning_rate": 1.0222772421922649e-05, "loss": 0.5154, "step": 32143 }, { "epoch": 0.9874358738057937, "grad_norm": 0.34225887060165405, "learning_rate": 1.0222289264260436e-05, "loss": 0.4871, "step": 32144 }, { "epoch": 0.9874665929407428, "grad_norm": 0.3784179389476776, "learning_rate": 1.022180610607905e-05, "loss": 0.4584, "step": 32145 }, { "epoch": 0.9874973120756919, "grad_norm": 0.3725365698337555, "learning_rate": 1.0221322947379618e-05, "loss": 0.5486, "step": 32146 }, { "epoch": 0.9875280312106411, "grad_norm": 0.32223111391067505, "learning_rate": 1.022083978816327e-05, "loss": 0.5042, "step": 32147 }, { "epoch": 0.9875587503455903, "grad_norm": 0.35299181938171387, "learning_rate": 1.0220356628431139e-05, "loss": 0.5189, "step": 32148 }, { "epoch": 0.9875894694805394, "grad_norm": 0.4264563322067261, "learning_rate": 1.021987346818435e-05, "loss": 0.5719, "step": 32149 }, { "epoch": 0.9876201886154886, "grad_norm": 0.41617056727409363, "learning_rate": 1.0219390307424027e-05, "loss": 0.5824, "step": 32150 }, { "epoch": 0.9876509077504377, "grad_norm": 0.37520840764045715, "learning_rate": 1.0218907146151307e-05, "loss": 0.5427, "step": 32151 }, { "epoch": 0.9876816268853869, "grad_norm": 0.35431089997291565, "learning_rate": 1.021842398436731e-05, "loss": 0.549, "step": 32152 }, { "epoch": 0.9877123460203361, "grad_norm": 0.48377498984336853, "learning_rate": 1.0217940822073172e-05, "loss": 0.5604, "step": 32153 }, { "epoch": 0.9877430651552852, "grad_norm": 0.3397085666656494, "learning_rate": 1.0217457659270014e-05, "loss": 0.4898, "step": 32154 }, { "epoch": 0.9877737842902344, "grad_norm": 0.3797539174556732, "learning_rate": 1.0216974495958973e-05, "loss": 0.5547, "step": 32155 }, { "epoch": 0.9878045034251836, "grad_norm": 0.38696128129959106, "learning_rate": 1.021649133214117e-05, "loss": 0.5423, "step": 32156 }, { "epoch": 0.9878352225601327, "grad_norm": 0.5899645090103149, "learning_rate": 1.0216008167817736e-05, "loss": 0.5291, "step": 32157 }, { "epoch": 0.9878659416950819, "grad_norm": 0.35277801752090454, "learning_rate": 1.0215525002989802e-05, "loss": 0.5592, "step": 32158 }, { "epoch": 0.987896660830031, "grad_norm": 0.37696176767349243, "learning_rate": 1.021504183765849e-05, "loss": 0.592, "step": 32159 }, { "epoch": 0.9879273799649801, "grad_norm": 0.35649850964546204, "learning_rate": 1.0214558671824938e-05, "loss": 0.5608, "step": 32160 }, { "epoch": 0.9879580990999294, "grad_norm": 0.40814509987831116, "learning_rate": 1.0214075505490267e-05, "loss": 0.463, "step": 32161 }, { "epoch": 0.9879888182348785, "grad_norm": 0.3736914098262787, "learning_rate": 1.0213592338655608e-05, "loss": 0.4952, "step": 32162 }, { "epoch": 0.9880195373698276, "grad_norm": 0.4202638268470764, "learning_rate": 1.0213109171322089e-05, "loss": 0.5297, "step": 32163 }, { "epoch": 0.9880502565047768, "grad_norm": 0.47559407353401184, "learning_rate": 1.021262600349084e-05, "loss": 0.5069, "step": 32164 }, { "epoch": 0.988080975639726, "grad_norm": 0.3845932185649872, "learning_rate": 1.0212142835162988e-05, "loss": 0.5424, "step": 32165 }, { "epoch": 0.9881116947746752, "grad_norm": 0.36255648732185364, "learning_rate": 1.021165966633966e-05, "loss": 0.5026, "step": 32166 }, { "epoch": 0.9881424139096243, "grad_norm": 0.33209875226020813, "learning_rate": 1.0211176497021987e-05, "loss": 0.5528, "step": 32167 }, { "epoch": 0.9881731330445734, "grad_norm": 0.35274022817611694, "learning_rate": 1.0210693327211094e-05, "loss": 0.5373, "step": 32168 }, { "epoch": 0.9882038521795227, "grad_norm": 0.41809704899787903, "learning_rate": 1.0210210156908115e-05, "loss": 0.5559, "step": 32169 }, { "epoch": 0.9882345713144718, "grad_norm": 0.363588809967041, "learning_rate": 1.0209726986114174e-05, "loss": 0.4873, "step": 32170 }, { "epoch": 0.9882652904494209, "grad_norm": 0.40022894740104675, "learning_rate": 1.0209243814830403e-05, "loss": 0.5703, "step": 32171 }, { "epoch": 0.9882960095843701, "grad_norm": 0.42741236090660095, "learning_rate": 1.0208760643057925e-05, "loss": 0.5815, "step": 32172 }, { "epoch": 0.9883267287193193, "grad_norm": 0.3297968804836273, "learning_rate": 1.0208277470797875e-05, "loss": 0.4382, "step": 32173 }, { "epoch": 0.9883574478542684, "grad_norm": 0.3636147677898407, "learning_rate": 1.0207794298051376e-05, "loss": 0.5098, "step": 32174 }, { "epoch": 0.9883881669892176, "grad_norm": 0.3582250773906708, "learning_rate": 1.0207311124819561e-05, "loss": 0.5082, "step": 32175 }, { "epoch": 0.9884188861241667, "grad_norm": 0.44586923718452454, "learning_rate": 1.0206827951103555e-05, "loss": 0.4821, "step": 32176 }, { "epoch": 0.9884496052591158, "grad_norm": 0.505623996257782, "learning_rate": 1.0206344776904489e-05, "loss": 0.5454, "step": 32177 }, { "epoch": 0.9884803243940651, "grad_norm": 0.37903133034706116, "learning_rate": 1.020586160222349e-05, "loss": 0.5132, "step": 32178 }, { "epoch": 0.9885110435290142, "grad_norm": 0.355342835187912, "learning_rate": 1.0205378427061685e-05, "loss": 0.5319, "step": 32179 }, { "epoch": 0.9885417626639634, "grad_norm": 0.38622143864631653, "learning_rate": 1.0204895251420208e-05, "loss": 0.5703, "step": 32180 }, { "epoch": 0.9885724817989126, "grad_norm": 0.3711675703525543, "learning_rate": 1.020441207530018e-05, "loss": 0.5019, "step": 32181 }, { "epoch": 0.9886032009338617, "grad_norm": 0.3763747215270996, "learning_rate": 1.0203928898702737e-05, "loss": 0.5945, "step": 32182 }, { "epoch": 0.9886339200688109, "grad_norm": 0.38206368684768677, "learning_rate": 1.0203445721629004e-05, "loss": 0.5102, "step": 32183 }, { "epoch": 0.98866463920376, "grad_norm": 0.3954969346523285, "learning_rate": 1.0202962544080106e-05, "loss": 0.5277, "step": 32184 }, { "epoch": 0.9886953583387091, "grad_norm": 0.3902527987957001, "learning_rate": 1.0202479366057176e-05, "loss": 0.543, "step": 32185 }, { "epoch": 0.9887260774736584, "grad_norm": 0.35028916597366333, "learning_rate": 1.020199618756134e-05, "loss": 0.5554, "step": 32186 }, { "epoch": 0.9887567966086075, "grad_norm": 0.37626126408576965, "learning_rate": 1.0201513008593731e-05, "loss": 0.6022, "step": 32187 }, { "epoch": 0.9887875157435566, "grad_norm": 0.3694612383842468, "learning_rate": 1.020102982915547e-05, "loss": 0.567, "step": 32188 }, { "epoch": 0.9888182348785058, "grad_norm": 0.35958945751190186, "learning_rate": 1.0200546649247695e-05, "loss": 0.5304, "step": 32189 }, { "epoch": 0.988848954013455, "grad_norm": 0.3824270963668823, "learning_rate": 1.0200063468871527e-05, "loss": 0.5463, "step": 32190 }, { "epoch": 0.9888796731484042, "grad_norm": 0.3467811048030853, "learning_rate": 1.0199580288028097e-05, "loss": 0.5632, "step": 32191 }, { "epoch": 0.9889103922833533, "grad_norm": 0.40626856684684753, "learning_rate": 1.0199097106718532e-05, "loss": 0.5807, "step": 32192 }, { "epoch": 0.9889411114183024, "grad_norm": 0.3858722746372223, "learning_rate": 1.0198613924943962e-05, "loss": 0.5767, "step": 32193 }, { "epoch": 0.9889718305532517, "grad_norm": 0.4119257628917694, "learning_rate": 1.0198130742705517e-05, "loss": 0.6101, "step": 32194 }, { "epoch": 0.9890025496882008, "grad_norm": 0.3952065110206604, "learning_rate": 1.0197647560004321e-05, "loss": 0.519, "step": 32195 }, { "epoch": 0.9890332688231499, "grad_norm": 0.3862069249153137, "learning_rate": 1.0197164376841507e-05, "loss": 0.4968, "step": 32196 }, { "epoch": 0.9890639879580991, "grad_norm": 0.41822198033332825, "learning_rate": 1.0196681193218202e-05, "loss": 0.5054, "step": 32197 }, { "epoch": 0.9890947070930483, "grad_norm": 0.3915591239929199, "learning_rate": 1.0196198009135534e-05, "loss": 0.5174, "step": 32198 }, { "epoch": 0.9891254262279974, "grad_norm": 0.5144933462142944, "learning_rate": 1.019571482459463e-05, "loss": 0.6168, "step": 32199 }, { "epoch": 0.9891561453629466, "grad_norm": 0.3800897002220154, "learning_rate": 1.0195231639596625e-05, "loss": 0.531, "step": 32200 }, { "epoch": 0.9891868644978957, "grad_norm": 0.3817237615585327, "learning_rate": 1.019474845414264e-05, "loss": 0.5422, "step": 32201 }, { "epoch": 0.989217583632845, "grad_norm": 0.41069623827934265, "learning_rate": 1.0194265268233807e-05, "loss": 0.5677, "step": 32202 }, { "epoch": 0.9892483027677941, "grad_norm": 0.4282089173793793, "learning_rate": 1.0193782081871255e-05, "loss": 0.5582, "step": 32203 }, { "epoch": 0.9892790219027432, "grad_norm": 0.3866927921772003, "learning_rate": 1.0193298895056108e-05, "loss": 0.5707, "step": 32204 }, { "epoch": 0.9893097410376924, "grad_norm": 0.40817177295684814, "learning_rate": 1.01928157077895e-05, "loss": 0.5744, "step": 32205 }, { "epoch": 0.9893404601726415, "grad_norm": 0.4030611217021942, "learning_rate": 1.0192332520072556e-05, "loss": 0.5965, "step": 32206 }, { "epoch": 0.9893711793075907, "grad_norm": 0.41297513246536255, "learning_rate": 1.019184933190641e-05, "loss": 0.4977, "step": 32207 }, { "epoch": 0.9894018984425399, "grad_norm": 0.39029833674430847, "learning_rate": 1.0191366143292183e-05, "loss": 0.5556, "step": 32208 }, { "epoch": 0.989432617577489, "grad_norm": 0.3773021101951599, "learning_rate": 1.0190882954231008e-05, "loss": 0.5428, "step": 32209 }, { "epoch": 0.9894633367124381, "grad_norm": 0.39592665433883667, "learning_rate": 1.019039976472401e-05, "loss": 0.5394, "step": 32210 }, { "epoch": 0.9894940558473874, "grad_norm": 0.3763681650161743, "learning_rate": 1.0189916574772325e-05, "loss": 0.583, "step": 32211 }, { "epoch": 0.9895247749823365, "grad_norm": 0.4100067615509033, "learning_rate": 1.0189433384377071e-05, "loss": 0.5448, "step": 32212 }, { "epoch": 0.9895554941172856, "grad_norm": 0.355919748544693, "learning_rate": 1.0188950193539387e-05, "loss": 0.557, "step": 32213 }, { "epoch": 0.9895862132522348, "grad_norm": 0.37298139929771423, "learning_rate": 1.0188467002260394e-05, "loss": 0.5742, "step": 32214 }, { "epoch": 0.989616932387184, "grad_norm": 0.3398204445838928, "learning_rate": 1.0187983810541223e-05, "loss": 0.5347, "step": 32215 }, { "epoch": 0.9896476515221332, "grad_norm": 0.355074018239975, "learning_rate": 1.0187500618383004e-05, "loss": 0.511, "step": 32216 }, { "epoch": 0.9896783706570823, "grad_norm": 0.4251198172569275, "learning_rate": 1.0187017425786863e-05, "loss": 0.6844, "step": 32217 }, { "epoch": 0.9897090897920314, "grad_norm": 0.5558260679244995, "learning_rate": 1.0186534232753933e-05, "loss": 0.5837, "step": 32218 }, { "epoch": 0.9897398089269807, "grad_norm": 0.39636632800102234, "learning_rate": 1.0186051039285335e-05, "loss": 0.4636, "step": 32219 }, { "epoch": 0.9897705280619298, "grad_norm": 0.4399622678756714, "learning_rate": 1.0185567845382205e-05, "loss": 0.499, "step": 32220 }, { "epoch": 0.9898012471968789, "grad_norm": 0.45344293117523193, "learning_rate": 1.0185084651045667e-05, "loss": 0.6005, "step": 32221 }, { "epoch": 0.9898319663318281, "grad_norm": 0.36356452107429504, "learning_rate": 1.018460145627685e-05, "loss": 0.4415, "step": 32222 }, { "epoch": 0.9898626854667772, "grad_norm": 0.6256075501441956, "learning_rate": 1.0184118261076885e-05, "loss": 0.4918, "step": 32223 }, { "epoch": 0.9898934046017264, "grad_norm": 0.376596063375473, "learning_rate": 1.0183635065446898e-05, "loss": 0.5648, "step": 32224 }, { "epoch": 0.9899241237366756, "grad_norm": 0.5305349826812744, "learning_rate": 1.0183151869388019e-05, "loss": 0.5808, "step": 32225 }, { "epoch": 0.9899548428716247, "grad_norm": 0.39791011810302734, "learning_rate": 1.0182668672901375e-05, "loss": 0.4937, "step": 32226 }, { "epoch": 0.989985562006574, "grad_norm": 0.3604849576950073, "learning_rate": 1.0182185475988096e-05, "loss": 0.5107, "step": 32227 }, { "epoch": 0.9900162811415231, "grad_norm": 0.38815629482269287, "learning_rate": 1.018170227864931e-05, "loss": 0.5885, "step": 32228 }, { "epoch": 0.9900470002764722, "grad_norm": 0.3641103506088257, "learning_rate": 1.0181219080886146e-05, "loss": 0.5414, "step": 32229 }, { "epoch": 0.9900777194114214, "grad_norm": 0.5917376279830933, "learning_rate": 1.018073588269973e-05, "loss": 0.5704, "step": 32230 }, { "epoch": 0.9901084385463705, "grad_norm": 0.37418481707572937, "learning_rate": 1.0180252684091195e-05, "loss": 0.553, "step": 32231 }, { "epoch": 0.9901391576813197, "grad_norm": 0.42543885111808777, "learning_rate": 1.0179769485061668e-05, "loss": 0.5737, "step": 32232 }, { "epoch": 0.9901698768162689, "grad_norm": 0.4238223433494568, "learning_rate": 1.0179286285612275e-05, "loss": 0.5125, "step": 32233 }, { "epoch": 0.990200595951218, "grad_norm": 0.32704800367355347, "learning_rate": 1.0178803085744149e-05, "loss": 0.4435, "step": 32234 }, { "epoch": 0.9902313150861671, "grad_norm": 0.38485920429229736, "learning_rate": 1.0178319885458415e-05, "loss": 0.5392, "step": 32235 }, { "epoch": 0.9902620342211164, "grad_norm": 1.1051019430160522, "learning_rate": 1.01778366847562e-05, "loss": 0.5761, "step": 32236 }, { "epoch": 0.9902927533560655, "grad_norm": 0.36702483892440796, "learning_rate": 1.0177353483638637e-05, "loss": 0.58, "step": 32237 }, { "epoch": 0.9903234724910146, "grad_norm": 0.3971094489097595, "learning_rate": 1.0176870282106853e-05, "loss": 0.5056, "step": 32238 }, { "epoch": 0.9903541916259638, "grad_norm": 0.4128482937812805, "learning_rate": 1.0176387080161976e-05, "loss": 0.566, "step": 32239 }, { "epoch": 0.990384910760913, "grad_norm": 0.34403273463249207, "learning_rate": 1.0175903877805131e-05, "loss": 0.5203, "step": 32240 }, { "epoch": 0.9904156298958622, "grad_norm": 0.39846155047416687, "learning_rate": 1.0175420675037455e-05, "loss": 0.5189, "step": 32241 }, { "epoch": 0.9904463490308113, "grad_norm": 0.4115917384624481, "learning_rate": 1.0174937471860069e-05, "loss": 0.5265, "step": 32242 }, { "epoch": 0.9904770681657604, "grad_norm": 0.3716435134410858, "learning_rate": 1.0174454268274106e-05, "loss": 0.5351, "step": 32243 }, { "epoch": 0.9905077873007097, "grad_norm": 0.4204607605934143, "learning_rate": 1.017397106428069e-05, "loss": 0.6175, "step": 32244 }, { "epoch": 0.9905385064356588, "grad_norm": 0.3876058757305145, "learning_rate": 1.0173487859880958e-05, "loss": 0.5088, "step": 32245 }, { "epoch": 0.9905692255706079, "grad_norm": 0.3603725731372833, "learning_rate": 1.0173004655076028e-05, "loss": 0.5465, "step": 32246 }, { "epoch": 0.9905999447055571, "grad_norm": 0.36036229133605957, "learning_rate": 1.0172521449867034e-05, "loss": 0.5298, "step": 32247 }, { "epoch": 0.9906306638405062, "grad_norm": 0.3506869673728943, "learning_rate": 1.0172038244255107e-05, "loss": 0.5526, "step": 32248 }, { "epoch": 0.9906613829754554, "grad_norm": 0.33401280641555786, "learning_rate": 1.0171555038241367e-05, "loss": 0.4899, "step": 32249 }, { "epoch": 0.9906921021104046, "grad_norm": 0.39873453974723816, "learning_rate": 1.0171071831826953e-05, "loss": 0.549, "step": 32250 }, { "epoch": 0.9907228212453537, "grad_norm": 0.38574615120887756, "learning_rate": 1.0170588625012987e-05, "loss": 0.5526, "step": 32251 }, { "epoch": 0.9907535403803029, "grad_norm": 0.38286495208740234, "learning_rate": 1.0170105417800602e-05, "loss": 0.5904, "step": 32252 }, { "epoch": 0.9907842595152521, "grad_norm": 0.37456026673316956, "learning_rate": 1.0169622210190923e-05, "loss": 0.6155, "step": 32253 }, { "epoch": 0.9908149786502012, "grad_norm": 0.34495094418525696, "learning_rate": 1.0169139002185078e-05, "loss": 0.5106, "step": 32254 }, { "epoch": 0.9908456977851504, "grad_norm": 0.37490004301071167, "learning_rate": 1.0168655793784199e-05, "loss": 0.5542, "step": 32255 }, { "epoch": 0.9908764169200995, "grad_norm": 0.4356817901134491, "learning_rate": 1.0168172584989409e-05, "loss": 0.5613, "step": 32256 }, { "epoch": 0.9909071360550487, "grad_norm": 0.3393363356590271, "learning_rate": 1.0167689375801843e-05, "loss": 0.5426, "step": 32257 }, { "epoch": 0.9909378551899979, "grad_norm": 0.368733674287796, "learning_rate": 1.0167206166222624e-05, "loss": 0.4837, "step": 32258 }, { "epoch": 0.990968574324947, "grad_norm": 0.41447463631629944, "learning_rate": 1.0166722956252888e-05, "loss": 0.5273, "step": 32259 }, { "epoch": 0.9909992934598961, "grad_norm": 0.38014334440231323, "learning_rate": 1.0166239745893755e-05, "loss": 0.5252, "step": 32260 }, { "epoch": 0.9910300125948454, "grad_norm": 0.907802939414978, "learning_rate": 1.0165756535146361e-05, "loss": 0.4684, "step": 32261 }, { "epoch": 0.9910607317297945, "grad_norm": 0.3526974320411682, "learning_rate": 1.0165273324011829e-05, "loss": 0.5587, "step": 32262 }, { "epoch": 0.9910914508647436, "grad_norm": 0.3383224606513977, "learning_rate": 1.016479011249129e-05, "loss": 0.5372, "step": 32263 }, { "epoch": 0.9911221699996928, "grad_norm": 0.3339794874191284, "learning_rate": 1.0164306900585872e-05, "loss": 0.4884, "step": 32264 }, { "epoch": 0.991152889134642, "grad_norm": 0.3723840117454529, "learning_rate": 1.0163823688296702e-05, "loss": 0.5525, "step": 32265 }, { "epoch": 0.9911836082695912, "grad_norm": 0.3723396062850952, "learning_rate": 1.0163340475624911e-05, "loss": 0.5722, "step": 32266 }, { "epoch": 0.9912143274045403, "grad_norm": 0.3991277813911438, "learning_rate": 1.0162857262571626e-05, "loss": 0.6023, "step": 32267 }, { "epoch": 0.9912450465394894, "grad_norm": 0.4546571671962738, "learning_rate": 1.016237404913798e-05, "loss": 0.5726, "step": 32268 }, { "epoch": 0.9912757656744386, "grad_norm": 0.4115605652332306, "learning_rate": 1.0161890835325095e-05, "loss": 0.5303, "step": 32269 }, { "epoch": 0.9913064848093878, "grad_norm": 0.3842051029205322, "learning_rate": 1.0161407621134105e-05, "loss": 0.4946, "step": 32270 }, { "epoch": 0.9913372039443369, "grad_norm": 0.3567367494106293, "learning_rate": 1.0160924406566135e-05, "loss": 0.5794, "step": 32271 }, { "epoch": 0.9913679230792861, "grad_norm": 0.41901737451553345, "learning_rate": 1.0160441191622314e-05, "loss": 0.5448, "step": 32272 }, { "epoch": 0.9913986422142352, "grad_norm": 0.37941789627075195, "learning_rate": 1.0159957976303773e-05, "loss": 0.5053, "step": 32273 }, { "epoch": 0.9914293613491844, "grad_norm": 0.37065595388412476, "learning_rate": 1.0159474760611638e-05, "loss": 0.5489, "step": 32274 }, { "epoch": 0.9914600804841336, "grad_norm": 0.3861217796802521, "learning_rate": 1.015899154454704e-05, "loss": 0.6546, "step": 32275 }, { "epoch": 0.9914907996190827, "grad_norm": 0.37479111552238464, "learning_rate": 1.0158508328111104e-05, "loss": 0.5228, "step": 32276 }, { "epoch": 0.9915215187540319, "grad_norm": 0.41581717133522034, "learning_rate": 1.0158025111304962e-05, "loss": 0.5759, "step": 32277 }, { "epoch": 0.9915522378889811, "grad_norm": 0.3540689945220947, "learning_rate": 1.015754189412974e-05, "loss": 0.4879, "step": 32278 }, { "epoch": 0.9915829570239302, "grad_norm": 0.3910854756832123, "learning_rate": 1.015705867658657e-05, "loss": 0.5574, "step": 32279 }, { "epoch": 0.9916136761588794, "grad_norm": 0.3529926538467407, "learning_rate": 1.0156575458676576e-05, "loss": 0.509, "step": 32280 }, { "epoch": 0.9916443952938285, "grad_norm": 0.4353283941745758, "learning_rate": 1.0156092240400892e-05, "loss": 0.6398, "step": 32281 }, { "epoch": 0.9916751144287776, "grad_norm": 0.4393685460090637, "learning_rate": 1.0155609021760643e-05, "loss": 0.4854, "step": 32282 }, { "epoch": 0.9917058335637269, "grad_norm": 0.3620607256889343, "learning_rate": 1.0155125802756958e-05, "loss": 0.5846, "step": 32283 }, { "epoch": 0.991736552698676, "grad_norm": 0.39055222272872925, "learning_rate": 1.0154642583390965e-05, "loss": 0.566, "step": 32284 }, { "epoch": 0.9917672718336251, "grad_norm": 0.37977153062820435, "learning_rate": 1.0154159363663792e-05, "loss": 0.5372, "step": 32285 }, { "epoch": 0.9917979909685744, "grad_norm": 0.37354978919029236, "learning_rate": 1.0153676143576572e-05, "loss": 0.5762, "step": 32286 }, { "epoch": 0.9918287101035235, "grad_norm": 0.4053434133529663, "learning_rate": 1.0153192923130429e-05, "loss": 0.5669, "step": 32287 }, { "epoch": 0.9918594292384726, "grad_norm": 0.37165477871894836, "learning_rate": 1.0152709702326492e-05, "loss": 0.5108, "step": 32288 }, { "epoch": 0.9918901483734218, "grad_norm": 0.4061339795589447, "learning_rate": 1.0152226481165894e-05, "loss": 0.5535, "step": 32289 }, { "epoch": 0.9919208675083709, "grad_norm": 0.39246997237205505, "learning_rate": 1.0151743259649758e-05, "loss": 0.6204, "step": 32290 }, { "epoch": 0.9919515866433202, "grad_norm": 0.35129788517951965, "learning_rate": 1.0151260037779217e-05, "loss": 0.4708, "step": 32291 }, { "epoch": 0.9919823057782693, "grad_norm": 0.46255698800086975, "learning_rate": 1.0150776815555396e-05, "loss": 0.5871, "step": 32292 }, { "epoch": 0.9920130249132184, "grad_norm": 0.4030190408229828, "learning_rate": 1.0150293592979426e-05, "loss": 0.5389, "step": 32293 }, { "epoch": 0.9920437440481676, "grad_norm": 0.36360347270965576, "learning_rate": 1.0149810370052433e-05, "loss": 0.5235, "step": 32294 }, { "epoch": 0.9920744631831168, "grad_norm": 0.37075838446617126, "learning_rate": 1.0149327146775551e-05, "loss": 0.5611, "step": 32295 }, { "epoch": 0.9921051823180659, "grad_norm": 0.3758288323879242, "learning_rate": 1.0148843923149902e-05, "loss": 0.586, "step": 32296 }, { "epoch": 0.9921359014530151, "grad_norm": 0.3786045014858246, "learning_rate": 1.014836069917662e-05, "loss": 0.525, "step": 32297 }, { "epoch": 0.9921666205879642, "grad_norm": 0.38423487544059753, "learning_rate": 1.0147877474856832e-05, "loss": 0.5309, "step": 32298 }, { "epoch": 0.9921973397229134, "grad_norm": 0.4114548861980438, "learning_rate": 1.014739425019166e-05, "loss": 0.5445, "step": 32299 }, { "epoch": 0.9922280588578626, "grad_norm": 0.3660309910774231, "learning_rate": 1.0146911025182245e-05, "loss": 0.4923, "step": 32300 }, { "epoch": 0.9922587779928117, "grad_norm": 0.3678315281867981, "learning_rate": 1.0146427799829704e-05, "loss": 0.5438, "step": 32301 }, { "epoch": 0.9922894971277609, "grad_norm": 0.38054749369621277, "learning_rate": 1.0145944574135174e-05, "loss": 0.5454, "step": 32302 }, { "epoch": 0.99232021626271, "grad_norm": 0.39484742283821106, "learning_rate": 1.0145461348099777e-05, "loss": 0.5712, "step": 32303 }, { "epoch": 0.9923509353976592, "grad_norm": 0.35421767830848694, "learning_rate": 1.0144978121724647e-05, "loss": 0.5068, "step": 32304 }, { "epoch": 0.9923816545326084, "grad_norm": 0.3927289843559265, "learning_rate": 1.0144494895010912e-05, "loss": 0.5661, "step": 32305 }, { "epoch": 0.9924123736675575, "grad_norm": 0.44859829545021057, "learning_rate": 1.0144011667959698e-05, "loss": 0.5438, "step": 32306 }, { "epoch": 0.9924430928025066, "grad_norm": 0.3510657846927643, "learning_rate": 1.0143528440572133e-05, "loss": 0.4257, "step": 32307 }, { "epoch": 0.9924738119374559, "grad_norm": 0.35505038499832153, "learning_rate": 1.0143045212849348e-05, "loss": 0.5048, "step": 32308 }, { "epoch": 0.992504531072405, "grad_norm": 0.41280388832092285, "learning_rate": 1.0142561984792472e-05, "loss": 0.5666, "step": 32309 }, { "epoch": 0.9925352502073541, "grad_norm": 0.35539841651916504, "learning_rate": 1.0142078756402631e-05, "loss": 0.5674, "step": 32310 }, { "epoch": 0.9925659693423033, "grad_norm": 0.48130345344543457, "learning_rate": 1.0141595527680956e-05, "loss": 0.6614, "step": 32311 }, { "epoch": 0.9925966884772525, "grad_norm": 0.3733084201812744, "learning_rate": 1.0141112298628575e-05, "loss": 0.6236, "step": 32312 }, { "epoch": 0.9926274076122017, "grad_norm": 0.4213038682937622, "learning_rate": 1.0140629069246617e-05, "loss": 0.4828, "step": 32313 }, { "epoch": 0.9926581267471508, "grad_norm": 0.3847908675670624, "learning_rate": 1.0140145839536209e-05, "loss": 0.5769, "step": 32314 }, { "epoch": 0.9926888458820999, "grad_norm": 0.37023380398750305, "learning_rate": 1.013966260949848e-05, "loss": 0.5766, "step": 32315 }, { "epoch": 0.9927195650170492, "grad_norm": 0.4144892394542694, "learning_rate": 1.0139179379134563e-05, "loss": 0.563, "step": 32316 }, { "epoch": 0.9927502841519983, "grad_norm": 0.4024188220500946, "learning_rate": 1.0138696148445577e-05, "loss": 0.5083, "step": 32317 }, { "epoch": 0.9927810032869474, "grad_norm": 0.3184928596019745, "learning_rate": 1.0138212917432661e-05, "loss": 0.4635, "step": 32318 }, { "epoch": 0.9928117224218966, "grad_norm": 0.46869754791259766, "learning_rate": 1.0137729686096935e-05, "loss": 0.5997, "step": 32319 }, { "epoch": 0.9928424415568458, "grad_norm": 0.36067628860473633, "learning_rate": 1.0137246454439535e-05, "loss": 0.5386, "step": 32320 }, { "epoch": 0.9928731606917949, "grad_norm": 0.3709237575531006, "learning_rate": 1.0136763222461585e-05, "loss": 0.5001, "step": 32321 }, { "epoch": 0.9929038798267441, "grad_norm": 0.399245947599411, "learning_rate": 1.0136279990164215e-05, "loss": 0.5741, "step": 32322 }, { "epoch": 0.9929345989616932, "grad_norm": 0.39587199687957764, "learning_rate": 1.0135796757548554e-05, "loss": 0.4988, "step": 32323 }, { "epoch": 0.9929653180966423, "grad_norm": 0.3928129971027374, "learning_rate": 1.0135313524615729e-05, "loss": 0.5575, "step": 32324 }, { "epoch": 0.9929960372315916, "grad_norm": 0.3955269157886505, "learning_rate": 1.0134830291366873e-05, "loss": 0.5366, "step": 32325 }, { "epoch": 0.9930267563665407, "grad_norm": 0.36662885546684265, "learning_rate": 1.013434705780311e-05, "loss": 0.4428, "step": 32326 }, { "epoch": 0.9930574755014899, "grad_norm": 0.35725805163383484, "learning_rate": 1.013386382392557e-05, "loss": 0.4562, "step": 32327 }, { "epoch": 0.993088194636439, "grad_norm": 1.1828842163085938, "learning_rate": 1.0133380589735377e-05, "loss": 0.5641, "step": 32328 }, { "epoch": 0.9931189137713882, "grad_norm": 0.39458832144737244, "learning_rate": 1.013289735523367e-05, "loss": 0.5201, "step": 32329 }, { "epoch": 0.9931496329063374, "grad_norm": 0.39535757899284363, "learning_rate": 1.013241412042157e-05, "loss": 0.5075, "step": 32330 }, { "epoch": 0.9931803520412865, "grad_norm": 0.4177777171134949, "learning_rate": 1.013193088530021e-05, "loss": 0.5956, "step": 32331 }, { "epoch": 0.9932110711762356, "grad_norm": 0.3796970546245575, "learning_rate": 1.0131447649870712e-05, "loss": 0.6255, "step": 32332 }, { "epoch": 0.9932417903111849, "grad_norm": 0.35397031903266907, "learning_rate": 1.0130964414134211e-05, "loss": 0.5111, "step": 32333 }, { "epoch": 0.993272509446134, "grad_norm": 0.4327811896800995, "learning_rate": 1.0130481178091835e-05, "loss": 0.5119, "step": 32334 }, { "epoch": 0.9933032285810831, "grad_norm": 0.3871498107910156, "learning_rate": 1.0129997941744708e-05, "loss": 0.5825, "step": 32335 }, { "epoch": 0.9933339477160323, "grad_norm": 0.37914830446243286, "learning_rate": 1.0129514705093966e-05, "loss": 0.5263, "step": 32336 }, { "epoch": 0.9933646668509815, "grad_norm": 0.3796376883983612, "learning_rate": 1.0129031468140728e-05, "loss": 0.5359, "step": 32337 }, { "epoch": 0.9933953859859307, "grad_norm": 0.4221058189868927, "learning_rate": 1.0128548230886132e-05, "loss": 0.4277, "step": 32338 }, { "epoch": 0.9934261051208798, "grad_norm": 0.40415066480636597, "learning_rate": 1.0128064993331301e-05, "loss": 0.5451, "step": 32339 }, { "epoch": 0.9934568242558289, "grad_norm": 0.3732110261917114, "learning_rate": 1.0127581755477365e-05, "loss": 0.5467, "step": 32340 }, { "epoch": 0.9934875433907782, "grad_norm": 0.3436453640460968, "learning_rate": 1.0127098517325453e-05, "loss": 0.5944, "step": 32341 }, { "epoch": 0.9935182625257273, "grad_norm": 0.3754141926765442, "learning_rate": 1.0126615278876692e-05, "loss": 0.5628, "step": 32342 }, { "epoch": 0.9935489816606764, "grad_norm": 0.4354453682899475, "learning_rate": 1.0126132040132216e-05, "loss": 0.5705, "step": 32343 }, { "epoch": 0.9935797007956256, "grad_norm": 0.4480322301387787, "learning_rate": 1.0125648801093146e-05, "loss": 0.5747, "step": 32344 }, { "epoch": 0.9936104199305748, "grad_norm": 0.35247376561164856, "learning_rate": 1.012516556176062e-05, "loss": 0.4727, "step": 32345 }, { "epoch": 0.9936411390655239, "grad_norm": 0.37638911604881287, "learning_rate": 1.0124682322135755e-05, "loss": 0.6037, "step": 32346 }, { "epoch": 0.9936718582004731, "grad_norm": 0.3952213227748871, "learning_rate": 1.012419908221969e-05, "loss": 0.5161, "step": 32347 }, { "epoch": 0.9937025773354222, "grad_norm": 0.372242271900177, "learning_rate": 1.0123715842013546e-05, "loss": 0.5272, "step": 32348 }, { "epoch": 0.9937332964703713, "grad_norm": 0.363162100315094, "learning_rate": 1.0123232601518458e-05, "loss": 0.5105, "step": 32349 }, { "epoch": 0.9937640156053206, "grad_norm": 0.383843332529068, "learning_rate": 1.0122749360735552e-05, "loss": 0.5642, "step": 32350 }, { "epoch": 0.9937947347402697, "grad_norm": 0.4598860442638397, "learning_rate": 1.0122266119665953e-05, "loss": 0.5829, "step": 32351 }, { "epoch": 0.9938254538752189, "grad_norm": 0.38057762384414673, "learning_rate": 1.0121782878310795e-05, "loss": 0.4573, "step": 32352 }, { "epoch": 0.993856173010168, "grad_norm": 0.3661222457885742, "learning_rate": 1.0121299636671203e-05, "loss": 0.5417, "step": 32353 }, { "epoch": 0.9938868921451172, "grad_norm": 0.4155339300632477, "learning_rate": 1.012081639474831e-05, "loss": 0.5316, "step": 32354 }, { "epoch": 0.9939176112800664, "grad_norm": 0.3414105772972107, "learning_rate": 1.0120333152543238e-05, "loss": 0.4968, "step": 32355 }, { "epoch": 0.9939483304150155, "grad_norm": 0.3667045831680298, "learning_rate": 1.0119849910057123e-05, "loss": 0.5871, "step": 32356 }, { "epoch": 0.9939790495499646, "grad_norm": 0.3671264052391052, "learning_rate": 1.011936666729109e-05, "loss": 0.6167, "step": 32357 }, { "epoch": 0.9940097686849139, "grad_norm": 0.3853718638420105, "learning_rate": 1.0118883424246264e-05, "loss": 0.5515, "step": 32358 }, { "epoch": 0.994040487819863, "grad_norm": 0.3725956380367279, "learning_rate": 1.0118400180923779e-05, "loss": 0.4856, "step": 32359 }, { "epoch": 0.9940712069548121, "grad_norm": 0.38769960403442383, "learning_rate": 1.0117916937324763e-05, "loss": 0.6163, "step": 32360 }, { "epoch": 0.9941019260897613, "grad_norm": 0.37752413749694824, "learning_rate": 1.0117433693450345e-05, "loss": 0.5497, "step": 32361 }, { "epoch": 0.9941326452247105, "grad_norm": 0.37024715542793274, "learning_rate": 1.011695044930165e-05, "loss": 0.5575, "step": 32362 }, { "epoch": 0.9941633643596597, "grad_norm": 0.41636183857917786, "learning_rate": 1.0116467204879811e-05, "loss": 0.5525, "step": 32363 }, { "epoch": 0.9941940834946088, "grad_norm": 0.4153290092945099, "learning_rate": 1.0115983960185954e-05, "loss": 0.5772, "step": 32364 }, { "epoch": 0.9942248026295579, "grad_norm": 0.3899405300617218, "learning_rate": 1.0115500715221209e-05, "loss": 0.5991, "step": 32365 }, { "epoch": 0.9942555217645072, "grad_norm": 0.3532464802265167, "learning_rate": 1.0115017469986704e-05, "loss": 0.427, "step": 32366 }, { "epoch": 0.9942862408994563, "grad_norm": 0.3651235103607178, "learning_rate": 1.0114534224483568e-05, "loss": 0.4891, "step": 32367 }, { "epoch": 0.9943169600344054, "grad_norm": 0.3908202052116394, "learning_rate": 1.0114050978712928e-05, "loss": 0.5129, "step": 32368 }, { "epoch": 0.9943476791693546, "grad_norm": 0.36422044038772583, "learning_rate": 1.0113567732675916e-05, "loss": 0.57, "step": 32369 }, { "epoch": 0.9943783983043037, "grad_norm": 0.3861384093761444, "learning_rate": 1.0113084486373656e-05, "loss": 0.549, "step": 32370 }, { "epoch": 0.9944091174392529, "grad_norm": 0.4990008473396301, "learning_rate": 1.011260123980728e-05, "loss": 0.6204, "step": 32371 }, { "epoch": 0.9944398365742021, "grad_norm": 0.3760487735271454, "learning_rate": 1.0112117992977918e-05, "loss": 0.5089, "step": 32372 }, { "epoch": 0.9944705557091512, "grad_norm": 0.4104313254356384, "learning_rate": 1.0111634745886694e-05, "loss": 0.5575, "step": 32373 }, { "epoch": 0.9945012748441003, "grad_norm": 0.3898140788078308, "learning_rate": 1.011115149853474e-05, "loss": 0.5268, "step": 32374 }, { "epoch": 0.9945319939790496, "grad_norm": 0.3710586428642273, "learning_rate": 1.0110668250923184e-05, "loss": 0.6008, "step": 32375 }, { "epoch": 0.9945627131139987, "grad_norm": 0.34998446702957153, "learning_rate": 1.0110185003053153e-05, "loss": 0.4335, "step": 32376 }, { "epoch": 0.9945934322489479, "grad_norm": 0.3920132517814636, "learning_rate": 1.010970175492578e-05, "loss": 0.4971, "step": 32377 }, { "epoch": 0.994624151383897, "grad_norm": 0.39042842388153076, "learning_rate": 1.0109218506542188e-05, "loss": 0.6256, "step": 32378 }, { "epoch": 0.9946548705188462, "grad_norm": 0.36674395203590393, "learning_rate": 1.0108735257903512e-05, "loss": 0.5135, "step": 32379 }, { "epoch": 0.9946855896537954, "grad_norm": 0.3584077060222626, "learning_rate": 1.0108252009010875e-05, "loss": 0.5743, "step": 32380 }, { "epoch": 0.9947163087887445, "grad_norm": 0.3683011531829834, "learning_rate": 1.010776875986541e-05, "loss": 0.4456, "step": 32381 }, { "epoch": 0.9947470279236936, "grad_norm": 0.46519792079925537, "learning_rate": 1.0107285510468239e-05, "loss": 0.5445, "step": 32382 }, { "epoch": 0.9947777470586429, "grad_norm": 0.35834458470344543, "learning_rate": 1.0106802260820501e-05, "loss": 0.6097, "step": 32383 }, { "epoch": 0.994808466193592, "grad_norm": 0.38513660430908203, "learning_rate": 1.0106319010923314e-05, "loss": 0.5077, "step": 32384 }, { "epoch": 0.9948391853285411, "grad_norm": 0.37072116136550903, "learning_rate": 1.0105835760777813e-05, "loss": 0.5453, "step": 32385 }, { "epoch": 0.9948699044634903, "grad_norm": 0.35603463649749756, "learning_rate": 1.0105352510385129e-05, "loss": 0.4514, "step": 32386 }, { "epoch": 0.9949006235984394, "grad_norm": 0.41186046600341797, "learning_rate": 1.0104869259746381e-05, "loss": 0.6218, "step": 32387 }, { "epoch": 0.9949313427333887, "grad_norm": 0.34837719798088074, "learning_rate": 1.0104386008862708e-05, "loss": 0.5582, "step": 32388 }, { "epoch": 0.9949620618683378, "grad_norm": 0.38500940799713135, "learning_rate": 1.0103902757735231e-05, "loss": 0.5241, "step": 32389 }, { "epoch": 0.9949927810032869, "grad_norm": 0.37405043840408325, "learning_rate": 1.0103419506365084e-05, "loss": 0.5014, "step": 32390 }, { "epoch": 0.9950235001382361, "grad_norm": 0.38273391127586365, "learning_rate": 1.0102936254753393e-05, "loss": 0.5276, "step": 32391 }, { "epoch": 0.9950542192731853, "grad_norm": 0.384322851896286, "learning_rate": 1.0102453002901286e-05, "loss": 0.5035, "step": 32392 }, { "epoch": 0.9950849384081344, "grad_norm": 0.42898645997047424, "learning_rate": 1.0101969750809892e-05, "loss": 0.4743, "step": 32393 }, { "epoch": 0.9951156575430836, "grad_norm": 0.3572531044483185, "learning_rate": 1.0101486498480344e-05, "loss": 0.6189, "step": 32394 }, { "epoch": 0.9951463766780327, "grad_norm": 0.40767839550971985, "learning_rate": 1.0101003245913765e-05, "loss": 0.6101, "step": 32395 }, { "epoch": 0.9951770958129819, "grad_norm": 0.3994283974170685, "learning_rate": 1.0100519993111283e-05, "loss": 0.5126, "step": 32396 }, { "epoch": 0.9952078149479311, "grad_norm": 0.3825433552265167, "learning_rate": 1.0100036740074035e-05, "loss": 0.507, "step": 32397 }, { "epoch": 0.9952385340828802, "grad_norm": 0.3844190239906311, "learning_rate": 1.0099553486803142e-05, "loss": 0.5274, "step": 32398 }, { "epoch": 0.9952692532178293, "grad_norm": 0.3535843789577484, "learning_rate": 1.0099070233299734e-05, "loss": 0.5386, "step": 32399 }, { "epoch": 0.9952999723527786, "grad_norm": 0.3935088813304901, "learning_rate": 1.0098586979564942e-05, "loss": 0.5934, "step": 32400 }, { "epoch": 0.9953306914877277, "grad_norm": 0.4787008464336395, "learning_rate": 1.0098103725599896e-05, "loss": 0.5969, "step": 32401 }, { "epoch": 0.9953614106226769, "grad_norm": 0.3460431694984436, "learning_rate": 1.0097620471405719e-05, "loss": 0.5492, "step": 32402 }, { "epoch": 0.995392129757626, "grad_norm": 0.36139318346977234, "learning_rate": 1.0097137216983539e-05, "loss": 0.6154, "step": 32403 }, { "epoch": 0.9954228488925752, "grad_norm": 0.3430153429508209, "learning_rate": 1.0096653962334493e-05, "loss": 0.5639, "step": 32404 }, { "epoch": 0.9954535680275244, "grad_norm": 0.3873125910758972, "learning_rate": 1.0096170707459703e-05, "loss": 0.6568, "step": 32405 }, { "epoch": 0.9954842871624735, "grad_norm": 0.3961176872253418, "learning_rate": 1.00956874523603e-05, "loss": 0.5472, "step": 32406 }, { "epoch": 0.9955150062974226, "grad_norm": 0.36364656686782837, "learning_rate": 1.0095204197037411e-05, "loss": 0.5641, "step": 32407 }, { "epoch": 0.9955457254323719, "grad_norm": 0.3990556299686432, "learning_rate": 1.0094720941492169e-05, "loss": 0.5113, "step": 32408 }, { "epoch": 0.995576444567321, "grad_norm": 0.38404542207717896, "learning_rate": 1.0094237685725696e-05, "loss": 0.4317, "step": 32409 }, { "epoch": 0.9956071637022701, "grad_norm": 0.3396654725074768, "learning_rate": 1.0093754429739127e-05, "loss": 0.4906, "step": 32410 }, { "epoch": 0.9956378828372193, "grad_norm": 0.38201209902763367, "learning_rate": 1.0093271173533586e-05, "loss": 0.5592, "step": 32411 }, { "epoch": 0.9956686019721684, "grad_norm": 0.3706047832965851, "learning_rate": 1.0092787917110204e-05, "loss": 0.5022, "step": 32412 }, { "epoch": 0.9956993211071177, "grad_norm": 0.4036979377269745, "learning_rate": 1.0092304660470111e-05, "loss": 0.5294, "step": 32413 }, { "epoch": 0.9957300402420668, "grad_norm": 0.3706468641757965, "learning_rate": 1.0091821403614433e-05, "loss": 0.5269, "step": 32414 }, { "epoch": 0.9957607593770159, "grad_norm": 0.37191516160964966, "learning_rate": 1.0091338146544301e-05, "loss": 0.502, "step": 32415 }, { "epoch": 0.9957914785119651, "grad_norm": 0.35566309094429016, "learning_rate": 1.0090854889260839e-05, "loss": 0.5549, "step": 32416 }, { "epoch": 0.9958221976469143, "grad_norm": 0.4321800470352173, "learning_rate": 1.0090371631765182e-05, "loss": 0.5408, "step": 32417 }, { "epoch": 0.9958529167818634, "grad_norm": 0.4263523817062378, "learning_rate": 1.0089888374058454e-05, "loss": 0.5526, "step": 32418 }, { "epoch": 0.9958836359168126, "grad_norm": 0.38331693410873413, "learning_rate": 1.0089405116141789e-05, "loss": 0.578, "step": 32419 }, { "epoch": 0.9959143550517617, "grad_norm": 0.4290127456188202, "learning_rate": 1.008892185801631e-05, "loss": 0.5626, "step": 32420 }, { "epoch": 0.9959450741867109, "grad_norm": 0.42065030336380005, "learning_rate": 1.0088438599683145e-05, "loss": 0.5539, "step": 32421 }, { "epoch": 0.9959757933216601, "grad_norm": 0.36321550607681274, "learning_rate": 1.008795534114343e-05, "loss": 0.4765, "step": 32422 }, { "epoch": 0.9960065124566092, "grad_norm": 0.3538135588169098, "learning_rate": 1.0087472082398284e-05, "loss": 0.5186, "step": 32423 }, { "epoch": 0.9960372315915584, "grad_norm": 0.34270551800727844, "learning_rate": 1.0086988823448847e-05, "loss": 0.4627, "step": 32424 }, { "epoch": 0.9960679507265076, "grad_norm": 0.3883684575557709, "learning_rate": 1.0086505564296236e-05, "loss": 0.5272, "step": 32425 }, { "epoch": 0.9960986698614567, "grad_norm": 0.34237489104270935, "learning_rate": 1.0086022304941586e-05, "loss": 0.5948, "step": 32426 }, { "epoch": 0.9961293889964059, "grad_norm": 0.3729807138442993, "learning_rate": 1.0085539045386028e-05, "loss": 0.586, "step": 32427 }, { "epoch": 0.996160108131355, "grad_norm": 0.35016974806785583, "learning_rate": 1.0085055785630687e-05, "loss": 0.4917, "step": 32428 }, { "epoch": 0.9961908272663041, "grad_norm": 0.3664548397064209, "learning_rate": 1.008457252567669e-05, "loss": 0.4638, "step": 32429 }, { "epoch": 0.9962215464012534, "grad_norm": 0.38606134057044983, "learning_rate": 1.008408926552517e-05, "loss": 0.516, "step": 32430 }, { "epoch": 0.9962522655362025, "grad_norm": 0.37199342250823975, "learning_rate": 1.0083606005177251e-05, "loss": 0.6031, "step": 32431 }, { "epoch": 0.9962829846711516, "grad_norm": 0.3366070091724396, "learning_rate": 1.0083122744634066e-05, "loss": 0.5244, "step": 32432 }, { "epoch": 0.9963137038061008, "grad_norm": 0.3925579786300659, "learning_rate": 1.0082639483896744e-05, "loss": 0.5405, "step": 32433 }, { "epoch": 0.99634442294105, "grad_norm": 0.4124689996242523, "learning_rate": 1.0082156222966408e-05, "loss": 0.5353, "step": 32434 }, { "epoch": 0.9963751420759991, "grad_norm": 0.48427584767341614, "learning_rate": 1.0081672961844193e-05, "loss": 0.4203, "step": 32435 }, { "epoch": 0.9964058612109483, "grad_norm": 0.3740990161895752, "learning_rate": 1.0081189700531224e-05, "loss": 0.5258, "step": 32436 }, { "epoch": 0.9964365803458974, "grad_norm": 0.4176744222640991, "learning_rate": 1.0080706439028631e-05, "loss": 0.5669, "step": 32437 }, { "epoch": 0.9964672994808467, "grad_norm": 0.4217855632305145, "learning_rate": 1.0080223177337545e-05, "loss": 0.4893, "step": 32438 }, { "epoch": 0.9964980186157958, "grad_norm": 0.37449654936790466, "learning_rate": 1.0079739915459089e-05, "loss": 0.5198, "step": 32439 }, { "epoch": 0.9965287377507449, "grad_norm": 0.4392707347869873, "learning_rate": 1.0079256653394395e-05, "loss": 0.5209, "step": 32440 }, { "epoch": 0.9965594568856941, "grad_norm": 0.37100523710250854, "learning_rate": 1.0078773391144592e-05, "loss": 0.5082, "step": 32441 }, { "epoch": 0.9965901760206433, "grad_norm": 0.3307364881038666, "learning_rate": 1.0078290128710811e-05, "loss": 0.5023, "step": 32442 }, { "epoch": 0.9966208951555924, "grad_norm": 0.3909124732017517, "learning_rate": 1.0077806866094174e-05, "loss": 0.6025, "step": 32443 }, { "epoch": 0.9966516142905416, "grad_norm": 0.38406646251678467, "learning_rate": 1.0077323603295814e-05, "loss": 0.4663, "step": 32444 }, { "epoch": 0.9966823334254907, "grad_norm": 0.3528292179107666, "learning_rate": 1.007684034031686e-05, "loss": 0.5366, "step": 32445 }, { "epoch": 0.9967130525604398, "grad_norm": 0.39282089471817017, "learning_rate": 1.0076357077158443e-05, "loss": 0.4606, "step": 32446 }, { "epoch": 0.9967437716953891, "grad_norm": 0.3376142084598541, "learning_rate": 1.0075873813821684e-05, "loss": 0.5782, "step": 32447 }, { "epoch": 0.9967744908303382, "grad_norm": 0.37088051438331604, "learning_rate": 1.007539055030772e-05, "loss": 0.5582, "step": 32448 }, { "epoch": 0.9968052099652874, "grad_norm": 0.33095285296440125, "learning_rate": 1.0074907286617672e-05, "loss": 0.5358, "step": 32449 }, { "epoch": 0.9968359291002366, "grad_norm": 0.5791329741477966, "learning_rate": 1.0074424022752676e-05, "loss": 0.5896, "step": 32450 }, { "epoch": 0.9968666482351857, "grad_norm": 0.40861427783966064, "learning_rate": 1.0073940758713858e-05, "loss": 0.5954, "step": 32451 }, { "epoch": 0.9968973673701349, "grad_norm": 0.4007718563079834, "learning_rate": 1.0073457494502343e-05, "loss": 0.4915, "step": 32452 }, { "epoch": 0.996928086505084, "grad_norm": 0.41804710030555725, "learning_rate": 1.0072974230119265e-05, "loss": 0.5242, "step": 32453 }, { "epoch": 0.9969588056400331, "grad_norm": 0.37709417939186096, "learning_rate": 1.0072490965565753e-05, "loss": 0.5491, "step": 32454 }, { "epoch": 0.9969895247749824, "grad_norm": 0.3823970556259155, "learning_rate": 1.007200770084293e-05, "loss": 0.6397, "step": 32455 }, { "epoch": 0.9970202439099315, "grad_norm": 0.3760110139846802, "learning_rate": 1.007152443595193e-05, "loss": 0.5314, "step": 32456 }, { "epoch": 0.9970509630448806, "grad_norm": 0.32774215936660767, "learning_rate": 1.0071041170893879e-05, "loss": 0.5118, "step": 32457 }, { "epoch": 0.9970816821798298, "grad_norm": 0.4071796238422394, "learning_rate": 1.0070557905669906e-05, "loss": 0.5618, "step": 32458 }, { "epoch": 0.997112401314779, "grad_norm": 0.3634624481201172, "learning_rate": 1.0070074640281139e-05, "loss": 0.5401, "step": 32459 }, { "epoch": 0.9971431204497281, "grad_norm": 0.4256261885166168, "learning_rate": 1.0069591374728709e-05, "loss": 0.5948, "step": 32460 }, { "epoch": 0.9971738395846773, "grad_norm": 0.37881237268447876, "learning_rate": 1.0069108109013743e-05, "loss": 0.577, "step": 32461 }, { "epoch": 0.9972045587196264, "grad_norm": 0.36442121863365173, "learning_rate": 1.0068624843137371e-05, "loss": 0.5902, "step": 32462 }, { "epoch": 0.9972352778545757, "grad_norm": 0.33963873982429504, "learning_rate": 1.0068141577100718e-05, "loss": 0.5212, "step": 32463 }, { "epoch": 0.9972659969895248, "grad_norm": 0.3513931334018707, "learning_rate": 1.0067658310904919e-05, "loss": 0.5108, "step": 32464 }, { "epoch": 0.9972967161244739, "grad_norm": 0.5843539237976074, "learning_rate": 1.0067175044551098e-05, "loss": 0.6494, "step": 32465 }, { "epoch": 0.9973274352594231, "grad_norm": 0.38987401127815247, "learning_rate": 1.0066691778040388e-05, "loss": 0.6207, "step": 32466 }, { "epoch": 0.9973581543943723, "grad_norm": 0.3788263499736786, "learning_rate": 1.0066208511373911e-05, "loss": 0.5477, "step": 32467 }, { "epoch": 0.9973888735293214, "grad_norm": 0.3795483112335205, "learning_rate": 1.0065725244552797e-05, "loss": 0.5782, "step": 32468 }, { "epoch": 0.9974195926642706, "grad_norm": 0.40256696939468384, "learning_rate": 1.0065241977578182e-05, "loss": 0.5403, "step": 32469 }, { "epoch": 0.9974503117992197, "grad_norm": 0.36141452193260193, "learning_rate": 1.0064758710451187e-05, "loss": 0.5009, "step": 32470 }, { "epoch": 0.9974810309341688, "grad_norm": 0.35513198375701904, "learning_rate": 1.0064275443172947e-05, "loss": 0.5453, "step": 32471 }, { "epoch": 0.9975117500691181, "grad_norm": 0.38908877968788147, "learning_rate": 1.0063792175744585e-05, "loss": 0.5191, "step": 32472 }, { "epoch": 0.9975424692040672, "grad_norm": 0.4156550467014313, "learning_rate": 1.0063308908167231e-05, "loss": 0.4942, "step": 32473 }, { "epoch": 0.9975731883390164, "grad_norm": 0.37242233753204346, "learning_rate": 1.0062825640442018e-05, "loss": 0.5728, "step": 32474 }, { "epoch": 0.9976039074739655, "grad_norm": 0.3724967837333679, "learning_rate": 1.0062342372570066e-05, "loss": 0.5313, "step": 32475 }, { "epoch": 0.9976346266089147, "grad_norm": 0.40584075450897217, "learning_rate": 1.0061859104552515e-05, "loss": 0.5279, "step": 32476 }, { "epoch": 0.9976653457438639, "grad_norm": 0.3794108033180237, "learning_rate": 1.0061375836390481e-05, "loss": 0.579, "step": 32477 }, { "epoch": 0.997696064878813, "grad_norm": 0.32863909006118774, "learning_rate": 1.0060892568085106e-05, "loss": 0.5105, "step": 32478 }, { "epoch": 0.9977267840137621, "grad_norm": 0.37564361095428467, "learning_rate": 1.0060409299637508e-05, "loss": 0.6054, "step": 32479 }, { "epoch": 0.9977575031487114, "grad_norm": 0.3905028998851776, "learning_rate": 1.005992603104882e-05, "loss": 0.5072, "step": 32480 }, { "epoch": 0.9977882222836605, "grad_norm": 0.4089955687522888, "learning_rate": 1.0059442762320173e-05, "loss": 0.5514, "step": 32481 }, { "epoch": 0.9978189414186096, "grad_norm": 0.3737332224845886, "learning_rate": 1.0058959493452692e-05, "loss": 0.538, "step": 32482 }, { "epoch": 0.9978496605535588, "grad_norm": 0.3682406544685364, "learning_rate": 1.0058476224447508e-05, "loss": 0.4806, "step": 32483 }, { "epoch": 0.997880379688508, "grad_norm": 0.364387184381485, "learning_rate": 1.0057992955305746e-05, "loss": 0.4947, "step": 32484 }, { "epoch": 0.9979110988234571, "grad_norm": 0.3368184268474579, "learning_rate": 1.0057509686028541e-05, "loss": 0.5281, "step": 32485 }, { "epoch": 0.9979418179584063, "grad_norm": 0.39162567257881165, "learning_rate": 1.0057026416617012e-05, "loss": 0.6104, "step": 32486 }, { "epoch": 0.9979725370933554, "grad_norm": 0.36300233006477356, "learning_rate": 1.00565431470723e-05, "loss": 0.5777, "step": 32487 }, { "epoch": 0.9980032562283047, "grad_norm": 0.4013594388961792, "learning_rate": 1.0056059877395524e-05, "loss": 0.5289, "step": 32488 }, { "epoch": 0.9980339753632538, "grad_norm": 0.4861108064651489, "learning_rate": 1.0055576607587817e-05, "loss": 0.5988, "step": 32489 }, { "epoch": 0.9980646944982029, "grad_norm": 0.3732290267944336, "learning_rate": 1.005509333765031e-05, "loss": 0.4892, "step": 32490 }, { "epoch": 0.9980954136331521, "grad_norm": 0.33701613545417786, "learning_rate": 1.0054610067584125e-05, "loss": 0.5301, "step": 32491 }, { "epoch": 0.9981261327681012, "grad_norm": 0.33877891302108765, "learning_rate": 1.0054126797390398e-05, "loss": 0.5011, "step": 32492 }, { "epoch": 0.9981568519030504, "grad_norm": 0.3493198752403259, "learning_rate": 1.0053643527070248e-05, "loss": 0.535, "step": 32493 }, { "epoch": 0.9981875710379996, "grad_norm": 0.36749008297920227, "learning_rate": 1.0053160256624816e-05, "loss": 0.4789, "step": 32494 }, { "epoch": 0.9982182901729487, "grad_norm": 0.5626538395881653, "learning_rate": 1.005267698605522e-05, "loss": 0.6395, "step": 32495 }, { "epoch": 0.9982490093078978, "grad_norm": 0.6970357298851013, "learning_rate": 1.0052193715362596e-05, "loss": 0.5199, "step": 32496 }, { "epoch": 0.9982797284428471, "grad_norm": 0.3317001163959503, "learning_rate": 1.0051710444548071e-05, "loss": 0.5811, "step": 32497 }, { "epoch": 0.9983104475777962, "grad_norm": 0.37586510181427, "learning_rate": 1.0051227173612772e-05, "loss": 0.5791, "step": 32498 }, { "epoch": 0.9983411667127454, "grad_norm": 0.34398800134658813, "learning_rate": 1.0050743902557825e-05, "loss": 0.587, "step": 32499 }, { "epoch": 0.9983718858476945, "grad_norm": 0.40245866775512695, "learning_rate": 1.0050260631384366e-05, "loss": 0.5155, "step": 32500 }, { "epoch": 0.9984026049826437, "grad_norm": 0.3699391186237335, "learning_rate": 1.0049777360093518e-05, "loss": 0.579, "step": 32501 }, { "epoch": 0.9984333241175929, "grad_norm": 0.3696046471595764, "learning_rate": 1.0049294088686412e-05, "loss": 0.5373, "step": 32502 }, { "epoch": 0.998464043252542, "grad_norm": 0.3512346148490906, "learning_rate": 1.0048810817164177e-05, "loss": 0.5826, "step": 32503 }, { "epoch": 0.9984947623874911, "grad_norm": 0.3891742527484894, "learning_rate": 1.0048327545527936e-05, "loss": 0.4912, "step": 32504 }, { "epoch": 0.9985254815224404, "grad_norm": 0.3984135091304779, "learning_rate": 1.0047844273778827e-05, "loss": 0.5756, "step": 32505 }, { "epoch": 0.9985562006573895, "grad_norm": 0.4011856019496918, "learning_rate": 1.0047361001917972e-05, "loss": 0.5849, "step": 32506 }, { "epoch": 0.9985869197923386, "grad_norm": 0.37193843722343445, "learning_rate": 1.0046877729946505e-05, "loss": 0.6057, "step": 32507 }, { "epoch": 0.9986176389272878, "grad_norm": 0.3526730537414551, "learning_rate": 1.0046394457865551e-05, "loss": 0.5215, "step": 32508 }, { "epoch": 0.998648358062237, "grad_norm": 0.37684184312820435, "learning_rate": 1.0045911185676239e-05, "loss": 0.5589, "step": 32509 }, { "epoch": 0.9986790771971862, "grad_norm": 0.3467346727848053, "learning_rate": 1.0045427913379699e-05, "loss": 0.5411, "step": 32510 }, { "epoch": 0.9987097963321353, "grad_norm": 0.34298765659332275, "learning_rate": 1.0044944640977056e-05, "loss": 0.4773, "step": 32511 }, { "epoch": 0.9987405154670844, "grad_norm": 0.3588203489780426, "learning_rate": 1.0044461368469447e-05, "loss": 0.5195, "step": 32512 }, { "epoch": 0.9987712346020337, "grad_norm": 0.43060019612312317, "learning_rate": 1.004397809585799e-05, "loss": 0.5538, "step": 32513 }, { "epoch": 0.9988019537369828, "grad_norm": 0.3878839910030365, "learning_rate": 1.0043494823143823e-05, "loss": 0.5343, "step": 32514 }, { "epoch": 0.9988326728719319, "grad_norm": 1.0451518297195435, "learning_rate": 1.0043011550328069e-05, "loss": 0.5226, "step": 32515 }, { "epoch": 0.9988633920068811, "grad_norm": 0.3813377022743225, "learning_rate": 1.0042528277411858e-05, "loss": 0.5208, "step": 32516 }, { "epoch": 0.9988941111418302, "grad_norm": 0.4254327416419983, "learning_rate": 1.0042045004396319e-05, "loss": 0.4916, "step": 32517 }, { "epoch": 0.9989248302767794, "grad_norm": 0.4600476920604706, "learning_rate": 1.0041561731282584e-05, "loss": 0.5148, "step": 32518 }, { "epoch": 0.9989555494117286, "grad_norm": 0.3873882591724396, "learning_rate": 1.0041078458071778e-05, "loss": 0.5337, "step": 32519 }, { "epoch": 0.9989862685466777, "grad_norm": 0.38077062368392944, "learning_rate": 1.0040595184765028e-05, "loss": 0.55, "step": 32520 }, { "epoch": 0.9990169876816268, "grad_norm": 0.35137856006622314, "learning_rate": 1.0040111911363466e-05, "loss": 0.5776, "step": 32521 }, { "epoch": 0.9990477068165761, "grad_norm": 0.4217524230480194, "learning_rate": 1.0039628637868218e-05, "loss": 0.5634, "step": 32522 }, { "epoch": 0.9990784259515252, "grad_norm": 0.3787831664085388, "learning_rate": 1.0039145364280415e-05, "loss": 0.5581, "step": 32523 }, { "epoch": 0.9991091450864744, "grad_norm": 0.39409372210502625, "learning_rate": 1.003866209060119e-05, "loss": 0.4703, "step": 32524 }, { "epoch": 0.9991398642214235, "grad_norm": 0.32204505801200867, "learning_rate": 1.003817881683166e-05, "loss": 0.5143, "step": 32525 }, { "epoch": 0.9991705833563727, "grad_norm": 0.48686981201171875, "learning_rate": 1.0037695542972968e-05, "loss": 0.5339, "step": 32526 }, { "epoch": 0.9992013024913219, "grad_norm": 0.3817749321460724, "learning_rate": 1.0037212269026231e-05, "loss": 0.4925, "step": 32527 }, { "epoch": 0.999232021626271, "grad_norm": 0.3339221179485321, "learning_rate": 1.0036728994992584e-05, "loss": 0.4852, "step": 32528 }, { "epoch": 0.9992627407612201, "grad_norm": 0.35329970717430115, "learning_rate": 1.003624572087315e-05, "loss": 0.5057, "step": 32529 }, { "epoch": 0.9992934598961694, "grad_norm": 0.44805610179901123, "learning_rate": 1.0035762446669066e-05, "loss": 0.5288, "step": 32530 }, { "epoch": 0.9993241790311185, "grad_norm": 0.34205448627471924, "learning_rate": 1.0035279172381453e-05, "loss": 0.5517, "step": 32531 }, { "epoch": 0.9993548981660676, "grad_norm": 0.38832250237464905, "learning_rate": 1.0034795898011446e-05, "loss": 0.5968, "step": 32532 }, { "epoch": 0.9993856173010168, "grad_norm": 0.43230727314949036, "learning_rate": 1.003431262356017e-05, "loss": 0.5221, "step": 32533 }, { "epoch": 0.999416336435966, "grad_norm": 0.3910462260246277, "learning_rate": 1.0033829349028753e-05, "loss": 0.5865, "step": 32534 }, { "epoch": 0.9994470555709152, "grad_norm": 0.3965539336204529, "learning_rate": 1.003334607441833e-05, "loss": 0.5521, "step": 32535 }, { "epoch": 0.9994777747058643, "grad_norm": 0.3760276734828949, "learning_rate": 1.0032862799730019e-05, "loss": 0.5463, "step": 32536 }, { "epoch": 0.9995084938408134, "grad_norm": 0.3505136966705322, "learning_rate": 1.0032379524964957e-05, "loss": 0.4645, "step": 32537 }, { "epoch": 0.9995392129757626, "grad_norm": 0.5779119729995728, "learning_rate": 1.003189625012427e-05, "loss": 0.4729, "step": 32538 }, { "epoch": 0.9995699321107118, "grad_norm": 0.34309831261634827, "learning_rate": 1.0031412975209088e-05, "loss": 0.4785, "step": 32539 }, { "epoch": 0.9996006512456609, "grad_norm": 0.3851175308227539, "learning_rate": 1.0030929700220535e-05, "loss": 0.5392, "step": 32540 }, { "epoch": 0.9996313703806101, "grad_norm": 0.4300593137741089, "learning_rate": 1.0030446425159749e-05, "loss": 0.5697, "step": 32541 }, { "epoch": 0.9996620895155592, "grad_norm": 0.4027702510356903, "learning_rate": 1.002996315002785e-05, "loss": 0.6206, "step": 32542 }, { "epoch": 0.9996928086505084, "grad_norm": 0.38156986236572266, "learning_rate": 1.0029479874825969e-05, "loss": 0.4936, "step": 32543 }, { "epoch": 0.9997235277854576, "grad_norm": 0.37758946418762207, "learning_rate": 1.0028996599555239e-05, "loss": 0.5532, "step": 32544 }, { "epoch": 0.9997542469204067, "grad_norm": 0.37421488761901855, "learning_rate": 1.0028513324216781e-05, "loss": 0.5692, "step": 32545 }, { "epoch": 0.9997849660553558, "grad_norm": 0.36170199513435364, "learning_rate": 1.0028030048811734e-05, "loss": 0.5119, "step": 32546 }, { "epoch": 0.9998156851903051, "grad_norm": 0.4984782934188843, "learning_rate": 1.0027546773341217e-05, "loss": 0.5847, "step": 32547 }, { "epoch": 0.9998464043252542, "grad_norm": 0.4142237901687622, "learning_rate": 1.0027063497806364e-05, "loss": 0.5426, "step": 32548 }, { "epoch": 0.9998771234602034, "grad_norm": 0.3551313579082489, "learning_rate": 1.00265802222083e-05, "loss": 0.4415, "step": 32549 }, { "epoch": 0.9999078425951525, "grad_norm": 0.3464774489402771, "learning_rate": 1.0026096946548158e-05, "loss": 0.535, "step": 32550 }, { "epoch": 0.9999385617301016, "grad_norm": 0.37426653504371643, "learning_rate": 1.0025613670827063e-05, "loss": 0.468, "step": 32551 }, { "epoch": 0.9999692808650509, "grad_norm": 0.5279443860054016, "learning_rate": 1.002513039504615e-05, "loss": 0.4935, "step": 32552 }, { "epoch": 1.0, "grad_norm": 0.40786212682724, "learning_rate": 1.002464711920654e-05, "loss": 0.6122, "step": 32553 }, { "epoch": 1.0, "eval_loss": 0.22940881550312042, "eval_runtime": 174.2855, "eval_samples_per_second": 137.699, "eval_steps_per_second": 17.213, "step": 32553 } ], "logging_steps": 1, "max_steps": 65106, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.120703180377623e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }