{ "best_metric": 0.08554638942253362, "best_model_checkpoint": "./seq2seq_wav2vec2_bart-base_24k-en-voxpopuli/t1_new1_spec/checkpoint-29000", "epoch": 20.0, "eval_steps": 1000, "global_step": 34820, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005743825387708214, "grad_norm": 6.055897235870361, "learning_rate": 5.000000000000001e-07, "loss": 10.9883, "step": 10 }, { "epoch": 0.011487650775416428, "grad_norm": 5.815479755401611, "learning_rate": 1.0000000000000002e-06, "loss": 10.9223, "step": 20 }, { "epoch": 0.01723147616312464, "grad_norm": 5.46067476272583, "learning_rate": 1.5e-06, "loss": 10.8304, "step": 30 }, { "epoch": 0.022975301550832855, "grad_norm": 5.2169952392578125, "learning_rate": 2.0000000000000003e-06, "loss": 10.6553, "step": 40 }, { "epoch": 0.02871912693854107, "grad_norm": 5.73301362991333, "learning_rate": 2.5e-06, "loss": 10.434, "step": 50 }, { "epoch": 0.03446295232624928, "grad_norm": 5.824595928192139, "learning_rate": 3e-06, "loss": 10.0886, "step": 60 }, { "epoch": 0.040206777713957496, "grad_norm": 5.168050765991211, "learning_rate": 3.5000000000000004e-06, "loss": 9.7514, "step": 70 }, { "epoch": 0.04595060310166571, "grad_norm": 4.594773769378662, "learning_rate": 4.000000000000001e-06, "loss": 9.4369, "step": 80 }, { "epoch": 0.051694428489373924, "grad_norm": 4.235531330108643, "learning_rate": 4.5e-06, "loss": 9.1625, "step": 90 }, { "epoch": 0.05743825387708214, "grad_norm": 4.006635665893555, "learning_rate": 5e-06, "loss": 8.9379, "step": 100 }, { "epoch": 0.06318207926479034, "grad_norm": 3.3541603088378906, "learning_rate": 5.500000000000001e-06, "loss": 8.703, "step": 110 }, { "epoch": 0.06892590465249857, "grad_norm": 3.289834499359131, "learning_rate": 6e-06, "loss": 8.5148, "step": 120 }, { "epoch": 0.07466973004020677, "grad_norm": 66.89250946044922, "learning_rate": 6.5000000000000004e-06, "loss": 8.3209, "step": 130 }, { "epoch": 0.08041355542791499, "grad_norm": 3.456878185272217, "learning_rate": 7.000000000000001e-06, "loss": 8.1789, "step": 140 }, { "epoch": 0.0861573808156232, "grad_norm": 2.5259904861450195, "learning_rate": 7.5e-06, "loss": 8.0436, "step": 150 }, { "epoch": 0.09190120620333142, "grad_norm": 2.7311551570892334, "learning_rate": 8.000000000000001e-06, "loss": 7.9073, "step": 160 }, { "epoch": 0.09764503159103963, "grad_norm": 2.792020797729492, "learning_rate": 8.500000000000002e-06, "loss": 7.761, "step": 170 }, { "epoch": 0.10338885697874785, "grad_norm": 2.549994468688965, "learning_rate": 9e-06, "loss": 7.6251, "step": 180 }, { "epoch": 0.10913268236645605, "grad_norm": 2.3393728733062744, "learning_rate": 9.5e-06, "loss": 7.4803, "step": 190 }, { "epoch": 0.11487650775416428, "grad_norm": 2.8744332790374756, "learning_rate": 1e-05, "loss": 7.3186, "step": 200 }, { "epoch": 0.12062033314187248, "grad_norm": 1.95337975025177, "learning_rate": 1.05e-05, "loss": 7.2298, "step": 210 }, { "epoch": 0.1263641585295807, "grad_norm": 2.156261920928955, "learning_rate": 1.1000000000000001e-05, "loss": 7.136, "step": 220 }, { "epoch": 0.13210798391728892, "grad_norm": 2.040726661682129, "learning_rate": 1.1500000000000002e-05, "loss": 7.0287, "step": 230 }, { "epoch": 0.13785180930499713, "grad_norm": 2.147550582885742, "learning_rate": 1.2e-05, "loss": 6.9376, "step": 240 }, { "epoch": 0.14359563469270534, "grad_norm": 2.419684648513794, "learning_rate": 1.25e-05, "loss": 6.8987, "step": 250 }, { "epoch": 0.14933946008041354, "grad_norm": 1.5293253660202026, "learning_rate": 1.3000000000000001e-05, "loss": 6.8369, "step": 260 }, { "epoch": 0.15508328546812178, "grad_norm": 2.4937326908111572, "learning_rate": 1.3500000000000001e-05, "loss": 6.7625, "step": 270 }, { "epoch": 0.16082711085582999, "grad_norm": 2.5087127685546875, "learning_rate": 1.4000000000000001e-05, "loss": 6.7237, "step": 280 }, { "epoch": 0.1665709362435382, "grad_norm": 1.5482823848724365, "learning_rate": 1.45e-05, "loss": 6.7109, "step": 290 }, { "epoch": 0.1723147616312464, "grad_norm": 2.4903125762939453, "learning_rate": 1.5e-05, "loss": 6.6613, "step": 300 }, { "epoch": 0.17805858701895463, "grad_norm": 3.5624024868011475, "learning_rate": 1.55e-05, "loss": 6.6248, "step": 310 }, { "epoch": 0.18380241240666284, "grad_norm": 5.1260666847229, "learning_rate": 1.6000000000000003e-05, "loss": 6.606, "step": 320 }, { "epoch": 0.18954623779437105, "grad_norm": 2.5122170448303223, "learning_rate": 1.65e-05, "loss": 6.6266, "step": 330 }, { "epoch": 0.19529006318207925, "grad_norm": 2.002775192260742, "learning_rate": 1.7000000000000003e-05, "loss": 6.571, "step": 340 }, { "epoch": 0.2010338885697875, "grad_norm": 1.6555070877075195, "learning_rate": 1.75e-05, "loss": 6.5593, "step": 350 }, { "epoch": 0.2067777139574957, "grad_norm": 5.010408401489258, "learning_rate": 1.8e-05, "loss": 6.5288, "step": 360 }, { "epoch": 0.2125215393452039, "grad_norm": 1.9755157232284546, "learning_rate": 1.85e-05, "loss": 6.5213, "step": 370 }, { "epoch": 0.2182653647329121, "grad_norm": 2.2749240398406982, "learning_rate": 1.9e-05, "loss": 6.4629, "step": 380 }, { "epoch": 0.22400919012062034, "grad_norm": 3.620232582092285, "learning_rate": 1.9500000000000003e-05, "loss": 6.4937, "step": 390 }, { "epoch": 0.22975301550832855, "grad_norm": 2.032214641571045, "learning_rate": 2e-05, "loss": 6.444, "step": 400 }, { "epoch": 0.23549684089603676, "grad_norm": 2.468402862548828, "learning_rate": 2.05e-05, "loss": 6.4267, "step": 410 }, { "epoch": 0.24124066628374496, "grad_norm": 2.084977388381958, "learning_rate": 2.1e-05, "loss": 6.4032, "step": 420 }, { "epoch": 0.2469844916714532, "grad_norm": 2.6172053813934326, "learning_rate": 2.15e-05, "loss": 6.3946, "step": 430 }, { "epoch": 0.2527283170591614, "grad_norm": 3.0039024353027344, "learning_rate": 2.2000000000000003e-05, "loss": 6.3789, "step": 440 }, { "epoch": 0.2584721424468696, "grad_norm": 1.6767144203186035, "learning_rate": 2.25e-05, "loss": 6.3923, "step": 450 }, { "epoch": 0.26421596783457785, "grad_norm": 4.074848651885986, "learning_rate": 2.3000000000000003e-05, "loss": 6.3333, "step": 460 }, { "epoch": 0.269959793222286, "grad_norm": 2.885188341140747, "learning_rate": 2.35e-05, "loss": 6.3333, "step": 470 }, { "epoch": 0.27570361860999426, "grad_norm": 2.0869805812835693, "learning_rate": 2.4e-05, "loss": 6.297, "step": 480 }, { "epoch": 0.2814474439977025, "grad_norm": 2.609419345855713, "learning_rate": 2.45e-05, "loss": 6.2393, "step": 490 }, { "epoch": 0.2871912693854107, "grad_norm": 2.56186580657959, "learning_rate": 2.5e-05, "loss": 6.2737, "step": 500 }, { "epoch": 0.2929350947731189, "grad_norm": 1.8970677852630615, "learning_rate": 2.5500000000000003e-05, "loss": 6.2084, "step": 510 }, { "epoch": 0.2986789201608271, "grad_norm": 72.05806732177734, "learning_rate": 2.6000000000000002e-05, "loss": 6.825, "step": 520 }, { "epoch": 0.3044227455485353, "grad_norm": 1.9689005613327026, "learning_rate": 2.6500000000000004e-05, "loss": 6.3931, "step": 530 }, { "epoch": 0.31016657093624356, "grad_norm": 2.1896631717681885, "learning_rate": 2.7000000000000002e-05, "loss": 6.2246, "step": 540 }, { "epoch": 0.31591039632395174, "grad_norm": 4.583789825439453, "learning_rate": 2.7500000000000004e-05, "loss": 6.1815, "step": 550 }, { "epoch": 0.32165422171165997, "grad_norm": 3.7064321041107178, "learning_rate": 2.8000000000000003e-05, "loss": 6.1143, "step": 560 }, { "epoch": 0.3273980470993682, "grad_norm": 1.6995564699172974, "learning_rate": 2.8499999999999998e-05, "loss": 6.119, "step": 570 }, { "epoch": 0.3331418724870764, "grad_norm": 2.3913519382476807, "learning_rate": 2.9e-05, "loss": 6.1435, "step": 580 }, { "epoch": 0.3388856978747846, "grad_norm": 3.105802536010742, "learning_rate": 2.95e-05, "loss": 6.0587, "step": 590 }, { "epoch": 0.3446295232624928, "grad_norm": 1.9124610424041748, "learning_rate": 3e-05, "loss": 6.1034, "step": 600 }, { "epoch": 0.35037334865020103, "grad_norm": 2.707331657409668, "learning_rate": 3.05e-05, "loss": 6.0529, "step": 610 }, { "epoch": 0.35611717403790927, "grad_norm": 2.319321870803833, "learning_rate": 3.1e-05, "loss": 6.0769, "step": 620 }, { "epoch": 0.36186099942561745, "grad_norm": 1.970542073249817, "learning_rate": 3.15e-05, "loss": 6.0737, "step": 630 }, { "epoch": 0.3676048248133257, "grad_norm": 2.0732107162475586, "learning_rate": 3.2000000000000005e-05, "loss": 5.9737, "step": 640 }, { "epoch": 0.3733486502010339, "grad_norm": 4.50523042678833, "learning_rate": 3.2500000000000004e-05, "loss": 5.9883, "step": 650 }, { "epoch": 0.3790924755887421, "grad_norm": 1.6544623374938965, "learning_rate": 3.3e-05, "loss": 5.9738, "step": 660 }, { "epoch": 0.38483630097645033, "grad_norm": 1.7353025674819946, "learning_rate": 3.35e-05, "loss": 5.9409, "step": 670 }, { "epoch": 0.3905801263641585, "grad_norm": 2.4177908897399902, "learning_rate": 3.4000000000000007e-05, "loss": 5.8607, "step": 680 }, { "epoch": 0.39632395175186674, "grad_norm": 2.5148210525512695, "learning_rate": 3.45e-05, "loss": 5.9205, "step": 690 }, { "epoch": 0.402067777139575, "grad_norm": 2.590613842010498, "learning_rate": 3.5e-05, "loss": 5.8985, "step": 700 }, { "epoch": 0.40781160252728316, "grad_norm": 1.9567346572875977, "learning_rate": 3.55e-05, "loss": 5.8444, "step": 710 }, { "epoch": 0.4135554279149914, "grad_norm": 3.8777434825897217, "learning_rate": 3.6e-05, "loss": 5.8868, "step": 720 }, { "epoch": 0.41929925330269957, "grad_norm": 3.3018198013305664, "learning_rate": 3.65e-05, "loss": 5.7993, "step": 730 }, { "epoch": 0.4250430786904078, "grad_norm": 2.752387523651123, "learning_rate": 3.7e-05, "loss": 5.7856, "step": 740 }, { "epoch": 0.43078690407811604, "grad_norm": 2.77730393409729, "learning_rate": 3.7500000000000003e-05, "loss": 5.8193, "step": 750 }, { "epoch": 0.4365307294658242, "grad_norm": 1.8536241054534912, "learning_rate": 3.8e-05, "loss": 5.7811, "step": 760 }, { "epoch": 0.44227455485353245, "grad_norm": 1.8928744792938232, "learning_rate": 3.85e-05, "loss": 5.7732, "step": 770 }, { "epoch": 0.4480183802412407, "grad_norm": 3.0804221630096436, "learning_rate": 3.9000000000000006e-05, "loss": 5.7638, "step": 780 }, { "epoch": 0.45376220562894887, "grad_norm": 2.771130323410034, "learning_rate": 3.9500000000000005e-05, "loss": 5.7104, "step": 790 }, { "epoch": 0.4595060310166571, "grad_norm": 3.3369624614715576, "learning_rate": 4e-05, "loss": 5.6699, "step": 800 }, { "epoch": 0.4652498564043653, "grad_norm": 2.156682252883911, "learning_rate": 4.05e-05, "loss": 5.7005, "step": 810 }, { "epoch": 0.4709936817920735, "grad_norm": 1.8618643283843994, "learning_rate": 4.1e-05, "loss": 5.6653, "step": 820 }, { "epoch": 0.47673750717978175, "grad_norm": 4.0401458740234375, "learning_rate": 4.15e-05, "loss": 5.6899, "step": 830 }, { "epoch": 0.48248133256748993, "grad_norm": 2.14341139793396, "learning_rate": 4.2e-05, "loss": 5.6509, "step": 840 }, { "epoch": 0.48822515795519816, "grad_norm": 2.8897228240966797, "learning_rate": 4.25e-05, "loss": 5.6362, "step": 850 }, { "epoch": 0.4939689833429064, "grad_norm": 2.2010457515716553, "learning_rate": 4.3e-05, "loss": 5.6282, "step": 860 }, { "epoch": 0.4997128087306146, "grad_norm": 2.02427077293396, "learning_rate": 4.35e-05, "loss": 5.6147, "step": 870 }, { "epoch": 0.5054566341183228, "grad_norm": 1.9486312866210938, "learning_rate": 4.4000000000000006e-05, "loss": 5.6123, "step": 880 }, { "epoch": 0.511200459506031, "grad_norm": 1.9781122207641602, "learning_rate": 4.4500000000000004e-05, "loss": 5.6125, "step": 890 }, { "epoch": 0.5169442848937392, "grad_norm": 2.069385528564453, "learning_rate": 4.5e-05, "loss": 5.5844, "step": 900 }, { "epoch": 0.5226881102814475, "grad_norm": 52.123844146728516, "learning_rate": 4.55e-05, "loss": 5.6115, "step": 910 }, { "epoch": 0.5284319356691557, "grad_norm": 1.9142512083053589, "learning_rate": 4.600000000000001e-05, "loss": 5.5356, "step": 920 }, { "epoch": 0.5341757610568638, "grad_norm": 2.6590237617492676, "learning_rate": 4.6500000000000005e-05, "loss": 5.5443, "step": 930 }, { "epoch": 0.539919586444572, "grad_norm": 1.7980990409851074, "learning_rate": 4.7e-05, "loss": 5.5028, "step": 940 }, { "epoch": 0.5456634118322803, "grad_norm": 2.1112303733825684, "learning_rate": 4.75e-05, "loss": 5.4412, "step": 950 }, { "epoch": 0.5514072372199885, "grad_norm": 3.2449238300323486, "learning_rate": 4.8e-05, "loss": 5.4485, "step": 960 }, { "epoch": 0.5571510626076968, "grad_norm": 2.456397771835327, "learning_rate": 4.85e-05, "loss": 5.4416, "step": 970 }, { "epoch": 0.562894887995405, "grad_norm": 2.243213653564453, "learning_rate": 4.9e-05, "loss": 5.421, "step": 980 }, { "epoch": 0.5686387133831131, "grad_norm": 2.1592395305633545, "learning_rate": 4.9500000000000004e-05, "loss": 5.4488, "step": 990 }, { "epoch": 0.5743825387708213, "grad_norm": 1.6209907531738281, "learning_rate": 5e-05, "loss": 5.457, "step": 1000 }, { "epoch": 0.5743825387708213, "eval_loss": 5.736476421356201, "eval_runtime": 168.2098, "eval_samples_per_second": 9.458, "eval_steps_per_second": 0.101, "eval_wer": 1.0291275850378574, "step": 1000 }, { "epoch": 0.5801263641585296, "grad_norm": 2.0220963954925537, "learning_rate": 5.05e-05, "loss": 5.3579, "step": 1010 }, { "epoch": 0.5858701895462378, "grad_norm": 3.0556018352508545, "learning_rate": 5.1000000000000006e-05, "loss": 5.3937, "step": 1020 }, { "epoch": 0.591614014933946, "grad_norm": 2.5268030166625977, "learning_rate": 5.1500000000000005e-05, "loss": 5.3661, "step": 1030 }, { "epoch": 0.5973578403216542, "grad_norm": 6.735248565673828, "learning_rate": 5.2000000000000004e-05, "loss": 5.427, "step": 1040 }, { "epoch": 0.6031016657093624, "grad_norm": 5.201588153839111, "learning_rate": 5.25e-05, "loss": 5.5775, "step": 1050 }, { "epoch": 0.6088454910970706, "grad_norm": 2.0221407413482666, "learning_rate": 5.300000000000001e-05, "loss": 5.3778, "step": 1060 }, { "epoch": 0.6145893164847789, "grad_norm": 1.9622694253921509, "learning_rate": 5.3500000000000006e-05, "loss": 5.3581, "step": 1070 }, { "epoch": 0.6203331418724871, "grad_norm": 1.7550222873687744, "learning_rate": 5.4000000000000005e-05, "loss": 5.3513, "step": 1080 }, { "epoch": 0.6260769672601952, "grad_norm": 2.479619026184082, "learning_rate": 5.45e-05, "loss": 5.2855, "step": 1090 }, { "epoch": 0.6318207926479035, "grad_norm": 2.534801959991455, "learning_rate": 5.500000000000001e-05, "loss": 5.3142, "step": 1100 }, { "epoch": 0.6375646180356117, "grad_norm": 2.064847946166992, "learning_rate": 5.550000000000001e-05, "loss": 5.3408, "step": 1110 }, { "epoch": 0.6433084434233199, "grad_norm": 1.9141936302185059, "learning_rate": 5.6000000000000006e-05, "loss": 5.2582, "step": 1120 }, { "epoch": 0.6490522688110282, "grad_norm": 2.138089179992676, "learning_rate": 5.65e-05, "loss": 5.2712, "step": 1130 }, { "epoch": 0.6547960941987364, "grad_norm": 1.825021743774414, "learning_rate": 5.6999999999999996e-05, "loss": 5.2543, "step": 1140 }, { "epoch": 0.6605399195864445, "grad_norm": 1.7678310871124268, "learning_rate": 5.7499999999999995e-05, "loss": 5.2146, "step": 1150 }, { "epoch": 0.6662837449741528, "grad_norm": 2.457432746887207, "learning_rate": 5.8e-05, "loss": 5.1643, "step": 1160 }, { "epoch": 0.672027570361861, "grad_norm": 2.418137550354004, "learning_rate": 5.85e-05, "loss": 5.2242, "step": 1170 }, { "epoch": 0.6777713957495692, "grad_norm": 1.7979555130004883, "learning_rate": 5.9e-05, "loss": 5.156, "step": 1180 }, { "epoch": 0.6835152211372775, "grad_norm": 2.3807952404022217, "learning_rate": 5.95e-05, "loss": 5.1424, "step": 1190 }, { "epoch": 0.6892590465249856, "grad_norm": 2.2758312225341797, "learning_rate": 6e-05, "loss": 5.2342, "step": 1200 }, { "epoch": 0.6950028719126938, "grad_norm": 1.9458492994308472, "learning_rate": 6.05e-05, "loss": 5.1192, "step": 1210 }, { "epoch": 0.7007466973004021, "grad_norm": 2.064619302749634, "learning_rate": 6.1e-05, "loss": 5.0974, "step": 1220 }, { "epoch": 0.7064905226881103, "grad_norm": 2.2566277980804443, "learning_rate": 6.15e-05, "loss": 5.1274, "step": 1230 }, { "epoch": 0.7122343480758185, "grad_norm": 2.3915159702301025, "learning_rate": 6.2e-05, "loss": 5.1261, "step": 1240 }, { "epoch": 0.7179781734635267, "grad_norm": 2.568120002746582, "learning_rate": 6.25e-05, "loss": 5.122, "step": 1250 }, { "epoch": 0.7237219988512349, "grad_norm": 2.387334108352661, "learning_rate": 6.3e-05, "loss": 5.096, "step": 1260 }, { "epoch": 0.7294658242389431, "grad_norm": 2.991128921508789, "learning_rate": 6.35e-05, "loss": 5.0988, "step": 1270 }, { "epoch": 0.7352096496266514, "grad_norm": 1.900687575340271, "learning_rate": 6.400000000000001e-05, "loss": 5.0034, "step": 1280 }, { "epoch": 0.7409534750143596, "grad_norm": 2.0494322776794434, "learning_rate": 6.450000000000001e-05, "loss": 5.0093, "step": 1290 }, { "epoch": 0.7466973004020678, "grad_norm": 2.711444139480591, "learning_rate": 6.500000000000001e-05, "loss": 5.0134, "step": 1300 }, { "epoch": 0.752441125789776, "grad_norm": 2.2127387523651123, "learning_rate": 6.55e-05, "loss": 4.9642, "step": 1310 }, { "epoch": 0.7581849511774842, "grad_norm": 1.9456676244735718, "learning_rate": 6.6e-05, "loss": 5.0235, "step": 1320 }, { "epoch": 0.7639287765651924, "grad_norm": 1.8089032173156738, "learning_rate": 6.65e-05, "loss": 4.9891, "step": 1330 }, { "epoch": 0.7696726019529007, "grad_norm": 2.4659690856933594, "learning_rate": 6.7e-05, "loss": 4.9657, "step": 1340 }, { "epoch": 0.7754164273406089, "grad_norm": 2.3967230319976807, "learning_rate": 6.750000000000001e-05, "loss": 5.0062, "step": 1350 }, { "epoch": 0.781160252728317, "grad_norm": 2.1226890087127686, "learning_rate": 6.800000000000001e-05, "loss": 4.9346, "step": 1360 }, { "epoch": 0.7869040781160253, "grad_norm": 1.9833396673202515, "learning_rate": 6.850000000000001e-05, "loss": 4.9702, "step": 1370 }, { "epoch": 0.7926479035037335, "grad_norm": 1.912986397743225, "learning_rate": 6.9e-05, "loss": 4.9767, "step": 1380 }, { "epoch": 0.7983917288914417, "grad_norm": 2.553302526473999, "learning_rate": 6.95e-05, "loss": 4.9779, "step": 1390 }, { "epoch": 0.80413555427915, "grad_norm": 2.2005956172943115, "learning_rate": 7e-05, "loss": 4.9189, "step": 1400 }, { "epoch": 0.8098793796668581, "grad_norm": 2.284294366836548, "learning_rate": 7.05e-05, "loss": 4.9111, "step": 1410 }, { "epoch": 0.8156232050545663, "grad_norm": 2.5295588970184326, "learning_rate": 7.1e-05, "loss": 4.9468, "step": 1420 }, { "epoch": 0.8213670304422745, "grad_norm": 2.211534023284912, "learning_rate": 7.15e-05, "loss": 4.9011, "step": 1430 }, { "epoch": 0.8271108558299828, "grad_norm": 2.135462760925293, "learning_rate": 7.2e-05, "loss": 4.8942, "step": 1440 }, { "epoch": 0.832854681217691, "grad_norm": 2.19386887550354, "learning_rate": 7.25e-05, "loss": 4.8801, "step": 1450 }, { "epoch": 0.8385985066053991, "grad_norm": 2.6439120769500732, "learning_rate": 7.3e-05, "loss": 4.8623, "step": 1460 }, { "epoch": 0.8443423319931074, "grad_norm": 2.874725580215454, "learning_rate": 7.35e-05, "loss": 4.8364, "step": 1470 }, { "epoch": 0.8500861573808156, "grad_norm": 1.9559205770492554, "learning_rate": 7.4e-05, "loss": 4.8381, "step": 1480 }, { "epoch": 0.8558299827685238, "grad_norm": 1.822804570198059, "learning_rate": 7.450000000000001e-05, "loss": 4.8564, "step": 1490 }, { "epoch": 0.8615738081562321, "grad_norm": 2.2507824897766113, "learning_rate": 7.500000000000001e-05, "loss": 4.869, "step": 1500 }, { "epoch": 0.8673176335439403, "grad_norm": 2.1126718521118164, "learning_rate": 7.55e-05, "loss": 4.777, "step": 1510 }, { "epoch": 0.8730614589316484, "grad_norm": 2.6732213497161865, "learning_rate": 7.6e-05, "loss": 4.7495, "step": 1520 }, { "epoch": 0.8788052843193567, "grad_norm": 2.125293731689453, "learning_rate": 7.65e-05, "loss": 4.7961, "step": 1530 }, { "epoch": 0.8845491097070649, "grad_norm": 2.2320406436920166, "learning_rate": 7.7e-05, "loss": 4.7528, "step": 1540 }, { "epoch": 0.8902929350947731, "grad_norm": 2.6552276611328125, "learning_rate": 7.75e-05, "loss": 4.7833, "step": 1550 }, { "epoch": 0.8960367604824814, "grad_norm": 2.2076845169067383, "learning_rate": 7.800000000000001e-05, "loss": 4.7246, "step": 1560 }, { "epoch": 0.9017805858701895, "grad_norm": 2.4645378589630127, "learning_rate": 7.850000000000001e-05, "loss": 4.8091, "step": 1570 }, { "epoch": 0.9075244112578977, "grad_norm": 2.4692769050598145, "learning_rate": 7.900000000000001e-05, "loss": 4.7109, "step": 1580 }, { "epoch": 0.913268236645606, "grad_norm": 2.0135834217071533, "learning_rate": 7.950000000000001e-05, "loss": 4.7533, "step": 1590 }, { "epoch": 0.9190120620333142, "grad_norm": 3.3233773708343506, "learning_rate": 8e-05, "loss": 4.7149, "step": 1600 }, { "epoch": 0.9247558874210224, "grad_norm": 2.0522029399871826, "learning_rate": 8.05e-05, "loss": 4.6994, "step": 1610 }, { "epoch": 0.9304997128087306, "grad_norm": 1.9845471382141113, "learning_rate": 8.1e-05, "loss": 4.7063, "step": 1620 }, { "epoch": 0.9362435381964388, "grad_norm": 2.1543734073638916, "learning_rate": 8.15e-05, "loss": 4.7315, "step": 1630 }, { "epoch": 0.941987363584147, "grad_norm": 1.9925730228424072, "learning_rate": 8.2e-05, "loss": 4.6213, "step": 1640 }, { "epoch": 0.9477311889718553, "grad_norm": 2.437191963195801, "learning_rate": 8.25e-05, "loss": 4.7162, "step": 1650 }, { "epoch": 0.9534750143595635, "grad_norm": 2.6762571334838867, "learning_rate": 8.3e-05, "loss": 4.6834, "step": 1660 }, { "epoch": 0.9592188397472717, "grad_norm": 2.2476072311401367, "learning_rate": 8.35e-05, "loss": 4.6793, "step": 1670 }, { "epoch": 0.9649626651349799, "grad_norm": 2.2051050662994385, "learning_rate": 8.4e-05, "loss": 4.6565, "step": 1680 }, { "epoch": 0.9707064905226881, "grad_norm": 2.049757242202759, "learning_rate": 8.450000000000001e-05, "loss": 4.6022, "step": 1690 }, { "epoch": 0.9764503159103963, "grad_norm": 2.2780234813690186, "learning_rate": 8.5e-05, "loss": 4.6523, "step": 1700 }, { "epoch": 0.9821941412981046, "grad_norm": 2.121629476547241, "learning_rate": 8.55e-05, "loss": 4.6201, "step": 1710 }, { "epoch": 0.9879379666858128, "grad_norm": 2.312450885772705, "learning_rate": 8.6e-05, "loss": 4.5953, "step": 1720 }, { "epoch": 0.9936817920735209, "grad_norm": 1.9780007600784302, "learning_rate": 8.65e-05, "loss": 4.5872, "step": 1730 }, { "epoch": 0.9994256174612292, "grad_norm": 2.257361888885498, "learning_rate": 8.7e-05, "loss": 4.514, "step": 1740 }, { "epoch": 1.0051694428489375, "grad_norm": 1.9719204902648926, "learning_rate": 8.75e-05, "loss": 4.5915, "step": 1750 }, { "epoch": 1.0109132682366455, "grad_norm": 2.1014137268066406, "learning_rate": 8.800000000000001e-05, "loss": 4.4911, "step": 1760 }, { "epoch": 1.0166570936243537, "grad_norm": 2.4287660121917725, "learning_rate": 8.850000000000001e-05, "loss": 4.4604, "step": 1770 }, { "epoch": 1.022400919012062, "grad_norm": 2.763195514678955, "learning_rate": 8.900000000000001e-05, "loss": 4.4405, "step": 1780 }, { "epoch": 1.0281447443997702, "grad_norm": 2.200176477432251, "learning_rate": 8.950000000000001e-05, "loss": 4.4641, "step": 1790 }, { "epoch": 1.0338885697874785, "grad_norm": 2.1211001873016357, "learning_rate": 9e-05, "loss": 4.4463, "step": 1800 }, { "epoch": 1.0396323951751867, "grad_norm": 3.6094770431518555, "learning_rate": 9.05e-05, "loss": 4.3736, "step": 1810 }, { "epoch": 1.045376220562895, "grad_norm": 3.214155673980713, "learning_rate": 9.1e-05, "loss": 4.3319, "step": 1820 }, { "epoch": 1.0511200459506032, "grad_norm": 3.0764145851135254, "learning_rate": 9.15e-05, "loss": 4.2536, "step": 1830 }, { "epoch": 1.0568638713383114, "grad_norm": 2.361523389816284, "learning_rate": 9.200000000000001e-05, "loss": 4.1905, "step": 1840 }, { "epoch": 1.0626076967260196, "grad_norm": 3.380676031112671, "learning_rate": 9.250000000000001e-05, "loss": 4.2394, "step": 1850 }, { "epoch": 1.0683515221137276, "grad_norm": 2.218505859375, "learning_rate": 9.300000000000001e-05, "loss": 4.2496, "step": 1860 }, { "epoch": 1.0740953475014359, "grad_norm": 2.547895908355713, "learning_rate": 9.350000000000001e-05, "loss": 4.0718, "step": 1870 }, { "epoch": 1.079839172889144, "grad_norm": 2.4427947998046875, "learning_rate": 9.4e-05, "loss": 4.0825, "step": 1880 }, { "epoch": 1.0855829982768523, "grad_norm": 2.6170310974121094, "learning_rate": 9.449999999999999e-05, "loss": 3.9951, "step": 1890 }, { "epoch": 1.0913268236645606, "grad_norm": 2.4050493240356445, "learning_rate": 9.5e-05, "loss": 3.9702, "step": 1900 }, { "epoch": 1.0970706490522688, "grad_norm": 2.338259696960449, "learning_rate": 9.55e-05, "loss": 3.8448, "step": 1910 }, { "epoch": 1.102814474439977, "grad_norm": 3.197923183441162, "learning_rate": 9.6e-05, "loss": 3.8478, "step": 1920 }, { "epoch": 1.1085582998276853, "grad_norm": 3.978116750717163, "learning_rate": 9.65e-05, "loss": 3.9211, "step": 1930 }, { "epoch": 1.1143021252153935, "grad_norm": 5.024524211883545, "learning_rate": 9.7e-05, "loss": 3.9001, "step": 1940 }, { "epoch": 1.1200459506031017, "grad_norm": 3.330965995788574, "learning_rate": 9.75e-05, "loss": 3.8398, "step": 1950 }, { "epoch": 1.12578977599081, "grad_norm": 2.9635727405548096, "learning_rate": 9.8e-05, "loss": 3.73, "step": 1960 }, { "epoch": 1.1315336013785182, "grad_norm": 4.389112949371338, "learning_rate": 9.850000000000001e-05, "loss": 3.7353, "step": 1970 }, { "epoch": 1.1372774267662262, "grad_norm": 4.017416477203369, "learning_rate": 9.900000000000001e-05, "loss": 3.6872, "step": 1980 }, { "epoch": 1.1430212521539345, "grad_norm": 3.254038095474243, "learning_rate": 9.95e-05, "loss": 3.6201, "step": 1990 }, { "epoch": 1.1487650775416427, "grad_norm": 3.1984143257141113, "learning_rate": 0.0001, "loss": 3.556, "step": 2000 }, { "epoch": 1.1487650775416427, "eval_loss": 3.298543930053711, "eval_runtime": 134.2343, "eval_samples_per_second": 11.852, "eval_steps_per_second": 0.127, "eval_wer": 0.6563736015368968, "step": 2000 }, { "epoch": 1.154508902929351, "grad_norm": 3.599515676498413, "learning_rate": 9.999997709444327e-05, "loss": 3.5628, "step": 2010 }, { "epoch": 1.1602527283170592, "grad_norm": 3.7620060443878174, "learning_rate": 9.999990837779402e-05, "loss": 3.4545, "step": 2020 }, { "epoch": 1.1659965537047674, "grad_norm": 3.044027805328369, "learning_rate": 9.999979385011526e-05, "loss": 3.4293, "step": 2030 }, { "epoch": 1.1717403790924756, "grad_norm": 4.311574459075928, "learning_rate": 9.999963351151187e-05, "loss": 3.4216, "step": 2040 }, { "epoch": 1.1774842044801839, "grad_norm": 3.8230364322662354, "learning_rate": 9.999942736213082e-05, "loss": 3.4031, "step": 2050 }, { "epoch": 1.183228029867892, "grad_norm": 2.7271487712860107, "learning_rate": 9.999917540216097e-05, "loss": 3.3488, "step": 2060 }, { "epoch": 1.1889718552556001, "grad_norm": 3.1351821422576904, "learning_rate": 9.999887763183321e-05, "loss": 3.3062, "step": 2070 }, { "epoch": 1.1947156806433084, "grad_norm": 3.5218021869659424, "learning_rate": 9.999853405142033e-05, "loss": 3.2523, "step": 2080 }, { "epoch": 1.2004595060310166, "grad_norm": 3.355557680130005, "learning_rate": 9.999814466123717e-05, "loss": 3.2321, "step": 2090 }, { "epoch": 1.2062033314187248, "grad_norm": 2.673678398132324, "learning_rate": 9.999770946164053e-05, "loss": 3.2167, "step": 2100 }, { "epoch": 1.211947156806433, "grad_norm": 2.6313750743865967, "learning_rate": 9.999722845302913e-05, "loss": 3.1712, "step": 2110 }, { "epoch": 1.2176909821941413, "grad_norm": 2.699028491973877, "learning_rate": 9.999670163584374e-05, "loss": 3.1336, "step": 2120 }, { "epoch": 1.2234348075818495, "grad_norm": 3.1718649864196777, "learning_rate": 9.999612901056704e-05, "loss": 3.104, "step": 2130 }, { "epoch": 1.2291786329695578, "grad_norm": 3.0105576515197754, "learning_rate": 9.999551057772373e-05, "loss": 3.0972, "step": 2140 }, { "epoch": 1.234922458357266, "grad_norm": 2.405269145965576, "learning_rate": 9.999484633788044e-05, "loss": 3.052, "step": 2150 }, { "epoch": 1.2406662837449742, "grad_norm": 2.575950860977173, "learning_rate": 9.999413629164581e-05, "loss": 3.0088, "step": 2160 }, { "epoch": 1.2464101091326825, "grad_norm": 2.7079427242279053, "learning_rate": 9.999338043967042e-05, "loss": 2.9572, "step": 2170 }, { "epoch": 1.2521539345203907, "grad_norm": 2.3106322288513184, "learning_rate": 9.999257878264685e-05, "loss": 3.0007, "step": 2180 }, { "epoch": 1.2578977599080987, "grad_norm": 2.602318525314331, "learning_rate": 9.999173132130961e-05, "loss": 3.0284, "step": 2190 }, { "epoch": 1.263641585295807, "grad_norm": 2.740706205368042, "learning_rate": 9.999083805643521e-05, "loss": 2.8827, "step": 2200 }, { "epoch": 1.2693854106835152, "grad_norm": 2.7976179122924805, "learning_rate": 9.998989898884213e-05, "loss": 2.8553, "step": 2210 }, { "epoch": 1.2751292360712234, "grad_norm": 2.7452118396759033, "learning_rate": 9.998891411939077e-05, "loss": 2.911, "step": 2220 }, { "epoch": 1.2808730614589316, "grad_norm": 2.7137744426727295, "learning_rate": 9.99878834489836e-05, "loss": 2.8641, "step": 2230 }, { "epoch": 1.2866168868466399, "grad_norm": 2.503614664077759, "learning_rate": 9.998680697856495e-05, "loss": 2.7575, "step": 2240 }, { "epoch": 1.2923607122343481, "grad_norm": 3.2327475547790527, "learning_rate": 9.998568470912115e-05, "loss": 2.7739, "step": 2250 }, { "epoch": 1.2981045376220564, "grad_norm": 3.1317138671875, "learning_rate": 9.99845166416805e-05, "loss": 2.769, "step": 2260 }, { "epoch": 1.3038483630097644, "grad_norm": 2.6516921520233154, "learning_rate": 9.99833027773133e-05, "loss": 2.7254, "step": 2270 }, { "epoch": 1.3095921883974726, "grad_norm": 2.7394845485687256, "learning_rate": 9.998204311713172e-05, "loss": 2.7846, "step": 2280 }, { "epoch": 1.3153360137851808, "grad_norm": 2.830819606781006, "learning_rate": 9.998073766228999e-05, "loss": 2.7072, "step": 2290 }, { "epoch": 1.321079839172889, "grad_norm": 2.734186887741089, "learning_rate": 9.997938641398424e-05, "loss": 2.733, "step": 2300 }, { "epoch": 1.3268236645605973, "grad_norm": 2.7175474166870117, "learning_rate": 9.997798937345256e-05, "loss": 2.6192, "step": 2310 }, { "epoch": 1.3325674899483055, "grad_norm": 3.526029586791992, "learning_rate": 9.997654654197504e-05, "loss": 2.7071, "step": 2320 }, { "epoch": 1.3383113153360138, "grad_norm": 2.367809295654297, "learning_rate": 9.997505792087371e-05, "loss": 2.6703, "step": 2330 }, { "epoch": 1.344055140723722, "grad_norm": 2.6097769737243652, "learning_rate": 9.99735235115125e-05, "loss": 2.6224, "step": 2340 }, { "epoch": 1.3497989661114302, "grad_norm": 3.1089775562286377, "learning_rate": 9.997194331529738e-05, "loss": 2.571, "step": 2350 }, { "epoch": 1.3555427914991385, "grad_norm": 2.848905563354492, "learning_rate": 9.997031733367622e-05, "loss": 2.5765, "step": 2360 }, { "epoch": 1.3612866168868467, "grad_norm": 2.7338247299194336, "learning_rate": 9.996864556813884e-05, "loss": 2.5917, "step": 2370 }, { "epoch": 1.367030442274555, "grad_norm": 2.7184884548187256, "learning_rate": 9.996692802021705e-05, "loss": 2.5485, "step": 2380 }, { "epoch": 1.3727742676622632, "grad_norm": 2.4794275760650635, "learning_rate": 9.99651646914846e-05, "loss": 2.5777, "step": 2390 }, { "epoch": 1.3785180930499714, "grad_norm": 2.6377339363098145, "learning_rate": 9.996335558355711e-05, "loss": 2.4702, "step": 2400 }, { "epoch": 1.3842619184376794, "grad_norm": 2.6016781330108643, "learning_rate": 9.996150069809225e-05, "loss": 2.5105, "step": 2410 }, { "epoch": 1.3900057438253877, "grad_norm": 2.295804738998413, "learning_rate": 9.99596000367896e-05, "loss": 2.4875, "step": 2420 }, { "epoch": 1.395749569213096, "grad_norm": 2.5935721397399902, "learning_rate": 9.995765360139065e-05, "loss": 2.5321, "step": 2430 }, { "epoch": 1.4014933946008041, "grad_norm": 2.610018730163574, "learning_rate": 9.99556613936789e-05, "loss": 2.5063, "step": 2440 }, { "epoch": 1.4072372199885124, "grad_norm": 2.441831588745117, "learning_rate": 9.995362341547968e-05, "loss": 2.4652, "step": 2450 }, { "epoch": 1.4129810453762206, "grad_norm": 2.280978202819824, "learning_rate": 9.995153966866038e-05, "loss": 2.4431, "step": 2460 }, { "epoch": 1.4187248707639288, "grad_norm": 2.8101091384887695, "learning_rate": 9.994941015513024e-05, "loss": 2.4762, "step": 2470 }, { "epoch": 1.424468696151637, "grad_norm": 2.2506802082061768, "learning_rate": 9.994723487684047e-05, "loss": 2.4469, "step": 2480 }, { "epoch": 1.430212521539345, "grad_norm": 2.431871175765991, "learning_rate": 9.994501383578422e-05, "loss": 2.3902, "step": 2490 }, { "epoch": 1.4359563469270533, "grad_norm": 2.6454572677612305, "learning_rate": 9.994274703399656e-05, "loss": 2.3476, "step": 2500 }, { "epoch": 1.4417001723147616, "grad_norm": 2.3911397457122803, "learning_rate": 9.994043447355447e-05, "loss": 2.3861, "step": 2510 }, { "epoch": 1.4474439977024698, "grad_norm": 3.0244767665863037, "learning_rate": 9.99380761565769e-05, "loss": 2.3384, "step": 2520 }, { "epoch": 1.453187823090178, "grad_norm": 2.5775461196899414, "learning_rate": 9.993567208522468e-05, "loss": 2.3815, "step": 2530 }, { "epoch": 1.4589316484778863, "grad_norm": 2.2212674617767334, "learning_rate": 9.993322226170059e-05, "loss": 2.4184, "step": 2540 }, { "epoch": 1.4646754738655945, "grad_norm": 2.528367280960083, "learning_rate": 9.993072668824933e-05, "loss": 2.3448, "step": 2550 }, { "epoch": 1.4704192992533027, "grad_norm": 2.2349610328674316, "learning_rate": 9.99281853671575e-05, "loss": 2.334, "step": 2560 }, { "epoch": 1.476163124641011, "grad_norm": 2.3622207641601562, "learning_rate": 9.992559830075366e-05, "loss": 2.3842, "step": 2570 }, { "epoch": 1.4819069500287192, "grad_norm": 2.2977137565612793, "learning_rate": 9.99229654914082e-05, "loss": 2.3045, "step": 2580 }, { "epoch": 1.4876507754164274, "grad_norm": 1.933606743812561, "learning_rate": 9.992028694153354e-05, "loss": 2.3059, "step": 2590 }, { "epoch": 1.4933946008041357, "grad_norm": 2.331773519515991, "learning_rate": 9.991756265358393e-05, "loss": 2.2804, "step": 2600 }, { "epoch": 1.499138426191844, "grad_norm": 2.951323986053467, "learning_rate": 9.991479263005554e-05, "loss": 2.3432, "step": 2610 }, { "epoch": 1.5048822515795521, "grad_norm": 2.289476156234741, "learning_rate": 9.991197687348648e-05, "loss": 2.281, "step": 2620 }, { "epoch": 1.5106260769672601, "grad_norm": 2.394831895828247, "learning_rate": 9.990911538645669e-05, "loss": 2.2692, "step": 2630 }, { "epoch": 1.5163699023549684, "grad_norm": 2.339203357696533, "learning_rate": 9.99062081715881e-05, "loss": 2.2815, "step": 2640 }, { "epoch": 1.5221137277426766, "grad_norm": 2.315734624862671, "learning_rate": 9.990325523154449e-05, "loss": 2.2323, "step": 2650 }, { "epoch": 1.5278575531303848, "grad_norm": 2.2262301445007324, "learning_rate": 9.990025656903151e-05, "loss": 2.2553, "step": 2660 }, { "epoch": 1.533601378518093, "grad_norm": 2.3424930572509766, "learning_rate": 9.989721218679679e-05, "loss": 2.2106, "step": 2670 }, { "epoch": 1.5393452039058013, "grad_norm": 2.5576043128967285, "learning_rate": 9.989412208762978e-05, "loss": 2.2682, "step": 2680 }, { "epoch": 1.5450890292935093, "grad_norm": 2.107931137084961, "learning_rate": 9.989098627436182e-05, "loss": 2.2304, "step": 2690 }, { "epoch": 1.5508328546812176, "grad_norm": 2.4734134674072266, "learning_rate": 9.988780474986619e-05, "loss": 2.2944, "step": 2700 }, { "epoch": 1.5565766800689258, "grad_norm": 2.3347787857055664, "learning_rate": 9.988457751705799e-05, "loss": 2.2134, "step": 2710 }, { "epoch": 1.562320505456634, "grad_norm": 2.101534366607666, "learning_rate": 9.988130457889425e-05, "loss": 2.2543, "step": 2720 }, { "epoch": 1.5680643308443423, "grad_norm": 2.0245771408081055, "learning_rate": 9.987798593837385e-05, "loss": 2.1982, "step": 2730 }, { "epoch": 1.5738081562320505, "grad_norm": 2.2531967163085938, "learning_rate": 9.987462159853755e-05, "loss": 2.1985, "step": 2740 }, { "epoch": 1.5795519816197587, "grad_norm": 2.476135492324829, "learning_rate": 9.9871211562468e-05, "loss": 2.2326, "step": 2750 }, { "epoch": 1.585295807007467, "grad_norm": 2.1797146797180176, "learning_rate": 9.986775583328971e-05, "loss": 2.2304, "step": 2760 }, { "epoch": 1.5910396323951752, "grad_norm": 2.2493717670440674, "learning_rate": 9.986425441416902e-05, "loss": 2.1488, "step": 2770 }, { "epoch": 1.5967834577828834, "grad_norm": 2.4548563957214355, "learning_rate": 9.986070730831422e-05, "loss": 2.2093, "step": 2780 }, { "epoch": 1.6025272831705917, "grad_norm": 2.227916955947876, "learning_rate": 9.985711451897537e-05, "loss": 2.1303, "step": 2790 }, { "epoch": 1.6082711085583, "grad_norm": 2.5574474334716797, "learning_rate": 9.985347604944443e-05, "loss": 2.1631, "step": 2800 }, { "epoch": 1.6140149339460081, "grad_norm": 1.9754066467285156, "learning_rate": 9.984979190305524e-05, "loss": 2.1326, "step": 2810 }, { "epoch": 1.6197587593337164, "grad_norm": 1.9048519134521484, "learning_rate": 9.984606208318346e-05, "loss": 2.1867, "step": 2820 }, { "epoch": 1.6255025847214246, "grad_norm": 2.357179880142212, "learning_rate": 9.984228659324658e-05, "loss": 2.1223, "step": 2830 }, { "epoch": 1.6312464101091326, "grad_norm": 2.1598963737487793, "learning_rate": 9.983846543670398e-05, "loss": 2.2037, "step": 2840 }, { "epoch": 1.6369902354968409, "grad_norm": 2.0762202739715576, "learning_rate": 9.983459861705686e-05, "loss": 2.1561, "step": 2850 }, { "epoch": 1.642734060884549, "grad_norm": 2.152742862701416, "learning_rate": 9.983068613784825e-05, "loss": 2.1443, "step": 2860 }, { "epoch": 1.6484778862722573, "grad_norm": 2.111618995666504, "learning_rate": 9.982672800266307e-05, "loss": 2.124, "step": 2870 }, { "epoch": 1.6542217116599656, "grad_norm": 2.063375949859619, "learning_rate": 9.982272421512799e-05, "loss": 2.1186, "step": 2880 }, { "epoch": 1.6599655370476738, "grad_norm": 2.5852835178375244, "learning_rate": 9.981867477891157e-05, "loss": 2.1264, "step": 2890 }, { "epoch": 1.6657093624353818, "grad_norm": 2.230330467224121, "learning_rate": 9.981457969772418e-05, "loss": 2.1144, "step": 2900 }, { "epoch": 1.67145318782309, "grad_norm": 1.8338911533355713, "learning_rate": 9.9810438975318e-05, "loss": 2.1473, "step": 2910 }, { "epoch": 1.6771970132107983, "grad_norm": 1.8844740390777588, "learning_rate": 9.980625261548707e-05, "loss": 2.0594, "step": 2920 }, { "epoch": 1.6829408385985065, "grad_norm": 2.140822649002075, "learning_rate": 9.98020206220672e-05, "loss": 2.1078, "step": 2930 }, { "epoch": 1.6886846639862148, "grad_norm": 2.2550313472747803, "learning_rate": 9.979774299893601e-05, "loss": 2.1365, "step": 2940 }, { "epoch": 1.694428489373923, "grad_norm": 2.161696434020996, "learning_rate": 9.979341975001298e-05, "loss": 2.1481, "step": 2950 }, { "epoch": 1.7001723147616312, "grad_norm": 2.343641519546509, "learning_rate": 9.978905087925936e-05, "loss": 2.0848, "step": 2960 }, { "epoch": 1.7059161401493395, "grad_norm": 1.7906761169433594, "learning_rate": 9.97846363906782e-05, "loss": 2.0444, "step": 2970 }, { "epoch": 1.7116599655370477, "grad_norm": 2.2159507274627686, "learning_rate": 9.978017628831435e-05, "loss": 2.063, "step": 2980 }, { "epoch": 1.717403790924756, "grad_norm": 1.9609827995300293, "learning_rate": 9.977567057625448e-05, "loss": 2.0772, "step": 2990 }, { "epoch": 1.7231476163124642, "grad_norm": 2.187344789505005, "learning_rate": 9.977111925862701e-05, "loss": 2.043, "step": 3000 }, { "epoch": 1.7231476163124642, "eval_loss": 1.5376592874526978, "eval_runtime": 118.538, "eval_samples_per_second": 13.422, "eval_steps_per_second": 0.143, "eval_wer": 0.16281500734546275, "step": 3000 }, { "epoch": 1.7288914417001724, "grad_norm": 2.0185186862945557, "learning_rate": 9.976652233960216e-05, "loss": 2.0608, "step": 3010 }, { "epoch": 1.7346352670878806, "grad_norm": 2.0478994846343994, "learning_rate": 9.976187982339198e-05, "loss": 2.0297, "step": 3020 }, { "epoch": 1.7403790924755889, "grad_norm": 1.8286670446395874, "learning_rate": 9.975719171425023e-05, "loss": 2.0616, "step": 3030 }, { "epoch": 1.746122917863297, "grad_norm": 2.078399896621704, "learning_rate": 9.975245801647246e-05, "loss": 2.0619, "step": 3040 }, { "epoch": 1.7518667432510053, "grad_norm": 1.829615831375122, "learning_rate": 9.974767873439603e-05, "loss": 1.9933, "step": 3050 }, { "epoch": 1.7576105686387133, "grad_norm": 2.0238797664642334, "learning_rate": 9.974285387240004e-05, "loss": 2.0745, "step": 3060 }, { "epoch": 1.7633543940264216, "grad_norm": 1.9021639823913574, "learning_rate": 9.973798343490535e-05, "loss": 2.0614, "step": 3070 }, { "epoch": 1.7690982194141298, "grad_norm": 2.020947217941284, "learning_rate": 9.97330674263746e-05, "loss": 1.9981, "step": 3080 }, { "epoch": 1.774842044801838, "grad_norm": 2.1848363876342773, "learning_rate": 9.972810585131218e-05, "loss": 1.9835, "step": 3090 }, { "epoch": 1.7805858701895463, "grad_norm": 1.9264791011810303, "learning_rate": 9.972309871426417e-05, "loss": 2.0374, "step": 3100 }, { "epoch": 1.7863296955772543, "grad_norm": 1.9320201873779297, "learning_rate": 9.971804601981851e-05, "loss": 2.0116, "step": 3110 }, { "epoch": 1.7920735209649625, "grad_norm": 2.040639877319336, "learning_rate": 9.971294777260478e-05, "loss": 2.0172, "step": 3120 }, { "epoch": 1.7978173463526708, "grad_norm": 1.878435730934143, "learning_rate": 9.970780397729437e-05, "loss": 2.039, "step": 3130 }, { "epoch": 1.803561171740379, "grad_norm": 2.0855300426483154, "learning_rate": 9.970261463860037e-05, "loss": 1.9758, "step": 3140 }, { "epoch": 1.8093049971280872, "grad_norm": 1.8565047979354858, "learning_rate": 9.96973797612776e-05, "loss": 2.0312, "step": 3150 }, { "epoch": 1.8150488225157955, "grad_norm": 2.0382068157196045, "learning_rate": 9.96920993501226e-05, "loss": 1.9775, "step": 3160 }, { "epoch": 1.8207926479035037, "grad_norm": 1.9754124879837036, "learning_rate": 9.968677340997366e-05, "loss": 1.9888, "step": 3170 }, { "epoch": 1.826536473291212, "grad_norm": 1.8170818090438843, "learning_rate": 9.968140194571076e-05, "loss": 1.9683, "step": 3180 }, { "epoch": 1.8322802986789202, "grad_norm": 2.2989866733551025, "learning_rate": 9.967598496225562e-05, "loss": 2.0482, "step": 3190 }, { "epoch": 1.8380241240666284, "grad_norm": 2.2369909286499023, "learning_rate": 9.967052246457162e-05, "loss": 2.0328, "step": 3200 }, { "epoch": 1.8437679494543366, "grad_norm": 1.9546477794647217, "learning_rate": 9.966501445766387e-05, "loss": 1.9906, "step": 3210 }, { "epoch": 1.8495117748420449, "grad_norm": 2.0692763328552246, "learning_rate": 9.965946094657922e-05, "loss": 2.0246, "step": 3220 }, { "epoch": 1.855255600229753, "grad_norm": 1.7040553092956543, "learning_rate": 9.965386193640614e-05, "loss": 1.9528, "step": 3230 }, { "epoch": 1.8609994256174613, "grad_norm": 2.292430877685547, "learning_rate": 9.964821743227483e-05, "loss": 1.9443, "step": 3240 }, { "epoch": 1.8667432510051696, "grad_norm": 1.714004397392273, "learning_rate": 9.96425274393572e-05, "loss": 1.9865, "step": 3250 }, { "epoch": 1.8724870763928778, "grad_norm": 1.8055516481399536, "learning_rate": 9.963679196286675e-05, "loss": 1.9863, "step": 3260 }, { "epoch": 1.8782309017805858, "grad_norm": 2.266010046005249, "learning_rate": 9.963101100805877e-05, "loss": 2.0021, "step": 3270 }, { "epoch": 1.883974727168294, "grad_norm": 2.031609058380127, "learning_rate": 9.962518458023014e-05, "loss": 1.9567, "step": 3280 }, { "epoch": 1.8897185525560023, "grad_norm": 1.9392821788787842, "learning_rate": 9.961931268471942e-05, "loss": 1.9666, "step": 3290 }, { "epoch": 1.8954623779437105, "grad_norm": 1.846899151802063, "learning_rate": 9.961339532690685e-05, "loss": 1.9543, "step": 3300 }, { "epoch": 1.9012062033314188, "grad_norm": 1.9800326824188232, "learning_rate": 9.960743251221434e-05, "loss": 1.981, "step": 3310 }, { "epoch": 1.9069500287191268, "grad_norm": 1.947850227355957, "learning_rate": 9.960142424610539e-05, "loss": 1.9292, "step": 3320 }, { "epoch": 1.912693854106835, "grad_norm": 1.733098030090332, "learning_rate": 9.95953705340852e-05, "loss": 1.9522, "step": 3330 }, { "epoch": 1.9184376794945432, "grad_norm": 1.806164264678955, "learning_rate": 9.958927138170058e-05, "loss": 1.9055, "step": 3340 }, { "epoch": 1.9241815048822515, "grad_norm": 2.001389503479004, "learning_rate": 9.958312679454002e-05, "loss": 1.919, "step": 3350 }, { "epoch": 1.9299253302699597, "grad_norm": 2.0079329013824463, "learning_rate": 9.957693677823358e-05, "loss": 1.9778, "step": 3360 }, { "epoch": 1.935669155657668, "grad_norm": 1.8081001043319702, "learning_rate": 9.957070133845297e-05, "loss": 1.949, "step": 3370 }, { "epoch": 1.9414129810453762, "grad_norm": 1.9634689092636108, "learning_rate": 9.956442048091156e-05, "loss": 1.9185, "step": 3380 }, { "epoch": 1.9471568064330844, "grad_norm": 1.979411005973816, "learning_rate": 9.955809421136427e-05, "loss": 1.9137, "step": 3390 }, { "epoch": 1.9529006318207927, "grad_norm": 1.906067132949829, "learning_rate": 9.955172253560765e-05, "loss": 1.9222, "step": 3400 }, { "epoch": 1.9586444572085009, "grad_norm": 1.8121016025543213, "learning_rate": 9.954530545947988e-05, "loss": 1.9434, "step": 3410 }, { "epoch": 1.9643882825962091, "grad_norm": 1.7773370742797852, "learning_rate": 9.953884298886073e-05, "loss": 1.9087, "step": 3420 }, { "epoch": 1.9701321079839174, "grad_norm": 1.8535752296447754, "learning_rate": 9.953233512967155e-05, "loss": 1.9061, "step": 3430 }, { "epoch": 1.9758759333716256, "grad_norm": 1.8436633348464966, "learning_rate": 9.952578188787528e-05, "loss": 1.8902, "step": 3440 }, { "epoch": 1.9816197587593338, "grad_norm": 2.091872453689575, "learning_rate": 9.951918326947642e-05, "loss": 1.9193, "step": 3450 }, { "epoch": 1.987363584147042, "grad_norm": 1.865073561668396, "learning_rate": 9.951253928052112e-05, "loss": 1.9217, "step": 3460 }, { "epoch": 1.9931074095347503, "grad_norm": 1.6477876901626587, "learning_rate": 9.950584992709704e-05, "loss": 1.8917, "step": 3470 }, { "epoch": 1.9988512349224583, "grad_norm": 1.938935399055481, "learning_rate": 9.949911521533341e-05, "loss": 1.8995, "step": 3480 }, { "epoch": 2.0045950603101668, "grad_norm": 1.6785749197006226, "learning_rate": 9.949233515140105e-05, "loss": 1.9109, "step": 3490 }, { "epoch": 2.010338885697875, "grad_norm": 1.6445651054382324, "learning_rate": 9.948550974151228e-05, "loss": 1.8619, "step": 3500 }, { "epoch": 2.016082711085583, "grad_norm": 2.163782835006714, "learning_rate": 9.947863899192105e-05, "loss": 1.863, "step": 3510 }, { "epoch": 2.021826536473291, "grad_norm": 1.6399071216583252, "learning_rate": 9.947172290892277e-05, "loss": 1.8514, "step": 3520 }, { "epoch": 2.0275703618609993, "grad_norm": 1.8980154991149902, "learning_rate": 9.946476149885443e-05, "loss": 1.8628, "step": 3530 }, { "epoch": 2.0333141872487075, "grad_norm": 1.7344228029251099, "learning_rate": 9.945775476809459e-05, "loss": 1.8687, "step": 3540 }, { "epoch": 2.0390580126364157, "grad_norm": 1.8204574584960938, "learning_rate": 9.945070272306326e-05, "loss": 1.8337, "step": 3550 }, { "epoch": 2.044801838024124, "grad_norm": 1.9501166343688965, "learning_rate": 9.9443605370222e-05, "loss": 1.8864, "step": 3560 }, { "epoch": 2.050545663411832, "grad_norm": 1.8548567295074463, "learning_rate": 9.94364627160739e-05, "loss": 1.8264, "step": 3570 }, { "epoch": 2.0562894887995404, "grad_norm": 1.9015278816223145, "learning_rate": 9.942927476716353e-05, "loss": 1.8747, "step": 3580 }, { "epoch": 2.0620333141872487, "grad_norm": 5.407815933227539, "learning_rate": 9.942204153007701e-05, "loss": 1.849, "step": 3590 }, { "epoch": 2.067777139574957, "grad_norm": 2.2696824073791504, "learning_rate": 9.94147630114419e-05, "loss": 1.8654, "step": 3600 }, { "epoch": 2.073520964962665, "grad_norm": 2.291529893875122, "learning_rate": 9.940743921792727e-05, "loss": 1.8262, "step": 3610 }, { "epoch": 2.0792647903503734, "grad_norm": 1.9089547395706177, "learning_rate": 9.940007015624368e-05, "loss": 1.885, "step": 3620 }, { "epoch": 2.0850086157380816, "grad_norm": 1.8163701295852661, "learning_rate": 9.939265583314319e-05, "loss": 1.7987, "step": 3630 }, { "epoch": 2.09075244112579, "grad_norm": 2.1564691066741943, "learning_rate": 9.938519625541929e-05, "loss": 1.8413, "step": 3640 }, { "epoch": 2.096496266513498, "grad_norm": 1.8651676177978516, "learning_rate": 9.937769142990695e-05, "loss": 1.8011, "step": 3650 }, { "epoch": 2.1022400919012063, "grad_norm": 2.2574386596679688, "learning_rate": 9.937014136348261e-05, "loss": 1.8566, "step": 3660 }, { "epoch": 2.1079839172889145, "grad_norm": 1.8635132312774658, "learning_rate": 9.936254606306414e-05, "loss": 1.867, "step": 3670 }, { "epoch": 2.113727742676623, "grad_norm": 1.7160543203353882, "learning_rate": 9.93549055356109e-05, "loss": 1.8858, "step": 3680 }, { "epoch": 2.119471568064331, "grad_norm": 2.180222749710083, "learning_rate": 9.934721978812364e-05, "loss": 1.8533, "step": 3690 }, { "epoch": 2.1252153934520392, "grad_norm": 1.6430362462997437, "learning_rate": 9.933948882764456e-05, "loss": 1.8363, "step": 3700 }, { "epoch": 2.130959218839747, "grad_norm": 2.0018839836120605, "learning_rate": 9.933171266125729e-05, "loss": 1.8417, "step": 3710 }, { "epoch": 2.1367030442274553, "grad_norm": 1.5961920022964478, "learning_rate": 9.932389129608693e-05, "loss": 1.8347, "step": 3720 }, { "epoch": 2.1424468696151635, "grad_norm": 1.927363395690918, "learning_rate": 9.931602473929988e-05, "loss": 1.8379, "step": 3730 }, { "epoch": 2.1481906950028717, "grad_norm": 1.6899296045303345, "learning_rate": 9.930811299810407e-05, "loss": 1.8172, "step": 3740 }, { "epoch": 2.15393452039058, "grad_norm": 1.7401469945907593, "learning_rate": 9.930015607974874e-05, "loss": 1.7898, "step": 3750 }, { "epoch": 2.159678345778288, "grad_norm": 1.5379750728607178, "learning_rate": 9.929215399152457e-05, "loss": 1.8134, "step": 3760 }, { "epoch": 2.1654221711659964, "grad_norm": 2.779557943344116, "learning_rate": 9.92841067407636e-05, "loss": 1.8462, "step": 3770 }, { "epoch": 2.1711659965537047, "grad_norm": 2.0340523719787598, "learning_rate": 9.927601433483932e-05, "loss": 1.8363, "step": 3780 }, { "epoch": 2.176909821941413, "grad_norm": 2.0397660732269287, "learning_rate": 9.92678767811665e-05, "loss": 1.8127, "step": 3790 }, { "epoch": 2.182653647329121, "grad_norm": 1.8008798360824585, "learning_rate": 9.925969408720134e-05, "loss": 1.807, "step": 3800 }, { "epoch": 2.1883974727168294, "grad_norm": 1.5582823753356934, "learning_rate": 9.925146626044138e-05, "loss": 1.8112, "step": 3810 }, { "epoch": 2.1941412981045376, "grad_norm": 1.7653279304504395, "learning_rate": 9.924319330842551e-05, "loss": 1.8477, "step": 3820 }, { "epoch": 2.199885123492246, "grad_norm": 1.6408051252365112, "learning_rate": 9.923487523873397e-05, "loss": 1.7837, "step": 3830 }, { "epoch": 2.205628948879954, "grad_norm": 1.5557013750076294, "learning_rate": 9.922651205898834e-05, "loss": 1.8259, "step": 3840 }, { "epoch": 2.2113727742676623, "grad_norm": 1.7891162633895874, "learning_rate": 9.921810377685153e-05, "loss": 1.8289, "step": 3850 }, { "epoch": 2.2171165996553706, "grad_norm": 1.9197757244110107, "learning_rate": 9.920965040002781e-05, "loss": 1.7728, "step": 3860 }, { "epoch": 2.222860425043079, "grad_norm": 1.8888788223266602, "learning_rate": 9.92011519362627e-05, "loss": 1.7889, "step": 3870 }, { "epoch": 2.228604250430787, "grad_norm": 1.7249999046325684, "learning_rate": 9.919260839334309e-05, "loss": 1.7782, "step": 3880 }, { "epoch": 2.2343480758184953, "grad_norm": 1.797751784324646, "learning_rate": 9.918401977909716e-05, "loss": 1.8266, "step": 3890 }, { "epoch": 2.2400919012062035, "grad_norm": 1.8753774166107178, "learning_rate": 9.917538610139438e-05, "loss": 1.8381, "step": 3900 }, { "epoch": 2.2458357265939117, "grad_norm": 1.7335102558135986, "learning_rate": 9.916670736814552e-05, "loss": 1.7924, "step": 3910 }, { "epoch": 2.25157955198162, "grad_norm": 1.8527804613113403, "learning_rate": 9.91579835873026e-05, "loss": 1.8102, "step": 3920 }, { "epoch": 2.257323377369328, "grad_norm": 1.818963885307312, "learning_rate": 9.914921476685897e-05, "loss": 1.7871, "step": 3930 }, { "epoch": 2.2630672027570364, "grad_norm": 2.0059568881988525, "learning_rate": 9.914040091484921e-05, "loss": 1.8068, "step": 3940 }, { "epoch": 2.2688110281447442, "grad_norm": 1.555553913116455, "learning_rate": 9.913154203934917e-05, "loss": 1.8109, "step": 3950 }, { "epoch": 2.2745548535324525, "grad_norm": 1.7397311925888062, "learning_rate": 9.912263814847596e-05, "loss": 1.8161, "step": 3960 }, { "epoch": 2.2802986789201607, "grad_norm": 1.7555936574935913, "learning_rate": 9.911368925038792e-05, "loss": 1.7862, "step": 3970 }, { "epoch": 2.286042504307869, "grad_norm": 1.83700430393219, "learning_rate": 9.910469535328466e-05, "loss": 1.8232, "step": 3980 }, { "epoch": 2.291786329695577, "grad_norm": 1.6713528633117676, "learning_rate": 9.909565646540698e-05, "loss": 1.8019, "step": 3990 }, { "epoch": 2.2975301550832854, "grad_norm": 1.5632139444351196, "learning_rate": 9.908657259503692e-05, "loss": 1.7877, "step": 4000 }, { "epoch": 2.2975301550832854, "eval_loss": 1.3252075910568237, "eval_runtime": 118.8863, "eval_samples_per_second": 13.383, "eval_steps_per_second": 0.143, "eval_wer": 0.12863035371228387, "step": 4000 }, { "epoch": 2.3032739804709936, "grad_norm": 1.6311732530593872, "learning_rate": 9.907744375049777e-05, "loss": 1.8081, "step": 4010 }, { "epoch": 2.309017805858702, "grad_norm": 1.7291101217269897, "learning_rate": 9.906826994015398e-05, "loss": 1.8197, "step": 4020 }, { "epoch": 2.31476163124641, "grad_norm": 1.5252999067306519, "learning_rate": 9.905905117241123e-05, "loss": 1.8088, "step": 4030 }, { "epoch": 2.3205054566341183, "grad_norm": 1.73078453540802, "learning_rate": 9.904978745571635e-05, "loss": 1.8088, "step": 4040 }, { "epoch": 2.3262492820218266, "grad_norm": 1.774350881576538, "learning_rate": 9.904047879855742e-05, "loss": 1.7625, "step": 4050 }, { "epoch": 2.331993107409535, "grad_norm": 1.5366252660751343, "learning_rate": 9.903112520946365e-05, "loss": 1.7548, "step": 4060 }, { "epoch": 2.337736932797243, "grad_norm": 2.1595780849456787, "learning_rate": 9.902172669700545e-05, "loss": 1.8042, "step": 4070 }, { "epoch": 2.3434807581849513, "grad_norm": 1.7477549314498901, "learning_rate": 9.901228326979439e-05, "loss": 1.782, "step": 4080 }, { "epoch": 2.3492245835726595, "grad_norm": 1.615454912185669, "learning_rate": 9.900279493648313e-05, "loss": 1.7851, "step": 4090 }, { "epoch": 2.3549684089603677, "grad_norm": 1.691811203956604, "learning_rate": 9.899326170576557e-05, "loss": 1.8087, "step": 4100 }, { "epoch": 2.360712234348076, "grad_norm": 1.5970778465270996, "learning_rate": 9.89836835863767e-05, "loss": 1.7835, "step": 4110 }, { "epoch": 2.366456059735784, "grad_norm": 1.557707667350769, "learning_rate": 9.897406058709263e-05, "loss": 1.7658, "step": 4120 }, { "epoch": 2.372199885123492, "grad_norm": 1.424856424331665, "learning_rate": 9.896439271673062e-05, "loss": 1.7659, "step": 4130 }, { "epoch": 2.3779437105112002, "grad_norm": 1.9639933109283447, "learning_rate": 9.895467998414903e-05, "loss": 1.7797, "step": 4140 }, { "epoch": 2.3836875358989085, "grad_norm": 1.7225106954574585, "learning_rate": 9.89449223982473e-05, "loss": 1.8025, "step": 4150 }, { "epoch": 2.3894313612866167, "grad_norm": 2.075155735015869, "learning_rate": 9.893511996796604e-05, "loss": 1.7508, "step": 4160 }, { "epoch": 2.395175186674325, "grad_norm": 1.6480848789215088, "learning_rate": 9.892527270228688e-05, "loss": 1.7818, "step": 4170 }, { "epoch": 2.400919012062033, "grad_norm": 1.5570247173309326, "learning_rate": 9.891538061023254e-05, "loss": 1.767, "step": 4180 }, { "epoch": 2.4066628374497414, "grad_norm": 1.6227962970733643, "learning_rate": 9.890544370086684e-05, "loss": 1.7814, "step": 4190 }, { "epoch": 2.4124066628374496, "grad_norm": 1.8115174770355225, "learning_rate": 9.889546198329469e-05, "loss": 1.7697, "step": 4200 }, { "epoch": 2.418150488225158, "grad_norm": 1.9101393222808838, "learning_rate": 9.888543546666196e-05, "loss": 1.763, "step": 4210 }, { "epoch": 2.423894313612866, "grad_norm": 1.8728480339050293, "learning_rate": 9.887536416015565e-05, "loss": 1.7417, "step": 4220 }, { "epoch": 2.4296381390005743, "grad_norm": 1.5599184036254883, "learning_rate": 9.88652480730038e-05, "loss": 1.7757, "step": 4230 }, { "epoch": 2.4353819643882826, "grad_norm": 1.5323339700698853, "learning_rate": 9.885508721447544e-05, "loss": 1.7791, "step": 4240 }, { "epoch": 2.441125789775991, "grad_norm": 1.7740155458450317, "learning_rate": 9.884488159388062e-05, "loss": 1.742, "step": 4250 }, { "epoch": 2.446869615163699, "grad_norm": 1.5832844972610474, "learning_rate": 9.883463122057046e-05, "loss": 1.8082, "step": 4260 }, { "epoch": 2.4526134405514073, "grad_norm": 1.515741229057312, "learning_rate": 9.882433610393701e-05, "loss": 1.745, "step": 4270 }, { "epoch": 2.4583572659391155, "grad_norm": 1.6691546440124512, "learning_rate": 9.88139962534134e-05, "loss": 1.7197, "step": 4280 }, { "epoch": 2.4641010913268238, "grad_norm": 1.5105769634246826, "learning_rate": 9.880361167847368e-05, "loss": 1.7331, "step": 4290 }, { "epoch": 2.469844916714532, "grad_norm": 1.6371650695800781, "learning_rate": 9.879318238863292e-05, "loss": 1.7437, "step": 4300 }, { "epoch": 2.4755887421022402, "grad_norm": 1.59730064868927, "learning_rate": 9.878270839344712e-05, "loss": 1.7572, "step": 4310 }, { "epoch": 2.4813325674899485, "grad_norm": 1.6025923490524292, "learning_rate": 9.877218970251328e-05, "loss": 1.7431, "step": 4320 }, { "epoch": 2.4870763928776567, "grad_norm": 1.5592079162597656, "learning_rate": 9.876162632546937e-05, "loss": 1.7379, "step": 4330 }, { "epoch": 2.492820218265365, "grad_norm": 1.7612663507461548, "learning_rate": 9.875101827199422e-05, "loss": 1.7847, "step": 4340 }, { "epoch": 2.498564043653073, "grad_norm": 1.7232937812805176, "learning_rate": 9.874036555180768e-05, "loss": 1.788, "step": 4350 }, { "epoch": 2.5043078690407814, "grad_norm": 2.1759378910064697, "learning_rate": 9.872966817467051e-05, "loss": 1.7413, "step": 4360 }, { "epoch": 2.5100516944284896, "grad_norm": 1.8792170286178589, "learning_rate": 9.871892615038436e-05, "loss": 1.7487, "step": 4370 }, { "epoch": 2.5157955198161974, "grad_norm": 1.9813764095306396, "learning_rate": 9.870813948879179e-05, "loss": 1.7151, "step": 4380 }, { "epoch": 2.5215393452039057, "grad_norm": 1.5582070350646973, "learning_rate": 9.869730819977631e-05, "loss": 1.7605, "step": 4390 }, { "epoch": 2.527283170591614, "grad_norm": 1.612741470336914, "learning_rate": 9.868643229326226e-05, "loss": 1.6946, "step": 4400 }, { "epoch": 2.533026995979322, "grad_norm": 1.4968715906143188, "learning_rate": 9.867551177921489e-05, "loss": 1.7403, "step": 4410 }, { "epoch": 2.5387708213670304, "grad_norm": 2.0964488983154297, "learning_rate": 9.86645466676403e-05, "loss": 1.7482, "step": 4420 }, { "epoch": 2.5445146467547386, "grad_norm": 1.4919378757476807, "learning_rate": 9.86535369685855e-05, "loss": 1.7483, "step": 4430 }, { "epoch": 2.550258472142447, "grad_norm": 1.7294507026672363, "learning_rate": 9.864248269213833e-05, "loss": 1.7387, "step": 4440 }, { "epoch": 2.556002297530155, "grad_norm": 1.449524164199829, "learning_rate": 9.863138384842746e-05, "loss": 1.7712, "step": 4450 }, { "epoch": 2.5617461229178633, "grad_norm": 1.7336524724960327, "learning_rate": 9.862024044762239e-05, "loss": 1.7172, "step": 4460 }, { "epoch": 2.5674899483055715, "grad_norm": 1.4938337802886963, "learning_rate": 9.860905249993347e-05, "loss": 1.7152, "step": 4470 }, { "epoch": 2.5732337736932798, "grad_norm": 1.5241613388061523, "learning_rate": 9.859782001561187e-05, "loss": 1.742, "step": 4480 }, { "epoch": 2.578977599080988, "grad_norm": 2.017444133758545, "learning_rate": 9.858654300494957e-05, "loss": 1.7256, "step": 4490 }, { "epoch": 2.5847214244686962, "grad_norm": 1.6224578619003296, "learning_rate": 9.857522147827929e-05, "loss": 1.7367, "step": 4500 }, { "epoch": 2.5904652498564045, "grad_norm": 1.5634706020355225, "learning_rate": 9.856385544597463e-05, "loss": 1.7262, "step": 4510 }, { "epoch": 2.5962090752441127, "grad_norm": 1.6724357604980469, "learning_rate": 9.855244491844989e-05, "loss": 1.7694, "step": 4520 }, { "epoch": 2.601952900631821, "grad_norm": 1.4197584390640259, "learning_rate": 9.854098990616018e-05, "loss": 1.7129, "step": 4530 }, { "epoch": 2.6076967260195287, "grad_norm": 1.4949312210083008, "learning_rate": 9.852949041960138e-05, "loss": 1.7153, "step": 4540 }, { "epoch": 2.613440551407237, "grad_norm": 1.4707062244415283, "learning_rate": 9.851794646931009e-05, "loss": 1.7437, "step": 4550 }, { "epoch": 2.619184376794945, "grad_norm": 1.5612741708755493, "learning_rate": 9.850635806586366e-05, "loss": 1.6803, "step": 4560 }, { "epoch": 2.6249282021826534, "grad_norm": 2.0915966033935547, "learning_rate": 9.849472521988018e-05, "loss": 1.7291, "step": 4570 }, { "epoch": 2.6306720275703617, "grad_norm": 1.6557848453521729, "learning_rate": 9.848304794201845e-05, "loss": 1.732, "step": 4580 }, { "epoch": 2.63641585295807, "grad_norm": 1.4456892013549805, "learning_rate": 9.847132624297799e-05, "loss": 1.694, "step": 4590 }, { "epoch": 2.642159678345778, "grad_norm": 1.5064661502838135, "learning_rate": 9.845956013349902e-05, "loss": 1.7224, "step": 4600 }, { "epoch": 2.6479035037334864, "grad_norm": 1.7379176616668701, "learning_rate": 9.844774962436244e-05, "loss": 1.6923, "step": 4610 }, { "epoch": 2.6536473291211946, "grad_norm": 1.7334299087524414, "learning_rate": 9.843589472638985e-05, "loss": 1.7385, "step": 4620 }, { "epoch": 2.659391154508903, "grad_norm": 2.0103020668029785, "learning_rate": 9.842399545044355e-05, "loss": 1.7113, "step": 4630 }, { "epoch": 2.665134979896611, "grad_norm": 1.9990754127502441, "learning_rate": 9.841205180742638e-05, "loss": 1.7217, "step": 4640 }, { "epoch": 2.6708788052843193, "grad_norm": 1.6340017318725586, "learning_rate": 9.840006380828202e-05, "loss": 1.752, "step": 4650 }, { "epoch": 2.6766226306720275, "grad_norm": 1.720790147781372, "learning_rate": 9.838803146399461e-05, "loss": 1.7218, "step": 4660 }, { "epoch": 2.682366456059736, "grad_norm": 1.9331406354904175, "learning_rate": 9.837595478558906e-05, "loss": 1.6949, "step": 4670 }, { "epoch": 2.688110281447444, "grad_norm": 1.696510672569275, "learning_rate": 9.836383378413082e-05, "loss": 1.7186, "step": 4680 }, { "epoch": 2.6938541068351523, "grad_norm": 1.6908689737319946, "learning_rate": 9.835166847072595e-05, "loss": 1.7275, "step": 4690 }, { "epoch": 2.6995979322228605, "grad_norm": 1.7099285125732422, "learning_rate": 9.833945885652119e-05, "loss": 1.657, "step": 4700 }, { "epoch": 2.7053417576105687, "grad_norm": 1.8270260095596313, "learning_rate": 9.832720495270378e-05, "loss": 1.741, "step": 4710 }, { "epoch": 2.711085582998277, "grad_norm": 1.5068602561950684, "learning_rate": 9.831490677050163e-05, "loss": 1.728, "step": 4720 }, { "epoch": 2.716829408385985, "grad_norm": 1.7136476039886475, "learning_rate": 9.83025643211831e-05, "loss": 1.7005, "step": 4730 }, { "epoch": 2.7225732337736934, "grad_norm": 1.5944297313690186, "learning_rate": 9.829017761605723e-05, "loss": 1.6879, "step": 4740 }, { "epoch": 2.7283170591614017, "grad_norm": 1.8268508911132812, "learning_rate": 9.827774666647357e-05, "loss": 1.707, "step": 4750 }, { "epoch": 2.73406088454911, "grad_norm": 1.5303763151168823, "learning_rate": 9.826527148382218e-05, "loss": 1.7282, "step": 4760 }, { "epoch": 2.739804709936818, "grad_norm": 1.5995979309082031, "learning_rate": 9.825275207953367e-05, "loss": 1.7205, "step": 4770 }, { "epoch": 2.7455485353245264, "grad_norm": 1.5851478576660156, "learning_rate": 9.824018846507919e-05, "loss": 1.7247, "step": 4780 }, { "epoch": 2.7512923607122346, "grad_norm": 1.6493960618972778, "learning_rate": 9.822758065197036e-05, "loss": 1.7032, "step": 4790 }, { "epoch": 2.757036186099943, "grad_norm": 1.381735920906067, "learning_rate": 9.821492865175931e-05, "loss": 1.7158, "step": 4800 }, { "epoch": 2.7627800114876506, "grad_norm": 1.8293508291244507, "learning_rate": 9.82022324760387e-05, "loss": 1.7101, "step": 4810 }, { "epoch": 2.768523836875359, "grad_norm": 1.5041792392730713, "learning_rate": 9.81894921364416e-05, "loss": 1.6721, "step": 4820 }, { "epoch": 2.774267662263067, "grad_norm": 1.661577820777893, "learning_rate": 9.817670764464159e-05, "loss": 1.6853, "step": 4830 }, { "epoch": 2.7800114876507753, "grad_norm": 1.6965444087982178, "learning_rate": 9.816387901235267e-05, "loss": 1.6916, "step": 4840 }, { "epoch": 2.7857553130384836, "grad_norm": 1.4665067195892334, "learning_rate": 9.815100625132935e-05, "loss": 1.7065, "step": 4850 }, { "epoch": 2.791499138426192, "grad_norm": 1.4456219673156738, "learning_rate": 9.813808937336648e-05, "loss": 1.72, "step": 4860 }, { "epoch": 2.7972429638139, "grad_norm": 1.7180407047271729, "learning_rate": 9.812512839029941e-05, "loss": 1.7002, "step": 4870 }, { "epoch": 2.8029867892016083, "grad_norm": 1.5545158386230469, "learning_rate": 9.811212331400388e-05, "loss": 1.6886, "step": 4880 }, { "epoch": 2.8087306145893165, "grad_norm": 1.6606507301330566, "learning_rate": 9.809907415639601e-05, "loss": 1.6949, "step": 4890 }, { "epoch": 2.8144744399770247, "grad_norm": 1.3799747228622437, "learning_rate": 9.808598092943235e-05, "loss": 1.7195, "step": 4900 }, { "epoch": 2.820218265364733, "grad_norm": 1.8167319297790527, "learning_rate": 9.807284364510976e-05, "loss": 1.7192, "step": 4910 }, { "epoch": 2.825962090752441, "grad_norm": 1.4204845428466797, "learning_rate": 9.805966231546558e-05, "loss": 1.6955, "step": 4920 }, { "epoch": 2.8317059161401494, "grad_norm": 1.6783603429794312, "learning_rate": 9.804643695257738e-05, "loss": 1.7277, "step": 4930 }, { "epoch": 2.8374497415278577, "grad_norm": 1.9263339042663574, "learning_rate": 9.803316756856317e-05, "loss": 1.7599, "step": 4940 }, { "epoch": 2.843193566915566, "grad_norm": 1.6194292306900024, "learning_rate": 9.801985417558126e-05, "loss": 1.7167, "step": 4950 }, { "epoch": 2.848937392303274, "grad_norm": 1.6685937643051147, "learning_rate": 9.80064967858303e-05, "loss": 1.7194, "step": 4960 }, { "epoch": 2.854681217690982, "grad_norm": 1.5313128232955933, "learning_rate": 9.79930954115492e-05, "loss": 1.7196, "step": 4970 }, { "epoch": 2.86042504307869, "grad_norm": 1.4977713823318481, "learning_rate": 9.797965006501723e-05, "loss": 1.7082, "step": 4980 }, { "epoch": 2.8661688684663984, "grad_norm": 1.5906270742416382, "learning_rate": 9.796616075855396e-05, "loss": 1.6943, "step": 4990 }, { "epoch": 2.8719126938541066, "grad_norm": 1.3928202390670776, "learning_rate": 9.795262750451918e-05, "loss": 1.6818, "step": 5000 }, { "epoch": 2.8719126938541066, "eval_loss": 1.2614110708236694, "eval_runtime": 119.2784, "eval_samples_per_second": 13.339, "eval_steps_per_second": 0.143, "eval_wer": 0.12450559385241271, "step": 5000 }, { "epoch": 2.877656519241815, "grad_norm": 1.3297574520111084, "learning_rate": 9.793905031531299e-05, "loss": 1.6859, "step": 5010 }, { "epoch": 2.883400344629523, "grad_norm": 1.8638650178909302, "learning_rate": 9.792542920337573e-05, "loss": 1.6615, "step": 5020 }, { "epoch": 2.8891441700172313, "grad_norm": 1.5153930187225342, "learning_rate": 9.791176418118799e-05, "loss": 1.6556, "step": 5030 }, { "epoch": 2.8948879954049396, "grad_norm": 1.579380989074707, "learning_rate": 9.78980552612706e-05, "loss": 1.7046, "step": 5040 }, { "epoch": 2.900631820792648, "grad_norm": 1.3421663045883179, "learning_rate": 9.788430245618461e-05, "loss": 1.7131, "step": 5050 }, { "epoch": 2.906375646180356, "grad_norm": 1.5661914348602295, "learning_rate": 9.787050577853125e-05, "loss": 1.7261, "step": 5060 }, { "epoch": 2.9121194715680643, "grad_norm": 1.6113357543945312, "learning_rate": 9.7856665240952e-05, "loss": 1.6815, "step": 5070 }, { "epoch": 2.9178632969557725, "grad_norm": 1.7786363363265991, "learning_rate": 9.784278085612849e-05, "loss": 1.7075, "step": 5080 }, { "epoch": 2.9236071223434807, "grad_norm": 1.4313446283340454, "learning_rate": 9.782885263678255e-05, "loss": 1.6817, "step": 5090 }, { "epoch": 2.929350947731189, "grad_norm": 1.3281270265579224, "learning_rate": 9.781488059567617e-05, "loss": 1.7082, "step": 5100 }, { "epoch": 2.935094773118897, "grad_norm": 1.3985512256622314, "learning_rate": 9.780086474561148e-05, "loss": 1.6916, "step": 5110 }, { "epoch": 2.9408385985066055, "grad_norm": 1.5001497268676758, "learning_rate": 9.778680509943073e-05, "loss": 1.6988, "step": 5120 }, { "epoch": 2.9465824238943137, "grad_norm": 1.6843388080596924, "learning_rate": 9.777270167001635e-05, "loss": 1.6701, "step": 5130 }, { "epoch": 2.952326249282022, "grad_norm": 1.57426118850708, "learning_rate": 9.775855447029086e-05, "loss": 1.6859, "step": 5140 }, { "epoch": 2.95807007466973, "grad_norm": 1.5246561765670776, "learning_rate": 9.774436351321687e-05, "loss": 1.6775, "step": 5150 }, { "epoch": 2.9638139000574384, "grad_norm": 1.5458987951278687, "learning_rate": 9.773012881179713e-05, "loss": 1.6782, "step": 5160 }, { "epoch": 2.9695577254451466, "grad_norm": 1.5908442735671997, "learning_rate": 9.771585037907443e-05, "loss": 1.7048, "step": 5170 }, { "epoch": 2.975301550832855, "grad_norm": 1.4700584411621094, "learning_rate": 9.770152822813164e-05, "loss": 1.6844, "step": 5180 }, { "epoch": 2.981045376220563, "grad_norm": 1.525244116783142, "learning_rate": 9.768716237209167e-05, "loss": 1.6921, "step": 5190 }, { "epoch": 2.9867892016082713, "grad_norm": 2.005902051925659, "learning_rate": 9.767275282411755e-05, "loss": 1.6903, "step": 5200 }, { "epoch": 2.9925330269959796, "grad_norm": 1.7618675231933594, "learning_rate": 9.765829959741223e-05, "loss": 1.7073, "step": 5210 }, { "epoch": 2.998276852383688, "grad_norm": 1.535569429397583, "learning_rate": 9.764380270521875e-05, "loss": 1.6539, "step": 5220 }, { "epoch": 3.0040206777713956, "grad_norm": 1.5486606359481812, "learning_rate": 9.762926216082019e-05, "loss": 1.6324, "step": 5230 }, { "epoch": 3.009764503159104, "grad_norm": 1.6515790224075317, "learning_rate": 9.761467797753955e-05, "loss": 1.6459, "step": 5240 }, { "epoch": 3.015508328546812, "grad_norm": 1.4425338506698608, "learning_rate": 9.760005016873986e-05, "loss": 1.6255, "step": 5250 }, { "epoch": 3.0212521539345203, "grad_norm": 1.506661057472229, "learning_rate": 9.758537874782413e-05, "loss": 1.6324, "step": 5260 }, { "epoch": 3.0269959793222285, "grad_norm": 1.499016523361206, "learning_rate": 9.757066372823531e-05, "loss": 1.6335, "step": 5270 }, { "epoch": 3.0327398047099368, "grad_norm": 1.554861307144165, "learning_rate": 9.755590512345627e-05, "loss": 1.6821, "step": 5280 }, { "epoch": 3.038483630097645, "grad_norm": 1.625934362411499, "learning_rate": 9.754110294700989e-05, "loss": 1.6039, "step": 5290 }, { "epoch": 3.0442274554853532, "grad_norm": 1.4130136966705322, "learning_rate": 9.752625721245889e-05, "loss": 1.6067, "step": 5300 }, { "epoch": 3.0499712808730615, "grad_norm": 1.5291974544525146, "learning_rate": 9.751136793340599e-05, "loss": 1.6318, "step": 5310 }, { "epoch": 3.0557151062607697, "grad_norm": 1.4778972864151, "learning_rate": 9.749643512349373e-05, "loss": 1.6217, "step": 5320 }, { "epoch": 3.061458931648478, "grad_norm": 1.6106590032577515, "learning_rate": 9.748145879640458e-05, "loss": 1.6374, "step": 5330 }, { "epoch": 3.067202757036186, "grad_norm": 1.4123419523239136, "learning_rate": 9.746643896586086e-05, "loss": 1.637, "step": 5340 }, { "epoch": 3.0729465824238944, "grad_norm": 1.4393341541290283, "learning_rate": 9.745137564562478e-05, "loss": 1.6072, "step": 5350 }, { "epoch": 3.0786904078116026, "grad_norm": 1.5072475671768188, "learning_rate": 9.743626884949838e-05, "loss": 1.645, "step": 5360 }, { "epoch": 3.084434233199311, "grad_norm": 1.4783241748809814, "learning_rate": 9.742111859132349e-05, "loss": 1.6618, "step": 5370 }, { "epoch": 3.090178058587019, "grad_norm": 1.487516164779663, "learning_rate": 9.740592488498184e-05, "loss": 1.6572, "step": 5380 }, { "epoch": 3.0959218839747273, "grad_norm": 1.5002632141113281, "learning_rate": 9.739068774439495e-05, "loss": 1.6485, "step": 5390 }, { "epoch": 3.1016657093624356, "grad_norm": 1.6059160232543945, "learning_rate": 9.737540718352413e-05, "loss": 1.6368, "step": 5400 }, { "epoch": 3.107409534750144, "grad_norm": 1.4255726337432861, "learning_rate": 9.736008321637043e-05, "loss": 1.6145, "step": 5410 }, { "epoch": 3.1131533601378516, "grad_norm": 1.6063114404678345, "learning_rate": 9.734471585697475e-05, "loss": 1.6283, "step": 5420 }, { "epoch": 3.11889718552556, "grad_norm": 1.7552169561386108, "learning_rate": 9.732930511941769e-05, "loss": 1.6381, "step": 5430 }, { "epoch": 3.124641010913268, "grad_norm": 1.7182084321975708, "learning_rate": 9.731385101781962e-05, "loss": 1.6371, "step": 5440 }, { "epoch": 3.1303848363009763, "grad_norm": 1.5302674770355225, "learning_rate": 9.729835356634066e-05, "loss": 1.6204, "step": 5450 }, { "epoch": 3.1361286616886845, "grad_norm": 1.389792799949646, "learning_rate": 9.728281277918061e-05, "loss": 1.6038, "step": 5460 }, { "epoch": 3.1418724870763928, "grad_norm": 1.3397209644317627, "learning_rate": 9.726722867057899e-05, "loss": 1.6023, "step": 5470 }, { "epoch": 3.147616312464101, "grad_norm": 1.5047202110290527, "learning_rate": 9.725160125481504e-05, "loss": 1.6479, "step": 5480 }, { "epoch": 3.1533601378518092, "grad_norm": 1.6639615297317505, "learning_rate": 9.723593054620765e-05, "loss": 1.6422, "step": 5490 }, { "epoch": 3.1591039632395175, "grad_norm": 1.3791922330856323, "learning_rate": 9.72202165591154e-05, "loss": 1.651, "step": 5500 }, { "epoch": 3.1648477886272257, "grad_norm": 1.4571179151535034, "learning_rate": 9.720445930793652e-05, "loss": 1.5932, "step": 5510 }, { "epoch": 3.170591614014934, "grad_norm": 1.3366632461547852, "learning_rate": 9.718865880710886e-05, "loss": 1.6057, "step": 5520 }, { "epoch": 3.176335439402642, "grad_norm": 1.4677654504776, "learning_rate": 9.717281507110991e-05, "loss": 1.6433, "step": 5530 }, { "epoch": 3.1820792647903504, "grad_norm": 1.6696430444717407, "learning_rate": 9.71569281144568e-05, "loss": 1.6128, "step": 5540 }, { "epoch": 3.1878230901780586, "grad_norm": 1.5777435302734375, "learning_rate": 9.714099795170624e-05, "loss": 1.6409, "step": 5550 }, { "epoch": 3.193566915565767, "grad_norm": 1.5568597316741943, "learning_rate": 9.712502459745451e-05, "loss": 1.6638, "step": 5560 }, { "epoch": 3.199310740953475, "grad_norm": 1.3886417150497437, "learning_rate": 9.710900806633751e-05, "loss": 1.6086, "step": 5570 }, { "epoch": 3.2050545663411834, "grad_norm": 1.7217527627944946, "learning_rate": 9.709294837303066e-05, "loss": 1.6181, "step": 5580 }, { "epoch": 3.2107983917288916, "grad_norm": 2.0039329528808594, "learning_rate": 9.707684553224892e-05, "loss": 1.6036, "step": 5590 }, { "epoch": 3.2165422171166, "grad_norm": 1.3699754476547241, "learning_rate": 9.706069955874686e-05, "loss": 1.6169, "step": 5600 }, { "epoch": 3.222286042504308, "grad_norm": 1.5251069068908691, "learning_rate": 9.70445104673185e-05, "loss": 1.6619, "step": 5610 }, { "epoch": 3.2280298678920163, "grad_norm": 1.337915301322937, "learning_rate": 9.702827827279738e-05, "loss": 1.607, "step": 5620 }, { "epoch": 3.2337736932797245, "grad_norm": 1.662143349647522, "learning_rate": 9.701200299005654e-05, "loss": 1.6182, "step": 5630 }, { "epoch": 3.2395175186674323, "grad_norm": 1.390687108039856, "learning_rate": 9.69956846340085e-05, "loss": 1.6406, "step": 5640 }, { "epoch": 3.2452613440551406, "grad_norm": 1.4389570951461792, "learning_rate": 9.697932321960529e-05, "loss": 1.6215, "step": 5650 }, { "epoch": 3.251005169442849, "grad_norm": 1.3212333917617798, "learning_rate": 9.696291876183828e-05, "loss": 1.5844, "step": 5660 }, { "epoch": 3.256748994830557, "grad_norm": 1.5739758014678955, "learning_rate": 9.694647127573842e-05, "loss": 1.6483, "step": 5670 }, { "epoch": 3.2624928202182653, "grad_norm": 1.3461438417434692, "learning_rate": 9.692998077637597e-05, "loss": 1.6015, "step": 5680 }, { "epoch": 3.2682366456059735, "grad_norm": 1.708883285522461, "learning_rate": 9.691344727886066e-05, "loss": 1.5859, "step": 5690 }, { "epoch": 3.2739804709936817, "grad_norm": 1.4918363094329834, "learning_rate": 9.689687079834163e-05, "loss": 1.6333, "step": 5700 }, { "epoch": 3.27972429638139, "grad_norm": 1.4630522727966309, "learning_rate": 9.688025135000734e-05, "loss": 1.6456, "step": 5710 }, { "epoch": 3.285468121769098, "grad_norm": 1.9295337200164795, "learning_rate": 9.686358894908569e-05, "loss": 1.6083, "step": 5720 }, { "epoch": 3.2912119471568064, "grad_norm": 1.5182411670684814, "learning_rate": 9.684688361084389e-05, "loss": 1.6384, "step": 5730 }, { "epoch": 3.2969557725445147, "grad_norm": 1.3987635374069214, "learning_rate": 9.683013535058853e-05, "loss": 1.617, "step": 5740 }, { "epoch": 3.302699597932223, "grad_norm": 1.5364372730255127, "learning_rate": 9.681334418366548e-05, "loss": 1.5873, "step": 5750 }, { "epoch": 3.308443423319931, "grad_norm": 1.5112583637237549, "learning_rate": 9.679651012545997e-05, "loss": 1.6274, "step": 5760 }, { "epoch": 3.3141872487076394, "grad_norm": 1.4777523279190063, "learning_rate": 9.677963319139651e-05, "loss": 1.5907, "step": 5770 }, { "epoch": 3.3199310740953476, "grad_norm": 1.6601784229278564, "learning_rate": 9.676271339693886e-05, "loss": 1.5859, "step": 5780 }, { "epoch": 3.325674899483056, "grad_norm": 1.6540298461914062, "learning_rate": 9.674575075759014e-05, "loss": 1.6485, "step": 5790 }, { "epoch": 3.331418724870764, "grad_norm": 1.3588889837265015, "learning_rate": 9.672874528889263e-05, "loss": 1.6117, "step": 5800 }, { "epoch": 3.3371625502584723, "grad_norm": 1.6457010507583618, "learning_rate": 9.671169700642793e-05, "loss": 1.6357, "step": 5810 }, { "epoch": 3.3429063756461805, "grad_norm": 1.555299162864685, "learning_rate": 9.669460592581684e-05, "loss": 1.6322, "step": 5820 }, { "epoch": 3.3486502010338883, "grad_norm": 1.6192684173583984, "learning_rate": 9.667747206271933e-05, "loss": 1.5684, "step": 5830 }, { "epoch": 3.3543940264215966, "grad_norm": 1.7383484840393066, "learning_rate": 9.666029543283466e-05, "loss": 1.6093, "step": 5840 }, { "epoch": 3.360137851809305, "grad_norm": 1.3070027828216553, "learning_rate": 9.664307605190119e-05, "loss": 1.6204, "step": 5850 }, { "epoch": 3.365881677197013, "grad_norm": 1.4149906635284424, "learning_rate": 9.662581393569653e-05, "loss": 1.6233, "step": 5860 }, { "epoch": 3.3716255025847213, "grad_norm": 1.4312522411346436, "learning_rate": 9.660850910003736e-05, "loss": 1.5876, "step": 5870 }, { "epoch": 3.3773693279724295, "grad_norm": 1.4669331312179565, "learning_rate": 9.659116156077959e-05, "loss": 1.6019, "step": 5880 }, { "epoch": 3.3831131533601377, "grad_norm": 1.3660081624984741, "learning_rate": 9.657377133381819e-05, "loss": 1.5871, "step": 5890 }, { "epoch": 3.388856978747846, "grad_norm": 1.332747459411621, "learning_rate": 9.655633843508728e-05, "loss": 1.5998, "step": 5900 }, { "epoch": 3.394600804135554, "grad_norm": 1.8047428131103516, "learning_rate": 9.653886288056006e-05, "loss": 1.6318, "step": 5910 }, { "epoch": 3.4003446295232624, "grad_norm": 1.4865713119506836, "learning_rate": 9.652134468624882e-05, "loss": 1.6272, "step": 5920 }, { "epoch": 3.4060884549109707, "grad_norm": 1.5586755275726318, "learning_rate": 9.650378386820496e-05, "loss": 1.5735, "step": 5930 }, { "epoch": 3.411832280298679, "grad_norm": 1.378774881362915, "learning_rate": 9.648618044251883e-05, "loss": 1.6079, "step": 5940 }, { "epoch": 3.417576105686387, "grad_norm": 1.3385347127914429, "learning_rate": 9.646853442531996e-05, "loss": 1.5863, "step": 5950 }, { "epoch": 3.4233199310740954, "grad_norm": 1.5953559875488281, "learning_rate": 9.645084583277678e-05, "loss": 1.5968, "step": 5960 }, { "epoch": 3.4290637564618036, "grad_norm": 1.533825397491455, "learning_rate": 9.643311468109682e-05, "loss": 1.6139, "step": 5970 }, { "epoch": 3.434807581849512, "grad_norm": 1.7705518007278442, "learning_rate": 9.641534098652652e-05, "loss": 1.6357, "step": 5980 }, { "epoch": 3.44055140723722, "grad_norm": 1.2516279220581055, "learning_rate": 9.63975247653514e-05, "loss": 1.5709, "step": 5990 }, { "epoch": 3.4462952326249283, "grad_norm": 1.3904739618301392, "learning_rate": 9.637966603389588e-05, "loss": 1.5822, "step": 6000 }, { "epoch": 3.4462952326249283, "eval_loss": 1.2109429836273193, "eval_runtime": 119.8261, "eval_samples_per_second": 13.278, "eval_steps_per_second": 0.142, "eval_wer": 0.11165103401514295, "step": 6000 }, { "epoch": 3.4520390580126366, "grad_norm": 1.3731943368911743, "learning_rate": 9.636176480852331e-05, "loss": 1.6092, "step": 6010 }, { "epoch": 3.457782883400345, "grad_norm": 1.4693683385849, "learning_rate": 9.634382110563606e-05, "loss": 1.5977, "step": 6020 }, { "epoch": 3.463526708788053, "grad_norm": 1.6020904779434204, "learning_rate": 9.632583494167535e-05, "loss": 1.585, "step": 6030 }, { "epoch": 3.4692705341757613, "grad_norm": 1.3191226720809937, "learning_rate": 9.630780633312134e-05, "loss": 1.6078, "step": 6040 }, { "epoch": 3.4750143595634695, "grad_norm": 1.4400807619094849, "learning_rate": 9.628973529649304e-05, "loss": 1.5996, "step": 6050 }, { "epoch": 3.4807581849511777, "grad_norm": 1.3734054565429688, "learning_rate": 9.627162184834841e-05, "loss": 1.6015, "step": 6060 }, { "epoch": 3.4865020103388855, "grad_norm": 1.4475988149642944, "learning_rate": 9.625346600528417e-05, "loss": 1.5986, "step": 6070 }, { "epoch": 3.4922458357265938, "grad_norm": 1.3220233917236328, "learning_rate": 9.623526778393597e-05, "loss": 1.5943, "step": 6080 }, { "epoch": 3.497989661114302, "grad_norm": 1.53400719165802, "learning_rate": 9.621702720097828e-05, "loss": 1.6039, "step": 6090 }, { "epoch": 3.50373348650201, "grad_norm": 1.5094637870788574, "learning_rate": 9.619874427312432e-05, "loss": 1.6181, "step": 6100 }, { "epoch": 3.5094773118897185, "grad_norm": 1.5519062280654907, "learning_rate": 9.618041901712616e-05, "loss": 1.5799, "step": 6110 }, { "epoch": 3.5152211372774267, "grad_norm": 1.5281223058700562, "learning_rate": 9.616205144977469e-05, "loss": 1.5769, "step": 6120 }, { "epoch": 3.520964962665135, "grad_norm": 1.6994590759277344, "learning_rate": 9.614364158789948e-05, "loss": 1.5891, "step": 6130 }, { "epoch": 3.526708788052843, "grad_norm": 1.571830153465271, "learning_rate": 9.612518944836892e-05, "loss": 1.5847, "step": 6140 }, { "epoch": 3.5324526134405514, "grad_norm": 1.4372785091400146, "learning_rate": 9.610669504809012e-05, "loss": 1.5947, "step": 6150 }, { "epoch": 3.5381964388282596, "grad_norm": 1.4658076763153076, "learning_rate": 9.608815840400888e-05, "loss": 1.6079, "step": 6160 }, { "epoch": 3.543940264215968, "grad_norm": 1.314266562461853, "learning_rate": 9.606957953310978e-05, "loss": 1.5642, "step": 6170 }, { "epoch": 3.549684089603676, "grad_norm": 1.5588135719299316, "learning_rate": 9.6050958452416e-05, "loss": 1.6001, "step": 6180 }, { "epoch": 3.5554279149913843, "grad_norm": 1.5007277727127075, "learning_rate": 9.603229517898948e-05, "loss": 1.6064, "step": 6190 }, { "epoch": 3.5611717403790926, "grad_norm": 1.3470954895019531, "learning_rate": 9.601358972993077e-05, "loss": 1.5815, "step": 6200 }, { "epoch": 3.566915565766801, "grad_norm": 1.5417461395263672, "learning_rate": 9.599484212237906e-05, "loss": 1.6023, "step": 6210 }, { "epoch": 3.572659391154509, "grad_norm": 1.2810810804367065, "learning_rate": 9.59760523735122e-05, "loss": 1.5672, "step": 6220 }, { "epoch": 3.5784032165422173, "grad_norm": 7.572571754455566, "learning_rate": 9.595722050054663e-05, "loss": 1.5986, "step": 6230 }, { "epoch": 3.584147041929925, "grad_norm": 1.4589245319366455, "learning_rate": 9.593834652073741e-05, "loss": 1.6105, "step": 6240 }, { "epoch": 3.5898908673176333, "grad_norm": 1.3294093608856201, "learning_rate": 9.591943045137813e-05, "loss": 1.6008, "step": 6250 }, { "epoch": 3.5956346927053415, "grad_norm": 1.3746421337127686, "learning_rate": 9.590047230980104e-05, "loss": 1.5593, "step": 6260 }, { "epoch": 3.6013785180930498, "grad_norm": 1.3553591966629028, "learning_rate": 9.588147211337681e-05, "loss": 1.588, "step": 6270 }, { "epoch": 3.607122343480758, "grad_norm": 1.6245908737182617, "learning_rate": 9.586242987951475e-05, "loss": 1.587, "step": 6280 }, { "epoch": 3.6128661688684662, "grad_norm": 1.3594012260437012, "learning_rate": 9.584334562566268e-05, "loss": 1.5621, "step": 6290 }, { "epoch": 3.6186099942561745, "grad_norm": 1.5187963247299194, "learning_rate": 9.582421936930683e-05, "loss": 1.6121, "step": 6300 }, { "epoch": 3.6243538196438827, "grad_norm": 1.6585781574249268, "learning_rate": 9.580505112797201e-05, "loss": 1.603, "step": 6310 }, { "epoch": 3.630097645031591, "grad_norm": 1.6604756116867065, "learning_rate": 9.57858409192215e-05, "loss": 1.6096, "step": 6320 }, { "epoch": 3.635841470419299, "grad_norm": 1.3212215900421143, "learning_rate": 9.576658876065693e-05, "loss": 1.5692, "step": 6330 }, { "epoch": 3.6415852958070074, "grad_norm": 1.4517269134521484, "learning_rate": 9.574729466991849e-05, "loss": 1.5892, "step": 6340 }, { "epoch": 3.6473291211947156, "grad_norm": 1.3805903196334839, "learning_rate": 9.572795866468472e-05, "loss": 1.5828, "step": 6350 }, { "epoch": 3.653072946582424, "grad_norm": 1.3964784145355225, "learning_rate": 9.57085807626726e-05, "loss": 1.576, "step": 6360 }, { "epoch": 3.658816771970132, "grad_norm": 1.378891110420227, "learning_rate": 9.568916098163747e-05, "loss": 1.6137, "step": 6370 }, { "epoch": 3.6645605973578403, "grad_norm": 1.4654159545898438, "learning_rate": 9.566969933937305e-05, "loss": 1.6009, "step": 6380 }, { "epoch": 3.6703044227455486, "grad_norm": 1.304354190826416, "learning_rate": 9.565019585371144e-05, "loss": 1.5436, "step": 6390 }, { "epoch": 3.676048248133257, "grad_norm": 1.2430413961410522, "learning_rate": 9.563065054252307e-05, "loss": 1.5791, "step": 6400 }, { "epoch": 3.681792073520965, "grad_norm": 1.4539940357208252, "learning_rate": 9.561106342371665e-05, "loss": 1.5863, "step": 6410 }, { "epoch": 3.6875358989086733, "grad_norm": 1.5169475078582764, "learning_rate": 9.559143451523926e-05, "loss": 1.6033, "step": 6420 }, { "epoch": 3.6932797242963815, "grad_norm": 1.4534494876861572, "learning_rate": 9.55717638350762e-05, "loss": 1.5799, "step": 6430 }, { "epoch": 3.6990235496840898, "grad_norm": 1.5747262239456177, "learning_rate": 9.555205140125116e-05, "loss": 1.5943, "step": 6440 }, { "epoch": 3.704767375071798, "grad_norm": 1.3716018199920654, "learning_rate": 9.553229723182594e-05, "loss": 1.6031, "step": 6450 }, { "epoch": 3.710511200459506, "grad_norm": 1.4669586420059204, "learning_rate": 9.55125013449007e-05, "loss": 1.5593, "step": 6460 }, { "epoch": 3.7162550258472145, "grad_norm": 1.4438790082931519, "learning_rate": 9.549266375861376e-05, "loss": 1.586, "step": 6470 }, { "epoch": 3.7219988512349227, "grad_norm": 1.3656120300292969, "learning_rate": 9.547278449114168e-05, "loss": 1.6233, "step": 6480 }, { "epoch": 3.727742676622631, "grad_norm": 1.6877834796905518, "learning_rate": 9.545286356069919e-05, "loss": 1.6324, "step": 6490 }, { "epoch": 3.733486502010339, "grad_norm": 1.4297900199890137, "learning_rate": 9.543290098553919e-05, "loss": 1.5998, "step": 6500 }, { "epoch": 3.739230327398047, "grad_norm": 1.3245452642440796, "learning_rate": 9.541289678395279e-05, "loss": 1.5842, "step": 6510 }, { "epoch": 3.744974152785755, "grad_norm": 1.408369541168213, "learning_rate": 9.539285097426917e-05, "loss": 1.5746, "step": 6520 }, { "epoch": 3.7507179781734634, "grad_norm": 1.5135191679000854, "learning_rate": 9.537276357485566e-05, "loss": 1.5814, "step": 6530 }, { "epoch": 3.7564618035611717, "grad_norm": 1.4139986038208008, "learning_rate": 9.535263460411771e-05, "loss": 1.5794, "step": 6540 }, { "epoch": 3.76220562894888, "grad_norm": 1.3048148155212402, "learning_rate": 9.533246408049887e-05, "loss": 1.6052, "step": 6550 }, { "epoch": 3.767949454336588, "grad_norm": 1.447126030921936, "learning_rate": 9.531225202248074e-05, "loss": 1.5797, "step": 6560 }, { "epoch": 3.7736932797242964, "grad_norm": 1.2924034595489502, "learning_rate": 9.529199844858297e-05, "loss": 1.6002, "step": 6570 }, { "epoch": 3.7794371051120046, "grad_norm": 1.3542078733444214, "learning_rate": 9.527170337736329e-05, "loss": 1.608, "step": 6580 }, { "epoch": 3.785180930499713, "grad_norm": 2.0147647857666016, "learning_rate": 9.525136682741739e-05, "loss": 1.5571, "step": 6590 }, { "epoch": 3.790924755887421, "grad_norm": 1.417090654373169, "learning_rate": 9.523098881737902e-05, "loss": 1.5959, "step": 6600 }, { "epoch": 3.7966685812751293, "grad_norm": 1.436158299446106, "learning_rate": 9.521056936591991e-05, "loss": 1.5826, "step": 6610 }, { "epoch": 3.8024124066628375, "grad_norm": 1.550160527229309, "learning_rate": 9.519010849174972e-05, "loss": 1.6176, "step": 6620 }, { "epoch": 3.8081562320505458, "grad_norm": 1.4246094226837158, "learning_rate": 9.516960621361614e-05, "loss": 1.5892, "step": 6630 }, { "epoch": 3.813900057438254, "grad_norm": 1.5779341459274292, "learning_rate": 9.514906255030472e-05, "loss": 1.5879, "step": 6640 }, { "epoch": 3.8196438828259622, "grad_norm": 1.5506840944290161, "learning_rate": 9.512847752063897e-05, "loss": 1.5975, "step": 6650 }, { "epoch": 3.8253877082136705, "grad_norm": 1.2425625324249268, "learning_rate": 9.51078511434803e-05, "loss": 1.5948, "step": 6660 }, { "epoch": 3.8311315336013783, "grad_norm": 2.2475855350494385, "learning_rate": 9.508718343772803e-05, "loss": 1.5536, "step": 6670 }, { "epoch": 3.8368753589890865, "grad_norm": 1.6249672174453735, "learning_rate": 9.506647442231926e-05, "loss": 1.5815, "step": 6680 }, { "epoch": 3.8426191843767947, "grad_norm": 1.2605689764022827, "learning_rate": 9.504572411622902e-05, "loss": 1.5689, "step": 6690 }, { "epoch": 3.848363009764503, "grad_norm": 1.3918836116790771, "learning_rate": 9.502493253847021e-05, "loss": 1.5606, "step": 6700 }, { "epoch": 3.854106835152211, "grad_norm": 1.6872825622558594, "learning_rate": 9.500409970809339e-05, "loss": 1.6169, "step": 6710 }, { "epoch": 3.8598506605399194, "grad_norm": 1.3307108879089355, "learning_rate": 9.498322564418709e-05, "loss": 1.5665, "step": 6720 }, { "epoch": 3.8655944859276277, "grad_norm": 1.5549707412719727, "learning_rate": 9.496231036587753e-05, "loss": 1.6051, "step": 6730 }, { "epoch": 3.871338311315336, "grad_norm": 1.525460124015808, "learning_rate": 9.49413538923287e-05, "loss": 1.5646, "step": 6740 }, { "epoch": 3.877082136703044, "grad_norm": 1.3676981925964355, "learning_rate": 9.492035624274237e-05, "loss": 1.5807, "step": 6750 }, { "epoch": 3.8828259620907524, "grad_norm": 1.2008260488510132, "learning_rate": 9.4899317436358e-05, "loss": 1.5683, "step": 6760 }, { "epoch": 3.8885697874784606, "grad_norm": 1.322078824043274, "learning_rate": 9.487823749245278e-05, "loss": 1.6136, "step": 6770 }, { "epoch": 3.894313612866169, "grad_norm": 1.2905033826828003, "learning_rate": 9.485711643034158e-05, "loss": 1.5861, "step": 6780 }, { "epoch": 3.900057438253877, "grad_norm": 1.4273862838745117, "learning_rate": 9.483595426937697e-05, "loss": 1.5874, "step": 6790 }, { "epoch": 3.9058012636415853, "grad_norm": 1.2639853954315186, "learning_rate": 9.481475102894917e-05, "loss": 1.5889, "step": 6800 }, { "epoch": 3.9115450890292935, "grad_norm": 1.4719356298446655, "learning_rate": 9.479350672848602e-05, "loss": 1.5176, "step": 6810 }, { "epoch": 3.9172889144170018, "grad_norm": 1.5981605052947998, "learning_rate": 9.477222138745297e-05, "loss": 1.5696, "step": 6820 }, { "epoch": 3.92303273980471, "grad_norm": 1.9092198610305786, "learning_rate": 9.475089502535315e-05, "loss": 1.5699, "step": 6830 }, { "epoch": 3.9287765651924182, "grad_norm": 1.3135465383529663, "learning_rate": 9.472952766172719e-05, "loss": 1.5435, "step": 6840 }, { "epoch": 3.9345203905801265, "grad_norm": 1.4641355276107788, "learning_rate": 9.470811931615334e-05, "loss": 1.5975, "step": 6850 }, { "epoch": 3.9402642159678347, "grad_norm": 1.576891303062439, "learning_rate": 9.468667000824736e-05, "loss": 1.5788, "step": 6860 }, { "epoch": 3.946008041355543, "grad_norm": 1.4915142059326172, "learning_rate": 9.466517975766259e-05, "loss": 1.592, "step": 6870 }, { "epoch": 3.951751866743251, "grad_norm": 1.2781870365142822, "learning_rate": 9.464364858408985e-05, "loss": 1.5932, "step": 6880 }, { "epoch": 3.9574956921309594, "grad_norm": 1.2722479104995728, "learning_rate": 9.462207650725748e-05, "loss": 1.5473, "step": 6890 }, { "epoch": 3.9632395175186677, "grad_norm": 1.3138319253921509, "learning_rate": 9.460046354693126e-05, "loss": 1.5787, "step": 6900 }, { "epoch": 3.968983342906376, "grad_norm": 1.5277183055877686, "learning_rate": 9.457880972291448e-05, "loss": 1.5758, "step": 6910 }, { "epoch": 3.974727168294084, "grad_norm": 1.7289197444915771, "learning_rate": 9.455711505504784e-05, "loss": 1.5825, "step": 6920 }, { "epoch": 3.980470993681792, "grad_norm": 1.3524231910705566, "learning_rate": 9.453537956320948e-05, "loss": 1.6016, "step": 6930 }, { "epoch": 3.9862148190695, "grad_norm": 1.3868252038955688, "learning_rate": 9.45136032673149e-05, "loss": 1.5901, "step": 6940 }, { "epoch": 3.9919586444572084, "grad_norm": 1.3178081512451172, "learning_rate": 9.449178618731707e-05, "loss": 1.5391, "step": 6950 }, { "epoch": 3.9977024698449166, "grad_norm": 1.525253176689148, "learning_rate": 9.446992834320627e-05, "loss": 1.5875, "step": 6960 }, { "epoch": 4.003446295232625, "grad_norm": 1.317173719406128, "learning_rate": 9.444802975501014e-05, "loss": 1.5807, "step": 6970 }, { "epoch": 4.0091901206203335, "grad_norm": 1.2668205499649048, "learning_rate": 9.442609044279364e-05, "loss": 1.5087, "step": 6980 }, { "epoch": 4.014933946008042, "grad_norm": 1.7844841480255127, "learning_rate": 9.440411042665911e-05, "loss": 1.5486, "step": 6990 }, { "epoch": 4.02067777139575, "grad_norm": 1.4545730352401733, "learning_rate": 9.438208972674609e-05, "loss": 1.546, "step": 7000 }, { "epoch": 4.02067777139575, "eval_loss": 1.1895684003829956, "eval_runtime": 123.2458, "eval_samples_per_second": 12.909, "eval_steps_per_second": 0.138, "eval_wer": 0.1069047349983049, "step": 7000 }, { "epoch": 4.026421596783457, "grad_norm": 1.2099732160568237, "learning_rate": 9.436002836323147e-05, "loss": 1.5493, "step": 7010 }, { "epoch": 4.032165422171166, "grad_norm": 1.3350284099578857, "learning_rate": 9.433792635632935e-05, "loss": 1.522, "step": 7020 }, { "epoch": 4.037909247558874, "grad_norm": 1.2830753326416016, "learning_rate": 9.431578372629113e-05, "loss": 1.5382, "step": 7030 }, { "epoch": 4.043653072946582, "grad_norm": 1.2033826112747192, "learning_rate": 9.429360049340538e-05, "loss": 1.5319, "step": 7040 }, { "epoch": 4.04939689833429, "grad_norm": 1.129206657409668, "learning_rate": 9.427137667799785e-05, "loss": 1.5623, "step": 7050 }, { "epoch": 4.0551407237219985, "grad_norm": 1.4639811515808105, "learning_rate": 9.424911230043157e-05, "loss": 1.5216, "step": 7060 }, { "epoch": 4.060884549109707, "grad_norm": 1.346333622932434, "learning_rate": 9.422680738110665e-05, "loss": 1.5519, "step": 7070 }, { "epoch": 4.066628374497415, "grad_norm": 1.3707916736602783, "learning_rate": 9.420446194046039e-05, "loss": 1.5248, "step": 7080 }, { "epoch": 4.072372199885123, "grad_norm": 1.2147454023361206, "learning_rate": 9.418207599896718e-05, "loss": 1.5058, "step": 7090 }, { "epoch": 4.0781160252728315, "grad_norm": 1.6157779693603516, "learning_rate": 9.415964957713857e-05, "loss": 1.5309, "step": 7100 }, { "epoch": 4.08385985066054, "grad_norm": 1.2559071779251099, "learning_rate": 9.413718269552314e-05, "loss": 1.5221, "step": 7110 }, { "epoch": 4.089603676048248, "grad_norm": 1.376235008239746, "learning_rate": 9.41146753747066e-05, "loss": 1.5216, "step": 7120 }, { "epoch": 4.095347501435956, "grad_norm": 1.3287769556045532, "learning_rate": 9.409212763531171e-05, "loss": 1.5425, "step": 7130 }, { "epoch": 4.101091326823664, "grad_norm": 1.2312625646591187, "learning_rate": 9.406953949799822e-05, "loss": 1.5201, "step": 7140 }, { "epoch": 4.106835152211373, "grad_norm": 1.385913372039795, "learning_rate": 9.40469109834629e-05, "loss": 1.527, "step": 7150 }, { "epoch": 4.112578977599081, "grad_norm": 1.3541021347045898, "learning_rate": 9.402424211243957e-05, "loss": 1.4973, "step": 7160 }, { "epoch": 4.118322802986789, "grad_norm": 1.348053216934204, "learning_rate": 9.400153290569899e-05, "loss": 1.5445, "step": 7170 }, { "epoch": 4.124066628374497, "grad_norm": 1.4380687475204468, "learning_rate": 9.397878338404885e-05, "loss": 1.5196, "step": 7180 }, { "epoch": 4.129810453762206, "grad_norm": 1.166715383529663, "learning_rate": 9.395599356833385e-05, "loss": 1.5304, "step": 7190 }, { "epoch": 4.135554279149914, "grad_norm": 1.2858169078826904, "learning_rate": 9.393316347943555e-05, "loss": 1.5401, "step": 7200 }, { "epoch": 4.141298104537622, "grad_norm": 1.3754222393035889, "learning_rate": 9.391029313827242e-05, "loss": 1.5442, "step": 7210 }, { "epoch": 4.14704192992533, "grad_norm": 1.3915207386016846, "learning_rate": 9.388738256579986e-05, "loss": 1.5413, "step": 7220 }, { "epoch": 4.1527857553130385, "grad_norm": 1.2301234006881714, "learning_rate": 9.386443178301006e-05, "loss": 1.5226, "step": 7230 }, { "epoch": 4.158529580700747, "grad_norm": 1.268314242362976, "learning_rate": 9.38414408109321e-05, "loss": 1.5081, "step": 7240 }, { "epoch": 4.164273406088455, "grad_norm": 1.568034052848816, "learning_rate": 9.381840967063189e-05, "loss": 1.5311, "step": 7250 }, { "epoch": 4.170017231476163, "grad_norm": 1.3623104095458984, "learning_rate": 9.379533838321212e-05, "loss": 1.5217, "step": 7260 }, { "epoch": 4.1757610568638714, "grad_norm": 1.3613442182540894, "learning_rate": 9.377222696981227e-05, "loss": 1.5679, "step": 7270 }, { "epoch": 4.18150488225158, "grad_norm": 1.4029641151428223, "learning_rate": 9.374907545160858e-05, "loss": 1.5287, "step": 7280 }, { "epoch": 4.187248707639288, "grad_norm": 1.2211167812347412, "learning_rate": 9.372588384981407e-05, "loss": 1.5474, "step": 7290 }, { "epoch": 4.192992533026996, "grad_norm": 1.4599618911743164, "learning_rate": 9.370265218567845e-05, "loss": 1.5341, "step": 7300 }, { "epoch": 4.198736358414704, "grad_norm": 1.3315317630767822, "learning_rate": 9.367938048048815e-05, "loss": 1.5302, "step": 7310 }, { "epoch": 4.204480183802413, "grad_norm": 1.2903155088424683, "learning_rate": 9.365606875556629e-05, "loss": 1.5303, "step": 7320 }, { "epoch": 4.210224009190121, "grad_norm": 1.3767284154891968, "learning_rate": 9.363271703227268e-05, "loss": 1.5334, "step": 7330 }, { "epoch": 4.215967834577829, "grad_norm": 1.417516827583313, "learning_rate": 9.360932533200375e-05, "loss": 1.5306, "step": 7340 }, { "epoch": 4.221711659965537, "grad_norm": 1.3683193922042847, "learning_rate": 9.358589367619254e-05, "loss": 1.5377, "step": 7350 }, { "epoch": 4.227455485353246, "grad_norm": 1.2844783067703247, "learning_rate": 9.356242208630877e-05, "loss": 1.5137, "step": 7360 }, { "epoch": 4.233199310740954, "grad_norm": 1.2717255353927612, "learning_rate": 9.35389105838587e-05, "loss": 1.544, "step": 7370 }, { "epoch": 4.238943136128662, "grad_norm": 1.327446460723877, "learning_rate": 9.351535919038515e-05, "loss": 1.532, "step": 7380 }, { "epoch": 4.24468696151637, "grad_norm": 1.3114255666732788, "learning_rate": 9.349176792746752e-05, "loss": 1.5064, "step": 7390 }, { "epoch": 4.2504307869040785, "grad_norm": 1.3684145212173462, "learning_rate": 9.346813681672172e-05, "loss": 1.5194, "step": 7400 }, { "epoch": 4.256174612291787, "grad_norm": 1.2469003200531006, "learning_rate": 9.34444658798002e-05, "loss": 1.5076, "step": 7410 }, { "epoch": 4.261918437679494, "grad_norm": 1.403851866722107, "learning_rate": 9.342075513839188e-05, "loss": 1.5186, "step": 7420 }, { "epoch": 4.267662263067203, "grad_norm": 1.2553436756134033, "learning_rate": 9.339700461422216e-05, "loss": 1.523, "step": 7430 }, { "epoch": 4.2734060884549105, "grad_norm": 1.3078651428222656, "learning_rate": 9.337321432905287e-05, "loss": 1.548, "step": 7440 }, { "epoch": 4.279149913842619, "grad_norm": 1.4701987504959106, "learning_rate": 9.33493843046823e-05, "loss": 1.529, "step": 7450 }, { "epoch": 4.284893739230327, "grad_norm": 1.34120774269104, "learning_rate": 9.332551456294516e-05, "loss": 1.5429, "step": 7460 }, { "epoch": 4.290637564618035, "grad_norm": 1.677347183227539, "learning_rate": 9.330160512571248e-05, "loss": 1.5212, "step": 7470 }, { "epoch": 4.2963813900057435, "grad_norm": 1.3228180408477783, "learning_rate": 9.327765601489175e-05, "loss": 1.5764, "step": 7480 }, { "epoch": 4.302125215393452, "grad_norm": 1.332287311553955, "learning_rate": 9.325366725242678e-05, "loss": 1.5116, "step": 7490 }, { "epoch": 4.30786904078116, "grad_norm": 1.3975943326950073, "learning_rate": 9.322963886029772e-05, "loss": 1.5421, "step": 7500 }, { "epoch": 4.313612866168868, "grad_norm": 1.1974446773529053, "learning_rate": 9.320557086052099e-05, "loss": 1.5279, "step": 7510 }, { "epoch": 4.319356691556576, "grad_norm": 1.3052939176559448, "learning_rate": 9.318146327514932e-05, "loss": 1.4998, "step": 7520 }, { "epoch": 4.325100516944285, "grad_norm": 1.2235811948776245, "learning_rate": 9.315731612627174e-05, "loss": 1.5499, "step": 7530 }, { "epoch": 4.330844342331993, "grad_norm": 1.4126347303390503, "learning_rate": 9.313312943601352e-05, "loss": 1.4997, "step": 7540 }, { "epoch": 4.336588167719701, "grad_norm": 1.3158483505249023, "learning_rate": 9.310890322653616e-05, "loss": 1.5437, "step": 7550 }, { "epoch": 4.342331993107409, "grad_norm": 1.573512315750122, "learning_rate": 9.308463752003732e-05, "loss": 1.5319, "step": 7560 }, { "epoch": 4.348075818495118, "grad_norm": 1.3274582624435425, "learning_rate": 9.306033233875094e-05, "loss": 1.4994, "step": 7570 }, { "epoch": 4.353819643882826, "grad_norm": 1.417730689048767, "learning_rate": 9.303598770494705e-05, "loss": 1.4918, "step": 7580 }, { "epoch": 4.359563469270534, "grad_norm": 1.3254122734069824, "learning_rate": 9.301160364093187e-05, "loss": 1.5668, "step": 7590 }, { "epoch": 4.365307294658242, "grad_norm": 1.3042727708816528, "learning_rate": 9.298718016904775e-05, "loss": 1.5268, "step": 7600 }, { "epoch": 4.3710511200459505, "grad_norm": 1.2062476873397827, "learning_rate": 9.296271731167314e-05, "loss": 1.4985, "step": 7610 }, { "epoch": 4.376794945433659, "grad_norm": 1.216174840927124, "learning_rate": 9.293821509122254e-05, "loss": 1.515, "step": 7620 }, { "epoch": 4.382538770821367, "grad_norm": 1.1550283432006836, "learning_rate": 9.291367353014658e-05, "loss": 1.5585, "step": 7630 }, { "epoch": 4.388282596209075, "grad_norm": 1.28323495388031, "learning_rate": 9.288909265093191e-05, "loss": 1.5431, "step": 7640 }, { "epoch": 4.3940264215967835, "grad_norm": 1.310599684715271, "learning_rate": 9.286447247610121e-05, "loss": 1.5384, "step": 7650 }, { "epoch": 4.399770246984492, "grad_norm": 1.2401442527770996, "learning_rate": 9.283981302821312e-05, "loss": 1.5259, "step": 7660 }, { "epoch": 4.4055140723722, "grad_norm": 1.293512225151062, "learning_rate": 9.281511432986239e-05, "loss": 1.5502, "step": 7670 }, { "epoch": 4.411257897759908, "grad_norm": 1.2158663272857666, "learning_rate": 9.279037640367956e-05, "loss": 1.5419, "step": 7680 }, { "epoch": 4.417001723147616, "grad_norm": 2.153297185897827, "learning_rate": 9.276559927233125e-05, "loss": 1.5365, "step": 7690 }, { "epoch": 4.422745548535325, "grad_norm": 1.2500333786010742, "learning_rate": 9.274078295851993e-05, "loss": 1.5219, "step": 7700 }, { "epoch": 4.428489373923033, "grad_norm": 1.3051958084106445, "learning_rate": 9.271592748498403e-05, "loss": 1.5227, "step": 7710 }, { "epoch": 4.434233199310741, "grad_norm": 1.350527048110962, "learning_rate": 9.269103287449779e-05, "loss": 1.4952, "step": 7720 }, { "epoch": 4.439977024698449, "grad_norm": 1.335240364074707, "learning_rate": 9.266609914987136e-05, "loss": 1.5151, "step": 7730 }, { "epoch": 4.445720850086158, "grad_norm": 1.2751095294952393, "learning_rate": 9.264112633395073e-05, "loss": 1.4958, "step": 7740 }, { "epoch": 4.451464675473866, "grad_norm": 1.4575210809707642, "learning_rate": 9.261611444961768e-05, "loss": 1.5303, "step": 7750 }, { "epoch": 4.457208500861574, "grad_norm": 1.7593839168548584, "learning_rate": 9.25910635197898e-05, "loss": 1.5125, "step": 7760 }, { "epoch": 4.462952326249282, "grad_norm": 1.355039119720459, "learning_rate": 9.256597356742047e-05, "loss": 1.5309, "step": 7770 }, { "epoch": 4.4686961516369905, "grad_norm": 1.4801783561706543, "learning_rate": 9.25408446154988e-05, "loss": 1.5244, "step": 7780 }, { "epoch": 4.474439977024699, "grad_norm": 1.375853419303894, "learning_rate": 9.251567668704963e-05, "loss": 1.499, "step": 7790 }, { "epoch": 4.480183802412407, "grad_norm": 1.2229357957839966, "learning_rate": 9.249046980513359e-05, "loss": 1.5368, "step": 7800 }, { "epoch": 4.485927627800115, "grad_norm": 1.390339732170105, "learning_rate": 9.246522399284687e-05, "loss": 1.5217, "step": 7810 }, { "epoch": 4.4916714531878235, "grad_norm": 1.397567629814148, "learning_rate": 9.243993927332145e-05, "loss": 1.4962, "step": 7820 }, { "epoch": 4.497415278575532, "grad_norm": 2.001462459564209, "learning_rate": 9.241461566972489e-05, "loss": 1.5452, "step": 7830 }, { "epoch": 4.50315910396324, "grad_norm": 1.3809707164764404, "learning_rate": 9.23892532052604e-05, "loss": 1.5471, "step": 7840 }, { "epoch": 4.508902929350947, "grad_norm": 1.481889009475708, "learning_rate": 9.236385190316682e-05, "loss": 1.5201, "step": 7850 }, { "epoch": 4.514646754738656, "grad_norm": 1.2754344940185547, "learning_rate": 9.233841178671853e-05, "loss": 1.519, "step": 7860 }, { "epoch": 4.520390580126364, "grad_norm": 1.345361590385437, "learning_rate": 9.23129328792255e-05, "loss": 1.5354, "step": 7870 }, { "epoch": 4.526134405514073, "grad_norm": 1.226788878440857, "learning_rate": 9.228741520403323e-05, "loss": 1.528, "step": 7880 }, { "epoch": 4.53187823090178, "grad_norm": 1.3052645921707153, "learning_rate": 9.226185878452276e-05, "loss": 1.5306, "step": 7890 }, { "epoch": 4.5376220562894884, "grad_norm": 1.3374087810516357, "learning_rate": 9.223626364411063e-05, "loss": 1.5334, "step": 7900 }, { "epoch": 4.543365881677197, "grad_norm": 1.2787437438964844, "learning_rate": 9.221062980624885e-05, "loss": 1.5304, "step": 7910 }, { "epoch": 4.549109707064905, "grad_norm": 1.3969898223876953, "learning_rate": 9.218495729442489e-05, "loss": 1.5238, "step": 7920 }, { "epoch": 4.554853532452613, "grad_norm": 1.4553688764572144, "learning_rate": 9.215924613216163e-05, "loss": 1.4905, "step": 7930 }, { "epoch": 4.560597357840321, "grad_norm": 1.3817580938339233, "learning_rate": 9.213349634301741e-05, "loss": 1.4886, "step": 7940 }, { "epoch": 4.56634118322803, "grad_norm": 1.5474655628204346, "learning_rate": 9.210770795058592e-05, "loss": 1.5139, "step": 7950 }, { "epoch": 4.572085008615738, "grad_norm": 1.5374083518981934, "learning_rate": 9.208188097849626e-05, "loss": 1.5159, "step": 7960 }, { "epoch": 4.577828834003446, "grad_norm": 1.4213935136795044, "learning_rate": 9.205601545041284e-05, "loss": 1.526, "step": 7970 }, { "epoch": 4.583572659391154, "grad_norm": 1.3005527257919312, "learning_rate": 9.203011139003544e-05, "loss": 1.5149, "step": 7980 }, { "epoch": 4.589316484778863, "grad_norm": 1.1757376194000244, "learning_rate": 9.200416882109912e-05, "loss": 1.5414, "step": 7990 }, { "epoch": 4.595060310166571, "grad_norm": 1.1613342761993408, "learning_rate": 9.197818776737423e-05, "loss": 1.5237, "step": 8000 }, { "epoch": 4.595060310166571, "eval_loss": 1.1661320924758911, "eval_runtime": 119.6345, "eval_samples_per_second": 13.299, "eval_steps_per_second": 0.142, "eval_wer": 0.10927788450672392, "step": 8000 }, { "epoch": 4.600804135554279, "grad_norm": 1.372605562210083, "learning_rate": 9.195216825266636e-05, "loss": 1.5137, "step": 8010 }, { "epoch": 4.606547960941987, "grad_norm": 1.2703298330307007, "learning_rate": 9.192611030081637e-05, "loss": 1.5413, "step": 8020 }, { "epoch": 4.6122917863296955, "grad_norm": 1.7039837837219238, "learning_rate": 9.190001393570034e-05, "loss": 1.5077, "step": 8030 }, { "epoch": 4.618035611717404, "grad_norm": 1.2425333261489868, "learning_rate": 9.187387918122953e-05, "loss": 1.5259, "step": 8040 }, { "epoch": 4.623779437105112, "grad_norm": 1.1865575313568115, "learning_rate": 9.184770606135038e-05, "loss": 1.5243, "step": 8050 }, { "epoch": 4.62952326249282, "grad_norm": 1.376383662223816, "learning_rate": 9.182149460004449e-05, "loss": 1.5375, "step": 8060 }, { "epoch": 4.635267087880528, "grad_norm": 1.4462292194366455, "learning_rate": 9.179524482132857e-05, "loss": 1.5209, "step": 8070 }, { "epoch": 4.641010913268237, "grad_norm": 1.3688052892684937, "learning_rate": 9.176895674925448e-05, "loss": 1.5416, "step": 8080 }, { "epoch": 4.646754738655945, "grad_norm": 1.1893608570098877, "learning_rate": 9.17426304079091e-05, "loss": 1.5261, "step": 8090 }, { "epoch": 4.652498564043653, "grad_norm": 1.4037144184112549, "learning_rate": 9.171626582141447e-05, "loss": 1.5167, "step": 8100 }, { "epoch": 4.658242389431361, "grad_norm": 1.2504767179489136, "learning_rate": 9.16898630139276e-05, "loss": 1.5016, "step": 8110 }, { "epoch": 4.66398621481907, "grad_norm": 1.148375153541565, "learning_rate": 9.16634220096405e-05, "loss": 1.4902, "step": 8120 }, { "epoch": 4.669730040206778, "grad_norm": 1.4028209447860718, "learning_rate": 9.163694283278027e-05, "loss": 1.4989, "step": 8130 }, { "epoch": 4.675473865594486, "grad_norm": 1.3027985095977783, "learning_rate": 9.16104255076089e-05, "loss": 1.4865, "step": 8140 }, { "epoch": 4.681217690982194, "grad_norm": 1.4219080209732056, "learning_rate": 9.158387005842341e-05, "loss": 1.5174, "step": 8150 }, { "epoch": 4.6869615163699025, "grad_norm": 1.4299012422561646, "learning_rate": 9.155727650955567e-05, "loss": 1.5335, "step": 8160 }, { "epoch": 4.692705341757611, "grad_norm": 1.2310203313827515, "learning_rate": 9.15306448853725e-05, "loss": 1.5351, "step": 8170 }, { "epoch": 4.698449167145319, "grad_norm": 1.1910754442214966, "learning_rate": 9.150397521027563e-05, "loss": 1.5247, "step": 8180 }, { "epoch": 4.704192992533027, "grad_norm": 1.1212091445922852, "learning_rate": 9.147726750870164e-05, "loss": 1.495, "step": 8190 }, { "epoch": 4.7099368179207355, "grad_norm": 1.280044674873352, "learning_rate": 9.14505218051219e-05, "loss": 1.5141, "step": 8200 }, { "epoch": 4.715680643308444, "grad_norm": 1.3714500665664673, "learning_rate": 9.14237381240427e-05, "loss": 1.5087, "step": 8210 }, { "epoch": 4.721424468696152, "grad_norm": 1.2407679557800293, "learning_rate": 9.139691649000504e-05, "loss": 1.5014, "step": 8220 }, { "epoch": 4.72716829408386, "grad_norm": 1.4980745315551758, "learning_rate": 9.137005692758472e-05, "loss": 1.5039, "step": 8230 }, { "epoch": 4.732912119471568, "grad_norm": 1.3073756694793701, "learning_rate": 9.134315946139233e-05, "loss": 1.5037, "step": 8240 }, { "epoch": 4.738655944859277, "grad_norm": 1.2725275754928589, "learning_rate": 9.131622411607312e-05, "loss": 1.5465, "step": 8250 }, { "epoch": 4.744399770246984, "grad_norm": 1.2008821964263916, "learning_rate": 9.128925091630711e-05, "loss": 1.51, "step": 8260 }, { "epoch": 4.750143595634693, "grad_norm": 1.2691665887832642, "learning_rate": 9.126223988680899e-05, "loss": 1.524, "step": 8270 }, { "epoch": 4.7558874210224005, "grad_norm": 1.2835962772369385, "learning_rate": 9.123519105232808e-05, "loss": 1.5175, "step": 8280 }, { "epoch": 4.76163124641011, "grad_norm": 1.3977302312850952, "learning_rate": 9.12081044376484e-05, "loss": 1.4827, "step": 8290 }, { "epoch": 4.767375071797817, "grad_norm": 1.2746983766555786, "learning_rate": 9.118098006758852e-05, "loss": 1.5177, "step": 8300 }, { "epoch": 4.773118897185525, "grad_norm": 1.5048744678497314, "learning_rate": 9.115381796700164e-05, "loss": 1.5063, "step": 8310 }, { "epoch": 4.778862722573233, "grad_norm": 1.3444232940673828, "learning_rate": 9.112661816077553e-05, "loss": 1.487, "step": 8320 }, { "epoch": 4.784606547960942, "grad_norm": 1.3672760725021362, "learning_rate": 9.10993806738325e-05, "loss": 1.5446, "step": 8330 }, { "epoch": 4.79035037334865, "grad_norm": 1.5493474006652832, "learning_rate": 9.107210553112942e-05, "loss": 1.5136, "step": 8340 }, { "epoch": 4.796094198736358, "grad_norm": 1.2539175748825073, "learning_rate": 9.104479275765758e-05, "loss": 1.4777, "step": 8350 }, { "epoch": 4.801838024124066, "grad_norm": 1.295505166053772, "learning_rate": 9.101744237844284e-05, "loss": 1.5088, "step": 8360 }, { "epoch": 4.807581849511775, "grad_norm": 1.1741442680358887, "learning_rate": 9.099005441854547e-05, "loss": 1.5118, "step": 8370 }, { "epoch": 4.813325674899483, "grad_norm": 1.431107759475708, "learning_rate": 9.096262890306016e-05, "loss": 1.4795, "step": 8380 }, { "epoch": 4.819069500287191, "grad_norm": 1.3822585344314575, "learning_rate": 9.093516585711608e-05, "loss": 1.5179, "step": 8390 }, { "epoch": 4.824813325674899, "grad_norm": 1.3570129871368408, "learning_rate": 9.090766530587672e-05, "loss": 1.4863, "step": 8400 }, { "epoch": 4.8305571510626075, "grad_norm": 1.2391068935394287, "learning_rate": 9.088012727453994e-05, "loss": 1.512, "step": 8410 }, { "epoch": 4.836300976450316, "grad_norm": 1.175000548362732, "learning_rate": 9.085255178833799e-05, "loss": 1.4885, "step": 8420 }, { "epoch": 4.842044801838024, "grad_norm": 1.2359306812286377, "learning_rate": 9.08249388725374e-05, "loss": 1.538, "step": 8430 }, { "epoch": 4.847788627225732, "grad_norm": 1.357232689857483, "learning_rate": 9.079728855243897e-05, "loss": 1.5122, "step": 8440 }, { "epoch": 4.8535324526134405, "grad_norm": 1.2246289253234863, "learning_rate": 9.076960085337786e-05, "loss": 1.5029, "step": 8450 }, { "epoch": 4.859276278001149, "grad_norm": 1.236830472946167, "learning_rate": 9.074187580072337e-05, "loss": 1.519, "step": 8460 }, { "epoch": 4.865020103388857, "grad_norm": 1.2631417512893677, "learning_rate": 9.071411341987915e-05, "loss": 1.5211, "step": 8470 }, { "epoch": 4.870763928776565, "grad_norm": 1.1685912609100342, "learning_rate": 9.06863137362829e-05, "loss": 1.5031, "step": 8480 }, { "epoch": 4.876507754164273, "grad_norm": 1.2414636611938477, "learning_rate": 9.065847677540666e-05, "loss": 1.4698, "step": 8490 }, { "epoch": 4.882251579551982, "grad_norm": 1.229708194732666, "learning_rate": 9.063060256275648e-05, "loss": 1.4631, "step": 8500 }, { "epoch": 4.88799540493969, "grad_norm": 1.30802321434021, "learning_rate": 9.060269112387265e-05, "loss": 1.4841, "step": 8510 }, { "epoch": 4.893739230327398, "grad_norm": 1.9324769973754883, "learning_rate": 9.057474248432956e-05, "loss": 1.5283, "step": 8520 }, { "epoch": 4.899483055715106, "grad_norm": 1.3783663511276245, "learning_rate": 9.054675666973559e-05, "loss": 1.4929, "step": 8530 }, { "epoch": 4.905226881102815, "grad_norm": 1.6306493282318115, "learning_rate": 9.05187337057333e-05, "loss": 1.5085, "step": 8540 }, { "epoch": 4.910970706490523, "grad_norm": 1.2749860286712646, "learning_rate": 9.04906736179992e-05, "loss": 1.5418, "step": 8550 }, { "epoch": 4.916714531878231, "grad_norm": 1.1955726146697998, "learning_rate": 9.046257643224387e-05, "loss": 1.5312, "step": 8560 }, { "epoch": 4.922458357265939, "grad_norm": 1.2583096027374268, "learning_rate": 9.043444217421189e-05, "loss": 1.5012, "step": 8570 }, { "epoch": 4.9282021826536475, "grad_norm": 1.242256999015808, "learning_rate": 9.040627086968172e-05, "loss": 1.535, "step": 8580 }, { "epoch": 4.933946008041356, "grad_norm": 1.1753039360046387, "learning_rate": 9.03780625444659e-05, "loss": 1.4832, "step": 8590 }, { "epoch": 4.939689833429064, "grad_norm": 1.17648184299469, "learning_rate": 9.034981722441077e-05, "loss": 1.5142, "step": 8600 }, { "epoch": 4.945433658816772, "grad_norm": 1.2780722379684448, "learning_rate": 9.032153493539663e-05, "loss": 1.4893, "step": 8610 }, { "epoch": 4.9511774842044805, "grad_norm": 1.3198331594467163, "learning_rate": 9.029321570333764e-05, "loss": 1.5075, "step": 8620 }, { "epoch": 4.956921309592189, "grad_norm": 1.345278263092041, "learning_rate": 9.026485955418181e-05, "loss": 1.5138, "step": 8630 }, { "epoch": 4.962665134979897, "grad_norm": 1.3138835430145264, "learning_rate": 9.023646651391095e-05, "loss": 1.5261, "step": 8640 }, { "epoch": 4.968408960367605, "grad_norm": 1.2407513856887817, "learning_rate": 9.020803660854073e-05, "loss": 1.4957, "step": 8650 }, { "epoch": 4.974152785755313, "grad_norm": 1.1697194576263428, "learning_rate": 9.017956986412055e-05, "loss": 1.5074, "step": 8660 }, { "epoch": 4.979896611143022, "grad_norm": 1.4139670133590698, "learning_rate": 9.01510663067336e-05, "loss": 1.5181, "step": 8670 }, { "epoch": 4.98564043653073, "grad_norm": 1.2401978969573975, "learning_rate": 9.012252596249674e-05, "loss": 1.5136, "step": 8680 }, { "epoch": 4.991384261918437, "grad_norm": 1.3499748706817627, "learning_rate": 9.009394885756059e-05, "loss": 1.5176, "step": 8690 }, { "epoch": 4.997128087306146, "grad_norm": 1.1562694311141968, "learning_rate": 9.006533501810947e-05, "loss": 1.4845, "step": 8700 }, { "epoch": 5.002871912693854, "grad_norm": 1.2447329759597778, "learning_rate": 9.003668447036129e-05, "loss": 1.5066, "step": 8710 }, { "epoch": 5.008615738081562, "grad_norm": 1.1344153881072998, "learning_rate": 9.000799724056765e-05, "loss": 1.4845, "step": 8720 }, { "epoch": 5.01435956346927, "grad_norm": 1.129337191581726, "learning_rate": 8.997927335501376e-05, "loss": 1.4656, "step": 8730 }, { "epoch": 5.020103388856978, "grad_norm": 1.2713044881820679, "learning_rate": 8.995051284001834e-05, "loss": 1.4752, "step": 8740 }, { "epoch": 5.025847214244687, "grad_norm": 1.3411953449249268, "learning_rate": 8.992171572193381e-05, "loss": 1.4662, "step": 8750 }, { "epoch": 5.031591039632395, "grad_norm": 1.35898756980896, "learning_rate": 8.989288202714598e-05, "loss": 1.4515, "step": 8760 }, { "epoch": 5.037334865020103, "grad_norm": 1.3001588582992554, "learning_rate": 8.986401178207429e-05, "loss": 1.4605, "step": 8770 }, { "epoch": 5.043078690407811, "grad_norm": 1.2070764303207397, "learning_rate": 8.98351050131716e-05, "loss": 1.4519, "step": 8780 }, { "epoch": 5.0488225157955195, "grad_norm": 1.3240972757339478, "learning_rate": 8.98061617469243e-05, "loss": 1.4571, "step": 8790 }, { "epoch": 5.054566341183228, "grad_norm": 1.2841193675994873, "learning_rate": 8.977718200985213e-05, "loss": 1.4819, "step": 8800 }, { "epoch": 5.060310166570936, "grad_norm": 1.2023500204086304, "learning_rate": 8.974816582850831e-05, "loss": 1.4946, "step": 8810 }, { "epoch": 5.066053991958644, "grad_norm": 1.251886010169983, "learning_rate": 8.971911322947946e-05, "loss": 1.4704, "step": 8820 }, { "epoch": 5.0717978173463525, "grad_norm": 1.179997444152832, "learning_rate": 8.969002423938555e-05, "loss": 1.4331, "step": 8830 }, { "epoch": 5.077541642734061, "grad_norm": 1.142061471939087, "learning_rate": 8.966089888487988e-05, "loss": 1.4603, "step": 8840 }, { "epoch": 5.083285468121769, "grad_norm": 1.3036853075027466, "learning_rate": 8.963173719264908e-05, "loss": 1.4774, "step": 8850 }, { "epoch": 5.089029293509477, "grad_norm": 1.4967633485794067, "learning_rate": 8.960253918941308e-05, "loss": 1.4803, "step": 8860 }, { "epoch": 5.094773118897185, "grad_norm": 1.2133448123931885, "learning_rate": 8.957330490192507e-05, "loss": 1.4835, "step": 8870 }, { "epoch": 5.100516944284894, "grad_norm": 1.1352540254592896, "learning_rate": 8.954403435697151e-05, "loss": 1.4602, "step": 8880 }, { "epoch": 5.106260769672602, "grad_norm": 1.0886096954345703, "learning_rate": 8.951472758137209e-05, "loss": 1.5046, "step": 8890 }, { "epoch": 5.11200459506031, "grad_norm": 1.2195403575897217, "learning_rate": 8.948538460197962e-05, "loss": 1.4563, "step": 8900 }, { "epoch": 5.117748420448018, "grad_norm": 1.2467718124389648, "learning_rate": 8.945600544568015e-05, "loss": 1.4564, "step": 8910 }, { "epoch": 5.123492245835727, "grad_norm": 1.3505523204803467, "learning_rate": 8.94265901393929e-05, "loss": 1.4427, "step": 8920 }, { "epoch": 5.129236071223435, "grad_norm": 1.338301420211792, "learning_rate": 8.939713871007013e-05, "loss": 1.4999, "step": 8930 }, { "epoch": 5.134979896611143, "grad_norm": 1.2780975103378296, "learning_rate": 8.936765118469727e-05, "loss": 1.4678, "step": 8940 }, { "epoch": 5.140723721998851, "grad_norm": 1.3231487274169922, "learning_rate": 8.933812759029281e-05, "loss": 1.4792, "step": 8950 }, { "epoch": 5.1464675473865595, "grad_norm": 1.2195783853530884, "learning_rate": 8.930856795390825e-05, "loss": 1.489, "step": 8960 }, { "epoch": 5.152211372774268, "grad_norm": 1.1261515617370605, "learning_rate": 8.927897230262813e-05, "loss": 1.4632, "step": 8970 }, { "epoch": 5.157955198161976, "grad_norm": 1.4532493352890015, "learning_rate": 8.924934066357007e-05, "loss": 1.486, "step": 8980 }, { "epoch": 5.163699023549684, "grad_norm": 1.0976425409317017, "learning_rate": 8.921967306388452e-05, "loss": 1.464, "step": 8990 }, { "epoch": 5.1694428489373925, "grad_norm": 1.287765622138977, "learning_rate": 8.918996953075497e-05, "loss": 1.4396, "step": 9000 }, { "epoch": 5.1694428489373925, "eval_loss": 1.147834300994873, "eval_runtime": 121.8641, "eval_samples_per_second": 13.056, "eval_steps_per_second": 0.139, "eval_wer": 0.10227144310091536, "step": 9000 }, { "epoch": 5.175186674325101, "grad_norm": 1.2124603986740112, "learning_rate": 8.916023009139785e-05, "loss": 1.4828, "step": 9010 }, { "epoch": 5.180930499712809, "grad_norm": 1.1968454122543335, "learning_rate": 8.913045477306244e-05, "loss": 1.5114, "step": 9020 }, { "epoch": 5.186674325100517, "grad_norm": 1.147079348564148, "learning_rate": 8.910064360303092e-05, "loss": 1.462, "step": 9030 }, { "epoch": 5.192418150488225, "grad_norm": 1.1202359199523926, "learning_rate": 8.907079660861829e-05, "loss": 1.4653, "step": 9040 }, { "epoch": 5.198161975875934, "grad_norm": 1.093362808227539, "learning_rate": 8.904091381717243e-05, "loss": 1.4727, "step": 9050 }, { "epoch": 5.203905801263642, "grad_norm": 1.1937211751937866, "learning_rate": 8.901099525607397e-05, "loss": 1.4589, "step": 9060 }, { "epoch": 5.20964962665135, "grad_norm": 1.205068588256836, "learning_rate": 8.898104095273633e-05, "loss": 1.4501, "step": 9070 }, { "epoch": 5.215393452039058, "grad_norm": 1.3431919813156128, "learning_rate": 8.895105093460569e-05, "loss": 1.4511, "step": 9080 }, { "epoch": 5.221137277426767, "grad_norm": 1.1750576496124268, "learning_rate": 8.892102522916098e-05, "loss": 1.4637, "step": 9090 }, { "epoch": 5.226881102814475, "grad_norm": 1.217994213104248, "learning_rate": 8.889096386391373e-05, "loss": 1.4643, "step": 9100 }, { "epoch": 5.232624928202183, "grad_norm": 1.383482813835144, "learning_rate": 8.886086686640823e-05, "loss": 1.458, "step": 9110 }, { "epoch": 5.238368753589891, "grad_norm": 1.2347828149795532, "learning_rate": 8.883073426422142e-05, "loss": 1.4705, "step": 9120 }, { "epoch": 5.2441125789775995, "grad_norm": 1.2212175130844116, "learning_rate": 8.880056608496284e-05, "loss": 1.5044, "step": 9130 }, { "epoch": 5.249856404365307, "grad_norm": 1.3333848714828491, "learning_rate": 8.877036235627462e-05, "loss": 1.4615, "step": 9140 }, { "epoch": 5.255600229753015, "grad_norm": 1.2548474073410034, "learning_rate": 8.874012310583146e-05, "loss": 1.4667, "step": 9150 }, { "epoch": 5.261344055140723, "grad_norm": 1.255906581878662, "learning_rate": 8.870984836134064e-05, "loss": 1.4328, "step": 9160 }, { "epoch": 5.267087880528432, "grad_norm": 1.23939049243927, "learning_rate": 8.867953815054195e-05, "loss": 1.4622, "step": 9170 }, { "epoch": 5.27283170591614, "grad_norm": 1.3477449417114258, "learning_rate": 8.864919250120763e-05, "loss": 1.4889, "step": 9180 }, { "epoch": 5.278575531303848, "grad_norm": 1.3194857835769653, "learning_rate": 8.861881144114247e-05, "loss": 1.4736, "step": 9190 }, { "epoch": 5.284319356691556, "grad_norm": 1.2175331115722656, "learning_rate": 8.858839499818364e-05, "loss": 1.4593, "step": 9200 }, { "epoch": 5.2900631820792645, "grad_norm": 1.386627435684204, "learning_rate": 8.855794320020078e-05, "loss": 1.4622, "step": 9210 }, { "epoch": 5.295807007466973, "grad_norm": 1.4545973539352417, "learning_rate": 8.852745607509588e-05, "loss": 1.4881, "step": 9220 }, { "epoch": 5.301550832854681, "grad_norm": 1.6160017251968384, "learning_rate": 8.849693365080332e-05, "loss": 1.4734, "step": 9230 }, { "epoch": 5.307294658242389, "grad_norm": 1.2399158477783203, "learning_rate": 8.846637595528982e-05, "loss": 1.4838, "step": 9240 }, { "epoch": 5.3130384836300975, "grad_norm": 1.3766226768493652, "learning_rate": 8.843578301655444e-05, "loss": 1.4573, "step": 9250 }, { "epoch": 5.318782309017806, "grad_norm": 1.3171476125717163, "learning_rate": 8.84051548626285e-05, "loss": 1.4569, "step": 9260 }, { "epoch": 5.324526134405514, "grad_norm": 1.155517339706421, "learning_rate": 8.83744915215756e-05, "loss": 1.4617, "step": 9270 }, { "epoch": 5.330269959793222, "grad_norm": 1.1997681856155396, "learning_rate": 8.834379302149162e-05, "loss": 1.4437, "step": 9280 }, { "epoch": 5.33601378518093, "grad_norm": 1.3225274085998535, "learning_rate": 8.831305939050454e-05, "loss": 1.4507, "step": 9290 }, { "epoch": 5.341757610568639, "grad_norm": 1.3525100946426392, "learning_rate": 8.828229065677464e-05, "loss": 1.4847, "step": 9300 }, { "epoch": 5.347501435956347, "grad_norm": 1.2089719772338867, "learning_rate": 8.825148684849437e-05, "loss": 1.4506, "step": 9310 }, { "epoch": 5.353245261344055, "grad_norm": 1.1200802326202393, "learning_rate": 8.822064799388821e-05, "loss": 1.4404, "step": 9320 }, { "epoch": 5.358989086731763, "grad_norm": 1.3737341165542603, "learning_rate": 8.818977412121286e-05, "loss": 1.4882, "step": 9330 }, { "epoch": 5.364732912119472, "grad_norm": 1.287752628326416, "learning_rate": 8.815886525875705e-05, "loss": 1.5014, "step": 9340 }, { "epoch": 5.37047673750718, "grad_norm": 1.239037275314331, "learning_rate": 8.812792143484159e-05, "loss": 1.4612, "step": 9350 }, { "epoch": 5.376220562894888, "grad_norm": 1.2316423654556274, "learning_rate": 8.80969426778193e-05, "loss": 1.4474, "step": 9360 }, { "epoch": 5.381964388282596, "grad_norm": 1.22121000289917, "learning_rate": 8.806592901607505e-05, "loss": 1.4939, "step": 9370 }, { "epoch": 5.3877082136703045, "grad_norm": 1.40297269821167, "learning_rate": 8.803488047802567e-05, "loss": 1.4587, "step": 9380 }, { "epoch": 5.393452039058013, "grad_norm": 1.3497315645217896, "learning_rate": 8.800379709211995e-05, "loss": 1.5025, "step": 9390 }, { "epoch": 5.399195864445721, "grad_norm": 1.3170558214187622, "learning_rate": 8.797267888683854e-05, "loss": 1.4991, "step": 9400 }, { "epoch": 5.404939689833429, "grad_norm": 1.3161382675170898, "learning_rate": 8.794152589069413e-05, "loss": 1.4877, "step": 9410 }, { "epoch": 5.410683515221137, "grad_norm": 1.2778904438018799, "learning_rate": 8.79103381322312e-05, "loss": 1.4709, "step": 9420 }, { "epoch": 5.416427340608846, "grad_norm": 1.2192775011062622, "learning_rate": 8.787911564002608e-05, "loss": 1.4702, "step": 9430 }, { "epoch": 5.422171165996554, "grad_norm": 1.2643715143203735, "learning_rate": 8.784785844268696e-05, "loss": 1.4538, "step": 9440 }, { "epoch": 5.427914991384262, "grad_norm": 1.2216124534606934, "learning_rate": 8.78165665688538e-05, "loss": 1.4671, "step": 9450 }, { "epoch": 5.43365881677197, "grad_norm": 1.2254632711410522, "learning_rate": 8.778524004719836e-05, "loss": 1.4473, "step": 9460 }, { "epoch": 5.439402642159679, "grad_norm": 1.1977343559265137, "learning_rate": 8.775387890642412e-05, "loss": 1.4728, "step": 9470 }, { "epoch": 5.445146467547387, "grad_norm": 1.218712568283081, "learning_rate": 8.772248317526627e-05, "loss": 1.4654, "step": 9480 }, { "epoch": 5.450890292935095, "grad_norm": 1.317732572555542, "learning_rate": 8.769105288249179e-05, "loss": 1.4621, "step": 9490 }, { "epoch": 5.456634118322803, "grad_norm": 1.158211588859558, "learning_rate": 8.765958805689916e-05, "loss": 1.4671, "step": 9500 }, { "epoch": 5.4623779437105116, "grad_norm": 1.3367105722427368, "learning_rate": 8.762808872731867e-05, "loss": 1.4548, "step": 9510 }, { "epoch": 5.46812176909822, "grad_norm": 1.3322018384933472, "learning_rate": 8.759655492261211e-05, "loss": 1.4473, "step": 9520 }, { "epoch": 5.473865594485928, "grad_norm": 1.3279736042022705, "learning_rate": 8.756498667167292e-05, "loss": 1.4656, "step": 9530 }, { "epoch": 5.479609419873636, "grad_norm": 1.2099251747131348, "learning_rate": 8.753338400342605e-05, "loss": 1.4811, "step": 9540 }, { "epoch": 5.485353245261344, "grad_norm": 1.3018758296966553, "learning_rate": 8.750174694682805e-05, "loss": 1.4596, "step": 9550 }, { "epoch": 5.491097070649053, "grad_norm": 1.2439451217651367, "learning_rate": 8.747007553086694e-05, "loss": 1.437, "step": 9560 }, { "epoch": 5.49684089603676, "grad_norm": 1.3036242723464966, "learning_rate": 8.743836978456222e-05, "loss": 1.4665, "step": 9570 }, { "epoch": 5.502584721424469, "grad_norm": 1.2429912090301514, "learning_rate": 8.740662973696485e-05, "loss": 1.4649, "step": 9580 }, { "epoch": 5.5083285468121765, "grad_norm": 1.1051344871520996, "learning_rate": 8.737485541715721e-05, "loss": 1.4577, "step": 9590 }, { "epoch": 5.514072372199885, "grad_norm": 1.261716604232788, "learning_rate": 8.734304685425314e-05, "loss": 1.4279, "step": 9600 }, { "epoch": 5.519816197587593, "grad_norm": 1.129029393196106, "learning_rate": 8.731120407739775e-05, "loss": 1.4657, "step": 9610 }, { "epoch": 5.525560022975301, "grad_norm": 1.237560510635376, "learning_rate": 8.727932711576762e-05, "loss": 1.4386, "step": 9620 }, { "epoch": 5.5313038483630095, "grad_norm": 1.0743985176086426, "learning_rate": 8.724741599857055e-05, "loss": 1.4558, "step": 9630 }, { "epoch": 5.537047673750718, "grad_norm": 1.2897517681121826, "learning_rate": 8.72154707550457e-05, "loss": 1.4765, "step": 9640 }, { "epoch": 5.542791499138426, "grad_norm": 1.194259762763977, "learning_rate": 8.718349141446347e-05, "loss": 1.4433, "step": 9650 }, { "epoch": 5.548535324526134, "grad_norm": 1.2468435764312744, "learning_rate": 8.715147800612549e-05, "loss": 1.4738, "step": 9660 }, { "epoch": 5.554279149913842, "grad_norm": 1.1765706539154053, "learning_rate": 8.711943055936468e-05, "loss": 1.4624, "step": 9670 }, { "epoch": 5.560022975301551, "grad_norm": 1.163429856300354, "learning_rate": 8.708734910354504e-05, "loss": 1.4738, "step": 9680 }, { "epoch": 5.565766800689259, "grad_norm": 1.272435188293457, "learning_rate": 8.705523366806177e-05, "loss": 1.4435, "step": 9690 }, { "epoch": 5.571510626076967, "grad_norm": 1.3485329151153564, "learning_rate": 8.702308428234129e-05, "loss": 1.4756, "step": 9700 }, { "epoch": 5.577254451464675, "grad_norm": 1.3449616432189941, "learning_rate": 8.699090097584099e-05, "loss": 1.4625, "step": 9710 }, { "epoch": 5.582998276852384, "grad_norm": 1.6839066743850708, "learning_rate": 8.695868377804944e-05, "loss": 1.4449, "step": 9720 }, { "epoch": 5.588742102240092, "grad_norm": 1.1611164808273315, "learning_rate": 8.692643271848622e-05, "loss": 1.4856, "step": 9730 }, { "epoch": 5.5944859276278, "grad_norm": 1.1526763439178467, "learning_rate": 8.689414782670194e-05, "loss": 1.4642, "step": 9740 }, { "epoch": 5.600229753015508, "grad_norm": 1.2113934755325317, "learning_rate": 8.686182913227824e-05, "loss": 1.4348, "step": 9750 }, { "epoch": 5.6059735784032165, "grad_norm": 1.3929334878921509, "learning_rate": 8.682947666482768e-05, "loss": 1.4566, "step": 9760 }, { "epoch": 5.611717403790925, "grad_norm": 1.1892578601837158, "learning_rate": 8.679709045399381e-05, "loss": 1.4761, "step": 9770 }, { "epoch": 5.617461229178633, "grad_norm": 1.1876999139785767, "learning_rate": 8.676467052945108e-05, "loss": 1.4263, "step": 9780 }, { "epoch": 5.623205054566341, "grad_norm": 1.2544496059417725, "learning_rate": 8.673221692090483e-05, "loss": 1.4428, "step": 9790 }, { "epoch": 5.6289488799540495, "grad_norm": 1.2875301837921143, "learning_rate": 8.669972965809125e-05, "loss": 1.4737, "step": 9800 }, { "epoch": 5.634692705341758, "grad_norm": 1.2570191621780396, "learning_rate": 8.666720877077741e-05, "loss": 1.4829, "step": 9810 }, { "epoch": 5.640436530729466, "grad_norm": 1.2582734823226929, "learning_rate": 8.663465428876113e-05, "loss": 1.4774, "step": 9820 }, { "epoch": 5.646180356117174, "grad_norm": 1.3060370683670044, "learning_rate": 8.660206624187109e-05, "loss": 1.4927, "step": 9830 }, { "epoch": 5.651924181504882, "grad_norm": 1.1538877487182617, "learning_rate": 8.656944465996662e-05, "loss": 1.4535, "step": 9840 }, { "epoch": 5.657668006892591, "grad_norm": 1.2658586502075195, "learning_rate": 8.653678957293787e-05, "loss": 1.4631, "step": 9850 }, { "epoch": 5.663411832280299, "grad_norm": 1.21420156955719, "learning_rate": 8.650410101070564e-05, "loss": 1.4273, "step": 9860 }, { "epoch": 5.669155657668007, "grad_norm": 1.1817564964294434, "learning_rate": 8.647137900322143e-05, "loss": 1.4543, "step": 9870 }, { "epoch": 5.674899483055715, "grad_norm": 1.0846434831619263, "learning_rate": 8.643862358046737e-05, "loss": 1.4904, "step": 9880 }, { "epoch": 5.680643308443424, "grad_norm": 1.5501220226287842, "learning_rate": 8.640583477245618e-05, "loss": 1.4665, "step": 9890 }, { "epoch": 5.686387133831132, "grad_norm": 1.369886040687561, "learning_rate": 8.637301260923124e-05, "loss": 1.4659, "step": 9900 }, { "epoch": 5.69213095921884, "grad_norm": 1.0211889743804932, "learning_rate": 8.634015712086642e-05, "loss": 1.4445, "step": 9910 }, { "epoch": 5.697874784606548, "grad_norm": 1.23423433303833, "learning_rate": 8.630726833746618e-05, "loss": 1.4221, "step": 9920 }, { "epoch": 5.7036186099942565, "grad_norm": 1.1704976558685303, "learning_rate": 8.627434628916544e-05, "loss": 1.4391, "step": 9930 }, { "epoch": 5.709362435381965, "grad_norm": 1.3454113006591797, "learning_rate": 8.624139100612962e-05, "loss": 1.4525, "step": 9940 }, { "epoch": 5.715106260769673, "grad_norm": 1.2955466508865356, "learning_rate": 8.62084025185546e-05, "loss": 1.4619, "step": 9950 }, { "epoch": 5.72085008615738, "grad_norm": 1.1994664669036865, "learning_rate": 8.617538085666673e-05, "loss": 1.4545, "step": 9960 }, { "epoch": 5.7265939115450895, "grad_norm": 1.3065454959869385, "learning_rate": 8.61423260507226e-05, "loss": 1.4407, "step": 9970 }, { "epoch": 5.732337736932797, "grad_norm": 1.3562005758285522, "learning_rate": 8.610923813100936e-05, "loss": 1.4907, "step": 9980 }, { "epoch": 5.738081562320506, "grad_norm": 1.1897697448730469, "learning_rate": 8.607611712784436e-05, "loss": 1.4643, "step": 9990 }, { "epoch": 5.743825387708213, "grad_norm": 1.3620132207870483, "learning_rate": 8.604296307157538e-05, "loss": 1.4343, "step": 10000 }, { "epoch": 5.743825387708213, "eval_loss": 1.1382653713226318, "eval_runtime": 120.4306, "eval_samples_per_second": 13.211, "eval_steps_per_second": 0.141, "eval_wer": 0.10286473047802011, "step": 10000 }, { "epoch": 5.7495692130959215, "grad_norm": 1.3977901935577393, "learning_rate": 8.600977599258038e-05, "loss": 1.4573, "step": 10010 }, { "epoch": 5.75531303848363, "grad_norm": 1.224683403968811, "learning_rate": 8.597655592126762e-05, "loss": 1.4337, "step": 10020 }, { "epoch": 5.761056863871338, "grad_norm": 1.323976755142212, "learning_rate": 8.59433028880756e-05, "loss": 1.4562, "step": 10030 }, { "epoch": 5.766800689259046, "grad_norm": 1.2467987537384033, "learning_rate": 8.591001692347301e-05, "loss": 1.463, "step": 10040 }, { "epoch": 5.772544514646754, "grad_norm": 1.1050121784210205, "learning_rate": 8.587669805795872e-05, "loss": 1.4462, "step": 10050 }, { "epoch": 5.778288340034463, "grad_norm": 1.2316280603408813, "learning_rate": 8.584334632206174e-05, "loss": 1.446, "step": 10060 }, { "epoch": 5.784032165422171, "grad_norm": 1.0639480352401733, "learning_rate": 8.580996174634122e-05, "loss": 1.4658, "step": 10070 }, { "epoch": 5.789775990809879, "grad_norm": 1.4012728929519653, "learning_rate": 8.577654436138634e-05, "loss": 1.4572, "step": 10080 }, { "epoch": 5.795519816197587, "grad_norm": 1.4036791324615479, "learning_rate": 8.574309419781643e-05, "loss": 1.4567, "step": 10090 }, { "epoch": 5.801263641585296, "grad_norm": 1.2204209566116333, "learning_rate": 8.570961128628076e-05, "loss": 1.4584, "step": 10100 }, { "epoch": 5.807007466973004, "grad_norm": 1.4119248390197754, "learning_rate": 8.56760956574587e-05, "loss": 1.4424, "step": 10110 }, { "epoch": 5.812751292360712, "grad_norm": 1.2645256519317627, "learning_rate": 8.564254734205954e-05, "loss": 1.4515, "step": 10120 }, { "epoch": 5.81849511774842, "grad_norm": 1.137039303779602, "learning_rate": 8.560896637082251e-05, "loss": 1.4475, "step": 10130 }, { "epoch": 5.8242389431361286, "grad_norm": 1.1953868865966797, "learning_rate": 8.55753527745168e-05, "loss": 1.4444, "step": 10140 }, { "epoch": 5.829982768523837, "grad_norm": 1.4050496816635132, "learning_rate": 8.554170658394145e-05, "loss": 1.4576, "step": 10150 }, { "epoch": 5.835726593911545, "grad_norm": 1.4435936212539673, "learning_rate": 8.550802782992541e-05, "loss": 1.4685, "step": 10160 }, { "epoch": 5.841470419299253, "grad_norm": 1.091422200202942, "learning_rate": 8.547431654332745e-05, "loss": 1.4528, "step": 10170 }, { "epoch": 5.8472142446869615, "grad_norm": 1.2685961723327637, "learning_rate": 8.544057275503616e-05, "loss": 1.4494, "step": 10180 }, { "epoch": 5.85295807007467, "grad_norm": 1.1865488290786743, "learning_rate": 8.540679649596985e-05, "loss": 1.4853, "step": 10190 }, { "epoch": 5.858701895462378, "grad_norm": 1.3077943325042725, "learning_rate": 8.537298779707667e-05, "loss": 1.4276, "step": 10200 }, { "epoch": 5.864445720850086, "grad_norm": 1.2340067625045776, "learning_rate": 8.533914668933444e-05, "loss": 1.465, "step": 10210 }, { "epoch": 5.870189546237794, "grad_norm": 1.1962890625, "learning_rate": 8.530527320375069e-05, "loss": 1.4741, "step": 10220 }, { "epoch": 5.875933371625503, "grad_norm": 1.254611849784851, "learning_rate": 8.52713673713626e-05, "loss": 1.4647, "step": 10230 }, { "epoch": 5.881677197013211, "grad_norm": 1.3268686532974243, "learning_rate": 8.523742922323701e-05, "loss": 1.4641, "step": 10240 }, { "epoch": 5.887421022400919, "grad_norm": 1.215364933013916, "learning_rate": 8.520345879047035e-05, "loss": 1.448, "step": 10250 }, { "epoch": 5.893164847788627, "grad_norm": 1.1393647193908691, "learning_rate": 8.516945610418869e-05, "loss": 1.4329, "step": 10260 }, { "epoch": 5.898908673176336, "grad_norm": 1.0588808059692383, "learning_rate": 8.513542119554755e-05, "loss": 1.4765, "step": 10270 }, { "epoch": 5.904652498564044, "grad_norm": 1.3944430351257324, "learning_rate": 8.510135409573205e-05, "loss": 1.421, "step": 10280 }, { "epoch": 5.910396323951752, "grad_norm": 1.1634774208068848, "learning_rate": 8.506725483595678e-05, "loss": 1.4632, "step": 10290 }, { "epoch": 5.91614014933946, "grad_norm": 1.356818437576294, "learning_rate": 8.503312344746583e-05, "loss": 1.4433, "step": 10300 }, { "epoch": 5.9218839747271685, "grad_norm": 1.168566346168518, "learning_rate": 8.499895996153268e-05, "loss": 1.4328, "step": 10310 }, { "epoch": 5.927627800114877, "grad_norm": 1.2512634992599487, "learning_rate": 8.496476440946026e-05, "loss": 1.4738, "step": 10320 }, { "epoch": 5.933371625502585, "grad_norm": 1.315414309501648, "learning_rate": 8.493053682258084e-05, "loss": 1.4182, "step": 10330 }, { "epoch": 5.939115450890293, "grad_norm": 1.2204861640930176, "learning_rate": 8.489627723225607e-05, "loss": 1.4607, "step": 10340 }, { "epoch": 5.9448592762780015, "grad_norm": 1.2707440853118896, "learning_rate": 8.486198566987691e-05, "loss": 1.4294, "step": 10350 }, { "epoch": 5.95060310166571, "grad_norm": 1.1873972415924072, "learning_rate": 8.482766216686365e-05, "loss": 1.4328, "step": 10360 }, { "epoch": 5.956346927053418, "grad_norm": 1.069666862487793, "learning_rate": 8.479330675466583e-05, "loss": 1.4807, "step": 10370 }, { "epoch": 5.962090752441126, "grad_norm": 1.1547396183013916, "learning_rate": 8.475891946476217e-05, "loss": 1.4427, "step": 10380 }, { "epoch": 5.9678345778288335, "grad_norm": 1.1067239046096802, "learning_rate": 8.472450032866066e-05, "loss": 1.4285, "step": 10390 }, { "epoch": 5.973578403216543, "grad_norm": 1.1606744527816772, "learning_rate": 8.469004937789849e-05, "loss": 1.4354, "step": 10400 }, { "epoch": 5.97932222860425, "grad_norm": 1.0931557416915894, "learning_rate": 8.465556664404193e-05, "loss": 1.4345, "step": 10410 }, { "epoch": 5.985066053991959, "grad_norm": 1.1738940477371216, "learning_rate": 8.462105215868646e-05, "loss": 1.4549, "step": 10420 }, { "epoch": 5.9908098793796665, "grad_norm": 1.1976697444915771, "learning_rate": 8.458650595345652e-05, "loss": 1.4613, "step": 10430 }, { "epoch": 5.996553704767375, "grad_norm": 1.172865867614746, "learning_rate": 8.455192806000574e-05, "loss": 1.4685, "step": 10440 }, { "epoch": 6.002297530155083, "grad_norm": 1.1299928426742554, "learning_rate": 8.451731851001676e-05, "loss": 1.483, "step": 10450 }, { "epoch": 6.008041355542791, "grad_norm": 1.1668400764465332, "learning_rate": 8.448267733520117e-05, "loss": 1.4196, "step": 10460 }, { "epoch": 6.013785180930499, "grad_norm": 1.397894263267517, "learning_rate": 8.444800456729961e-05, "loss": 1.4252, "step": 10470 }, { "epoch": 6.019529006318208, "grad_norm": 1.1697758436203003, "learning_rate": 8.441330023808161e-05, "loss": 1.4269, "step": 10480 }, { "epoch": 6.025272831705916, "grad_norm": 1.0655994415283203, "learning_rate": 8.437856437934561e-05, "loss": 1.423, "step": 10490 }, { "epoch": 6.031016657093624, "grad_norm": 1.3008092641830444, "learning_rate": 8.4343797022919e-05, "loss": 1.41, "step": 10500 }, { "epoch": 6.036760482481332, "grad_norm": 1.2477436065673828, "learning_rate": 8.430899820065802e-05, "loss": 1.4023, "step": 10510 }, { "epoch": 6.042504307869041, "grad_norm": 1.0990097522735596, "learning_rate": 8.427416794444768e-05, "loss": 1.4463, "step": 10520 }, { "epoch": 6.048248133256749, "grad_norm": 1.4908597469329834, "learning_rate": 8.423930628620186e-05, "loss": 1.4233, "step": 10530 }, { "epoch": 6.053991958644457, "grad_norm": 1.2926387786865234, "learning_rate": 8.420441325786316e-05, "loss": 1.4017, "step": 10540 }, { "epoch": 6.059735784032165, "grad_norm": 1.110458254814148, "learning_rate": 8.416948889140296e-05, "loss": 1.4223, "step": 10550 }, { "epoch": 6.0654796094198735, "grad_norm": 1.1542221307754517, "learning_rate": 8.413453321882134e-05, "loss": 1.4059, "step": 10560 }, { "epoch": 6.071223434807582, "grad_norm": 1.1031684875488281, "learning_rate": 8.409954627214707e-05, "loss": 1.4111, "step": 10570 }, { "epoch": 6.07696726019529, "grad_norm": 1.4636880159378052, "learning_rate": 8.40645280834376e-05, "loss": 1.4165, "step": 10580 }, { "epoch": 6.082711085582998, "grad_norm": 1.1508665084838867, "learning_rate": 8.402947868477893e-05, "loss": 1.4144, "step": 10590 }, { "epoch": 6.0884549109707065, "grad_norm": 1.1256190538406372, "learning_rate": 8.399439810828574e-05, "loss": 1.4309, "step": 10600 }, { "epoch": 6.094198736358415, "grad_norm": 1.2350083589553833, "learning_rate": 8.395928638610121e-05, "loss": 1.3977, "step": 10610 }, { "epoch": 6.099942561746123, "grad_norm": 1.1814467906951904, "learning_rate": 8.392414355039712e-05, "loss": 1.4505, "step": 10620 }, { "epoch": 6.105686387133831, "grad_norm": 1.2776685953140259, "learning_rate": 8.388896963337372e-05, "loss": 1.4291, "step": 10630 }, { "epoch": 6.111430212521539, "grad_norm": 1.068184733390808, "learning_rate": 8.385376466725975e-05, "loss": 1.4199, "step": 10640 }, { "epoch": 6.117174037909248, "grad_norm": 1.158353328704834, "learning_rate": 8.381852868431238e-05, "loss": 1.4041, "step": 10650 }, { "epoch": 6.122917863296956, "grad_norm": 1.1678544282913208, "learning_rate": 8.378326171681724e-05, "loss": 1.3901, "step": 10660 }, { "epoch": 6.128661688684664, "grad_norm": 1.1794697046279907, "learning_rate": 8.374796379708832e-05, "loss": 1.4185, "step": 10670 }, { "epoch": 6.134405514072372, "grad_norm": 1.1062124967575073, "learning_rate": 8.371263495746797e-05, "loss": 1.3941, "step": 10680 }, { "epoch": 6.140149339460081, "grad_norm": 1.2089911699295044, "learning_rate": 8.367727523032688e-05, "loss": 1.4202, "step": 10690 }, { "epoch": 6.145893164847789, "grad_norm": 1.4723756313323975, "learning_rate": 8.364188464806404e-05, "loss": 1.403, "step": 10700 }, { "epoch": 6.151636990235497, "grad_norm": 1.1218116283416748, "learning_rate": 8.36064632431067e-05, "loss": 1.43, "step": 10710 }, { "epoch": 6.157380815623205, "grad_norm": 1.353092074394226, "learning_rate": 8.357101104791038e-05, "loss": 1.4248, "step": 10720 }, { "epoch": 6.1631246410109135, "grad_norm": 1.1634867191314697, "learning_rate": 8.35355280949588e-05, "loss": 1.4219, "step": 10730 }, { "epoch": 6.168868466398622, "grad_norm": 1.0126991271972656, "learning_rate": 8.350001441676385e-05, "loss": 1.3907, "step": 10740 }, { "epoch": 6.17461229178633, "grad_norm": 1.130642056465149, "learning_rate": 8.346447004586557e-05, "loss": 1.3904, "step": 10750 }, { "epoch": 6.180356117174038, "grad_norm": 1.7694042921066284, "learning_rate": 8.342889501483213e-05, "loss": 1.4444, "step": 10760 }, { "epoch": 6.1860999425617464, "grad_norm": 1.057158350944519, "learning_rate": 8.339328935625982e-05, "loss": 1.4119, "step": 10770 }, { "epoch": 6.191843767949455, "grad_norm": 1.1768707036972046, "learning_rate": 8.335765310277295e-05, "loss": 1.4404, "step": 10780 }, { "epoch": 6.197587593337163, "grad_norm": 1.106454849243164, "learning_rate": 8.33219862870239e-05, "loss": 1.4085, "step": 10790 }, { "epoch": 6.203331418724871, "grad_norm": 1.2325435876846313, "learning_rate": 8.328628894169297e-05, "loss": 1.4331, "step": 10800 }, { "epoch": 6.209075244112579, "grad_norm": 1.2451276779174805, "learning_rate": 8.32505610994886e-05, "loss": 1.4176, "step": 10810 }, { "epoch": 6.214819069500288, "grad_norm": 1.141993761062622, "learning_rate": 8.3214802793147e-05, "loss": 1.4404, "step": 10820 }, { "epoch": 6.220562894887996, "grad_norm": 1.2254890203475952, "learning_rate": 8.31790140554324e-05, "loss": 1.4119, "step": 10830 }, { "epoch": 6.226306720275703, "grad_norm": 1.1228500604629517, "learning_rate": 8.314319491913685e-05, "loss": 1.4186, "step": 10840 }, { "epoch": 6.232050545663411, "grad_norm": 1.1133283376693726, "learning_rate": 8.310734541708029e-05, "loss": 1.4129, "step": 10850 }, { "epoch": 6.23779437105112, "grad_norm": 1.098901391029358, "learning_rate": 8.307146558211048e-05, "loss": 1.4241, "step": 10860 }, { "epoch": 6.243538196438828, "grad_norm": 1.1529247760772705, "learning_rate": 8.303555544710295e-05, "loss": 1.4037, "step": 10870 }, { "epoch": 6.249282021826536, "grad_norm": 1.2168315649032593, "learning_rate": 8.299961504496106e-05, "loss": 1.3878, "step": 10880 }, { "epoch": 6.255025847214244, "grad_norm": 1.1943061351776123, "learning_rate": 8.296364440861581e-05, "loss": 1.4157, "step": 10890 }, { "epoch": 6.260769672601953, "grad_norm": 1.1176701784133911, "learning_rate": 8.292764357102595e-05, "loss": 1.4302, "step": 10900 }, { "epoch": 6.266513497989661, "grad_norm": 1.469723105430603, "learning_rate": 8.289161256517789e-05, "loss": 1.4156, "step": 10910 }, { "epoch": 6.272257323377369, "grad_norm": 1.1445305347442627, "learning_rate": 8.285555142408572e-05, "loss": 1.4337, "step": 10920 }, { "epoch": 6.278001148765077, "grad_norm": 1.1990079879760742, "learning_rate": 8.28194601807911e-05, "loss": 1.4228, "step": 10930 }, { "epoch": 6.2837449741527855, "grad_norm": 1.1120957136154175, "learning_rate": 8.27833388683633e-05, "loss": 1.3974, "step": 10940 }, { "epoch": 6.289488799540494, "grad_norm": 1.1598010063171387, "learning_rate": 8.274718751989909e-05, "loss": 1.4367, "step": 10950 }, { "epoch": 6.295232624928202, "grad_norm": 1.0702592134475708, "learning_rate": 8.271100616852279e-05, "loss": 1.4, "step": 10960 }, { "epoch": 6.30097645031591, "grad_norm": 1.271758794784546, "learning_rate": 8.267479484738628e-05, "loss": 1.4331, "step": 10970 }, { "epoch": 6.3067202757036185, "grad_norm": 1.1642522811889648, "learning_rate": 8.263855358966878e-05, "loss": 1.4403, "step": 10980 }, { "epoch": 6.312464101091327, "grad_norm": 1.2690401077270508, "learning_rate": 8.2602282428577e-05, "loss": 1.4128, "step": 10990 }, { "epoch": 6.318207926479035, "grad_norm": 1.0879255533218384, "learning_rate": 8.256598139734511e-05, "loss": 1.4103, "step": 11000 }, { "epoch": 6.318207926479035, "eval_loss": 1.1180330514907837, "eval_runtime": 119.9171, "eval_samples_per_second": 13.267, "eval_steps_per_second": 0.142, "eval_wer": 0.09922025087580517, "step": 11000 }, { "epoch": 6.323951751866743, "grad_norm": 1.2267507314682007, "learning_rate": 8.252965052923452e-05, "loss": 1.4365, "step": 11010 }, { "epoch": 6.329695577254451, "grad_norm": 1.1544227600097656, "learning_rate": 8.249328985753406e-05, "loss": 1.4419, "step": 11020 }, { "epoch": 6.33543940264216, "grad_norm": 1.0912106037139893, "learning_rate": 8.245689941555986e-05, "loss": 1.4236, "step": 11030 }, { "epoch": 6.341183228029868, "grad_norm": 1.1619361639022827, "learning_rate": 8.242047923665531e-05, "loss": 1.4162, "step": 11040 }, { "epoch": 6.346927053417576, "grad_norm": 1.1370247602462769, "learning_rate": 8.23840293541911e-05, "loss": 1.4325, "step": 11050 }, { "epoch": 6.352670878805284, "grad_norm": 1.1341171264648438, "learning_rate": 8.234754980156504e-05, "loss": 1.4121, "step": 11060 }, { "epoch": 6.358414704192993, "grad_norm": 1.2911267280578613, "learning_rate": 8.231104061220219e-05, "loss": 1.416, "step": 11070 }, { "epoch": 6.364158529580701, "grad_norm": 1.4824491739273071, "learning_rate": 8.227450181955477e-05, "loss": 1.3958, "step": 11080 }, { "epoch": 6.369902354968409, "grad_norm": 1.3235267400741577, "learning_rate": 8.223793345710213e-05, "loss": 1.3999, "step": 11090 }, { "epoch": 6.375646180356117, "grad_norm": 1.1889561414718628, "learning_rate": 8.220133555835065e-05, "loss": 1.4185, "step": 11100 }, { "epoch": 6.3813900057438255, "grad_norm": 1.161799669265747, "learning_rate": 8.216470815683384e-05, "loss": 1.3844, "step": 11110 }, { "epoch": 6.387133831131534, "grad_norm": 1.1641936302185059, "learning_rate": 8.212805128611222e-05, "loss": 1.4384, "step": 11120 }, { "epoch": 6.392877656519242, "grad_norm": 1.2904438972473145, "learning_rate": 8.209136497977328e-05, "loss": 1.4244, "step": 11130 }, { "epoch": 6.39862148190695, "grad_norm": 1.2467349767684937, "learning_rate": 8.205464927143155e-05, "loss": 1.4232, "step": 11140 }, { "epoch": 6.4043653072946585, "grad_norm": 1.1668075323104858, "learning_rate": 8.201790419472845e-05, "loss": 1.4344, "step": 11150 }, { "epoch": 6.410109132682367, "grad_norm": 1.1435247659683228, "learning_rate": 8.198112978333232e-05, "loss": 1.3691, "step": 11160 }, { "epoch": 6.415852958070075, "grad_norm": 1.2412710189819336, "learning_rate": 8.194432607093836e-05, "loss": 1.4401, "step": 11170 }, { "epoch": 6.421596783457783, "grad_norm": 1.3402605056762695, "learning_rate": 8.190749309126869e-05, "loss": 1.4273, "step": 11180 }, { "epoch": 6.427340608845491, "grad_norm": 1.2125691175460815, "learning_rate": 8.187063087807213e-05, "loss": 1.4214, "step": 11190 }, { "epoch": 6.4330844342332, "grad_norm": 1.201837420463562, "learning_rate": 8.183373946512439e-05, "loss": 1.4333, "step": 11200 }, { "epoch": 6.438828259620908, "grad_norm": 1.1503769159317017, "learning_rate": 8.179681888622788e-05, "loss": 1.409, "step": 11210 }, { "epoch": 6.444572085008616, "grad_norm": 1.1592082977294922, "learning_rate": 8.175986917521176e-05, "loss": 1.4002, "step": 11220 }, { "epoch": 6.450315910396324, "grad_norm": 1.326252818107605, "learning_rate": 8.172289036593186e-05, "loss": 1.3916, "step": 11230 }, { "epoch": 6.456059735784033, "grad_norm": 1.2033997774124146, "learning_rate": 8.168588249227067e-05, "loss": 1.4068, "step": 11240 }, { "epoch": 6.46180356117174, "grad_norm": 1.1641250848770142, "learning_rate": 8.164884558813734e-05, "loss": 1.4369, "step": 11250 }, { "epoch": 6.467547386559449, "grad_norm": 1.1913131475448608, "learning_rate": 8.161177968746763e-05, "loss": 1.4381, "step": 11260 }, { "epoch": 6.473291211947156, "grad_norm": 1.339672327041626, "learning_rate": 8.157468482422378e-05, "loss": 1.4194, "step": 11270 }, { "epoch": 6.479035037334865, "grad_norm": 1.17433762550354, "learning_rate": 8.153756103239467e-05, "loss": 1.3916, "step": 11280 }, { "epoch": 6.484778862722573, "grad_norm": 1.17121422290802, "learning_rate": 8.150040834599564e-05, "loss": 1.4308, "step": 11290 }, { "epoch": 6.490522688110281, "grad_norm": 1.2462800741195679, "learning_rate": 8.146322679906851e-05, "loss": 1.4122, "step": 11300 }, { "epoch": 6.496266513497989, "grad_norm": 1.1157385110855103, "learning_rate": 8.142601642568155e-05, "loss": 1.406, "step": 11310 }, { "epoch": 6.502010338885698, "grad_norm": 1.2299013137817383, "learning_rate": 8.138877725992942e-05, "loss": 1.4004, "step": 11320 }, { "epoch": 6.507754164273406, "grad_norm": 1.2796170711517334, "learning_rate": 8.135150933593319e-05, "loss": 1.3878, "step": 11330 }, { "epoch": 6.513497989661114, "grad_norm": 1.1334229707717896, "learning_rate": 8.131421268784027e-05, "loss": 1.3973, "step": 11340 }, { "epoch": 6.519241815048822, "grad_norm": 1.0533747673034668, "learning_rate": 8.12768873498244e-05, "loss": 1.4155, "step": 11350 }, { "epoch": 6.5249856404365305, "grad_norm": 1.1573117971420288, "learning_rate": 8.123953335608556e-05, "loss": 1.385, "step": 11360 }, { "epoch": 6.530729465824239, "grad_norm": 1.2326871156692505, "learning_rate": 8.120215074085007e-05, "loss": 1.4163, "step": 11370 }, { "epoch": 6.536473291211947, "grad_norm": 1.2118451595306396, "learning_rate": 8.116473953837037e-05, "loss": 1.4123, "step": 11380 }, { "epoch": 6.542217116599655, "grad_norm": 1.196045994758606, "learning_rate": 8.112729978292522e-05, "loss": 1.4277, "step": 11390 }, { "epoch": 6.5479609419873634, "grad_norm": 1.2165313959121704, "learning_rate": 8.108983150881941e-05, "loss": 1.4388, "step": 11400 }, { "epoch": 6.553704767375072, "grad_norm": 1.069848656654358, "learning_rate": 8.105233475038396e-05, "loss": 1.3921, "step": 11410 }, { "epoch": 6.55944859276278, "grad_norm": 1.116459608078003, "learning_rate": 8.101480954197593e-05, "loss": 1.3839, "step": 11420 }, { "epoch": 6.565192418150488, "grad_norm": 1.1783957481384277, "learning_rate": 8.09772559179785e-05, "loss": 1.3942, "step": 11430 }, { "epoch": 6.570936243538196, "grad_norm": 1.275415301322937, "learning_rate": 8.093967391280083e-05, "loss": 1.4167, "step": 11440 }, { "epoch": 6.576680068925905, "grad_norm": 1.173251748085022, "learning_rate": 8.090206356087812e-05, "loss": 1.4299, "step": 11450 }, { "epoch": 6.582423894313613, "grad_norm": 1.3185877799987793, "learning_rate": 8.086442489667155e-05, "loss": 1.4118, "step": 11460 }, { "epoch": 6.588167719701321, "grad_norm": 1.053466796875, "learning_rate": 8.082675795466821e-05, "loss": 1.4127, "step": 11470 }, { "epoch": 6.593911545089029, "grad_norm": 1.1003645658493042, "learning_rate": 8.078906276938113e-05, "loss": 1.4135, "step": 11480 }, { "epoch": 6.599655370476738, "grad_norm": 1.1019959449768066, "learning_rate": 8.075133937534918e-05, "loss": 1.4269, "step": 11490 }, { "epoch": 6.605399195864446, "grad_norm": 1.1494642496109009, "learning_rate": 8.071358780713712e-05, "loss": 1.4661, "step": 11500 }, { "epoch": 6.611143021252154, "grad_norm": 1.0500520467758179, "learning_rate": 8.067580809933553e-05, "loss": 1.4105, "step": 11510 }, { "epoch": 6.616886846639862, "grad_norm": 1.1637636423110962, "learning_rate": 8.063800028656069e-05, "loss": 1.4501, "step": 11520 }, { "epoch": 6.6226306720275705, "grad_norm": 1.1359279155731201, "learning_rate": 8.060016440345477e-05, "loss": 1.4036, "step": 11530 }, { "epoch": 6.628374497415279, "grad_norm": 1.131584644317627, "learning_rate": 8.056230048468549e-05, "loss": 1.3899, "step": 11540 }, { "epoch": 6.634118322802987, "grad_norm": 1.4944665431976318, "learning_rate": 8.052440856494642e-05, "loss": 1.4055, "step": 11550 }, { "epoch": 6.639862148190695, "grad_norm": 1.0443183183670044, "learning_rate": 8.04864886789567e-05, "loss": 1.3999, "step": 11560 }, { "epoch": 6.645605973578403, "grad_norm": 1.1013708114624023, "learning_rate": 8.044854086146111e-05, "loss": 1.3856, "step": 11570 }, { "epoch": 6.651349798966112, "grad_norm": 1.0219786167144775, "learning_rate": 8.041056514723002e-05, "loss": 1.4015, "step": 11580 }, { "epoch": 6.65709362435382, "grad_norm": 1.1121494770050049, "learning_rate": 8.037256157105937e-05, "loss": 1.4039, "step": 11590 }, { "epoch": 6.662837449741528, "grad_norm": 1.2445120811462402, "learning_rate": 8.033453016777061e-05, "loss": 1.4328, "step": 11600 }, { "epoch": 6.668581275129236, "grad_norm": 1.2349894046783447, "learning_rate": 8.029647097221074e-05, "loss": 1.4049, "step": 11610 }, { "epoch": 6.674325100516945, "grad_norm": 1.0118918418884277, "learning_rate": 8.025838401925214e-05, "loss": 1.4108, "step": 11620 }, { "epoch": 6.680068925904653, "grad_norm": 1.2544103860855103, "learning_rate": 8.022026934379267e-05, "loss": 1.4349, "step": 11630 }, { "epoch": 6.685812751292361, "grad_norm": 1.3501166105270386, "learning_rate": 8.018212698075562e-05, "loss": 1.3846, "step": 11640 }, { "epoch": 6.691556576680069, "grad_norm": 1.0783281326293945, "learning_rate": 8.014395696508962e-05, "loss": 1.4359, "step": 11650 }, { "epoch": 6.697300402067777, "grad_norm": 1.0917038917541504, "learning_rate": 8.010575933176861e-05, "loss": 1.4088, "step": 11660 }, { "epoch": 6.703044227455486, "grad_norm": 1.0962666273117065, "learning_rate": 8.006753411579188e-05, "loss": 1.3895, "step": 11670 }, { "epoch": 6.708788052843193, "grad_norm": 1.1106805801391602, "learning_rate": 8.0029281352184e-05, "loss": 1.437, "step": 11680 }, { "epoch": 6.714531878230902, "grad_norm": 1.1627246141433716, "learning_rate": 7.999100107599468e-05, "loss": 1.4392, "step": 11690 }, { "epoch": 6.72027570361861, "grad_norm": 1.0740851163864136, "learning_rate": 7.9952693322299e-05, "loss": 1.4458, "step": 11700 }, { "epoch": 6.726019529006318, "grad_norm": 1.2067056894302368, "learning_rate": 7.991435812619708e-05, "loss": 1.4079, "step": 11710 }, { "epoch": 6.731763354394026, "grad_norm": 1.2740793228149414, "learning_rate": 7.987599552281427e-05, "loss": 1.4369, "step": 11720 }, { "epoch": 6.737507179781734, "grad_norm": 1.2172223329544067, "learning_rate": 7.983760554730097e-05, "loss": 1.3924, "step": 11730 }, { "epoch": 6.7432510051694425, "grad_norm": 1.2037878036499023, "learning_rate": 7.97991882348327e-05, "loss": 1.4303, "step": 11740 }, { "epoch": 6.748994830557151, "grad_norm": 1.1402994394302368, "learning_rate": 7.976074362061002e-05, "loss": 1.3968, "step": 11750 }, { "epoch": 6.754738655944859, "grad_norm": 1.1885806322097778, "learning_rate": 7.97222717398585e-05, "loss": 1.3877, "step": 11760 }, { "epoch": 6.760482481332567, "grad_norm": 1.2236276865005493, "learning_rate": 7.968377262782869e-05, "loss": 1.4241, "step": 11770 }, { "epoch": 6.7662263067202755, "grad_norm": 1.0645301342010498, "learning_rate": 7.964524631979613e-05, "loss": 1.4052, "step": 11780 }, { "epoch": 6.771970132107984, "grad_norm": 1.2329813241958618, "learning_rate": 7.96066928510612e-05, "loss": 1.4396, "step": 11790 }, { "epoch": 6.777713957495692, "grad_norm": 1.2357081174850464, "learning_rate": 7.956811225694923e-05, "loss": 1.4165, "step": 11800 }, { "epoch": 6.7834577828834, "grad_norm": 1.0434062480926514, "learning_rate": 7.95295045728104e-05, "loss": 1.3868, "step": 11810 }, { "epoch": 6.789201608271108, "grad_norm": 1.1681947708129883, "learning_rate": 7.94908698340197e-05, "loss": 1.4041, "step": 11820 }, { "epoch": 6.794945433658817, "grad_norm": 1.2732667922973633, "learning_rate": 7.94522080759769e-05, "loss": 1.4259, "step": 11830 }, { "epoch": 6.800689259046525, "grad_norm": 1.2895413637161255, "learning_rate": 7.941351933410653e-05, "loss": 1.459, "step": 11840 }, { "epoch": 6.806433084434233, "grad_norm": 1.2393548488616943, "learning_rate": 7.937480364385786e-05, "loss": 1.4179, "step": 11850 }, { "epoch": 6.812176909821941, "grad_norm": 1.0824437141418457, "learning_rate": 7.93360610407048e-05, "loss": 1.4153, "step": 11860 }, { "epoch": 6.81792073520965, "grad_norm": 1.2340294122695923, "learning_rate": 7.929729156014603e-05, "loss": 1.4088, "step": 11870 }, { "epoch": 6.823664560597358, "grad_norm": 1.272808313369751, "learning_rate": 7.925849523770473e-05, "loss": 1.3998, "step": 11880 }, { "epoch": 6.829408385985066, "grad_norm": 1.286903738975525, "learning_rate": 7.921967210892876e-05, "loss": 1.4248, "step": 11890 }, { "epoch": 6.835152211372774, "grad_norm": 1.2210613489151, "learning_rate": 7.918082220939052e-05, "loss": 1.4137, "step": 11900 }, { "epoch": 6.8408960367604825, "grad_norm": 1.1565394401550293, "learning_rate": 7.914194557468692e-05, "loss": 1.4255, "step": 11910 }, { "epoch": 6.846639862148191, "grad_norm": 1.2321792840957642, "learning_rate": 7.910304224043937e-05, "loss": 1.4136, "step": 11920 }, { "epoch": 6.852383687535899, "grad_norm": 1.2548294067382812, "learning_rate": 7.906411224229376e-05, "loss": 1.3967, "step": 11930 }, { "epoch": 6.858127512923607, "grad_norm": 1.1759604215621948, "learning_rate": 7.902515561592043e-05, "loss": 1.4115, "step": 11940 }, { "epoch": 6.8638713383113155, "grad_norm": 1.1248717308044434, "learning_rate": 7.898617239701406e-05, "loss": 1.4013, "step": 11950 }, { "epoch": 6.869615163699024, "grad_norm": 1.103611707687378, "learning_rate": 7.894716262129374e-05, "loss": 1.4334, "step": 11960 }, { "epoch": 6.875358989086732, "grad_norm": 1.149695873260498, "learning_rate": 7.89081263245029e-05, "loss": 1.4218, "step": 11970 }, { "epoch": 6.88110281447444, "grad_norm": 1.1804563999176025, "learning_rate": 7.886906354240922e-05, "loss": 1.389, "step": 11980 }, { "epoch": 6.886846639862148, "grad_norm": 1.1929295063018799, "learning_rate": 7.88299743108047e-05, "loss": 1.4092, "step": 11990 }, { "epoch": 6.892590465249857, "grad_norm": 1.173554539680481, "learning_rate": 7.879085866550556e-05, "loss": 1.4197, "step": 12000 }, { "epoch": 6.892590465249857, "eval_loss": 1.1182571649551392, "eval_runtime": 121.6129, "eval_samples_per_second": 13.082, "eval_steps_per_second": 0.14, "eval_wer": 0.09721437450559385, "step": 12000 }, { "epoch": 6.898334290637565, "grad_norm": 1.0430973768234253, "learning_rate": 7.87517166423522e-05, "loss": 1.4079, "step": 12010 }, { "epoch": 6.904078116025273, "grad_norm": 1.3011510372161865, "learning_rate": 7.871254827720923e-05, "loss": 1.4069, "step": 12020 }, { "epoch": 6.909821941412981, "grad_norm": 1.1980189085006714, "learning_rate": 7.867335360596533e-05, "loss": 1.4106, "step": 12030 }, { "epoch": 6.91556576680069, "grad_norm": 1.1535495519638062, "learning_rate": 7.86341326645334e-05, "loss": 1.4081, "step": 12040 }, { "epoch": 6.921309592188398, "grad_norm": 1.1565262079238892, "learning_rate": 7.859488548885025e-05, "loss": 1.4032, "step": 12050 }, { "epoch": 6.927053417576106, "grad_norm": 1.3801511526107788, "learning_rate": 7.855561211487689e-05, "loss": 1.4218, "step": 12060 }, { "epoch": 6.932797242963814, "grad_norm": 1.2260046005249023, "learning_rate": 7.851631257859821e-05, "loss": 1.4043, "step": 12070 }, { "epoch": 6.9385410683515225, "grad_norm": 1.1848559379577637, "learning_rate": 7.847698691602313e-05, "loss": 1.4156, "step": 12080 }, { "epoch": 6.94428489373923, "grad_norm": 1.0401930809020996, "learning_rate": 7.843763516318452e-05, "loss": 1.4102, "step": 12090 }, { "epoch": 6.950028719126939, "grad_norm": 1.0696467161178589, "learning_rate": 7.839825735613912e-05, "loss": 1.4025, "step": 12100 }, { "epoch": 6.955772544514646, "grad_norm": 1.1536978483200073, "learning_rate": 7.835885353096754e-05, "loss": 1.4244, "step": 12110 }, { "epoch": 6.9615163699023554, "grad_norm": 1.219382405281067, "learning_rate": 7.831942372377428e-05, "loss": 1.4064, "step": 12120 }, { "epoch": 6.967260195290063, "grad_norm": 1.1497808694839478, "learning_rate": 7.827996797068761e-05, "loss": 1.393, "step": 12130 }, { "epoch": 6.973004020677771, "grad_norm": 1.14622163772583, "learning_rate": 7.824048630785957e-05, "loss": 1.406, "step": 12140 }, { "epoch": 6.978747846065479, "grad_norm": 1.0015634298324585, "learning_rate": 7.820097877146592e-05, "loss": 1.4164, "step": 12150 }, { "epoch": 6.9844916714531875, "grad_norm": 1.023869276046753, "learning_rate": 7.81614453977062e-05, "loss": 1.4072, "step": 12160 }, { "epoch": 6.990235496840896, "grad_norm": 1.4680800437927246, "learning_rate": 7.812188622280356e-05, "loss": 1.4124, "step": 12170 }, { "epoch": 6.995979322228604, "grad_norm": 1.2094202041625977, "learning_rate": 7.80823012830048e-05, "loss": 1.4156, "step": 12180 }, { "epoch": 7.001723147616312, "grad_norm": 1.3978065252304077, "learning_rate": 7.804269061458034e-05, "loss": 1.3972, "step": 12190 }, { "epoch": 7.00746697300402, "grad_norm": 1.075047254562378, "learning_rate": 7.80030542538242e-05, "loss": 1.3765, "step": 12200 }, { "epoch": 7.013210798391729, "grad_norm": 1.1227922439575195, "learning_rate": 7.796339223705387e-05, "loss": 1.3724, "step": 12210 }, { "epoch": 7.018954623779437, "grad_norm": 1.2202383279800415, "learning_rate": 7.792370460061042e-05, "loss": 1.4039, "step": 12220 }, { "epoch": 7.024698449167145, "grad_norm": 1.3242013454437256, "learning_rate": 7.788399138085833e-05, "loss": 1.3884, "step": 12230 }, { "epoch": 7.030442274554853, "grad_norm": 1.0280619859695435, "learning_rate": 7.784425261418559e-05, "loss": 1.3754, "step": 12240 }, { "epoch": 7.036186099942562, "grad_norm": 1.0936064720153809, "learning_rate": 7.780448833700355e-05, "loss": 1.3697, "step": 12250 }, { "epoch": 7.04192992533027, "grad_norm": 1.0650871992111206, "learning_rate": 7.776469858574696e-05, "loss": 1.378, "step": 12260 }, { "epoch": 7.047673750717978, "grad_norm": 1.0535706281661987, "learning_rate": 7.772488339687388e-05, "loss": 1.3513, "step": 12270 }, { "epoch": 7.053417576105686, "grad_norm": 1.1385856866836548, "learning_rate": 7.768504280686572e-05, "loss": 1.3958, "step": 12280 }, { "epoch": 7.0591614014933945, "grad_norm": 1.122355341911316, "learning_rate": 7.764517685222711e-05, "loss": 1.3945, "step": 12290 }, { "epoch": 7.064905226881103, "grad_norm": 1.0148357152938843, "learning_rate": 7.760528556948596e-05, "loss": 1.3657, "step": 12300 }, { "epoch": 7.070649052268811, "grad_norm": 1.0328540802001953, "learning_rate": 7.756536899519342e-05, "loss": 1.3798, "step": 12310 }, { "epoch": 7.076392877656519, "grad_norm": 1.5095750093460083, "learning_rate": 7.752542716592373e-05, "loss": 1.3685, "step": 12320 }, { "epoch": 7.0821367030442275, "grad_norm": 1.110021948814392, "learning_rate": 7.74854601182743e-05, "loss": 1.3828, "step": 12330 }, { "epoch": 7.087880528431936, "grad_norm": 1.125283122062683, "learning_rate": 7.744546788886571e-05, "loss": 1.3941, "step": 12340 }, { "epoch": 7.093624353819644, "grad_norm": 1.1747839450836182, "learning_rate": 7.740545051434153e-05, "loss": 1.388, "step": 12350 }, { "epoch": 7.099368179207352, "grad_norm": 1.0587904453277588, "learning_rate": 7.736540803136842e-05, "loss": 1.4007, "step": 12360 }, { "epoch": 7.10511200459506, "grad_norm": 1.113619089126587, "learning_rate": 7.732534047663602e-05, "loss": 1.3636, "step": 12370 }, { "epoch": 7.110855829982769, "grad_norm": 1.1534483432769775, "learning_rate": 7.728524788685693e-05, "loss": 1.3919, "step": 12380 }, { "epoch": 7.116599655370477, "grad_norm": 1.146600604057312, "learning_rate": 7.724513029876675e-05, "loss": 1.3698, "step": 12390 }, { "epoch": 7.122343480758185, "grad_norm": 1.0434560775756836, "learning_rate": 7.720498774912392e-05, "loss": 1.3593, "step": 12400 }, { "epoch": 7.128087306145893, "grad_norm": 1.1574029922485352, "learning_rate": 7.716482027470979e-05, "loss": 1.3682, "step": 12410 }, { "epoch": 7.133831131533602, "grad_norm": 1.0996589660644531, "learning_rate": 7.712462791232853e-05, "loss": 1.3859, "step": 12420 }, { "epoch": 7.13957495692131, "grad_norm": 1.1369915008544922, "learning_rate": 7.708441069880713e-05, "loss": 1.3605, "step": 12430 }, { "epoch": 7.145318782309018, "grad_norm": 1.1563830375671387, "learning_rate": 7.704416867099529e-05, "loss": 1.3888, "step": 12440 }, { "epoch": 7.151062607696726, "grad_norm": 1.1287845373153687, "learning_rate": 7.700390186576557e-05, "loss": 1.3853, "step": 12450 }, { "epoch": 7.1568064330844345, "grad_norm": 1.137179970741272, "learning_rate": 7.696361032001312e-05, "loss": 1.358, "step": 12460 }, { "epoch": 7.162550258472143, "grad_norm": 1.1719515323638916, "learning_rate": 7.692329407065577e-05, "loss": 1.4013, "step": 12470 }, { "epoch": 7.168294083859851, "grad_norm": 1.154930591583252, "learning_rate": 7.688295315463408e-05, "loss": 1.396, "step": 12480 }, { "epoch": 7.174037909247559, "grad_norm": 1.2370989322662354, "learning_rate": 7.684258760891108e-05, "loss": 1.3715, "step": 12490 }, { "epoch": 7.1797817346352675, "grad_norm": 1.2117834091186523, "learning_rate": 7.680219747047246e-05, "loss": 1.3602, "step": 12500 }, { "epoch": 7.185525560022976, "grad_norm": 1.1284922361373901, "learning_rate": 7.67617827763264e-05, "loss": 1.3726, "step": 12510 }, { "epoch": 7.191269385410684, "grad_norm": 1.17840576171875, "learning_rate": 7.672134356350363e-05, "loss": 1.3888, "step": 12520 }, { "epoch": 7.197013210798392, "grad_norm": 1.1904903650283813, "learning_rate": 7.668087986905727e-05, "loss": 1.3726, "step": 12530 }, { "epoch": 7.2027570361860995, "grad_norm": 1.0724328756332397, "learning_rate": 7.664039173006294e-05, "loss": 1.3864, "step": 12540 }, { "epoch": 7.208500861573808, "grad_norm": 1.023146629333496, "learning_rate": 7.65998791836186e-05, "loss": 1.3768, "step": 12550 }, { "epoch": 7.214244686961516, "grad_norm": 1.0206445455551147, "learning_rate": 7.655934226684462e-05, "loss": 1.3517, "step": 12560 }, { "epoch": 7.219988512349224, "grad_norm": 1.1982409954071045, "learning_rate": 7.65187810168837e-05, "loss": 1.3862, "step": 12570 }, { "epoch": 7.2257323377369325, "grad_norm": 1.196985125541687, "learning_rate": 7.64781954709008e-05, "loss": 1.3635, "step": 12580 }, { "epoch": 7.231476163124641, "grad_norm": 1.1955093145370483, "learning_rate": 7.643758566608315e-05, "loss": 1.3917, "step": 12590 }, { "epoch": 7.237219988512349, "grad_norm": 1.1634180545806885, "learning_rate": 7.639695163964022e-05, "loss": 1.3564, "step": 12600 }, { "epoch": 7.242963813900057, "grad_norm": 1.094857931137085, "learning_rate": 7.63562934288037e-05, "loss": 1.3526, "step": 12610 }, { "epoch": 7.248707639287765, "grad_norm": 1.1216139793395996, "learning_rate": 7.631561107082742e-05, "loss": 1.3768, "step": 12620 }, { "epoch": 7.254451464675474, "grad_norm": 1.0748705863952637, "learning_rate": 7.627490460298727e-05, "loss": 1.3686, "step": 12630 }, { "epoch": 7.260195290063182, "grad_norm": 1.1522833108901978, "learning_rate": 7.62341740625813e-05, "loss": 1.3797, "step": 12640 }, { "epoch": 7.26593911545089, "grad_norm": 1.073476791381836, "learning_rate": 7.619341948692963e-05, "loss": 1.3928, "step": 12650 }, { "epoch": 7.271682940838598, "grad_norm": 1.4424747228622437, "learning_rate": 7.615264091337439e-05, "loss": 1.4032, "step": 12660 }, { "epoch": 7.277426766226307, "grad_norm": 1.2498866319656372, "learning_rate": 7.611183837927965e-05, "loss": 1.3965, "step": 12670 }, { "epoch": 7.283170591614015, "grad_norm": 1.1749467849731445, "learning_rate": 7.607101192203147e-05, "loss": 1.3824, "step": 12680 }, { "epoch": 7.288914417001723, "grad_norm": 1.092786431312561, "learning_rate": 7.603016157903784e-05, "loss": 1.3659, "step": 12690 }, { "epoch": 7.294658242389431, "grad_norm": 1.1371842622756958, "learning_rate": 7.598928738772864e-05, "loss": 1.3611, "step": 12700 }, { "epoch": 7.3004020677771395, "grad_norm": 1.2139581441879272, "learning_rate": 7.594838938555556e-05, "loss": 1.3588, "step": 12710 }, { "epoch": 7.306145893164848, "grad_norm": 1.2846897840499878, "learning_rate": 7.590746760999217e-05, "loss": 1.3669, "step": 12720 }, { "epoch": 7.311889718552556, "grad_norm": 1.1145058870315552, "learning_rate": 7.586652209853375e-05, "loss": 1.3883, "step": 12730 }, { "epoch": 7.317633543940264, "grad_norm": 1.1779024600982666, "learning_rate": 7.582555288869739e-05, "loss": 1.383, "step": 12740 }, { "epoch": 7.3233773693279725, "grad_norm": 1.118118405342102, "learning_rate": 7.578456001802186e-05, "loss": 1.3913, "step": 12750 }, { "epoch": 7.329121194715681, "grad_norm": 1.2149940729141235, "learning_rate": 7.574354352406761e-05, "loss": 1.3578, "step": 12760 }, { "epoch": 7.334865020103389, "grad_norm": 1.2460076808929443, "learning_rate": 7.570250344441676e-05, "loss": 1.3992, "step": 12770 }, { "epoch": 7.340608845491097, "grad_norm": 1.1166988611221313, "learning_rate": 7.566143981667302e-05, "loss": 1.37, "step": 12780 }, { "epoch": 7.346352670878805, "grad_norm": 1.0964570045471191, "learning_rate": 7.562035267846168e-05, "loss": 1.3553, "step": 12790 }, { "epoch": 7.352096496266514, "grad_norm": 1.0531352758407593, "learning_rate": 7.557924206742957e-05, "loss": 1.3603, "step": 12800 }, { "epoch": 7.357840321654222, "grad_norm": 1.1781939268112183, "learning_rate": 7.553810802124503e-05, "loss": 1.3553, "step": 12810 }, { "epoch": 7.36358414704193, "grad_norm": 1.0957376956939697, "learning_rate": 7.549695057759787e-05, "loss": 1.3904, "step": 12820 }, { "epoch": 7.369327972429638, "grad_norm": 1.3186931610107422, "learning_rate": 7.545576977419938e-05, "loss": 1.395, "step": 12830 }, { "epoch": 7.375071797817347, "grad_norm": 1.1461344957351685, "learning_rate": 7.541456564878216e-05, "loss": 1.3983, "step": 12840 }, { "epoch": 7.380815623205055, "grad_norm": 1.138440728187561, "learning_rate": 7.537333823910026e-05, "loss": 1.3802, "step": 12850 }, { "epoch": 7.386559448592763, "grad_norm": 1.1572725772857666, "learning_rate": 7.533208758292906e-05, "loss": 1.3855, "step": 12860 }, { "epoch": 7.392303273980471, "grad_norm": 1.086646556854248, "learning_rate": 7.529081371806518e-05, "loss": 1.3845, "step": 12870 }, { "epoch": 7.3980470993681795, "grad_norm": 1.1743797063827515, "learning_rate": 7.524951668232659e-05, "loss": 1.3992, "step": 12880 }, { "epoch": 7.403790924755888, "grad_norm": 1.0357474088668823, "learning_rate": 7.52081965135524e-05, "loss": 1.3683, "step": 12890 }, { "epoch": 7.409534750143596, "grad_norm": 1.1882617473602295, "learning_rate": 7.516685324960299e-05, "loss": 1.3967, "step": 12900 }, { "epoch": 7.415278575531304, "grad_norm": 1.2242915630340576, "learning_rate": 7.512548692835985e-05, "loss": 1.3553, "step": 12910 }, { "epoch": 7.421022400919012, "grad_norm": 1.2356122732162476, "learning_rate": 7.508409758772564e-05, "loss": 1.3942, "step": 12920 }, { "epoch": 7.426766226306721, "grad_norm": 1.1368290185928345, "learning_rate": 7.50426852656241e-05, "loss": 1.3956, "step": 12930 }, { "epoch": 7.432510051694429, "grad_norm": 1.145615816116333, "learning_rate": 7.500125e-05, "loss": 1.3913, "step": 12940 }, { "epoch": 7.438253877082136, "grad_norm": 1.1399472951889038, "learning_rate": 7.495979182881917e-05, "loss": 1.366, "step": 12950 }, { "epoch": 7.443997702469845, "grad_norm": 1.1986440420150757, "learning_rate": 7.491831079006838e-05, "loss": 1.386, "step": 12960 }, { "epoch": 7.449741527857553, "grad_norm": 1.173773169517517, "learning_rate": 7.48768069217554e-05, "loss": 1.3902, "step": 12970 }, { "epoch": 7.455485353245261, "grad_norm": 1.2537205219268799, "learning_rate": 7.48352802619089e-05, "loss": 1.3534, "step": 12980 }, { "epoch": 7.461229178632969, "grad_norm": 1.123275637626648, "learning_rate": 7.479373084857845e-05, "loss": 1.3846, "step": 12990 }, { "epoch": 7.466973004020677, "grad_norm": 1.2009204626083374, "learning_rate": 7.475215871983441e-05, "loss": 1.3701, "step": 13000 }, { "epoch": 7.466973004020677, "eval_loss": 1.1157002449035645, "eval_runtime": 120.6065, "eval_samples_per_second": 13.192, "eval_steps_per_second": 0.141, "eval_wer": 0.10080235054808453, "step": 13000 }, { "epoch": 7.472716829408386, "grad_norm": 1.2804807424545288, "learning_rate": 7.471056391376801e-05, "loss": 1.3736, "step": 13010 }, { "epoch": 7.478460654796094, "grad_norm": 1.0862783193588257, "learning_rate": 7.466894646849128e-05, "loss": 1.3777, "step": 13020 }, { "epoch": 7.484204480183802, "grad_norm": 1.2627243995666504, "learning_rate": 7.46273064221369e-05, "loss": 1.4001, "step": 13030 }, { "epoch": 7.48994830557151, "grad_norm": 1.0793190002441406, "learning_rate": 7.458564381285838e-05, "loss": 1.3703, "step": 13040 }, { "epoch": 7.495692130959219, "grad_norm": 1.1711695194244385, "learning_rate": 7.454395867882977e-05, "loss": 1.3924, "step": 13050 }, { "epoch": 7.501435956346927, "grad_norm": 1.0621047019958496, "learning_rate": 7.450225105824585e-05, "loss": 1.3855, "step": 13060 }, { "epoch": 7.507179781734635, "grad_norm": 1.0710511207580566, "learning_rate": 7.446052098932203e-05, "loss": 1.3673, "step": 13070 }, { "epoch": 7.512923607122343, "grad_norm": 1.332257628440857, "learning_rate": 7.441876851029417e-05, "loss": 1.3707, "step": 13080 }, { "epoch": 7.5186674325100515, "grad_norm": 1.1637886762619019, "learning_rate": 7.437699365941878e-05, "loss": 1.4041, "step": 13090 }, { "epoch": 7.52441125789776, "grad_norm": 1.047039270401001, "learning_rate": 7.43351964749728e-05, "loss": 1.3718, "step": 13100 }, { "epoch": 7.530155083285468, "grad_norm": 1.0901583433151245, "learning_rate": 7.429337699525366e-05, "loss": 1.369, "step": 13110 }, { "epoch": 7.535898908673176, "grad_norm": 1.2097355127334595, "learning_rate": 7.425153525857924e-05, "loss": 1.3677, "step": 13120 }, { "epoch": 7.5416427340608845, "grad_norm": 1.2786442041397095, "learning_rate": 7.420967130328776e-05, "loss": 1.3569, "step": 13130 }, { "epoch": 7.547386559448593, "grad_norm": 1.0091259479522705, "learning_rate": 7.416778516773783e-05, "loss": 1.3675, "step": 13140 }, { "epoch": 7.553130384836301, "grad_norm": 1.1223548650741577, "learning_rate": 7.412587689030837e-05, "loss": 1.3816, "step": 13150 }, { "epoch": 7.558874210224009, "grad_norm": 1.2186803817749023, "learning_rate": 7.408394650939861e-05, "loss": 1.3847, "step": 13160 }, { "epoch": 7.564618035611717, "grad_norm": 1.1721726655960083, "learning_rate": 7.404199406342803e-05, "loss": 1.3412, "step": 13170 }, { "epoch": 7.570361860999426, "grad_norm": 1.2012450695037842, "learning_rate": 7.400001959083631e-05, "loss": 1.3685, "step": 13180 }, { "epoch": 7.576105686387134, "grad_norm": 1.3168073892593384, "learning_rate": 7.395802313008331e-05, "loss": 1.3896, "step": 13190 }, { "epoch": 7.581849511774842, "grad_norm": 1.3617326021194458, "learning_rate": 7.391600471964904e-05, "loss": 1.3849, "step": 13200 }, { "epoch": 7.58759333716255, "grad_norm": 1.1132413148880005, "learning_rate": 7.387396439803367e-05, "loss": 1.3531, "step": 13210 }, { "epoch": 7.593337162550259, "grad_norm": 1.1169902086257935, "learning_rate": 7.383190220375736e-05, "loss": 1.3932, "step": 13220 }, { "epoch": 7.599080987937967, "grad_norm": 1.064626693725586, "learning_rate": 7.378981817536036e-05, "loss": 1.3648, "step": 13230 }, { "epoch": 7.604824813325675, "grad_norm": 1.138749122619629, "learning_rate": 7.374771235140295e-05, "loss": 1.3655, "step": 13240 }, { "epoch": 7.610568638713383, "grad_norm": 1.0636377334594727, "learning_rate": 7.370558477046531e-05, "loss": 1.3762, "step": 13250 }, { "epoch": 7.6163124641010915, "grad_norm": 1.1054062843322754, "learning_rate": 7.366343547114764e-05, "loss": 1.3622, "step": 13260 }, { "epoch": 7.6220562894888, "grad_norm": 1.1325266361236572, "learning_rate": 7.362126449206999e-05, "loss": 1.3972, "step": 13270 }, { "epoch": 7.627800114876508, "grad_norm": 1.0639208555221558, "learning_rate": 7.357907187187227e-05, "loss": 1.35, "step": 13280 }, { "epoch": 7.633543940264216, "grad_norm": 1.4457550048828125, "learning_rate": 7.353685764921423e-05, "loss": 1.3779, "step": 13290 }, { "epoch": 7.6392877656519245, "grad_norm": 1.1473275423049927, "learning_rate": 7.349462186277542e-05, "loss": 1.3814, "step": 13300 }, { "epoch": 7.645031591039633, "grad_norm": 1.0643682479858398, "learning_rate": 7.345236455125515e-05, "loss": 1.3857, "step": 13310 }, { "epoch": 7.650775416427341, "grad_norm": 1.2766255140304565, "learning_rate": 7.341008575337244e-05, "loss": 1.372, "step": 13320 }, { "epoch": 7.656519241815049, "grad_norm": 1.0395917892456055, "learning_rate": 7.336778550786598e-05, "loss": 1.378, "step": 13330 }, { "epoch": 7.662263067202757, "grad_norm": 1.2142328023910522, "learning_rate": 7.332546385349418e-05, "loss": 1.3766, "step": 13340 }, { "epoch": 7.668006892590466, "grad_norm": 1.2160910367965698, "learning_rate": 7.328312082903499e-05, "loss": 1.391, "step": 13350 }, { "epoch": 7.673750717978173, "grad_norm": 1.0703709125518799, "learning_rate": 7.324075647328599e-05, "loss": 1.3907, "step": 13360 }, { "epoch": 7.679494543365882, "grad_norm": 1.2247953414916992, "learning_rate": 7.319837082506426e-05, "loss": 1.3909, "step": 13370 }, { "epoch": 7.6852383687535895, "grad_norm": 1.2029789686203003, "learning_rate": 7.315596392320645e-05, "loss": 1.374, "step": 13380 }, { "epoch": 7.690982194141299, "grad_norm": 1.1630489826202393, "learning_rate": 7.31135358065686e-05, "loss": 1.3517, "step": 13390 }, { "epoch": 7.696726019529006, "grad_norm": 1.2949284315109253, "learning_rate": 7.30710865140263e-05, "loss": 1.3885, "step": 13400 }, { "epoch": 7.702469844916714, "grad_norm": 1.1581209897994995, "learning_rate": 7.302861608447447e-05, "loss": 1.3584, "step": 13410 }, { "epoch": 7.708213670304422, "grad_norm": 1.0207194089889526, "learning_rate": 7.298612455682737e-05, "loss": 1.3798, "step": 13420 }, { "epoch": 7.713957495692131, "grad_norm": 1.156020164489746, "learning_rate": 7.294361197001866e-05, "loss": 1.3852, "step": 13430 }, { "epoch": 7.719701321079839, "grad_norm": 1.155132532119751, "learning_rate": 7.290107836300125e-05, "loss": 1.3597, "step": 13440 }, { "epoch": 7.725445146467547, "grad_norm": 1.0250098705291748, "learning_rate": 7.285852377474736e-05, "loss": 1.3889, "step": 13450 }, { "epoch": 7.731188971855255, "grad_norm": 1.0279853343963623, "learning_rate": 7.281594824424838e-05, "loss": 1.3801, "step": 13460 }, { "epoch": 7.736932797242964, "grad_norm": 1.0755505561828613, "learning_rate": 7.277335181051489e-05, "loss": 1.3749, "step": 13470 }, { "epoch": 7.742676622630672, "grad_norm": 1.2912722826004028, "learning_rate": 7.273073451257667e-05, "loss": 1.3713, "step": 13480 }, { "epoch": 7.74842044801838, "grad_norm": 1.7359085083007812, "learning_rate": 7.268809638948258e-05, "loss": 1.3702, "step": 13490 }, { "epoch": 7.754164273406088, "grad_norm": 1.0690027475357056, "learning_rate": 7.264543748030055e-05, "loss": 1.3821, "step": 13500 }, { "epoch": 7.7599080987937965, "grad_norm": 1.088979959487915, "learning_rate": 7.260275782411763e-05, "loss": 1.3624, "step": 13510 }, { "epoch": 7.765651924181505, "grad_norm": 1.086045742034912, "learning_rate": 7.25600574600398e-05, "loss": 1.3799, "step": 13520 }, { "epoch": 7.771395749569213, "grad_norm": 1.0964921712875366, "learning_rate": 7.251733642719202e-05, "loss": 1.3943, "step": 13530 }, { "epoch": 7.777139574956921, "grad_norm": 1.1099073886871338, "learning_rate": 7.247459476471823e-05, "loss": 1.3528, "step": 13540 }, { "epoch": 7.782883400344629, "grad_norm": 1.200293779373169, "learning_rate": 7.243183251178124e-05, "loss": 1.3774, "step": 13550 }, { "epoch": 7.788627225732338, "grad_norm": 1.1068882942199707, "learning_rate": 7.238904970756276e-05, "loss": 1.3762, "step": 13560 }, { "epoch": 7.794371051120046, "grad_norm": 1.117372989654541, "learning_rate": 7.234624639126328e-05, "loss": 1.3914, "step": 13570 }, { "epoch": 7.800114876507754, "grad_norm": 1.1574249267578125, "learning_rate": 7.230342260210213e-05, "loss": 1.3968, "step": 13580 }, { "epoch": 7.805858701895462, "grad_norm": 1.0184029340744019, "learning_rate": 7.226057837931738e-05, "loss": 1.3752, "step": 13590 }, { "epoch": 7.811602527283171, "grad_norm": 1.1285433769226074, "learning_rate": 7.221771376216582e-05, "loss": 1.3646, "step": 13600 }, { "epoch": 7.817346352670879, "grad_norm": 1.1836575269699097, "learning_rate": 7.217482878992293e-05, "loss": 1.3772, "step": 13610 }, { "epoch": 7.823090178058587, "grad_norm": 1.0164090394973755, "learning_rate": 7.213192350188281e-05, "loss": 1.3827, "step": 13620 }, { "epoch": 7.828834003446295, "grad_norm": 1.0922777652740479, "learning_rate": 7.208899793735828e-05, "loss": 1.3673, "step": 13630 }, { "epoch": 7.8345778288340036, "grad_norm": 1.2530337572097778, "learning_rate": 7.20460521356806e-05, "loss": 1.407, "step": 13640 }, { "epoch": 7.840321654221712, "grad_norm": 1.0919013023376465, "learning_rate": 7.200308613619968e-05, "loss": 1.3871, "step": 13650 }, { "epoch": 7.84606547960942, "grad_norm": 1.3356585502624512, "learning_rate": 7.196009997828384e-05, "loss": 1.3814, "step": 13660 }, { "epoch": 7.851809304997128, "grad_norm": 1.0867161750793457, "learning_rate": 7.191709370131999e-05, "loss": 1.3478, "step": 13670 }, { "epoch": 7.8575531303848365, "grad_norm": 1.2120997905731201, "learning_rate": 7.187406734471337e-05, "loss": 1.3885, "step": 13680 }, { "epoch": 7.863296955772545, "grad_norm": 1.1963104009628296, "learning_rate": 7.183102094788767e-05, "loss": 1.3799, "step": 13690 }, { "epoch": 7.869040781160253, "grad_norm": 1.1579991579055786, "learning_rate": 7.178795455028491e-05, "loss": 1.4079, "step": 13700 }, { "epoch": 7.874784606547961, "grad_norm": 1.0706915855407715, "learning_rate": 7.174486819136546e-05, "loss": 1.3736, "step": 13710 }, { "epoch": 7.880528431935669, "grad_norm": 1.2165038585662842, "learning_rate": 7.170176191060802e-05, "loss": 1.389, "step": 13720 }, { "epoch": 7.886272257323378, "grad_norm": 1.141743779182434, "learning_rate": 7.165863574750946e-05, "loss": 1.3772, "step": 13730 }, { "epoch": 7.892016082711086, "grad_norm": 1.0168917179107666, "learning_rate": 7.161548974158489e-05, "loss": 1.3818, "step": 13740 }, { "epoch": 7.897759908098794, "grad_norm": 1.2216793298721313, "learning_rate": 7.157232393236765e-05, "loss": 1.3879, "step": 13750 }, { "epoch": 7.903503733486502, "grad_norm": 1.294527292251587, "learning_rate": 7.152913835940916e-05, "loss": 1.3891, "step": 13760 }, { "epoch": 7.909247558874211, "grad_norm": 1.1675856113433838, "learning_rate": 7.148593306227904e-05, "loss": 1.3709, "step": 13770 }, { "epoch": 7.914991384261919, "grad_norm": 0.9908486008644104, "learning_rate": 7.144270808056487e-05, "loss": 1.3917, "step": 13780 }, { "epoch": 7.920735209649626, "grad_norm": 1.1059294939041138, "learning_rate": 7.139946345387235e-05, "loss": 1.3791, "step": 13790 }, { "epoch": 7.926479035037335, "grad_norm": 1.0860515832901, "learning_rate": 7.135619922182513e-05, "loss": 1.365, "step": 13800 }, { "epoch": 7.932222860425043, "grad_norm": 1.4398434162139893, "learning_rate": 7.131291542406486e-05, "loss": 1.3658, "step": 13810 }, { "epoch": 7.937966685812752, "grad_norm": 0.9924653172492981, "learning_rate": 7.12696121002511e-05, "loss": 1.3943, "step": 13820 }, { "epoch": 7.943710511200459, "grad_norm": 1.16031014919281, "learning_rate": 7.122628929006133e-05, "loss": 1.3712, "step": 13830 }, { "epoch": 7.949454336588167, "grad_norm": 1.2708386182785034, "learning_rate": 7.118294703319081e-05, "loss": 1.3931, "step": 13840 }, { "epoch": 7.955198161975876, "grad_norm": 1.0046013593673706, "learning_rate": 7.113958536935267e-05, "loss": 1.3886, "step": 13850 }, { "epoch": 7.960941987363584, "grad_norm": 1.137477993965149, "learning_rate": 7.109620433827785e-05, "loss": 1.3734, "step": 13860 }, { "epoch": 7.966685812751292, "grad_norm": 1.0881842374801636, "learning_rate": 7.1052803979715e-05, "loss": 1.3964, "step": 13870 }, { "epoch": 7.972429638139, "grad_norm": 1.184719204902649, "learning_rate": 7.100938433343048e-05, "loss": 1.3708, "step": 13880 }, { "epoch": 7.9781734635267085, "grad_norm": 1.147592306137085, "learning_rate": 7.09659454392083e-05, "loss": 1.3708, "step": 13890 }, { "epoch": 7.983917288914417, "grad_norm": 1.2086807489395142, "learning_rate": 7.092248733685015e-05, "loss": 1.3791, "step": 13900 }, { "epoch": 7.989661114302125, "grad_norm": 1.5116052627563477, "learning_rate": 7.087901006617531e-05, "loss": 1.385, "step": 13910 }, { "epoch": 7.995404939689833, "grad_norm": 1.0756481885910034, "learning_rate": 7.083551366702063e-05, "loss": 1.3655, "step": 13920 }, { "epoch": 8.001148765077541, "grad_norm": 1.1795367002487183, "learning_rate": 7.079199817924044e-05, "loss": 1.3561, "step": 13930 }, { "epoch": 8.00689259046525, "grad_norm": 1.1214485168457031, "learning_rate": 7.074846364270659e-05, "loss": 1.3371, "step": 13940 }, { "epoch": 8.012636415852958, "grad_norm": 1.11283278465271, "learning_rate": 7.070491009730841e-05, "loss": 1.3646, "step": 13950 }, { "epoch": 8.018380241240667, "grad_norm": 1.0217725038528442, "learning_rate": 7.066133758295262e-05, "loss": 1.3587, "step": 13960 }, { "epoch": 8.024124066628374, "grad_norm": 1.0658364295959473, "learning_rate": 7.061774613956331e-05, "loss": 1.3336, "step": 13970 }, { "epoch": 8.029867892016084, "grad_norm": 1.2622003555297852, "learning_rate": 7.057413580708195e-05, "loss": 1.34, "step": 13980 }, { "epoch": 8.03561171740379, "grad_norm": 1.1085586547851562, "learning_rate": 7.053050662546728e-05, "loss": 1.3455, "step": 13990 }, { "epoch": 8.0413555427915, "grad_norm": 1.1799372434616089, "learning_rate": 7.048685863469532e-05, "loss": 1.3599, "step": 14000 }, { "epoch": 8.0413555427915, "eval_loss": 1.1006301641464233, "eval_runtime": 121.4589, "eval_samples_per_second": 13.099, "eval_steps_per_second": 0.14, "eval_wer": 0.09602779975138434, "step": 14000 }, { "epoch": 8.047099368179207, "grad_norm": 1.1422802209854126, "learning_rate": 7.044319187475934e-05, "loss": 1.3331, "step": 14010 }, { "epoch": 8.052843193566915, "grad_norm": 1.1073253154754639, "learning_rate": 7.03995063856698e-05, "loss": 1.345, "step": 14020 }, { "epoch": 8.058587018954624, "grad_norm": 1.159033179283142, "learning_rate": 7.035580220745434e-05, "loss": 1.3403, "step": 14030 }, { "epoch": 8.064330844342331, "grad_norm": 1.0331177711486816, "learning_rate": 7.031207938015765e-05, "loss": 1.3428, "step": 14040 }, { "epoch": 8.07007466973004, "grad_norm": 1.0810799598693848, "learning_rate": 7.026833794384161e-05, "loss": 1.3228, "step": 14050 }, { "epoch": 8.075818495117748, "grad_norm": 1.124673843383789, "learning_rate": 7.022457793858509e-05, "loss": 1.3551, "step": 14060 }, { "epoch": 8.081562320505457, "grad_norm": 1.1602224111557007, "learning_rate": 7.018079940448397e-05, "loss": 1.3648, "step": 14070 }, { "epoch": 8.087306145893164, "grad_norm": 1.1123594045639038, "learning_rate": 7.013700238165113e-05, "loss": 1.329, "step": 14080 }, { "epoch": 8.093049971280873, "grad_norm": 1.149477481842041, "learning_rate": 7.00931869102164e-05, "loss": 1.3298, "step": 14090 }, { "epoch": 8.09879379666858, "grad_norm": 1.2626850605010986, "learning_rate": 7.004935303032648e-05, "loss": 1.341, "step": 14100 }, { "epoch": 8.10453762205629, "grad_norm": 1.1603119373321533, "learning_rate": 7.000550078214498e-05, "loss": 1.3471, "step": 14110 }, { "epoch": 8.110281447443997, "grad_norm": 1.1090253591537476, "learning_rate": 6.996163020585227e-05, "loss": 1.3594, "step": 14120 }, { "epoch": 8.116025272831706, "grad_norm": 1.1094486713409424, "learning_rate": 6.99177413416456e-05, "loss": 1.3416, "step": 14130 }, { "epoch": 8.121769098219414, "grad_norm": 1.0558894872665405, "learning_rate": 6.987383422973893e-05, "loss": 1.3412, "step": 14140 }, { "epoch": 8.127512923607123, "grad_norm": 1.117684006690979, "learning_rate": 6.982990891036292e-05, "loss": 1.3558, "step": 14150 }, { "epoch": 8.13325674899483, "grad_norm": 1.1211860179901123, "learning_rate": 6.978596542376496e-05, "loss": 1.3567, "step": 14160 }, { "epoch": 8.139000574382539, "grad_norm": 1.022854208946228, "learning_rate": 6.974200381020905e-05, "loss": 1.3672, "step": 14170 }, { "epoch": 8.144744399770246, "grad_norm": 1.045206904411316, "learning_rate": 6.969802410997584e-05, "loss": 1.3518, "step": 14180 }, { "epoch": 8.150488225157956, "grad_norm": 1.0243618488311768, "learning_rate": 6.965402636336251e-05, "loss": 1.3489, "step": 14190 }, { "epoch": 8.156232050545663, "grad_norm": 1.1970548629760742, "learning_rate": 6.961001061068279e-05, "loss": 1.3567, "step": 14200 }, { "epoch": 8.161975875933372, "grad_norm": 1.0209884643554688, "learning_rate": 6.95659768922669e-05, "loss": 1.3351, "step": 14210 }, { "epoch": 8.16771970132108, "grad_norm": 1.0605586767196655, "learning_rate": 6.952192524846152e-05, "loss": 1.3436, "step": 14220 }, { "epoch": 8.173463526708789, "grad_norm": 1.2847466468811035, "learning_rate": 6.94778557196298e-05, "loss": 1.366, "step": 14230 }, { "epoch": 8.179207352096496, "grad_norm": 1.096878170967102, "learning_rate": 6.943376834615123e-05, "loss": 1.3628, "step": 14240 }, { "epoch": 8.184951177484205, "grad_norm": 1.145160436630249, "learning_rate": 6.938966316842168e-05, "loss": 1.3599, "step": 14250 }, { "epoch": 8.190695002871912, "grad_norm": 1.1982372999191284, "learning_rate": 6.934554022685325e-05, "loss": 1.354, "step": 14260 }, { "epoch": 8.196438828259621, "grad_norm": 1.0458102226257324, "learning_rate": 6.930139956187446e-05, "loss": 1.3514, "step": 14270 }, { "epoch": 8.202182653647329, "grad_norm": 1.0677944421768188, "learning_rate": 6.925724121392997e-05, "loss": 1.3171, "step": 14280 }, { "epoch": 8.207926479035038, "grad_norm": 1.1336259841918945, "learning_rate": 6.921306522348064e-05, "loss": 1.3336, "step": 14290 }, { "epoch": 8.213670304422745, "grad_norm": 1.539734959602356, "learning_rate": 6.916887163100357e-05, "loss": 1.338, "step": 14300 }, { "epoch": 8.219414129810454, "grad_norm": 1.1061655282974243, "learning_rate": 6.912466047699186e-05, "loss": 1.3749, "step": 14310 }, { "epoch": 8.225157955198162, "grad_norm": 1.137969732284546, "learning_rate": 6.908043180195485e-05, "loss": 1.3426, "step": 14320 }, { "epoch": 8.23090178058587, "grad_norm": 1.2803560495376587, "learning_rate": 6.903618564641784e-05, "loss": 1.3476, "step": 14330 }, { "epoch": 8.236645605973578, "grad_norm": 1.003831148147583, "learning_rate": 6.899192205092215e-05, "loss": 1.3461, "step": 14340 }, { "epoch": 8.242389431361287, "grad_norm": 1.07589590549469, "learning_rate": 6.894764105602513e-05, "loss": 1.3623, "step": 14350 }, { "epoch": 8.248133256748995, "grad_norm": 1.1557279825210571, "learning_rate": 6.890334270230005e-05, "loss": 1.3212, "step": 14360 }, { "epoch": 8.253877082136704, "grad_norm": 1.0825499296188354, "learning_rate": 6.885902703033602e-05, "loss": 1.3437, "step": 14370 }, { "epoch": 8.259620907524411, "grad_norm": 1.1142388582229614, "learning_rate": 6.881469408073814e-05, "loss": 1.3402, "step": 14380 }, { "epoch": 8.26536473291212, "grad_norm": 1.1542168855667114, "learning_rate": 6.877034389412724e-05, "loss": 1.3475, "step": 14390 }, { "epoch": 8.271108558299828, "grad_norm": 1.1278069019317627, "learning_rate": 6.872597651114e-05, "loss": 1.3437, "step": 14400 }, { "epoch": 8.276852383687537, "grad_norm": 1.069445013999939, "learning_rate": 6.868159197242884e-05, "loss": 1.3486, "step": 14410 }, { "epoch": 8.282596209075244, "grad_norm": 1.2677907943725586, "learning_rate": 6.863719031866186e-05, "loss": 1.3313, "step": 14420 }, { "epoch": 8.288340034462951, "grad_norm": 1.1928232908248901, "learning_rate": 6.85927715905229e-05, "loss": 1.3499, "step": 14430 }, { "epoch": 8.29408385985066, "grad_norm": 1.2636762857437134, "learning_rate": 6.854833582871145e-05, "loss": 1.3455, "step": 14440 }, { "epoch": 8.29982768523837, "grad_norm": 1.0178658962249756, "learning_rate": 6.850388307394255e-05, "loss": 1.3351, "step": 14450 }, { "epoch": 8.305571510626077, "grad_norm": 1.004676342010498, "learning_rate": 6.845941336694684e-05, "loss": 1.3419, "step": 14460 }, { "epoch": 8.311315336013784, "grad_norm": 0.9737274050712585, "learning_rate": 6.84149267484705e-05, "loss": 1.3074, "step": 14470 }, { "epoch": 8.317059161401493, "grad_norm": 1.2828425168991089, "learning_rate": 6.83704232592752e-05, "loss": 1.3335, "step": 14480 }, { "epoch": 8.3228029867892, "grad_norm": 1.1972343921661377, "learning_rate": 6.832590294013806e-05, "loss": 1.3407, "step": 14490 }, { "epoch": 8.32854681217691, "grad_norm": 1.0773992538452148, "learning_rate": 6.828136583185162e-05, "loss": 1.3352, "step": 14500 }, { "epoch": 8.334290637564617, "grad_norm": 1.2189594507217407, "learning_rate": 6.823681197522385e-05, "loss": 1.368, "step": 14510 }, { "epoch": 8.340034462952326, "grad_norm": 1.0958688259124756, "learning_rate": 6.819224141107798e-05, "loss": 1.3692, "step": 14520 }, { "epoch": 8.345778288340034, "grad_norm": 1.1439151763916016, "learning_rate": 6.814765418025264e-05, "loss": 1.3238, "step": 14530 }, { "epoch": 8.351522113727743, "grad_norm": 1.1483707427978516, "learning_rate": 6.810305032360163e-05, "loss": 1.3634, "step": 14540 }, { "epoch": 8.35726593911545, "grad_norm": 1.0480304956436157, "learning_rate": 6.805842988199407e-05, "loss": 1.364, "step": 14550 }, { "epoch": 8.36300976450316, "grad_norm": 1.1300170421600342, "learning_rate": 6.80137928963143e-05, "loss": 1.3467, "step": 14560 }, { "epoch": 8.368753589890867, "grad_norm": 0.9751035571098328, "learning_rate": 6.796913940746166e-05, "loss": 1.3347, "step": 14570 }, { "epoch": 8.374497415278576, "grad_norm": 1.0773341655731201, "learning_rate": 6.79244694563508e-05, "loss": 1.3369, "step": 14580 }, { "epoch": 8.380241240666283, "grad_norm": 1.176775574684143, "learning_rate": 6.78797830839113e-05, "loss": 1.3142, "step": 14590 }, { "epoch": 8.385985066053992, "grad_norm": 1.1013033390045166, "learning_rate": 6.783508033108794e-05, "loss": 1.3319, "step": 14600 }, { "epoch": 8.3917288914417, "grad_norm": 1.0472511053085327, "learning_rate": 6.779036123884038e-05, "loss": 1.3534, "step": 14610 }, { "epoch": 8.397472716829409, "grad_norm": 1.1459550857543945, "learning_rate": 6.774562584814328e-05, "loss": 1.3646, "step": 14620 }, { "epoch": 8.403216542217116, "grad_norm": 1.0484139919281006, "learning_rate": 6.770087419998629e-05, "loss": 1.3316, "step": 14630 }, { "epoch": 8.408960367604825, "grad_norm": 1.025768518447876, "learning_rate": 6.765610633537389e-05, "loss": 1.3517, "step": 14640 }, { "epoch": 8.414704192992533, "grad_norm": 1.126641035079956, "learning_rate": 6.761132229532544e-05, "loss": 1.3695, "step": 14650 }, { "epoch": 8.420448018380242, "grad_norm": 1.1499069929122925, "learning_rate": 6.756652212087516e-05, "loss": 1.3459, "step": 14660 }, { "epoch": 8.426191843767949, "grad_norm": 1.1583006381988525, "learning_rate": 6.7521705853072e-05, "loss": 1.3364, "step": 14670 }, { "epoch": 8.431935669155658, "grad_norm": 1.2050310373306274, "learning_rate": 6.747687353297966e-05, "loss": 1.3484, "step": 14680 }, { "epoch": 8.437679494543366, "grad_norm": 1.1018375158309937, "learning_rate": 6.74320252016766e-05, "loss": 1.3483, "step": 14690 }, { "epoch": 8.443423319931075, "grad_norm": 1.1013386249542236, "learning_rate": 6.738716090025588e-05, "loss": 1.3399, "step": 14700 }, { "epoch": 8.449167145318782, "grad_norm": 1.1075743436813354, "learning_rate": 6.734228066982524e-05, "loss": 1.3791, "step": 14710 }, { "epoch": 8.454910970706491, "grad_norm": 1.0828742980957031, "learning_rate": 6.729738455150701e-05, "loss": 1.329, "step": 14720 }, { "epoch": 8.460654796094198, "grad_norm": 1.0898609161376953, "learning_rate": 6.725247258643807e-05, "loss": 1.364, "step": 14730 }, { "epoch": 8.466398621481908, "grad_norm": 1.008550763130188, "learning_rate": 6.72075448157698e-05, "loss": 1.3632, "step": 14740 }, { "epoch": 8.472142446869615, "grad_norm": 1.004242181777954, "learning_rate": 6.716260128066811e-05, "loss": 1.3539, "step": 14750 }, { "epoch": 8.477886272257324, "grad_norm": 1.013573169708252, "learning_rate": 6.711764202231331e-05, "loss": 1.3234, "step": 14760 }, { "epoch": 8.483630097645031, "grad_norm": 1.096097469329834, "learning_rate": 6.707266708190013e-05, "loss": 1.3556, "step": 14770 }, { "epoch": 8.48937392303274, "grad_norm": 1.1716398000717163, "learning_rate": 6.702767650063769e-05, "loss": 1.3345, "step": 14780 }, { "epoch": 8.495117748420448, "grad_norm": 1.0597960948944092, "learning_rate": 6.69826703197494e-05, "loss": 1.3155, "step": 14790 }, { "epoch": 8.500861573808157, "grad_norm": 1.250192642211914, "learning_rate": 6.693764858047302e-05, "loss": 1.3633, "step": 14800 }, { "epoch": 8.506605399195864, "grad_norm": 1.2764009237289429, "learning_rate": 6.68926113240605e-05, "loss": 1.3477, "step": 14810 }, { "epoch": 8.512349224583573, "grad_norm": 1.1136800050735474, "learning_rate": 6.684755859177808e-05, "loss": 1.3344, "step": 14820 }, { "epoch": 8.51809304997128, "grad_norm": 1.1659847497940063, "learning_rate": 6.680249042490608e-05, "loss": 1.3517, "step": 14830 }, { "epoch": 8.523836875358988, "grad_norm": 1.0530874729156494, "learning_rate": 6.675740686473907e-05, "loss": 1.3518, "step": 14840 }, { "epoch": 8.529580700746697, "grad_norm": 1.1067551374435425, "learning_rate": 6.671230795258567e-05, "loss": 1.3405, "step": 14850 }, { "epoch": 8.535324526134406, "grad_norm": 1.2300156354904175, "learning_rate": 6.666719372976855e-05, "loss": 1.3534, "step": 14860 }, { "epoch": 8.541068351522114, "grad_norm": 1.2539186477661133, "learning_rate": 6.662206423762446e-05, "loss": 1.3445, "step": 14870 }, { "epoch": 8.546812176909821, "grad_norm": 1.1507915258407593, "learning_rate": 6.657691951750411e-05, "loss": 1.3546, "step": 14880 }, { "epoch": 8.55255600229753, "grad_norm": 1.2795406579971313, "learning_rate": 6.653175961077215e-05, "loss": 1.3366, "step": 14890 }, { "epoch": 8.558299827685238, "grad_norm": 1.1173717975616455, "learning_rate": 6.648658455880719e-05, "loss": 1.346, "step": 14900 }, { "epoch": 8.564043653072947, "grad_norm": 4.2965850830078125, "learning_rate": 6.644139440300167e-05, "loss": 1.353, "step": 14910 }, { "epoch": 8.569787478460654, "grad_norm": 1.0641264915466309, "learning_rate": 6.639618918476186e-05, "loss": 1.3363, "step": 14920 }, { "epoch": 8.575531303848363, "grad_norm": 1.0559968948364258, "learning_rate": 6.635096894550791e-05, "loss": 1.3534, "step": 14930 }, { "epoch": 8.58127512923607, "grad_norm": 1.0902478694915771, "learning_rate": 6.630573372667365e-05, "loss": 1.341, "step": 14940 }, { "epoch": 8.58701895462378, "grad_norm": 1.1054389476776123, "learning_rate": 6.626048356970668e-05, "loss": 1.354, "step": 14950 }, { "epoch": 8.592762780011487, "grad_norm": 1.1799063682556152, "learning_rate": 6.621521851606825e-05, "loss": 1.3476, "step": 14960 }, { "epoch": 8.598506605399196, "grad_norm": 1.1583868265151978, "learning_rate": 6.616993860723331e-05, "loss": 1.3392, "step": 14970 }, { "epoch": 8.604250430786903, "grad_norm": 1.185685157775879, "learning_rate": 6.61246438846904e-05, "loss": 1.3383, "step": 14980 }, { "epoch": 8.609994256174613, "grad_norm": 1.0549464225769043, "learning_rate": 6.607933438994163e-05, "loss": 1.3372, "step": 14990 }, { "epoch": 8.61573808156232, "grad_norm": 1.0891073942184448, "learning_rate": 6.60340101645026e-05, "loss": 1.3717, "step": 15000 }, { "epoch": 8.61573808156232, "eval_loss": 1.0906370878219604, "eval_runtime": 121.5334, "eval_samples_per_second": 13.091, "eval_steps_per_second": 0.14, "eval_wer": 0.09260933438806644, "step": 15000 }, { "epoch": 8.621481906950029, "grad_norm": 1.081456184387207, "learning_rate": 6.59886712499025e-05, "loss": 1.358, "step": 15010 }, { "epoch": 8.627225732337736, "grad_norm": 1.0776183605194092, "learning_rate": 6.594331768768391e-05, "loss": 1.3478, "step": 15020 }, { "epoch": 8.632969557725445, "grad_norm": 1.1232571601867676, "learning_rate": 6.589794951940287e-05, "loss": 1.3478, "step": 15030 }, { "epoch": 8.638713383113153, "grad_norm": 1.1927143335342407, "learning_rate": 6.585256678662874e-05, "loss": 1.349, "step": 15040 }, { "epoch": 8.644457208500862, "grad_norm": 1.1092239618301392, "learning_rate": 6.580716953094431e-05, "loss": 1.3498, "step": 15050 }, { "epoch": 8.65020103388857, "grad_norm": 1.1302359104156494, "learning_rate": 6.576175779394563e-05, "loss": 1.3656, "step": 15060 }, { "epoch": 8.655944859276278, "grad_norm": 1.0673636198043823, "learning_rate": 6.571633161724201e-05, "loss": 1.326, "step": 15070 }, { "epoch": 8.661688684663986, "grad_norm": 1.1380208730697632, "learning_rate": 6.567089104245603e-05, "loss": 1.3209, "step": 15080 }, { "epoch": 8.667432510051695, "grad_norm": 1.0629384517669678, "learning_rate": 6.562543611122342e-05, "loss": 1.3368, "step": 15090 }, { "epoch": 8.673176335439402, "grad_norm": 1.0566953420639038, "learning_rate": 6.557996686519308e-05, "loss": 1.3451, "step": 15100 }, { "epoch": 8.678920160827111, "grad_norm": 0.9778392314910889, "learning_rate": 6.553448334602705e-05, "loss": 1.3317, "step": 15110 }, { "epoch": 8.684663986214819, "grad_norm": 1.080062985420227, "learning_rate": 6.54889855954004e-05, "loss": 1.3245, "step": 15120 }, { "epoch": 8.690407811602528, "grad_norm": 1.2787530422210693, "learning_rate": 6.544347365500129e-05, "loss": 1.3575, "step": 15130 }, { "epoch": 8.696151636990235, "grad_norm": 1.3267161846160889, "learning_rate": 6.539794756653084e-05, "loss": 1.3801, "step": 15140 }, { "epoch": 8.701895462377944, "grad_norm": 1.0440847873687744, "learning_rate": 6.535240737170315e-05, "loss": 1.3417, "step": 15150 }, { "epoch": 8.707639287765652, "grad_norm": 1.0616346597671509, "learning_rate": 6.530685311224528e-05, "loss": 1.3575, "step": 15160 }, { "epoch": 8.71338311315336, "grad_norm": 1.0661760568618774, "learning_rate": 6.52612848298971e-05, "loss": 1.3503, "step": 15170 }, { "epoch": 8.719126938541068, "grad_norm": 0.9735297560691833, "learning_rate": 6.52157025664114e-05, "loss": 1.3353, "step": 15180 }, { "epoch": 8.724870763928777, "grad_norm": 1.21027410030365, "learning_rate": 6.517010636355375e-05, "loss": 1.3693, "step": 15190 }, { "epoch": 8.730614589316485, "grad_norm": 1.1968728303909302, "learning_rate": 6.512449626310249e-05, "loss": 1.3519, "step": 15200 }, { "epoch": 8.736358414704194, "grad_norm": 1.0177395343780518, "learning_rate": 6.50788723068487e-05, "loss": 1.3419, "step": 15210 }, { "epoch": 8.742102240091901, "grad_norm": 1.2030179500579834, "learning_rate": 6.503323453659617e-05, "loss": 1.3628, "step": 15220 }, { "epoch": 8.74784606547961, "grad_norm": 1.3232479095458984, "learning_rate": 6.49875829941613e-05, "loss": 1.3451, "step": 15230 }, { "epoch": 8.753589890867318, "grad_norm": 1.21495521068573, "learning_rate": 6.494191772137317e-05, "loss": 1.3345, "step": 15240 }, { "epoch": 8.759333716255027, "grad_norm": 1.1220831871032715, "learning_rate": 6.489623876007341e-05, "loss": 1.3606, "step": 15250 }, { "epoch": 8.765077541642734, "grad_norm": 1.1405380964279175, "learning_rate": 6.485054615211617e-05, "loss": 1.3659, "step": 15260 }, { "epoch": 8.770821367030443, "grad_norm": 1.3863025903701782, "learning_rate": 6.480483993936815e-05, "loss": 1.3354, "step": 15270 }, { "epoch": 8.77656519241815, "grad_norm": 1.0974985361099243, "learning_rate": 6.475912016370849e-05, "loss": 1.3262, "step": 15280 }, { "epoch": 8.782309017805858, "grad_norm": 1.0552657842636108, "learning_rate": 6.471338686702874e-05, "loss": 1.3383, "step": 15290 }, { "epoch": 8.788052843193567, "grad_norm": 1.0709670782089233, "learning_rate": 6.466764009123292e-05, "loss": 1.3478, "step": 15300 }, { "epoch": 8.793796668581276, "grad_norm": 1.1263865232467651, "learning_rate": 6.462187987823726e-05, "loss": 1.3274, "step": 15310 }, { "epoch": 8.799540493968983, "grad_norm": 1.0840257406234741, "learning_rate": 6.457610626997044e-05, "loss": 1.3373, "step": 15320 }, { "epoch": 8.80528431935669, "grad_norm": 1.1156831979751587, "learning_rate": 6.453031930837334e-05, "loss": 1.3546, "step": 15330 }, { "epoch": 8.8110281447444, "grad_norm": 1.123816728591919, "learning_rate": 6.44845190353991e-05, "loss": 1.34, "step": 15340 }, { "epoch": 8.816771970132107, "grad_norm": 1.0983755588531494, "learning_rate": 6.443870549301304e-05, "loss": 1.3571, "step": 15350 }, { "epoch": 8.822515795519816, "grad_norm": 0.9589661955833435, "learning_rate": 6.439287872319264e-05, "loss": 1.331, "step": 15360 }, { "epoch": 8.828259620907524, "grad_norm": 1.1686326265335083, "learning_rate": 6.43470387679275e-05, "loss": 1.3397, "step": 15370 }, { "epoch": 8.834003446295233, "grad_norm": 1.1462116241455078, "learning_rate": 6.430118566921932e-05, "loss": 1.3641, "step": 15380 }, { "epoch": 8.83974727168294, "grad_norm": 1.0272228717803955, "learning_rate": 6.425531946908183e-05, "loss": 1.3476, "step": 15390 }, { "epoch": 8.84549109707065, "grad_norm": 1.0231941938400269, "learning_rate": 6.42094402095408e-05, "loss": 1.3416, "step": 15400 }, { "epoch": 8.851234922458357, "grad_norm": 1.0671708583831787, "learning_rate": 6.416354793263388e-05, "loss": 1.3402, "step": 15410 }, { "epoch": 8.856978747846066, "grad_norm": 1.0501748323440552, "learning_rate": 6.411764268041069e-05, "loss": 1.3456, "step": 15420 }, { "epoch": 8.862722573233773, "grad_norm": 1.1197153329849243, "learning_rate": 6.40717244949328e-05, "loss": 1.3593, "step": 15430 }, { "epoch": 8.868466398621482, "grad_norm": 1.3269212245941162, "learning_rate": 6.402579341827354e-05, "loss": 1.3132, "step": 15440 }, { "epoch": 8.87421022400919, "grad_norm": 1.0807411670684814, "learning_rate": 6.397984949251812e-05, "loss": 1.3615, "step": 15450 }, { "epoch": 8.879954049396899, "grad_norm": 1.0193594694137573, "learning_rate": 6.393389275976345e-05, "loss": 1.3204, "step": 15460 }, { "epoch": 8.885697874784606, "grad_norm": 1.0807517766952515, "learning_rate": 6.388792326211825e-05, "loss": 1.3773, "step": 15470 }, { "epoch": 8.891441700172315, "grad_norm": 1.0580588579177856, "learning_rate": 6.38419410417029e-05, "loss": 1.3398, "step": 15480 }, { "epoch": 8.897185525560023, "grad_norm": 1.0967005491256714, "learning_rate": 6.379594614064942e-05, "loss": 1.3324, "step": 15490 }, { "epoch": 8.902929350947732, "grad_norm": 1.040083646774292, "learning_rate": 6.37499386011015e-05, "loss": 1.3513, "step": 15500 }, { "epoch": 8.908673176335439, "grad_norm": 0.984352707862854, "learning_rate": 6.370391846521436e-05, "loss": 1.3378, "step": 15510 }, { "epoch": 8.914417001723148, "grad_norm": 1.171065330505371, "learning_rate": 6.365788577515481e-05, "loss": 1.3446, "step": 15520 }, { "epoch": 8.920160827110855, "grad_norm": 1.0822973251342773, "learning_rate": 6.361184057310107e-05, "loss": 1.3647, "step": 15530 }, { "epoch": 8.925904652498565, "grad_norm": 1.2326501607894897, "learning_rate": 6.356578290124296e-05, "loss": 1.3636, "step": 15540 }, { "epoch": 8.931648477886272, "grad_norm": 1.1476212739944458, "learning_rate": 6.35197128017816e-05, "loss": 1.3619, "step": 15550 }, { "epoch": 8.937392303273981, "grad_norm": 1.0694681406021118, "learning_rate": 6.347363031692961e-05, "loss": 1.3472, "step": 15560 }, { "epoch": 8.943136128661688, "grad_norm": 1.1286289691925049, "learning_rate": 6.342753548891085e-05, "loss": 1.3603, "step": 15570 }, { "epoch": 8.948879954049398, "grad_norm": 1.201790452003479, "learning_rate": 6.338142835996055e-05, "loss": 1.3251, "step": 15580 }, { "epoch": 8.954623779437105, "grad_norm": 1.1758419275283813, "learning_rate": 6.333530897232523e-05, "loss": 1.3625, "step": 15590 }, { "epoch": 8.960367604824814, "grad_norm": 1.2090002298355103, "learning_rate": 6.328917736826257e-05, "loss": 1.3263, "step": 15600 }, { "epoch": 8.966111430212521, "grad_norm": 1.0079736709594727, "learning_rate": 6.324303359004152e-05, "loss": 1.3249, "step": 15610 }, { "epoch": 8.97185525560023, "grad_norm": 1.1410163640975952, "learning_rate": 6.319687767994212e-05, "loss": 1.361, "step": 15620 }, { "epoch": 8.977599080987938, "grad_norm": 1.176900029182434, "learning_rate": 6.31507096802556e-05, "loss": 1.3425, "step": 15630 }, { "epoch": 8.983342906375647, "grad_norm": 1.163103461265564, "learning_rate": 6.31045296332842e-05, "loss": 1.3184, "step": 15640 }, { "epoch": 8.989086731763354, "grad_norm": 1.1719297170639038, "learning_rate": 6.305833758134121e-05, "loss": 1.3408, "step": 15650 }, { "epoch": 8.994830557151063, "grad_norm": 1.1819090843200684, "learning_rate": 6.301213356675095e-05, "loss": 1.3299, "step": 15660 }, { "epoch": 9.00057438253877, "grad_norm": 1.0912621021270752, "learning_rate": 6.296591763184867e-05, "loss": 1.3424, "step": 15670 }, { "epoch": 9.00631820792648, "grad_norm": 1.0241864919662476, "learning_rate": 6.291968981898058e-05, "loss": 1.298, "step": 15680 }, { "epoch": 9.012062033314187, "grad_norm": 1.1241670846939087, "learning_rate": 6.287345017050372e-05, "loss": 1.305, "step": 15690 }, { "epoch": 9.017805858701896, "grad_norm": 1.0144832134246826, "learning_rate": 6.282719872878604e-05, "loss": 1.3046, "step": 15700 }, { "epoch": 9.023549684089604, "grad_norm": 1.1686733961105347, "learning_rate": 6.278093553620623e-05, "loss": 1.3245, "step": 15710 }, { "epoch": 9.029293509477311, "grad_norm": 1.0735207796096802, "learning_rate": 6.273466063515377e-05, "loss": 1.3124, "step": 15720 }, { "epoch": 9.03503733486502, "grad_norm": 1.058270812034607, "learning_rate": 6.26883740680289e-05, "loss": 1.3206, "step": 15730 }, { "epoch": 9.040781160252727, "grad_norm": 1.0202562808990479, "learning_rate": 6.264207587724253e-05, "loss": 1.3239, "step": 15740 }, { "epoch": 9.046524985640437, "grad_norm": 1.0882097482681274, "learning_rate": 6.259576610521618e-05, "loss": 1.3089, "step": 15750 }, { "epoch": 9.052268811028144, "grad_norm": 1.1244345903396606, "learning_rate": 6.254944479438206e-05, "loss": 1.2978, "step": 15760 }, { "epoch": 9.058012636415853, "grad_norm": 1.029341459274292, "learning_rate": 6.250311198718288e-05, "loss": 1.3062, "step": 15770 }, { "epoch": 9.06375646180356, "grad_norm": 1.1646206378936768, "learning_rate": 6.245676772607191e-05, "loss": 1.3204, "step": 15780 }, { "epoch": 9.06950028719127, "grad_norm": 1.0287197828292847, "learning_rate": 6.241041205351293e-05, "loss": 1.2877, "step": 15790 }, { "epoch": 9.075244112578977, "grad_norm": 0.9335780739784241, "learning_rate": 6.236404501198013e-05, "loss": 1.3241, "step": 15800 }, { "epoch": 9.080987937966686, "grad_norm": 1.033789873123169, "learning_rate": 6.231766664395821e-05, "loss": 1.2982, "step": 15810 }, { "epoch": 9.086731763354393, "grad_norm": 1.0893192291259766, "learning_rate": 6.227127699194215e-05, "loss": 1.3223, "step": 15820 }, { "epoch": 9.092475588742102, "grad_norm": 1.1011155843734741, "learning_rate": 6.22248760984373e-05, "loss": 1.3455, "step": 15830 }, { "epoch": 9.09821941412981, "grad_norm": 1.0940264463424683, "learning_rate": 6.217846400595933e-05, "loss": 1.336, "step": 15840 }, { "epoch": 9.103963239517519, "grad_norm": 0.983381986618042, "learning_rate": 6.213204075703418e-05, "loss": 1.3088, "step": 15850 }, { "epoch": 9.109707064905226, "grad_norm": 1.0434706211090088, "learning_rate": 6.208560639419796e-05, "loss": 1.3096, "step": 15860 }, { "epoch": 9.115450890292935, "grad_norm": 1.1008431911468506, "learning_rate": 6.203916095999702e-05, "loss": 1.3303, "step": 15870 }, { "epoch": 9.121194715680643, "grad_norm": 1.129802942276001, "learning_rate": 6.19927044969878e-05, "loss": 1.2985, "step": 15880 }, { "epoch": 9.126938541068352, "grad_norm": 1.1601494550704956, "learning_rate": 6.194623704773689e-05, "loss": 1.3373, "step": 15890 }, { "epoch": 9.13268236645606, "grad_norm": 1.0910149812698364, "learning_rate": 6.189975865482093e-05, "loss": 1.3125, "step": 15900 }, { "epoch": 9.138426191843768, "grad_norm": 1.0536905527114868, "learning_rate": 6.185326936082659e-05, "loss": 1.3087, "step": 15910 }, { "epoch": 9.144170017231476, "grad_norm": 1.0987095832824707, "learning_rate": 6.180676920835054e-05, "loss": 1.3248, "step": 15920 }, { "epoch": 9.149913842619185, "grad_norm": 1.0822519063949585, "learning_rate": 6.176025823999935e-05, "loss": 1.3066, "step": 15930 }, { "epoch": 9.155657668006892, "grad_norm": 1.00260329246521, "learning_rate": 6.171373649838955e-05, "loss": 1.3176, "step": 15940 }, { "epoch": 9.161401493394601, "grad_norm": 1.4799902439117432, "learning_rate": 6.166720402614756e-05, "loss": 1.3082, "step": 15950 }, { "epoch": 9.167145318782309, "grad_norm": 0.9770128726959229, "learning_rate": 6.162066086590955e-05, "loss": 1.3387, "step": 15960 }, { "epoch": 9.172889144170018, "grad_norm": 1.1276997327804565, "learning_rate": 6.157410706032156e-05, "loss": 1.3504, "step": 15970 }, { "epoch": 9.178632969557725, "grad_norm": 1.053182601928711, "learning_rate": 6.152754265203936e-05, "loss": 1.2998, "step": 15980 }, { "epoch": 9.184376794945434, "grad_norm": 1.073697805404663, "learning_rate": 6.148096768372841e-05, "loss": 1.3045, "step": 15990 }, { "epoch": 9.190120620333142, "grad_norm": 1.1023629903793335, "learning_rate": 6.143438219806388e-05, "loss": 1.3087, "step": 16000 }, { "epoch": 9.190120620333142, "eval_loss": 1.0896512269973755, "eval_runtime": 121.0884, "eval_samples_per_second": 13.139, "eval_steps_per_second": 0.14, "eval_wer": 0.09258108260820432, "step": 16000 }, { "epoch": 9.19586444572085, "grad_norm": 1.108096957206726, "learning_rate": 6.138778623773057e-05, "loss": 1.3235, "step": 16010 }, { "epoch": 9.201608271108558, "grad_norm": 1.04118013381958, "learning_rate": 6.134117984542286e-05, "loss": 1.3386, "step": 16020 }, { "epoch": 9.207352096496267, "grad_norm": 1.0881047248840332, "learning_rate": 6.12945630638447e-05, "loss": 1.3198, "step": 16030 }, { "epoch": 9.213095921883975, "grad_norm": 1.1101092100143433, "learning_rate": 6.124793593570957e-05, "loss": 1.3238, "step": 16040 }, { "epoch": 9.218839747271684, "grad_norm": 1.0963994264602661, "learning_rate": 6.12012985037404e-05, "loss": 1.3178, "step": 16050 }, { "epoch": 9.224583572659391, "grad_norm": 1.1826673746109009, "learning_rate": 6.11546508106696e-05, "loss": 1.2936, "step": 16060 }, { "epoch": 9.2303273980471, "grad_norm": 1.0260616540908813, "learning_rate": 6.110799289923895e-05, "loss": 1.3138, "step": 16070 }, { "epoch": 9.236071223434807, "grad_norm": 1.1229928731918335, "learning_rate": 6.106132481219962e-05, "loss": 1.3185, "step": 16080 }, { "epoch": 9.241815048822517, "grad_norm": 1.058148741722107, "learning_rate": 6.1014646592312064e-05, "loss": 1.3232, "step": 16090 }, { "epoch": 9.247558874210224, "grad_norm": 1.1946823596954346, "learning_rate": 6.096795828234606e-05, "loss": 1.3195, "step": 16100 }, { "epoch": 9.253302699597933, "grad_norm": 1.06914484500885, "learning_rate": 6.092125992508062e-05, "loss": 1.3149, "step": 16110 }, { "epoch": 9.25904652498564, "grad_norm": 1.1394798755645752, "learning_rate": 6.087455156330394e-05, "loss": 1.3232, "step": 16120 }, { "epoch": 9.26479035037335, "grad_norm": 1.1990212202072144, "learning_rate": 6.0827833239813436e-05, "loss": 1.3059, "step": 16130 }, { "epoch": 9.270534175761057, "grad_norm": 1.0347788333892822, "learning_rate": 6.0781104997415594e-05, "loss": 1.3116, "step": 16140 }, { "epoch": 9.276278001148764, "grad_norm": 1.038059115409851, "learning_rate": 6.073436687892601e-05, "loss": 1.2974, "step": 16150 }, { "epoch": 9.282021826536473, "grad_norm": 1.1207138299942017, "learning_rate": 6.068761892716933e-05, "loss": 1.3026, "step": 16160 }, { "epoch": 9.28776565192418, "grad_norm": 1.12576425075531, "learning_rate": 6.0640861184979206e-05, "loss": 1.2916, "step": 16170 }, { "epoch": 9.29350947731189, "grad_norm": 1.0274579524993896, "learning_rate": 6.059409369519827e-05, "loss": 1.3431, "step": 16180 }, { "epoch": 9.299253302699597, "grad_norm": 1.06902277469635, "learning_rate": 6.05473165006781e-05, "loss": 1.31, "step": 16190 }, { "epoch": 9.304997128087306, "grad_norm": 1.0216704607009888, "learning_rate": 6.0500529644279125e-05, "loss": 1.3188, "step": 16200 }, { "epoch": 9.310740953475014, "grad_norm": 1.0098819732666016, "learning_rate": 6.045373316887063e-05, "loss": 1.3026, "step": 16210 }, { "epoch": 9.316484778862723, "grad_norm": 1.030670404434204, "learning_rate": 6.0406927117330766e-05, "loss": 1.3212, "step": 16220 }, { "epoch": 9.32222860425043, "grad_norm": 1.0391238927841187, "learning_rate": 6.0360111532546414e-05, "loss": 1.2987, "step": 16230 }, { "epoch": 9.32797242963814, "grad_norm": 1.1110267639160156, "learning_rate": 6.0313286457413207e-05, "loss": 1.2991, "step": 16240 }, { "epoch": 9.333716255025847, "grad_norm": 1.060103416442871, "learning_rate": 6.026645193483544e-05, "loss": 1.3128, "step": 16250 }, { "epoch": 9.339460080413556, "grad_norm": 1.2157009840011597, "learning_rate": 6.021960800772612e-05, "loss": 1.3369, "step": 16260 }, { "epoch": 9.345203905801263, "grad_norm": 1.030556321144104, "learning_rate": 6.017275471900682e-05, "loss": 1.2989, "step": 16270 }, { "epoch": 9.350947731188972, "grad_norm": 0.9577980041503906, "learning_rate": 6.012589211160774e-05, "loss": 1.3054, "step": 16280 }, { "epoch": 9.35669155657668, "grad_norm": 1.1688345670700073, "learning_rate": 6.0079020228467574e-05, "loss": 1.3202, "step": 16290 }, { "epoch": 9.362435381964389, "grad_norm": 1.1184086799621582, "learning_rate": 6.0032139112533515e-05, "loss": 1.3199, "step": 16300 }, { "epoch": 9.368179207352096, "grad_norm": 0.9758406281471252, "learning_rate": 5.9985248806761275e-05, "loss": 1.2917, "step": 16310 }, { "epoch": 9.373923032739805, "grad_norm": 1.0295311212539673, "learning_rate": 5.99383493541149e-05, "loss": 1.3275, "step": 16320 }, { "epoch": 9.379666858127512, "grad_norm": 1.1117675304412842, "learning_rate": 5.98914407975669e-05, "loss": 1.3239, "step": 16330 }, { "epoch": 9.385410683515222, "grad_norm": 1.0382176637649536, "learning_rate": 5.984452318009808e-05, "loss": 1.3161, "step": 16340 }, { "epoch": 9.391154508902929, "grad_norm": 1.0064224004745483, "learning_rate": 5.979759654469752e-05, "loss": 1.3038, "step": 16350 }, { "epoch": 9.396898334290638, "grad_norm": 1.1555595397949219, "learning_rate": 5.975066093436265e-05, "loss": 1.3321, "step": 16360 }, { "epoch": 9.402642159678345, "grad_norm": 1.2141364812850952, "learning_rate": 5.970371639209902e-05, "loss": 1.3184, "step": 16370 }, { "epoch": 9.408385985066055, "grad_norm": 1.0710093975067139, "learning_rate": 5.965676296092047e-05, "loss": 1.318, "step": 16380 }, { "epoch": 9.414129810453762, "grad_norm": 1.0603036880493164, "learning_rate": 5.9609800683848885e-05, "loss": 1.3227, "step": 16390 }, { "epoch": 9.419873635841471, "grad_norm": 1.0112988948822021, "learning_rate": 5.9562829603914316e-05, "loss": 1.3256, "step": 16400 }, { "epoch": 9.425617461229178, "grad_norm": 1.0558658838272095, "learning_rate": 5.9515849764154884e-05, "loss": 1.3068, "step": 16410 }, { "epoch": 9.431361286616887, "grad_norm": 1.081594705581665, "learning_rate": 5.946886120761669e-05, "loss": 1.3372, "step": 16420 }, { "epoch": 9.437105112004595, "grad_norm": 1.1181234121322632, "learning_rate": 5.9421863977353865e-05, "loss": 1.3136, "step": 16430 }, { "epoch": 9.442848937392304, "grad_norm": 0.9570739269256592, "learning_rate": 5.937485811642846e-05, "loss": 1.3003, "step": 16440 }, { "epoch": 9.448592762780011, "grad_norm": 1.1212276220321655, "learning_rate": 5.9327843667910445e-05, "loss": 1.3098, "step": 16450 }, { "epoch": 9.45433658816772, "grad_norm": 1.1140167713165283, "learning_rate": 5.9280820674877666e-05, "loss": 1.2948, "step": 16460 }, { "epoch": 9.460080413555428, "grad_norm": 1.0425423383712769, "learning_rate": 5.923378918041579e-05, "loss": 1.3161, "step": 16470 }, { "epoch": 9.465824238943137, "grad_norm": 1.1352252960205078, "learning_rate": 5.9186749227618266e-05, "loss": 1.3243, "step": 16480 }, { "epoch": 9.471568064330844, "grad_norm": 1.108464002609253, "learning_rate": 5.91397008595863e-05, "loss": 1.33, "step": 16490 }, { "epoch": 9.477311889718553, "grad_norm": 0.9899983406066895, "learning_rate": 5.909264411942885e-05, "loss": 1.3253, "step": 16500 }, { "epoch": 9.48305571510626, "grad_norm": 1.1405996084213257, "learning_rate": 5.9045579050262446e-05, "loss": 1.3319, "step": 16510 }, { "epoch": 9.48879954049397, "grad_norm": 1.0669441223144531, "learning_rate": 5.8998505695211346e-05, "loss": 1.2855, "step": 16520 }, { "epoch": 9.494543365881677, "grad_norm": 1.0184197425842285, "learning_rate": 5.895142409740735e-05, "loss": 1.3356, "step": 16530 }, { "epoch": 9.500287191269386, "grad_norm": 1.1126103401184082, "learning_rate": 5.8904334299989814e-05, "loss": 1.3243, "step": 16540 }, { "epoch": 9.506031016657094, "grad_norm": 1.1308655738830566, "learning_rate": 5.8857236346105646e-05, "loss": 1.3266, "step": 16550 }, { "epoch": 9.511774842044801, "grad_norm": 1.1014912128448486, "learning_rate": 5.881013027890917e-05, "loss": 1.3201, "step": 16560 }, { "epoch": 9.51751866743251, "grad_norm": 1.0707571506500244, "learning_rate": 5.876301614156219e-05, "loss": 1.3177, "step": 16570 }, { "epoch": 9.52326249282022, "grad_norm": 1.2664343118667603, "learning_rate": 5.871589397723385e-05, "loss": 1.3086, "step": 16580 }, { "epoch": 9.529006318207927, "grad_norm": 1.0274556875228882, "learning_rate": 5.866876382910074e-05, "loss": 1.3111, "step": 16590 }, { "epoch": 9.534750143595634, "grad_norm": 1.0448927879333496, "learning_rate": 5.862162574034668e-05, "loss": 1.3215, "step": 16600 }, { "epoch": 9.540493968983343, "grad_norm": 1.0731481313705444, "learning_rate": 5.8574479754162814e-05, "loss": 1.3249, "step": 16610 }, { "epoch": 9.54623779437105, "grad_norm": 1.2758575677871704, "learning_rate": 5.852732591374748e-05, "loss": 1.3026, "step": 16620 }, { "epoch": 9.55198161975876, "grad_norm": 1.0362504720687866, "learning_rate": 5.848016426230623e-05, "loss": 1.3131, "step": 16630 }, { "epoch": 9.557725445146467, "grad_norm": 1.0237983465194702, "learning_rate": 5.84329948430518e-05, "loss": 1.3221, "step": 16640 }, { "epoch": 9.563469270534176, "grad_norm": 0.9935582876205444, "learning_rate": 5.838581769920404e-05, "loss": 1.3122, "step": 16650 }, { "epoch": 9.569213095921883, "grad_norm": 1.293875813484192, "learning_rate": 5.833863287398983e-05, "loss": 1.3158, "step": 16660 }, { "epoch": 9.574956921309592, "grad_norm": 1.0581437349319458, "learning_rate": 5.829144041064313e-05, "loss": 1.3239, "step": 16670 }, { "epoch": 9.5807007466973, "grad_norm": 1.1205697059631348, "learning_rate": 5.824424035240489e-05, "loss": 1.2931, "step": 16680 }, { "epoch": 9.586444572085009, "grad_norm": 1.2615162134170532, "learning_rate": 5.819703274252302e-05, "loss": 1.3294, "step": 16690 }, { "epoch": 9.592188397472716, "grad_norm": 1.0731533765792847, "learning_rate": 5.8149817624252335e-05, "loss": 1.3256, "step": 16700 }, { "epoch": 9.597932222860425, "grad_norm": 1.0383588075637817, "learning_rate": 5.8102595040854555e-05, "loss": 1.3066, "step": 16710 }, { "epoch": 9.603676048248133, "grad_norm": 0.9928858280181885, "learning_rate": 5.805536503559822e-05, "loss": 1.3356, "step": 16720 }, { "epoch": 9.609419873635842, "grad_norm": 1.0577837228775024, "learning_rate": 5.800812765175867e-05, "loss": 1.3039, "step": 16730 }, { "epoch": 9.61516369902355, "grad_norm": 1.1943581104278564, "learning_rate": 5.7960882932618024e-05, "loss": 1.3196, "step": 16740 }, { "epoch": 9.620907524411258, "grad_norm": 1.1729068756103516, "learning_rate": 5.79136309214651e-05, "loss": 1.3294, "step": 16750 }, { "epoch": 9.626651349798966, "grad_norm": 0.959618091583252, "learning_rate": 5.786637166159541e-05, "loss": 1.3197, "step": 16760 }, { "epoch": 9.632395175186675, "grad_norm": 1.0103988647460938, "learning_rate": 5.7819105196311104e-05, "loss": 1.3049, "step": 16770 }, { "epoch": 9.638139000574382, "grad_norm": 1.177199363708496, "learning_rate": 5.777183156892094e-05, "loss": 1.3141, "step": 16780 }, { "epoch": 9.643882825962091, "grad_norm": 1.1029537916183472, "learning_rate": 5.772455082274024e-05, "loss": 1.3247, "step": 16790 }, { "epoch": 9.649626651349799, "grad_norm": 1.056839108467102, "learning_rate": 5.767726300109083e-05, "loss": 1.3269, "step": 16800 }, { "epoch": 9.655370476737508, "grad_norm": 1.2015800476074219, "learning_rate": 5.7629968147301037e-05, "loss": 1.3033, "step": 16810 }, { "epoch": 9.661114302125215, "grad_norm": 1.1833492517471313, "learning_rate": 5.758266630470562e-05, "loss": 1.3177, "step": 16820 }, { "epoch": 9.666858127512924, "grad_norm": 1.003337025642395, "learning_rate": 5.7535357516645775e-05, "loss": 1.2967, "step": 16830 }, { "epoch": 9.672601952900632, "grad_norm": 1.2526954412460327, "learning_rate": 5.7488041826468994e-05, "loss": 1.319, "step": 16840 }, { "epoch": 9.67834577828834, "grad_norm": 1.2134490013122559, "learning_rate": 5.744071927752915e-05, "loss": 1.3122, "step": 16850 }, { "epoch": 9.684089603676048, "grad_norm": 1.1099965572357178, "learning_rate": 5.739338991318639e-05, "loss": 1.3121, "step": 16860 }, { "epoch": 9.689833429063757, "grad_norm": 1.0549049377441406, "learning_rate": 5.734605377680711e-05, "loss": 1.3161, "step": 16870 }, { "epoch": 9.695577254451464, "grad_norm": 1.1269898414611816, "learning_rate": 5.7298710911763864e-05, "loss": 1.3262, "step": 16880 }, { "epoch": 9.701321079839174, "grad_norm": 1.0524771213531494, "learning_rate": 5.725136136143545e-05, "loss": 1.2991, "step": 16890 }, { "epoch": 9.707064905226881, "grad_norm": 0.9965651631355286, "learning_rate": 5.7204005169206734e-05, "loss": 1.3168, "step": 16900 }, { "epoch": 9.71280873061459, "grad_norm": 1.0607653856277466, "learning_rate": 5.715664237846866e-05, "loss": 1.3158, "step": 16910 }, { "epoch": 9.718552556002297, "grad_norm": 1.0534794330596924, "learning_rate": 5.7109273032618295e-05, "loss": 1.321, "step": 16920 }, { "epoch": 9.724296381390007, "grad_norm": 2.33206844329834, "learning_rate": 5.70618971750586e-05, "loss": 1.325, "step": 16930 }, { "epoch": 9.730040206777714, "grad_norm": 1.0653855800628662, "learning_rate": 5.70145148491986e-05, "loss": 1.3116, "step": 16940 }, { "epoch": 9.735784032165423, "grad_norm": 1.2570234537124634, "learning_rate": 5.69671260984532e-05, "loss": 1.2963, "step": 16950 }, { "epoch": 9.74152785755313, "grad_norm": 1.1149219274520874, "learning_rate": 5.691973096624318e-05, "loss": 1.303, "step": 16960 }, { "epoch": 9.747271682940838, "grad_norm": 1.0425065755844116, "learning_rate": 5.687232949599521e-05, "loss": 1.3001, "step": 16970 }, { "epoch": 9.753015508328547, "grad_norm": 1.1205841302871704, "learning_rate": 5.6824921731141746e-05, "loss": 1.3261, "step": 16980 }, { "epoch": 9.758759333716256, "grad_norm": 1.1084097623825073, "learning_rate": 5.677750771512098e-05, "loss": 1.3308, "step": 16990 }, { "epoch": 9.764503159103963, "grad_norm": 1.1576192378997803, "learning_rate": 5.673008749137688e-05, "loss": 1.3177, "step": 17000 }, { "epoch": 9.764503159103963, "eval_loss": 1.08290433883667, "eval_runtime": 122.388, "eval_samples_per_second": 13.0, "eval_steps_per_second": 0.139, "eval_wer": 0.092298564809583, "step": 17000 }, { "epoch": 9.77024698449167, "grad_norm": 1.1223355531692505, "learning_rate": 5.6682661103359106e-05, "loss": 1.3237, "step": 17010 }, { "epoch": 9.77599080987938, "grad_norm": 1.0730124711990356, "learning_rate": 5.6635228594522904e-05, "loss": 1.3023, "step": 17020 }, { "epoch": 9.781734635267087, "grad_norm": 1.0638706684112549, "learning_rate": 5.6587790008329214e-05, "loss": 1.3234, "step": 17030 }, { "epoch": 9.787478460654796, "grad_norm": 1.168921709060669, "learning_rate": 5.654034538824445e-05, "loss": 1.3079, "step": 17040 }, { "epoch": 9.793222286042504, "grad_norm": 0.980303168296814, "learning_rate": 5.6492894777740624e-05, "loss": 1.3221, "step": 17050 }, { "epoch": 9.798966111430213, "grad_norm": 1.1931145191192627, "learning_rate": 5.644543822029522e-05, "loss": 1.3316, "step": 17060 }, { "epoch": 9.80470993681792, "grad_norm": 1.020251989364624, "learning_rate": 5.6397975759391176e-05, "loss": 1.3134, "step": 17070 }, { "epoch": 9.81045376220563, "grad_norm": 1.1040542125701904, "learning_rate": 5.635050743851681e-05, "loss": 1.3166, "step": 17080 }, { "epoch": 9.816197587593336, "grad_norm": 1.0645848512649536, "learning_rate": 5.630303330116582e-05, "loss": 1.3324, "step": 17090 }, { "epoch": 9.821941412981046, "grad_norm": 1.0187854766845703, "learning_rate": 5.625555339083728e-05, "loss": 1.3228, "step": 17100 }, { "epoch": 9.827685238368753, "grad_norm": 1.0625351667404175, "learning_rate": 5.620806775103549e-05, "loss": 1.3207, "step": 17110 }, { "epoch": 9.833429063756462, "grad_norm": 1.060349464416504, "learning_rate": 5.616057642527003e-05, "loss": 1.3142, "step": 17120 }, { "epoch": 9.83917288914417, "grad_norm": 1.0784462690353394, "learning_rate": 5.6113079457055704e-05, "loss": 1.3071, "step": 17130 }, { "epoch": 9.844916714531879, "grad_norm": 1.173680067062378, "learning_rate": 5.6065576889912433e-05, "loss": 1.3011, "step": 17140 }, { "epoch": 9.850660539919586, "grad_norm": 1.1100621223449707, "learning_rate": 5.6018068767365315e-05, "loss": 1.3288, "step": 17150 }, { "epoch": 9.856404365307295, "grad_norm": 0.999447226524353, "learning_rate": 5.5970555132944544e-05, "loss": 1.3177, "step": 17160 }, { "epoch": 9.862148190695002, "grad_norm": 1.0702630281448364, "learning_rate": 5.592303603018534e-05, "loss": 1.3088, "step": 17170 }, { "epoch": 9.867892016082711, "grad_norm": 1.0947574377059937, "learning_rate": 5.587551150262794e-05, "loss": 1.3184, "step": 17180 }, { "epoch": 9.873635841470419, "grad_norm": 1.118959665298462, "learning_rate": 5.5827981593817546e-05, "loss": 1.316, "step": 17190 }, { "epoch": 9.879379666858128, "grad_norm": 1.065979242324829, "learning_rate": 5.5780446347304296e-05, "loss": 1.304, "step": 17200 }, { "epoch": 9.885123492245835, "grad_norm": 0.9777162671089172, "learning_rate": 5.5732905806643235e-05, "loss": 1.3318, "step": 17210 }, { "epoch": 9.890867317633544, "grad_norm": 1.2175260782241821, "learning_rate": 5.5685360015394205e-05, "loss": 1.3454, "step": 17220 }, { "epoch": 9.896611143021252, "grad_norm": 1.1098296642303467, "learning_rate": 5.563780901712195e-05, "loss": 1.319, "step": 17230 }, { "epoch": 9.902354968408961, "grad_norm": 1.0876413583755493, "learning_rate": 5.559025285539588e-05, "loss": 1.318, "step": 17240 }, { "epoch": 9.908098793796668, "grad_norm": 1.0536487102508545, "learning_rate": 5.554269157379023e-05, "loss": 1.3073, "step": 17250 }, { "epoch": 9.913842619184377, "grad_norm": 1.0888110399246216, "learning_rate": 5.549512521588385e-05, "loss": 1.302, "step": 17260 }, { "epoch": 9.919586444572085, "grad_norm": 1.1653012037277222, "learning_rate": 5.54475538252603e-05, "loss": 1.301, "step": 17270 }, { "epoch": 9.925330269959794, "grad_norm": 1.109753131866455, "learning_rate": 5.539997744550772e-05, "loss": 1.3128, "step": 17280 }, { "epoch": 9.931074095347501, "grad_norm": 1.1051433086395264, "learning_rate": 5.535239612021883e-05, "loss": 1.3564, "step": 17290 }, { "epoch": 9.93681792073521, "grad_norm": 1.4998070001602173, "learning_rate": 5.530480989299087e-05, "loss": 1.2993, "step": 17300 }, { "epoch": 9.942561746122918, "grad_norm": 1.062514305114746, "learning_rate": 5.5257218807425605e-05, "loss": 1.2905, "step": 17310 }, { "epoch": 9.948305571510627, "grad_norm": 1.0827242136001587, "learning_rate": 5.52096229071292e-05, "loss": 1.3256, "step": 17320 }, { "epoch": 9.954049396898334, "grad_norm": 1.0938199758529663, "learning_rate": 5.516202223571225e-05, "loss": 1.3071, "step": 17330 }, { "epoch": 9.959793222286043, "grad_norm": 1.115787148475647, "learning_rate": 5.5114416836789784e-05, "loss": 1.3136, "step": 17340 }, { "epoch": 9.96553704767375, "grad_norm": 1.2033324241638184, "learning_rate": 5.506680675398107e-05, "loss": 1.307, "step": 17350 }, { "epoch": 9.97128087306146, "grad_norm": 1.0258671045303345, "learning_rate": 5.5019192030909704e-05, "loss": 1.2977, "step": 17360 }, { "epoch": 9.977024698449167, "grad_norm": 1.2147308588027954, "learning_rate": 5.497157271120355e-05, "loss": 1.3375, "step": 17370 }, { "epoch": 9.982768523836876, "grad_norm": 1.1705200672149658, "learning_rate": 5.492394883849467e-05, "loss": 1.307, "step": 17380 }, { "epoch": 9.988512349224584, "grad_norm": 1.082763433456421, "learning_rate": 5.4876320456419295e-05, "loss": 1.3192, "step": 17390 }, { "epoch": 9.994256174612293, "grad_norm": 1.194931149482727, "learning_rate": 5.4828687608617815e-05, "loss": 1.3158, "step": 17400 }, { "epoch": 10.0, "grad_norm": 4.827094078063965, "learning_rate": 5.478105033873464e-05, "loss": 1.3096, "step": 17410 }, { "epoch": 10.005743825387707, "grad_norm": 0.9479926824569702, "learning_rate": 5.47334086904183e-05, "loss": 1.2795, "step": 17420 }, { "epoch": 10.011487650775416, "grad_norm": 1.0475188493728638, "learning_rate": 5.4685762707321334e-05, "loss": 1.2963, "step": 17430 }, { "epoch": 10.017231476163124, "grad_norm": 1.038112998008728, "learning_rate": 5.463811243310023e-05, "loss": 1.2875, "step": 17440 }, { "epoch": 10.022975301550833, "grad_norm": 1.0162469148635864, "learning_rate": 5.459045791141541e-05, "loss": 1.2788, "step": 17450 }, { "epoch": 10.02871912693854, "grad_norm": 1.058996319770813, "learning_rate": 5.454279918593117e-05, "loss": 1.2707, "step": 17460 }, { "epoch": 10.03446295232625, "grad_norm": 1.2741267681121826, "learning_rate": 5.4495136300315705e-05, "loss": 1.2925, "step": 17470 }, { "epoch": 10.040206777713957, "grad_norm": 1.1402926445007324, "learning_rate": 5.4447469298241004e-05, "loss": 1.2853, "step": 17480 }, { "epoch": 10.045950603101666, "grad_norm": 0.9862801432609558, "learning_rate": 5.439979822338279e-05, "loss": 1.2848, "step": 17490 }, { "epoch": 10.051694428489373, "grad_norm": 1.089688777923584, "learning_rate": 5.4352123119420594e-05, "loss": 1.3036, "step": 17500 }, { "epoch": 10.057438253877082, "grad_norm": 1.063913106918335, "learning_rate": 5.430444403003752e-05, "loss": 1.2985, "step": 17510 }, { "epoch": 10.06318207926479, "grad_norm": 1.0474162101745605, "learning_rate": 5.425676099892045e-05, "loss": 1.292, "step": 17520 }, { "epoch": 10.068925904652499, "grad_norm": 0.9664213061332703, "learning_rate": 5.4209074069759815e-05, "loss": 1.2663, "step": 17530 }, { "epoch": 10.074669730040206, "grad_norm": 1.0256978273391724, "learning_rate": 5.41613832862496e-05, "loss": 1.2909, "step": 17540 }, { "epoch": 10.080413555427915, "grad_norm": 1.0831928253173828, "learning_rate": 5.4113688692087396e-05, "loss": 1.3059, "step": 17550 }, { "epoch": 10.086157380815623, "grad_norm": 1.110141634941101, "learning_rate": 5.4065990330974194e-05, "loss": 1.2849, "step": 17560 }, { "epoch": 10.091901206203332, "grad_norm": 0.9420299530029297, "learning_rate": 5.40182882466145e-05, "loss": 1.2839, "step": 17570 }, { "epoch": 10.097645031591039, "grad_norm": 0.9652169942855835, "learning_rate": 5.3970582482716215e-05, "loss": 1.2735, "step": 17580 }, { "epoch": 10.103388856978748, "grad_norm": 0.9281041622161865, "learning_rate": 5.392287308299058e-05, "loss": 1.2751, "step": 17590 }, { "epoch": 10.109132682366456, "grad_norm": 1.0035607814788818, "learning_rate": 5.387516009115223e-05, "loss": 1.2781, "step": 17600 }, { "epoch": 10.114876507754165, "grad_norm": 1.1126405000686646, "learning_rate": 5.382744355091904e-05, "loss": 1.3006, "step": 17610 }, { "epoch": 10.120620333141872, "grad_norm": 1.0376912355422974, "learning_rate": 5.3779723506012156e-05, "loss": 1.2762, "step": 17620 }, { "epoch": 10.126364158529581, "grad_norm": 1.0233867168426514, "learning_rate": 5.373200000015592e-05, "loss": 1.2871, "step": 17630 }, { "epoch": 10.132107983917289, "grad_norm": 1.0640654563903809, "learning_rate": 5.3684273077077874e-05, "loss": 1.3048, "step": 17640 }, { "epoch": 10.137851809304998, "grad_norm": 0.9658321142196655, "learning_rate": 5.363654278050868e-05, "loss": 1.2697, "step": 17650 }, { "epoch": 10.143595634692705, "grad_norm": 1.0131279230117798, "learning_rate": 5.358880915418206e-05, "loss": 1.2801, "step": 17660 }, { "epoch": 10.149339460080414, "grad_norm": 1.0024292469024658, "learning_rate": 5.354107224183483e-05, "loss": 1.277, "step": 17670 }, { "epoch": 10.155083285468121, "grad_norm": 0.9937605261802673, "learning_rate": 5.3493332087206805e-05, "loss": 1.3, "step": 17680 }, { "epoch": 10.16082711085583, "grad_norm": 0.9856476783752441, "learning_rate": 5.344558873404073e-05, "loss": 1.2954, "step": 17690 }, { "epoch": 10.166570936243538, "grad_norm": 1.0053565502166748, "learning_rate": 5.339784222608235e-05, "loss": 1.2769, "step": 17700 }, { "epoch": 10.172314761631247, "grad_norm": 1.2905499935150146, "learning_rate": 5.3350092607080284e-05, "loss": 1.3221, "step": 17710 }, { "epoch": 10.178058587018954, "grad_norm": 1.130355954170227, "learning_rate": 5.330233992078593e-05, "loss": 1.2944, "step": 17720 }, { "epoch": 10.183802412406664, "grad_norm": 1.1259716749191284, "learning_rate": 5.325458421095358e-05, "loss": 1.3174, "step": 17730 }, { "epoch": 10.18954623779437, "grad_norm": 1.1254315376281738, "learning_rate": 5.320682552134028e-05, "loss": 1.287, "step": 17740 }, { "epoch": 10.19529006318208, "grad_norm": 1.1372402906417847, "learning_rate": 5.315906389570574e-05, "loss": 1.2957, "step": 17750 }, { "epoch": 10.201033888569787, "grad_norm": 1.0196157693862915, "learning_rate": 5.31112993778125e-05, "loss": 1.2749, "step": 17760 }, { "epoch": 10.206777713957496, "grad_norm": 1.139103651046753, "learning_rate": 5.306353201142558e-05, "loss": 1.2902, "step": 17770 }, { "epoch": 10.212521539345204, "grad_norm": 1.0304359197616577, "learning_rate": 5.3015761840312725e-05, "loss": 1.3036, "step": 17780 }, { "epoch": 10.218265364732913, "grad_norm": 1.0686463117599487, "learning_rate": 5.296798890824423e-05, "loss": 1.2837, "step": 17790 }, { "epoch": 10.22400919012062, "grad_norm": 1.070369005203247, "learning_rate": 5.292021325899289e-05, "loss": 1.2797, "step": 17800 }, { "epoch": 10.22975301550833, "grad_norm": 1.1561923027038574, "learning_rate": 5.2872434936334023e-05, "loss": 1.2914, "step": 17810 }, { "epoch": 10.235496840896037, "grad_norm": 1.128672480583191, "learning_rate": 5.282465398404538e-05, "loss": 1.3241, "step": 17820 }, { "epoch": 10.241240666283744, "grad_norm": 1.048311471939087, "learning_rate": 5.27768704459071e-05, "loss": 1.2763, "step": 17830 }, { "epoch": 10.246984491671453, "grad_norm": 1.0134596824645996, "learning_rate": 5.272908436570173e-05, "loss": 1.3103, "step": 17840 }, { "epoch": 10.25272831705916, "grad_norm": 0.9723391532897949, "learning_rate": 5.2681295787214145e-05, "loss": 1.2743, "step": 17850 }, { "epoch": 10.25847214244687, "grad_norm": 1.0700024366378784, "learning_rate": 5.263350475423149e-05, "loss": 1.2834, "step": 17860 }, { "epoch": 10.264215967834577, "grad_norm": 1.0992634296417236, "learning_rate": 5.258571131054312e-05, "loss": 1.3079, "step": 17870 }, { "epoch": 10.269959793222286, "grad_norm": 1.1849727630615234, "learning_rate": 5.2537915499940684e-05, "loss": 1.2946, "step": 17880 }, { "epoch": 10.275703618609993, "grad_norm": 1.0340335369110107, "learning_rate": 5.249011736621795e-05, "loss": 1.2716, "step": 17890 }, { "epoch": 10.281447443997703, "grad_norm": 0.9856624007225037, "learning_rate": 5.2442316953170826e-05, "loss": 1.2789, "step": 17900 }, { "epoch": 10.28719126938541, "grad_norm": 1.1626960039138794, "learning_rate": 5.2394514304597296e-05, "loss": 1.2809, "step": 17910 }, { "epoch": 10.292935094773119, "grad_norm": 1.1043546199798584, "learning_rate": 5.234670946429739e-05, "loss": 1.3024, "step": 17920 }, { "epoch": 10.298678920160826, "grad_norm": 1.1999893188476562, "learning_rate": 5.2298902476073195e-05, "loss": 1.2967, "step": 17930 }, { "epoch": 10.304422745548536, "grad_norm": 0.993698000907898, "learning_rate": 5.22510933837287e-05, "loss": 1.297, "step": 17940 }, { "epoch": 10.310166570936243, "grad_norm": 1.0862151384353638, "learning_rate": 5.220328223106985e-05, "loss": 1.2756, "step": 17950 }, { "epoch": 10.315910396323952, "grad_norm": 1.1533913612365723, "learning_rate": 5.215546906190448e-05, "loss": 1.31, "step": 17960 }, { "epoch": 10.32165422171166, "grad_norm": 0.9844196438789368, "learning_rate": 5.2107653920042275e-05, "loss": 1.2727, "step": 17970 }, { "epoch": 10.327398047099368, "grad_norm": 1.1508187055587769, "learning_rate": 5.205983684929473e-05, "loss": 1.296, "step": 17980 }, { "epoch": 10.333141872487076, "grad_norm": 1.0531405210494995, "learning_rate": 5.2012017893475096e-05, "loss": 1.2864, "step": 17990 }, { "epoch": 10.338885697874785, "grad_norm": 1.0768994092941284, "learning_rate": 5.196419709639835e-05, "loss": 1.2712, "step": 18000 }, { "epoch": 10.338885697874785, "eval_loss": 1.0807828903198242, "eval_runtime": 121.4371, "eval_samples_per_second": 13.101, "eval_steps_per_second": 0.14, "eval_wer": 0.08964289750254266, "step": 18000 }, { "epoch": 10.344629523262492, "grad_norm": 1.0628842115402222, "learning_rate": 5.191637450188117e-05, "loss": 1.2943, "step": 18010 }, { "epoch": 10.350373348650201, "grad_norm": 1.0401692390441895, "learning_rate": 5.186855015374186e-05, "loss": 1.2962, "step": 18020 }, { "epoch": 10.356117174037909, "grad_norm": 1.1449860334396362, "learning_rate": 5.1820724095800364e-05, "loss": 1.299, "step": 18030 }, { "epoch": 10.361860999425618, "grad_norm": 1.089267373085022, "learning_rate": 5.1772896371878156e-05, "loss": 1.2903, "step": 18040 }, { "epoch": 10.367604824813325, "grad_norm": 1.0510478019714355, "learning_rate": 5.172506702579826e-05, "loss": 1.3014, "step": 18050 }, { "epoch": 10.373348650201034, "grad_norm": 1.0240811109542847, "learning_rate": 5.167723610138516e-05, "loss": 1.295, "step": 18060 }, { "epoch": 10.379092475588742, "grad_norm": 0.9672908782958984, "learning_rate": 5.162940364246485e-05, "loss": 1.2831, "step": 18070 }, { "epoch": 10.38483630097645, "grad_norm": 1.013809323310852, "learning_rate": 5.1581569692864626e-05, "loss": 1.2897, "step": 18080 }, { "epoch": 10.390580126364158, "grad_norm": 0.9808719158172607, "learning_rate": 5.1533734296413275e-05, "loss": 1.271, "step": 18090 }, { "epoch": 10.396323951751867, "grad_norm": 0.8859448432922363, "learning_rate": 5.148589749694079e-05, "loss": 1.2862, "step": 18100 }, { "epoch": 10.402067777139575, "grad_norm": 1.126128077507019, "learning_rate": 5.143805933827853e-05, "loss": 1.2773, "step": 18110 }, { "epoch": 10.407811602527284, "grad_norm": 1.0561703443527222, "learning_rate": 5.1390219864259056e-05, "loss": 1.2862, "step": 18120 }, { "epoch": 10.413555427914991, "grad_norm": 1.0264431238174438, "learning_rate": 5.134237911871619e-05, "loss": 1.2947, "step": 18130 }, { "epoch": 10.4192992533027, "grad_norm": 1.0245630741119385, "learning_rate": 5.129453714548483e-05, "loss": 1.2942, "step": 18140 }, { "epoch": 10.425043078690408, "grad_norm": 0.9584933519363403, "learning_rate": 5.124669398840107e-05, "loss": 1.2655, "step": 18150 }, { "epoch": 10.430786904078117, "grad_norm": 1.1901710033416748, "learning_rate": 5.1198849691302066e-05, "loss": 1.2945, "step": 18160 }, { "epoch": 10.436530729465824, "grad_norm": 1.0407531261444092, "learning_rate": 5.115100429802604e-05, "loss": 1.2935, "step": 18170 }, { "epoch": 10.442274554853533, "grad_norm": 1.032425880432129, "learning_rate": 5.110315785241219e-05, "loss": 1.2774, "step": 18180 }, { "epoch": 10.44801838024124, "grad_norm": 1.0361748933792114, "learning_rate": 5.105531039830066e-05, "loss": 1.2649, "step": 18190 }, { "epoch": 10.45376220562895, "grad_norm": 0.9903116822242737, "learning_rate": 5.1007461979532565e-05, "loss": 1.3001, "step": 18200 }, { "epoch": 10.459506031016657, "grad_norm": 1.1704838275909424, "learning_rate": 5.095961263994987e-05, "loss": 1.292, "step": 18210 }, { "epoch": 10.465249856404366, "grad_norm": 1.066001534461975, "learning_rate": 5.0911762423395435e-05, "loss": 1.3025, "step": 18220 }, { "epoch": 10.470993681792073, "grad_norm": 0.9677980542182922, "learning_rate": 5.086391137371288e-05, "loss": 1.3034, "step": 18230 }, { "epoch": 10.476737507179783, "grad_norm": 1.0306897163391113, "learning_rate": 5.081605953474654e-05, "loss": 1.2585, "step": 18240 }, { "epoch": 10.48248133256749, "grad_norm": 1.119706392288208, "learning_rate": 5.076820695034158e-05, "loss": 1.2699, "step": 18250 }, { "epoch": 10.488225157955199, "grad_norm": 1.152043342590332, "learning_rate": 5.0720353664343764e-05, "loss": 1.2961, "step": 18260 }, { "epoch": 10.493968983342906, "grad_norm": 1.1953966617584229, "learning_rate": 5.067249972059956e-05, "loss": 1.2912, "step": 18270 }, { "epoch": 10.499712808730614, "grad_norm": 1.023740291595459, "learning_rate": 5.062464516295602e-05, "loss": 1.3215, "step": 18280 }, { "epoch": 10.505456634118323, "grad_norm": 1.0794180631637573, "learning_rate": 5.05767900352607e-05, "loss": 1.313, "step": 18290 }, { "epoch": 10.51120045950603, "grad_norm": 1.0531030893325806, "learning_rate": 5.0528934381361734e-05, "loss": 1.3052, "step": 18300 }, { "epoch": 10.51694428489374, "grad_norm": 1.1103686094284058, "learning_rate": 5.0481078245107774e-05, "loss": 1.3096, "step": 18310 }, { "epoch": 10.522688110281447, "grad_norm": 1.1137804985046387, "learning_rate": 5.043322167034783e-05, "loss": 1.2954, "step": 18320 }, { "epoch": 10.528431935669156, "grad_norm": 1.0472280979156494, "learning_rate": 5.038536470093136e-05, "loss": 1.3134, "step": 18330 }, { "epoch": 10.534175761056863, "grad_norm": 1.1429625749588013, "learning_rate": 5.0337507380708204e-05, "loss": 1.3071, "step": 18340 }, { "epoch": 10.539919586444572, "grad_norm": 1.0990170240402222, "learning_rate": 5.0289649753528466e-05, "loss": 1.2762, "step": 18350 }, { "epoch": 10.54566341183228, "grad_norm": 1.0834749937057495, "learning_rate": 5.024179186324257e-05, "loss": 1.2941, "step": 18360 }, { "epoch": 10.551407237219989, "grad_norm": 1.0038423538208008, "learning_rate": 5.019393375370118e-05, "loss": 1.2776, "step": 18370 }, { "epoch": 10.557151062607696, "grad_norm": 1.0771673917770386, "learning_rate": 5.014607546875516e-05, "loss": 1.3033, "step": 18380 }, { "epoch": 10.562894887995405, "grad_norm": 1.086932897567749, "learning_rate": 5.0098217052255516e-05, "loss": 1.2922, "step": 18390 }, { "epoch": 10.568638713383113, "grad_norm": 1.043712854385376, "learning_rate": 5.0050358548053386e-05, "loss": 1.3058, "step": 18400 }, { "epoch": 10.574382538770822, "grad_norm": 1.0468264818191528, "learning_rate": 5.000250000000001e-05, "loss": 1.2685, "step": 18410 }, { "epoch": 10.580126364158529, "grad_norm": 1.06698477268219, "learning_rate": 4.995464145194663e-05, "loss": 1.2945, "step": 18420 }, { "epoch": 10.585870189546238, "grad_norm": 1.0319600105285645, "learning_rate": 4.990678294774449e-05, "loss": 1.2843, "step": 18430 }, { "epoch": 10.591614014933945, "grad_norm": 1.0435912609100342, "learning_rate": 4.985892453124485e-05, "loss": 1.2953, "step": 18440 }, { "epoch": 10.597357840321655, "grad_norm": 1.0339910984039307, "learning_rate": 4.981106624629881e-05, "loss": 1.2885, "step": 18450 }, { "epoch": 10.603101665709362, "grad_norm": 1.0914109945297241, "learning_rate": 4.9763208136757434e-05, "loss": 1.2898, "step": 18460 }, { "epoch": 10.608845491097071, "grad_norm": 0.9846189618110657, "learning_rate": 4.9715350246471556e-05, "loss": 1.3032, "step": 18470 }, { "epoch": 10.614589316484778, "grad_norm": 1.0420949459075928, "learning_rate": 4.9667492619291805e-05, "loss": 1.2943, "step": 18480 }, { "epoch": 10.620333141872488, "grad_norm": 1.0710291862487793, "learning_rate": 4.961963529906864e-05, "loss": 1.3075, "step": 18490 }, { "epoch": 10.626076967260195, "grad_norm": 1.0390212535858154, "learning_rate": 4.957177832965218e-05, "loss": 1.2741, "step": 18500 }, { "epoch": 10.631820792647904, "grad_norm": 0.9851287007331848, "learning_rate": 4.952392175489224e-05, "loss": 1.2932, "step": 18510 }, { "epoch": 10.637564618035611, "grad_norm": 1.0226706266403198, "learning_rate": 4.9476065618638275e-05, "loss": 1.2831, "step": 18520 }, { "epoch": 10.64330844342332, "grad_norm": 1.0406054258346558, "learning_rate": 4.9428209964739316e-05, "loss": 1.2672, "step": 18530 }, { "epoch": 10.649052268811028, "grad_norm": 1.4530843496322632, "learning_rate": 4.9380354837044e-05, "loss": 1.2883, "step": 18540 }, { "epoch": 10.654796094198737, "grad_norm": 1.0020498037338257, "learning_rate": 4.9332500279400434e-05, "loss": 1.275, "step": 18550 }, { "epoch": 10.660539919586444, "grad_norm": 0.9655764698982239, "learning_rate": 4.928464633565624e-05, "loss": 1.3016, "step": 18560 }, { "epoch": 10.666283744974153, "grad_norm": 1.118111491203308, "learning_rate": 4.9236793049658435e-05, "loss": 1.2816, "step": 18570 }, { "epoch": 10.67202757036186, "grad_norm": 1.094192624092102, "learning_rate": 4.918894046525346e-05, "loss": 1.3043, "step": 18580 }, { "epoch": 10.67777139574957, "grad_norm": 1.0270777940750122, "learning_rate": 4.914108862628715e-05, "loss": 1.2824, "step": 18590 }, { "epoch": 10.683515221137277, "grad_norm": 1.0722541809082031, "learning_rate": 4.9093237576604554e-05, "loss": 1.2807, "step": 18600 }, { "epoch": 10.689259046524986, "grad_norm": 1.1079844236373901, "learning_rate": 4.904538736005013e-05, "loss": 1.2903, "step": 18610 }, { "epoch": 10.695002871912694, "grad_norm": 1.170013427734375, "learning_rate": 4.899753802046745e-05, "loss": 1.2783, "step": 18620 }, { "epoch": 10.700746697300403, "grad_norm": 1.0115078687667847, "learning_rate": 4.894968960169935e-05, "loss": 1.2949, "step": 18630 }, { "epoch": 10.70649052268811, "grad_norm": 1.1240731477737427, "learning_rate": 4.890184214758784e-05, "loss": 1.3023, "step": 18640 }, { "epoch": 10.71223434807582, "grad_norm": 1.054719090461731, "learning_rate": 4.885399570197396e-05, "loss": 1.2627, "step": 18650 }, { "epoch": 10.717978173463527, "grad_norm": 0.9929307699203491, "learning_rate": 4.880615030869794e-05, "loss": 1.2749, "step": 18660 }, { "epoch": 10.723721998851236, "grad_norm": 1.1769680976867676, "learning_rate": 4.875830601159893e-05, "loss": 1.2699, "step": 18670 }, { "epoch": 10.729465824238943, "grad_norm": 1.0919102430343628, "learning_rate": 4.871046285451518e-05, "loss": 1.2846, "step": 18680 }, { "epoch": 10.73520964962665, "grad_norm": 1.0322463512420654, "learning_rate": 4.866262088128384e-05, "loss": 1.2847, "step": 18690 }, { "epoch": 10.74095347501436, "grad_norm": 1.151832103729248, "learning_rate": 4.8614780135740946e-05, "loss": 1.282, "step": 18700 }, { "epoch": 10.746697300402069, "grad_norm": 1.1855812072753906, "learning_rate": 4.8566940661721485e-05, "loss": 1.2751, "step": 18710 }, { "epoch": 10.752441125789776, "grad_norm": 1.104540467262268, "learning_rate": 4.8519102503059217e-05, "loss": 1.2831, "step": 18720 }, { "epoch": 10.758184951177483, "grad_norm": 1.1379268169403076, "learning_rate": 4.847126570358674e-05, "loss": 1.2763, "step": 18730 }, { "epoch": 10.763928776565193, "grad_norm": 0.9269735217094421, "learning_rate": 4.842343030713538e-05, "loss": 1.298, "step": 18740 }, { "epoch": 10.7696726019529, "grad_norm": 1.0377757549285889, "learning_rate": 4.837559635753517e-05, "loss": 1.3023, "step": 18750 }, { "epoch": 10.775416427340609, "grad_norm": 1.1565649509429932, "learning_rate": 4.832776389861484e-05, "loss": 1.2845, "step": 18760 }, { "epoch": 10.781160252728316, "grad_norm": 1.0077704191207886, "learning_rate": 4.827993297420175e-05, "loss": 1.2905, "step": 18770 }, { "epoch": 10.786904078116025, "grad_norm": 1.0438063144683838, "learning_rate": 4.823210362812186e-05, "loss": 1.2833, "step": 18780 }, { "epoch": 10.792647903503733, "grad_norm": 1.056535243988037, "learning_rate": 4.818427590419966e-05, "loss": 1.289, "step": 18790 }, { "epoch": 10.798391728891442, "grad_norm": 1.2826303243637085, "learning_rate": 4.813644984625814e-05, "loss": 1.2719, "step": 18800 }, { "epoch": 10.80413555427915, "grad_norm": 1.0525768995285034, "learning_rate": 4.808862549811885e-05, "loss": 1.3185, "step": 18810 }, { "epoch": 10.809879379666858, "grad_norm": 1.1545324325561523, "learning_rate": 4.8040802903601644e-05, "loss": 1.2904, "step": 18820 }, { "epoch": 10.815623205054566, "grad_norm": 1.062300682067871, "learning_rate": 4.799298210652491e-05, "loss": 1.289, "step": 18830 }, { "epoch": 10.821367030442275, "grad_norm": 1.1326003074645996, "learning_rate": 4.794516315070528e-05, "loss": 1.2812, "step": 18840 }, { "epoch": 10.827110855829982, "grad_norm": 1.0018856525421143, "learning_rate": 4.789734607995772e-05, "loss": 1.2771, "step": 18850 }, { "epoch": 10.832854681217691, "grad_norm": 0.9617106318473816, "learning_rate": 4.784953093809552e-05, "loss": 1.2662, "step": 18860 }, { "epoch": 10.838598506605399, "grad_norm": 1.0559762716293335, "learning_rate": 4.7801717768930147e-05, "loss": 1.2947, "step": 18870 }, { "epoch": 10.844342331993108, "grad_norm": 1.2240887880325317, "learning_rate": 4.775390661627131e-05, "loss": 1.3064, "step": 18880 }, { "epoch": 10.850086157380815, "grad_norm": 1.0106921195983887, "learning_rate": 4.770609752392682e-05, "loss": 1.2955, "step": 18890 }, { "epoch": 10.855829982768524, "grad_norm": 1.1305118799209595, "learning_rate": 4.765829053570261e-05, "loss": 1.2796, "step": 18900 }, { "epoch": 10.861573808156232, "grad_norm": 1.0738410949707031, "learning_rate": 4.761048569540272e-05, "loss": 1.2908, "step": 18910 }, { "epoch": 10.86731763354394, "grad_norm": 1.0190367698669434, "learning_rate": 4.756268304682918e-05, "loss": 1.2839, "step": 18920 }, { "epoch": 10.873061458931648, "grad_norm": 1.0953458547592163, "learning_rate": 4.751488263378206e-05, "loss": 1.3063, "step": 18930 }, { "epoch": 10.878805284319357, "grad_norm": 1.0812684297561646, "learning_rate": 4.7467084500059325e-05, "loss": 1.3067, "step": 18940 }, { "epoch": 10.884549109707065, "grad_norm": 0.9577709436416626, "learning_rate": 4.741928868945688e-05, "loss": 1.2789, "step": 18950 }, { "epoch": 10.890292935094774, "grad_norm": 1.0217535495758057, "learning_rate": 4.737149524576854e-05, "loss": 1.2778, "step": 18960 }, { "epoch": 10.896036760482481, "grad_norm": 1.0530942678451538, "learning_rate": 4.732370421278586e-05, "loss": 1.3059, "step": 18970 }, { "epoch": 10.90178058587019, "grad_norm": 1.083634614944458, "learning_rate": 4.727591563429827e-05, "loss": 1.2822, "step": 18980 }, { "epoch": 10.907524411257898, "grad_norm": 1.1242833137512207, "learning_rate": 4.722812955409291e-05, "loss": 1.3101, "step": 18990 }, { "epoch": 10.913268236645607, "grad_norm": 1.0768630504608154, "learning_rate": 4.718034601595463e-05, "loss": 1.2885, "step": 19000 }, { "epoch": 10.913268236645607, "eval_loss": 1.0733562707901, "eval_runtime": 122.5192, "eval_samples_per_second": 12.986, "eval_steps_per_second": 0.139, "eval_wer": 0.08780653181150412, "step": 19000 }, { "epoch": 10.919012062033314, "grad_norm": 0.9930522441864014, "learning_rate": 4.7132565063665986e-05, "loss": 1.3027, "step": 19010 }, { "epoch": 10.924755887421023, "grad_norm": 1.1161531209945679, "learning_rate": 4.708478674100711e-05, "loss": 1.2967, "step": 19020 }, { "epoch": 10.93049971280873, "grad_norm": 0.9924213290214539, "learning_rate": 4.7037011091755786e-05, "loss": 1.2659, "step": 19030 }, { "epoch": 10.93624353819644, "grad_norm": 1.1249287128448486, "learning_rate": 4.698923815968729e-05, "loss": 1.2732, "step": 19040 }, { "epoch": 10.941987363584147, "grad_norm": 1.1544798612594604, "learning_rate": 4.694146798857443e-05, "loss": 1.3079, "step": 19050 }, { "epoch": 10.947731188971856, "grad_norm": 1.0112615823745728, "learning_rate": 4.689370062218754e-05, "loss": 1.3011, "step": 19060 }, { "epoch": 10.953475014359563, "grad_norm": 0.9321224689483643, "learning_rate": 4.6845936104294255e-05, "loss": 1.2896, "step": 19070 }, { "epoch": 10.959218839747273, "grad_norm": 1.0098748207092285, "learning_rate": 4.679817447865974e-05, "loss": 1.2999, "step": 19080 }, { "epoch": 10.96496266513498, "grad_norm": 1.181365728378296, "learning_rate": 4.675041578904643e-05, "loss": 1.2966, "step": 19090 }, { "epoch": 10.970706490522687, "grad_norm": 1.0301467180252075, "learning_rate": 4.670266007921408e-05, "loss": 1.291, "step": 19100 }, { "epoch": 10.976450315910396, "grad_norm": 1.0525802373886108, "learning_rate": 4.6654907392919745e-05, "loss": 1.2809, "step": 19110 }, { "epoch": 10.982194141298105, "grad_norm": 1.0678201913833618, "learning_rate": 4.6607157773917645e-05, "loss": 1.308, "step": 19120 }, { "epoch": 10.987937966685813, "grad_norm": 1.1410713195800781, "learning_rate": 4.655941126595927e-05, "loss": 1.2723, "step": 19130 }, { "epoch": 10.99368179207352, "grad_norm": 1.0476981401443481, "learning_rate": 4.65116679127932e-05, "loss": 1.3089, "step": 19140 }, { "epoch": 10.99942561746123, "grad_norm": 1.0620732307434082, "learning_rate": 4.646392775816518e-05, "loss": 1.2846, "step": 19150 }, { "epoch": 11.005169442848937, "grad_norm": 1.0830312967300415, "learning_rate": 4.641619084581796e-05, "loss": 1.2921, "step": 19160 }, { "epoch": 11.010913268236646, "grad_norm": 1.0160865783691406, "learning_rate": 4.6368457219491326e-05, "loss": 1.2696, "step": 19170 }, { "epoch": 11.016657093624353, "grad_norm": 1.0626208782196045, "learning_rate": 4.632072692292213e-05, "loss": 1.2595, "step": 19180 }, { "epoch": 11.022400919012062, "grad_norm": 1.0124868154525757, "learning_rate": 4.627299999984407e-05, "loss": 1.2524, "step": 19190 }, { "epoch": 11.02814474439977, "grad_norm": 1.0652151107788086, "learning_rate": 4.622527649398786e-05, "loss": 1.2954, "step": 19200 }, { "epoch": 11.033888569787479, "grad_norm": 1.0326727628707886, "learning_rate": 4.617755644908098e-05, "loss": 1.2602, "step": 19210 }, { "epoch": 11.039632395175186, "grad_norm": 1.0618664026260376, "learning_rate": 4.612983990884778e-05, "loss": 1.2696, "step": 19220 }, { "epoch": 11.045376220562895, "grad_norm": 1.0242348909378052, "learning_rate": 4.6082126917009424e-05, "loss": 1.2636, "step": 19230 }, { "epoch": 11.051120045950602, "grad_norm": 1.355651617050171, "learning_rate": 4.6034417517283794e-05, "loss": 1.2933, "step": 19240 }, { "epoch": 11.056863871338312, "grad_norm": 1.0043624639511108, "learning_rate": 4.5986711753385515e-05, "loss": 1.2764, "step": 19250 }, { "epoch": 11.062607696726019, "grad_norm": 0.9619847536087036, "learning_rate": 4.5939009669025815e-05, "loss": 1.259, "step": 19260 }, { "epoch": 11.068351522113728, "grad_norm": 1.0290327072143555, "learning_rate": 4.589131130791262e-05, "loss": 1.2575, "step": 19270 }, { "epoch": 11.074095347501435, "grad_norm": 0.9619156718254089, "learning_rate": 4.58436167137504e-05, "loss": 1.2646, "step": 19280 }, { "epoch": 11.079839172889145, "grad_norm": 1.065199851989746, "learning_rate": 4.5795925930240194e-05, "loss": 1.2738, "step": 19290 }, { "epoch": 11.085582998276852, "grad_norm": 0.998838484287262, "learning_rate": 4.574823900107957e-05, "loss": 1.2645, "step": 19300 }, { "epoch": 11.091326823664561, "grad_norm": 0.9314670562744141, "learning_rate": 4.57005559699625e-05, "loss": 1.2913, "step": 19310 }, { "epoch": 11.097070649052268, "grad_norm": 1.0544768571853638, "learning_rate": 4.565287688057943e-05, "loss": 1.2673, "step": 19320 }, { "epoch": 11.102814474439977, "grad_norm": 1.2092013359069824, "learning_rate": 4.560520177661722e-05, "loss": 1.2603, "step": 19330 }, { "epoch": 11.108558299827685, "grad_norm": 0.9822429418563843, "learning_rate": 4.5557530701759e-05, "loss": 1.2619, "step": 19340 }, { "epoch": 11.114302125215394, "grad_norm": 0.957553505897522, "learning_rate": 4.55098636996843e-05, "loss": 1.258, "step": 19350 }, { "epoch": 11.120045950603101, "grad_norm": 1.008302927017212, "learning_rate": 4.546220081406884e-05, "loss": 1.2871, "step": 19360 }, { "epoch": 11.12578977599081, "grad_norm": 1.1175718307495117, "learning_rate": 4.54145420885846e-05, "loss": 1.2832, "step": 19370 }, { "epoch": 11.131533601378518, "grad_norm": 1.0406595468521118, "learning_rate": 4.5366887566899784e-05, "loss": 1.28, "step": 19380 }, { "epoch": 11.137277426766227, "grad_norm": 1.2113124132156372, "learning_rate": 4.531923729267867e-05, "loss": 1.2587, "step": 19390 }, { "epoch": 11.143021252153934, "grad_norm": 1.0352063179016113, "learning_rate": 4.527159130958171e-05, "loss": 1.2896, "step": 19400 }, { "epoch": 11.148765077541643, "grad_norm": 0.9835383296012878, "learning_rate": 4.522394966126539e-05, "loss": 1.2741, "step": 19410 }, { "epoch": 11.15450890292935, "grad_norm": 1.019024133682251, "learning_rate": 4.517631239138221e-05, "loss": 1.2715, "step": 19420 }, { "epoch": 11.16025272831706, "grad_norm": 1.0540754795074463, "learning_rate": 4.5128679543580714e-05, "loss": 1.2764, "step": 19430 }, { "epoch": 11.165996553704767, "grad_norm": 1.0139588117599487, "learning_rate": 4.508105116150534e-05, "loss": 1.2353, "step": 19440 }, { "epoch": 11.171740379092476, "grad_norm": 1.0660016536712646, "learning_rate": 4.503342728879646e-05, "loss": 1.2355, "step": 19450 }, { "epoch": 11.177484204480184, "grad_norm": 1.1476292610168457, "learning_rate": 4.498580796909032e-05, "loss": 1.2813, "step": 19460 }, { "epoch": 11.183228029867893, "grad_norm": 1.0248353481292725, "learning_rate": 4.493819324601894e-05, "loss": 1.2755, "step": 19470 }, { "epoch": 11.1889718552556, "grad_norm": 1.1075738668441772, "learning_rate": 4.489058316321023e-05, "loss": 1.2735, "step": 19480 }, { "epoch": 11.19471568064331, "grad_norm": 0.964785635471344, "learning_rate": 4.484297776428775e-05, "loss": 1.2811, "step": 19490 }, { "epoch": 11.200459506031017, "grad_norm": 1.1051472425460815, "learning_rate": 4.479537709287081e-05, "loss": 1.2592, "step": 19500 }, { "epoch": 11.206203331418726, "grad_norm": 1.0212862491607666, "learning_rate": 4.47477811925744e-05, "loss": 1.2654, "step": 19510 }, { "epoch": 11.211947156806433, "grad_norm": 1.014697551727295, "learning_rate": 4.470019010700913e-05, "loss": 1.2636, "step": 19520 }, { "epoch": 11.217690982194142, "grad_norm": 1.1704610586166382, "learning_rate": 4.465260387978119e-05, "loss": 1.2522, "step": 19530 }, { "epoch": 11.22343480758185, "grad_norm": 1.0169048309326172, "learning_rate": 4.460502255449229e-05, "loss": 1.2695, "step": 19540 }, { "epoch": 11.229178632969557, "grad_norm": 1.0568783283233643, "learning_rate": 4.4557446174739706e-05, "loss": 1.2708, "step": 19550 }, { "epoch": 11.234922458357266, "grad_norm": 0.9714581966400146, "learning_rate": 4.450987478411615e-05, "loss": 1.267, "step": 19560 }, { "epoch": 11.240666283744973, "grad_norm": 1.0905554294586182, "learning_rate": 4.446230842620979e-05, "loss": 1.269, "step": 19570 }, { "epoch": 11.246410109132682, "grad_norm": 0.9190165400505066, "learning_rate": 4.441474714460414e-05, "loss": 1.2724, "step": 19580 }, { "epoch": 11.25215393452039, "grad_norm": 0.9596851468086243, "learning_rate": 4.436719098287807e-05, "loss": 1.2668, "step": 19590 }, { "epoch": 11.257897759908099, "grad_norm": 1.049805998802185, "learning_rate": 4.4319639984605804e-05, "loss": 1.2646, "step": 19600 }, { "epoch": 11.263641585295806, "grad_norm": 0.9149695038795471, "learning_rate": 4.4272094193356774e-05, "loss": 1.2618, "step": 19610 }, { "epoch": 11.269385410683515, "grad_norm": 1.0188547372817993, "learning_rate": 4.422455365269571e-05, "loss": 1.2783, "step": 19620 }, { "epoch": 11.275129236071223, "grad_norm": 1.0329830646514893, "learning_rate": 4.4177018406182476e-05, "loss": 1.2696, "step": 19630 }, { "epoch": 11.280873061458932, "grad_norm": 1.037073016166687, "learning_rate": 4.412948849737207e-05, "loss": 1.2757, "step": 19640 }, { "epoch": 11.28661688684664, "grad_norm": 1.082959532737732, "learning_rate": 4.4081963969814664e-05, "loss": 1.2834, "step": 19650 }, { "epoch": 11.292360712234348, "grad_norm": 0.9733410477638245, "learning_rate": 4.4034444867055444e-05, "loss": 1.2608, "step": 19660 }, { "epoch": 11.298104537622056, "grad_norm": 1.0009406805038452, "learning_rate": 4.3986931232634694e-05, "loss": 1.2909, "step": 19670 }, { "epoch": 11.303848363009765, "grad_norm": 1.2611254453659058, "learning_rate": 4.393942311008759e-05, "loss": 1.2793, "step": 19680 }, { "epoch": 11.309592188397472, "grad_norm": 0.9886844158172607, "learning_rate": 4.389192054294432e-05, "loss": 1.2888, "step": 19690 }, { "epoch": 11.315336013785181, "grad_norm": 1.0744023323059082, "learning_rate": 4.384442357472998e-05, "loss": 1.2902, "step": 19700 }, { "epoch": 11.321079839172889, "grad_norm": 0.9251049757003784, "learning_rate": 4.379693224896451e-05, "loss": 1.2435, "step": 19710 }, { "epoch": 11.326823664560598, "grad_norm": 1.0934780836105347, "learning_rate": 4.3749446609162735e-05, "loss": 1.2776, "step": 19720 }, { "epoch": 11.332567489948305, "grad_norm": 1.0327459573745728, "learning_rate": 4.370196669883419e-05, "loss": 1.2671, "step": 19730 }, { "epoch": 11.338311315336014, "grad_norm": 1.0435962677001953, "learning_rate": 4.3654492561483204e-05, "loss": 1.2641, "step": 19740 }, { "epoch": 11.344055140723722, "grad_norm": 1.0751525163650513, "learning_rate": 4.3607024240608847e-05, "loss": 1.2648, "step": 19750 }, { "epoch": 11.34979896611143, "grad_norm": 0.9920499324798584, "learning_rate": 4.355956177970478e-05, "loss": 1.246, "step": 19760 }, { "epoch": 11.355542791499138, "grad_norm": 1.0982859134674072, "learning_rate": 4.3512105222259385e-05, "loss": 1.2819, "step": 19770 }, { "epoch": 11.361286616886847, "grad_norm": 1.01777184009552, "learning_rate": 4.3464654611755565e-05, "loss": 1.2724, "step": 19780 }, { "epoch": 11.367030442274555, "grad_norm": 1.0396219491958618, "learning_rate": 4.3417209991670795e-05, "loss": 1.2936, "step": 19790 }, { "epoch": 11.372774267662264, "grad_norm": 1.1243517398834229, "learning_rate": 4.33697714054771e-05, "loss": 1.2694, "step": 19800 }, { "epoch": 11.378518093049971, "grad_norm": 1.0075806379318237, "learning_rate": 4.3322338896640896e-05, "loss": 1.2923, "step": 19810 }, { "epoch": 11.38426191843768, "grad_norm": 1.1269917488098145, "learning_rate": 4.3274912508623126e-05, "loss": 1.2762, "step": 19820 }, { "epoch": 11.390005743825387, "grad_norm": 0.9697692394256592, "learning_rate": 4.322749228487904e-05, "loss": 1.2582, "step": 19830 }, { "epoch": 11.395749569213097, "grad_norm": 0.9485560059547424, "learning_rate": 4.318007826885827e-05, "loss": 1.2735, "step": 19840 }, { "epoch": 11.401493394600804, "grad_norm": 1.1051335334777832, "learning_rate": 4.313267050400481e-05, "loss": 1.267, "step": 19850 }, { "epoch": 11.407237219988513, "grad_norm": 0.9663533568382263, "learning_rate": 4.308526903375683e-05, "loss": 1.2638, "step": 19860 }, { "epoch": 11.41298104537622, "grad_norm": 1.0794001817703247, "learning_rate": 4.303787390154682e-05, "loss": 1.2788, "step": 19870 }, { "epoch": 11.41872487076393, "grad_norm": 1.1780107021331787, "learning_rate": 4.299048515080142e-05, "loss": 1.2578, "step": 19880 }, { "epoch": 11.424468696151637, "grad_norm": 1.0294619798660278, "learning_rate": 4.2943102824941404e-05, "loss": 1.2525, "step": 19890 }, { "epoch": 11.430212521539346, "grad_norm": 1.087461233139038, "learning_rate": 4.2895726967381734e-05, "loss": 1.2589, "step": 19900 }, { "epoch": 11.435956346927053, "grad_norm": 1.0082402229309082, "learning_rate": 4.284835762153134e-05, "loss": 1.2509, "step": 19910 }, { "epoch": 11.441700172314762, "grad_norm": 1.0569061040878296, "learning_rate": 4.2800994830793275e-05, "loss": 1.2729, "step": 19920 }, { "epoch": 11.44744399770247, "grad_norm": 1.0966882705688477, "learning_rate": 4.2753638638564546e-05, "loss": 1.2779, "step": 19930 }, { "epoch": 11.453187823090179, "grad_norm": 1.1267974376678467, "learning_rate": 4.270628908823613e-05, "loss": 1.2776, "step": 19940 }, { "epoch": 11.458931648477886, "grad_norm": 1.0483267307281494, "learning_rate": 4.265894622319292e-05, "loss": 1.2706, "step": 19950 }, { "epoch": 11.464675473865594, "grad_norm": 1.0437610149383545, "learning_rate": 4.261161008681361e-05, "loss": 1.2705, "step": 19960 }, { "epoch": 11.470419299253303, "grad_norm": 1.0062497854232788, "learning_rate": 4.2564280722470864e-05, "loss": 1.256, "step": 19970 }, { "epoch": 11.47616312464101, "grad_norm": 1.0850303173065186, "learning_rate": 4.2516958173531015e-05, "loss": 1.2766, "step": 19980 }, { "epoch": 11.48190695002872, "grad_norm": 1.088343858718872, "learning_rate": 4.246964248335424e-05, "loss": 1.2812, "step": 19990 }, { "epoch": 11.487650775416427, "grad_norm": 1.090111494064331, "learning_rate": 4.2422333695294393e-05, "loss": 1.2673, "step": 20000 }, { "epoch": 11.487650775416427, "eval_loss": 1.0746465921401978, "eval_runtime": 122.6086, "eval_samples_per_second": 12.976, "eval_steps_per_second": 0.139, "eval_wer": 0.09046219911854447, "step": 20000 }, { "epoch": 11.493394600804136, "grad_norm": 1.1436728239059448, "learning_rate": 4.237503185269897e-05, "loss": 1.2641, "step": 20010 }, { "epoch": 11.499138426191843, "grad_norm": 1.0940887928009033, "learning_rate": 4.232773699890918e-05, "loss": 1.2571, "step": 20020 }, { "epoch": 11.504882251579552, "grad_norm": 1.0334457159042358, "learning_rate": 4.2280449177259754e-05, "loss": 1.2703, "step": 20030 }, { "epoch": 11.51062607696726, "grad_norm": 1.08735990524292, "learning_rate": 4.223316843107906e-05, "loss": 1.2603, "step": 20040 }, { "epoch": 11.516369902354969, "grad_norm": 1.0585899353027344, "learning_rate": 4.2185894803688905e-05, "loss": 1.2599, "step": 20050 }, { "epoch": 11.522113727742676, "grad_norm": 1.1298428773880005, "learning_rate": 4.2138628338404604e-05, "loss": 1.272, "step": 20060 }, { "epoch": 11.527857553130385, "grad_norm": 1.005755066871643, "learning_rate": 4.209136907853491e-05, "loss": 1.2648, "step": 20070 }, { "epoch": 11.533601378518092, "grad_norm": 1.0285767316818237, "learning_rate": 4.204411706738198e-05, "loss": 1.2523, "step": 20080 }, { "epoch": 11.539345203905802, "grad_norm": 1.0364145040512085, "learning_rate": 4.199687234824134e-05, "loss": 1.2879, "step": 20090 }, { "epoch": 11.545089029293509, "grad_norm": 1.1432249546051025, "learning_rate": 4.19496349644018e-05, "loss": 1.2695, "step": 20100 }, { "epoch": 11.550832854681218, "grad_norm": 0.9667734503746033, "learning_rate": 4.190240495914544e-05, "loss": 1.2802, "step": 20110 }, { "epoch": 11.556576680068925, "grad_norm": 1.0737906694412231, "learning_rate": 4.185518237574767e-05, "loss": 1.27, "step": 20120 }, { "epoch": 11.562320505456634, "grad_norm": 1.069962978363037, "learning_rate": 4.1807967257476976e-05, "loss": 1.2665, "step": 20130 }, { "epoch": 11.568064330844342, "grad_norm": 1.0386557579040527, "learning_rate": 4.176075964759511e-05, "loss": 1.2585, "step": 20140 }, { "epoch": 11.573808156232051, "grad_norm": 1.1467912197113037, "learning_rate": 4.171355958935688e-05, "loss": 1.2653, "step": 20150 }, { "epoch": 11.579551981619758, "grad_norm": 0.9918843507766724, "learning_rate": 4.166636712601017e-05, "loss": 1.2779, "step": 20160 }, { "epoch": 11.585295807007467, "grad_norm": 1.0522713661193848, "learning_rate": 4.1619182300795976e-05, "loss": 1.2759, "step": 20170 }, { "epoch": 11.591039632395175, "grad_norm": 1.0904958248138428, "learning_rate": 4.15720051569482e-05, "loss": 1.3041, "step": 20180 }, { "epoch": 11.596783457782884, "grad_norm": 0.9734220504760742, "learning_rate": 4.152483573769379e-05, "loss": 1.2525, "step": 20190 }, { "epoch": 11.602527283170591, "grad_norm": 1.0670104026794434, "learning_rate": 4.147767408625255e-05, "loss": 1.2648, "step": 20200 }, { "epoch": 11.6082711085583, "grad_norm": 1.2152177095413208, "learning_rate": 4.14305202458372e-05, "loss": 1.2592, "step": 20210 }, { "epoch": 11.614014933946008, "grad_norm": 1.0259360074996948, "learning_rate": 4.138337425965333e-05, "loss": 1.2687, "step": 20220 }, { "epoch": 11.619758759333717, "grad_norm": 1.0761845111846924, "learning_rate": 4.1336236170899256e-05, "loss": 1.2642, "step": 20230 }, { "epoch": 11.625502584721424, "grad_norm": 1.1895610094070435, "learning_rate": 4.128910602276615e-05, "loss": 1.266, "step": 20240 }, { "epoch": 11.631246410109133, "grad_norm": 0.9918289184570312, "learning_rate": 4.1241983858437835e-05, "loss": 1.2939, "step": 20250 }, { "epoch": 11.63699023549684, "grad_norm": 1.0784008502960205, "learning_rate": 4.119486972109084e-05, "loss": 1.2459, "step": 20260 }, { "epoch": 11.64273406088455, "grad_norm": 1.0903466939926147, "learning_rate": 4.1147763653894376e-05, "loss": 1.277, "step": 20270 }, { "epoch": 11.648477886272257, "grad_norm": 1.1010406017303467, "learning_rate": 4.110066570001019e-05, "loss": 1.2705, "step": 20280 }, { "epoch": 11.654221711659966, "grad_norm": 1.1080422401428223, "learning_rate": 4.105357590259266e-05, "loss": 1.279, "step": 20290 }, { "epoch": 11.659965537047674, "grad_norm": 1.0251054763793945, "learning_rate": 4.1006494304788677e-05, "loss": 1.2675, "step": 20300 }, { "epoch": 11.665709362435383, "grad_norm": 1.0210870504379272, "learning_rate": 4.0959420949737557e-05, "loss": 1.2487, "step": 20310 }, { "epoch": 11.67145318782309, "grad_norm": 1.1191749572753906, "learning_rate": 4.091235588057118e-05, "loss": 1.247, "step": 20320 }, { "epoch": 11.6771970132108, "grad_norm": 0.9252220392227173, "learning_rate": 4.0865299140413696e-05, "loss": 1.2611, "step": 20330 }, { "epoch": 11.682940838598507, "grad_norm": 0.9974046349525452, "learning_rate": 4.0818250772381736e-05, "loss": 1.2701, "step": 20340 }, { "epoch": 11.688684663986216, "grad_norm": 1.0279533863067627, "learning_rate": 4.0771210819584236e-05, "loss": 1.2798, "step": 20350 }, { "epoch": 11.694428489373923, "grad_norm": 1.0447250604629517, "learning_rate": 4.072417932512235e-05, "loss": 1.2677, "step": 20360 }, { "epoch": 11.70017231476163, "grad_norm": 1.0690468549728394, "learning_rate": 4.067715633208958e-05, "loss": 1.269, "step": 20370 }, { "epoch": 11.70591614014934, "grad_norm": 1.03852379322052, "learning_rate": 4.063014188357156e-05, "loss": 1.2773, "step": 20380 }, { "epoch": 11.711659965537049, "grad_norm": 0.9885318279266357, "learning_rate": 4.058313602264615e-05, "loss": 1.2545, "step": 20390 }, { "epoch": 11.717403790924756, "grad_norm": 1.1901969909667969, "learning_rate": 4.0536138792383314e-05, "loss": 1.2563, "step": 20400 }, { "epoch": 11.723147616312463, "grad_norm": 1.0043365955352783, "learning_rate": 4.048915023584513e-05, "loss": 1.2607, "step": 20410 }, { "epoch": 11.728891441700172, "grad_norm": 1.0531787872314453, "learning_rate": 4.0442170396085686e-05, "loss": 1.2648, "step": 20420 }, { "epoch": 11.73463526708788, "grad_norm": 0.9562181234359741, "learning_rate": 4.039519931615113e-05, "loss": 1.2594, "step": 20430 }, { "epoch": 11.740379092475589, "grad_norm": 1.0916298627853394, "learning_rate": 4.0348237039079555e-05, "loss": 1.2742, "step": 20440 }, { "epoch": 11.746122917863296, "grad_norm": 1.01168692111969, "learning_rate": 4.030128360790098e-05, "loss": 1.2738, "step": 20450 }, { "epoch": 11.751866743251005, "grad_norm": 0.9795570969581604, "learning_rate": 4.0254339065637374e-05, "loss": 1.2458, "step": 20460 }, { "epoch": 11.757610568638713, "grad_norm": 1.1094186305999756, "learning_rate": 4.0207403455302495e-05, "loss": 1.264, "step": 20470 }, { "epoch": 11.763354394026422, "grad_norm": 0.9983965158462524, "learning_rate": 4.016047681990194e-05, "loss": 1.2719, "step": 20480 }, { "epoch": 11.76909821941413, "grad_norm": 0.9829633235931396, "learning_rate": 4.011355920243312e-05, "loss": 1.2586, "step": 20490 }, { "epoch": 11.774842044801838, "grad_norm": 1.0315426588058472, "learning_rate": 4.0066650645885096e-05, "loss": 1.2617, "step": 20500 }, { "epoch": 11.780585870189546, "grad_norm": 1.0671780109405518, "learning_rate": 4.001975119323875e-05, "loss": 1.2666, "step": 20510 }, { "epoch": 11.786329695577255, "grad_norm": 1.0147048234939575, "learning_rate": 3.997286088746649e-05, "loss": 1.2689, "step": 20520 }, { "epoch": 11.792073520964962, "grad_norm": 1.1218231916427612, "learning_rate": 3.9925979771532435e-05, "loss": 1.291, "step": 20530 }, { "epoch": 11.797817346352671, "grad_norm": 1.0123099088668823, "learning_rate": 3.987910788839227e-05, "loss": 1.2581, "step": 20540 }, { "epoch": 11.803561171740379, "grad_norm": 0.9395419359207153, "learning_rate": 3.9832245280993176e-05, "loss": 1.2696, "step": 20550 }, { "epoch": 11.809304997128088, "grad_norm": 1.050680160522461, "learning_rate": 3.978539199227389e-05, "loss": 1.2605, "step": 20560 }, { "epoch": 11.815048822515795, "grad_norm": 1.0552202463150024, "learning_rate": 3.9738548065164566e-05, "loss": 1.2545, "step": 20570 }, { "epoch": 11.820792647903504, "grad_norm": 0.9876143336296082, "learning_rate": 3.96917135425868e-05, "loss": 1.2548, "step": 20580 }, { "epoch": 11.826536473291211, "grad_norm": 1.0114952325820923, "learning_rate": 3.9644888467453595e-05, "loss": 1.2905, "step": 20590 }, { "epoch": 11.83228029867892, "grad_norm": 0.9893816709518433, "learning_rate": 3.9598072882669236e-05, "loss": 1.2846, "step": 20600 }, { "epoch": 11.838024124066628, "grad_norm": 1.0897454023361206, "learning_rate": 3.955126683112938e-05, "loss": 1.2834, "step": 20610 }, { "epoch": 11.843767949454337, "grad_norm": 0.9684361219406128, "learning_rate": 3.9504470355720904e-05, "loss": 1.2619, "step": 20620 }, { "epoch": 11.849511774842044, "grad_norm": 0.9694860577583313, "learning_rate": 3.9457683499321904e-05, "loss": 1.2656, "step": 20630 }, { "epoch": 11.855255600229754, "grad_norm": 0.9525081515312195, "learning_rate": 3.941090630480174e-05, "loss": 1.2877, "step": 20640 }, { "epoch": 11.860999425617461, "grad_norm": 1.1364809274673462, "learning_rate": 3.93641388150208e-05, "loss": 1.2406, "step": 20650 }, { "epoch": 11.86674325100517, "grad_norm": 1.0357623100280762, "learning_rate": 3.931738107283068e-05, "loss": 1.2507, "step": 20660 }, { "epoch": 11.872487076392877, "grad_norm": 1.1002920866012573, "learning_rate": 3.9270633121074015e-05, "loss": 1.2399, "step": 20670 }, { "epoch": 11.878230901780586, "grad_norm": 1.0811396837234497, "learning_rate": 3.9223895002584415e-05, "loss": 1.2959, "step": 20680 }, { "epoch": 11.883974727168294, "grad_norm": 1.0222771167755127, "learning_rate": 3.917716676018657e-05, "loss": 1.2736, "step": 20690 }, { "epoch": 11.889718552556003, "grad_norm": 0.9716038107872009, "learning_rate": 3.9130448436696054e-05, "loss": 1.2598, "step": 20700 }, { "epoch": 11.89546237794371, "grad_norm": 1.0875701904296875, "learning_rate": 3.908374007491939e-05, "loss": 1.2634, "step": 20710 }, { "epoch": 11.90120620333142, "grad_norm": 1.042626142501831, "learning_rate": 3.903704171765396e-05, "loss": 1.2687, "step": 20720 }, { "epoch": 11.906950028719127, "grad_norm": 1.0035465955734253, "learning_rate": 3.8990353407687945e-05, "loss": 1.2497, "step": 20730 }, { "epoch": 11.912693854106836, "grad_norm": 1.020595908164978, "learning_rate": 3.894367518780041e-05, "loss": 1.2557, "step": 20740 }, { "epoch": 11.918437679494543, "grad_norm": 1.0916239023208618, "learning_rate": 3.8897007100761064e-05, "loss": 1.269, "step": 20750 }, { "epoch": 11.924181504882252, "grad_norm": 0.9847072958946228, "learning_rate": 3.885034918933041e-05, "loss": 1.2558, "step": 20760 }, { "epoch": 11.92992533026996, "grad_norm": 1.1077895164489746, "learning_rate": 3.880370149625962e-05, "loss": 1.2706, "step": 20770 }, { "epoch": 11.935669155657669, "grad_norm": 1.0282268524169922, "learning_rate": 3.875706406429045e-05, "loss": 1.2598, "step": 20780 }, { "epoch": 11.941412981045376, "grad_norm": 0.8964557647705078, "learning_rate": 3.871043693615533e-05, "loss": 1.2559, "step": 20790 }, { "epoch": 11.947156806433085, "grad_norm": 1.002661943435669, "learning_rate": 3.866382015457715e-05, "loss": 1.2509, "step": 20800 }, { "epoch": 11.952900631820793, "grad_norm": 1.0154294967651367, "learning_rate": 3.861721376226944e-05, "loss": 1.271, "step": 20810 }, { "epoch": 11.9586444572085, "grad_norm": 1.0177925825119019, "learning_rate": 3.857061780193611e-05, "loss": 1.2833, "step": 20820 }, { "epoch": 11.96438828259621, "grad_norm": 1.0291526317596436, "learning_rate": 3.85240323162716e-05, "loss": 1.2673, "step": 20830 }, { "epoch": 11.970132107983916, "grad_norm": 1.1617499589920044, "learning_rate": 3.8477457347960655e-05, "loss": 1.2793, "step": 20840 }, { "epoch": 11.975875933371626, "grad_norm": 1.1649653911590576, "learning_rate": 3.843089293967843e-05, "loss": 1.2904, "step": 20850 }, { "epoch": 11.981619758759333, "grad_norm": 0.9970369935035706, "learning_rate": 3.8384339134090456e-05, "loss": 1.272, "step": 20860 }, { "epoch": 11.987363584147042, "grad_norm": 0.9548753499984741, "learning_rate": 3.833779597385244e-05, "loss": 1.2604, "step": 20870 }, { "epoch": 11.99310740953475, "grad_norm": 0.8901769518852234, "learning_rate": 3.829126350161045e-05, "loss": 1.2799, "step": 20880 }, { "epoch": 11.998851234922459, "grad_norm": 1.0081822872161865, "learning_rate": 3.824474176000066e-05, "loss": 1.274, "step": 20890 }, { "epoch": 12.004595060310166, "grad_norm": 1.00437331199646, "learning_rate": 3.819823079164947e-05, "loss": 1.2532, "step": 20900 }, { "epoch": 12.010338885697875, "grad_norm": 1.0935298204421997, "learning_rate": 3.815173063917342e-05, "loss": 1.2589, "step": 20910 }, { "epoch": 12.016082711085582, "grad_norm": 0.9490810632705688, "learning_rate": 3.810524134517907e-05, "loss": 1.2528, "step": 20920 }, { "epoch": 12.021826536473291, "grad_norm": 1.1048473119735718, "learning_rate": 3.805876295226312e-05, "loss": 1.2409, "step": 20930 }, { "epoch": 12.027570361860999, "grad_norm": 1.0223900079727173, "learning_rate": 3.801229550301222e-05, "loss": 1.2417, "step": 20940 }, { "epoch": 12.033314187248708, "grad_norm": 1.0157376527786255, "learning_rate": 3.7965839040002996e-05, "loss": 1.2647, "step": 20950 }, { "epoch": 12.039058012636415, "grad_norm": 0.988128662109375, "learning_rate": 3.791939360580205e-05, "loss": 1.2497, "step": 20960 }, { "epoch": 12.044801838024124, "grad_norm": 1.0185871124267578, "learning_rate": 3.787295924296582e-05, "loss": 1.2489, "step": 20970 }, { "epoch": 12.050545663411832, "grad_norm": 1.0104808807373047, "learning_rate": 3.7826535994040676e-05, "loss": 1.2421, "step": 20980 }, { "epoch": 12.05628948879954, "grad_norm": 1.0342261791229248, "learning_rate": 3.7780123901562717e-05, "loss": 1.2414, "step": 20990 }, { "epoch": 12.062033314187248, "grad_norm": 1.0249075889587402, "learning_rate": 3.773372300805786e-05, "loss": 1.2371, "step": 21000 }, { "epoch": 12.062033314187248, "eval_loss": 1.0713833570480347, "eval_runtime": 121.2057, "eval_samples_per_second": 13.126, "eval_steps_per_second": 0.14, "eval_wer": 0.08786303537122839, "step": 21000 }, { "epoch": 12.067777139574957, "grad_norm": 1.0830078125, "learning_rate": 3.7687333356041806e-05, "loss": 1.2443, "step": 21010 }, { "epoch": 12.073520964962665, "grad_norm": 1.0379953384399414, "learning_rate": 3.764095498801987e-05, "loss": 1.2488, "step": 21020 }, { "epoch": 12.079264790350374, "grad_norm": 1.0052080154418945, "learning_rate": 3.759458794648709e-05, "loss": 1.2353, "step": 21030 }, { "epoch": 12.085008615738081, "grad_norm": 1.0315632820129395, "learning_rate": 3.754823227392811e-05, "loss": 1.2563, "step": 21040 }, { "epoch": 12.09075244112579, "grad_norm": 1.0419727563858032, "learning_rate": 3.750188801281713e-05, "loss": 1.2543, "step": 21050 }, { "epoch": 12.096496266513498, "grad_norm": 1.0818511247634888, "learning_rate": 3.745555520561795e-05, "loss": 1.2618, "step": 21060 }, { "epoch": 12.102240091901207, "grad_norm": 1.0677670240402222, "learning_rate": 3.7409233894783804e-05, "loss": 1.2356, "step": 21070 }, { "epoch": 12.107983917288914, "grad_norm": 1.043628215789795, "learning_rate": 3.736292412275747e-05, "loss": 1.26, "step": 21080 }, { "epoch": 12.113727742676623, "grad_norm": 1.0647454261779785, "learning_rate": 3.7316625931971103e-05, "loss": 1.2465, "step": 21090 }, { "epoch": 12.11947156806433, "grad_norm": 0.9494752883911133, "learning_rate": 3.727033936484623e-05, "loss": 1.2527, "step": 21100 }, { "epoch": 12.12521539345204, "grad_norm": 0.9505246877670288, "learning_rate": 3.7224064463793795e-05, "loss": 1.2481, "step": 21110 }, { "epoch": 12.130959218839747, "grad_norm": 0.9982222318649292, "learning_rate": 3.717780127121398e-05, "loss": 1.2433, "step": 21120 }, { "epoch": 12.136703044227456, "grad_norm": 0.9857678413391113, "learning_rate": 3.7131549829496285e-05, "loss": 1.2582, "step": 21130 }, { "epoch": 12.142446869615164, "grad_norm": 1.1183850765228271, "learning_rate": 3.708531018101945e-05, "loss": 1.2443, "step": 21140 }, { "epoch": 12.148190695002873, "grad_norm": 0.9338358044624329, "learning_rate": 3.703908236815134e-05, "loss": 1.2535, "step": 21150 }, { "epoch": 12.15393452039058, "grad_norm": 0.8844596743583679, "learning_rate": 3.699286643324908e-05, "loss": 1.2226, "step": 21160 }, { "epoch": 12.159678345778289, "grad_norm": 1.0605865716934204, "learning_rate": 3.694666241865881e-05, "loss": 1.2602, "step": 21170 }, { "epoch": 12.165422171165996, "grad_norm": 0.9888694882392883, "learning_rate": 3.6900470366715814e-05, "loss": 1.2394, "step": 21180 }, { "epoch": 12.171165996553706, "grad_norm": 0.93047034740448, "learning_rate": 3.685429031974442e-05, "loss": 1.2525, "step": 21190 }, { "epoch": 12.176909821941413, "grad_norm": 1.066928505897522, "learning_rate": 3.6808122320057875e-05, "loss": 1.249, "step": 21200 }, { "epoch": 12.182653647329122, "grad_norm": 1.0635950565338135, "learning_rate": 3.676196640995849e-05, "loss": 1.2517, "step": 21210 }, { "epoch": 12.18839747271683, "grad_norm": 1.070138692855835, "learning_rate": 3.671582263173743e-05, "loss": 1.2491, "step": 21220 }, { "epoch": 12.194141298104537, "grad_norm": 0.9687944054603577, "learning_rate": 3.666969102767478e-05, "loss": 1.2481, "step": 21230 }, { "epoch": 12.199885123492246, "grad_norm": 1.0232404470443726, "learning_rate": 3.662357164003944e-05, "loss": 1.228, "step": 21240 }, { "epoch": 12.205628948879953, "grad_norm": 0.9780818819999695, "learning_rate": 3.657746451108915e-05, "loss": 1.2202, "step": 21250 }, { "epoch": 12.211372774267662, "grad_norm": 0.9533660411834717, "learning_rate": 3.65313696830704e-05, "loss": 1.251, "step": 21260 }, { "epoch": 12.21711659965537, "grad_norm": 1.0098894834518433, "learning_rate": 3.648528719821838e-05, "loss": 1.243, "step": 21270 }, { "epoch": 12.222860425043079, "grad_norm": 1.0104782581329346, "learning_rate": 3.643921709875706e-05, "loss": 1.2453, "step": 21280 }, { "epoch": 12.228604250430786, "grad_norm": 1.0211265087127686, "learning_rate": 3.6393159426898924e-05, "loss": 1.2481, "step": 21290 }, { "epoch": 12.234348075818495, "grad_norm": 1.142769455909729, "learning_rate": 3.6347114224845216e-05, "loss": 1.2746, "step": 21300 }, { "epoch": 12.240091901206203, "grad_norm": 0.9032977819442749, "learning_rate": 3.630108153478565e-05, "loss": 1.2523, "step": 21310 }, { "epoch": 12.245835726593912, "grad_norm": 1.1085121631622314, "learning_rate": 3.6255061398898496e-05, "loss": 1.2579, "step": 21320 }, { "epoch": 12.251579551981619, "grad_norm": 1.1852970123291016, "learning_rate": 3.6209053859350585e-05, "loss": 1.2448, "step": 21330 }, { "epoch": 12.257323377369328, "grad_norm": 0.975518524646759, "learning_rate": 3.6163058958297106e-05, "loss": 1.251, "step": 21340 }, { "epoch": 12.263067202757036, "grad_norm": 1.1111377477645874, "learning_rate": 3.611707673788177e-05, "loss": 1.2638, "step": 21350 }, { "epoch": 12.268811028144745, "grad_norm": 1.0246323347091675, "learning_rate": 3.607110724023656e-05, "loss": 1.2595, "step": 21360 }, { "epoch": 12.274554853532452, "grad_norm": 1.0392694473266602, "learning_rate": 3.602515050748189e-05, "loss": 1.2645, "step": 21370 }, { "epoch": 12.280298678920161, "grad_norm": 0.9927871227264404, "learning_rate": 3.597920658172647e-05, "loss": 1.2368, "step": 21380 }, { "epoch": 12.286042504307868, "grad_norm": 0.8740590810775757, "learning_rate": 3.59332755050672e-05, "loss": 1.2691, "step": 21390 }, { "epoch": 12.291786329695578, "grad_norm": 1.0087106227874756, "learning_rate": 3.588735731958932e-05, "loss": 1.2567, "step": 21400 }, { "epoch": 12.297530155083285, "grad_norm": 1.054331660270691, "learning_rate": 3.5841452067366144e-05, "loss": 1.2333, "step": 21410 }, { "epoch": 12.303273980470994, "grad_norm": 1.0606539249420166, "learning_rate": 3.579555979045921e-05, "loss": 1.2509, "step": 21420 }, { "epoch": 12.309017805858701, "grad_norm": 1.0268588066101074, "learning_rate": 3.5749680530918164e-05, "loss": 1.2487, "step": 21430 }, { "epoch": 12.31476163124641, "grad_norm": 1.0296322107315063, "learning_rate": 3.570381433078068e-05, "loss": 1.2434, "step": 21440 }, { "epoch": 12.320505456634118, "grad_norm": 1.135090708732605, "learning_rate": 3.565796123207251e-05, "loss": 1.258, "step": 21450 }, { "epoch": 12.326249282021827, "grad_norm": 1.1086962223052979, "learning_rate": 3.561212127680739e-05, "loss": 1.2385, "step": 21460 }, { "epoch": 12.331993107409534, "grad_norm": 1.06435227394104, "learning_rate": 3.556629450698697e-05, "loss": 1.2219, "step": 21470 }, { "epoch": 12.337736932797243, "grad_norm": 1.0489434003829956, "learning_rate": 3.552048096460091e-05, "loss": 1.2543, "step": 21480 }, { "epoch": 12.34348075818495, "grad_norm": 1.029554009437561, "learning_rate": 3.547468069162665e-05, "loss": 1.2493, "step": 21490 }, { "epoch": 12.34922458357266, "grad_norm": 1.0354934930801392, "learning_rate": 3.542889373002956e-05, "loss": 1.2605, "step": 21500 }, { "epoch": 12.354968408960367, "grad_norm": 1.0155259370803833, "learning_rate": 3.5383120121762746e-05, "loss": 1.2454, "step": 21510 }, { "epoch": 12.360712234348076, "grad_norm": 1.1830785274505615, "learning_rate": 3.53373599087671e-05, "loss": 1.2403, "step": 21520 }, { "epoch": 12.366456059735784, "grad_norm": 1.0363010168075562, "learning_rate": 3.5291613132971266e-05, "loss": 1.2652, "step": 21530 }, { "epoch": 12.372199885123493, "grad_norm": 1.0140489339828491, "learning_rate": 3.5245879836291516e-05, "loss": 1.2413, "step": 21540 }, { "epoch": 12.3779437105112, "grad_norm": 1.0690045356750488, "learning_rate": 3.520016006063186e-05, "loss": 1.2383, "step": 21550 }, { "epoch": 12.38368753589891, "grad_norm": 1.2001897096633911, "learning_rate": 3.515445384788386e-05, "loss": 1.2908, "step": 21560 }, { "epoch": 12.389431361286617, "grad_norm": 0.9489787817001343, "learning_rate": 3.51087612399266e-05, "loss": 1.2463, "step": 21570 }, { "epoch": 12.395175186674326, "grad_norm": 1.0735028982162476, "learning_rate": 3.5063082278626843e-05, "loss": 1.2617, "step": 21580 }, { "epoch": 12.400919012062033, "grad_norm": 0.9994289875030518, "learning_rate": 3.50174170058387e-05, "loss": 1.2558, "step": 21590 }, { "epoch": 12.406662837449742, "grad_norm": 0.9879858493804932, "learning_rate": 3.4971765463403845e-05, "loss": 1.2406, "step": 21600 }, { "epoch": 12.41240666283745, "grad_norm": 1.0495525598526, "learning_rate": 3.4926127693151304e-05, "loss": 1.2361, "step": 21610 }, { "epoch": 12.418150488225159, "grad_norm": 1.0661427974700928, "learning_rate": 3.488050373689751e-05, "loss": 1.2708, "step": 21620 }, { "epoch": 12.423894313612866, "grad_norm": 0.9698820114135742, "learning_rate": 3.4834893636446254e-05, "loss": 1.2548, "step": 21630 }, { "epoch": 12.429638139000575, "grad_norm": 1.2068321704864502, "learning_rate": 3.478929743358859e-05, "loss": 1.251, "step": 21640 }, { "epoch": 12.435381964388283, "grad_norm": 1.1306241750717163, "learning_rate": 3.47437151701029e-05, "loss": 1.2482, "step": 21650 }, { "epoch": 12.441125789775992, "grad_norm": 0.9876848459243774, "learning_rate": 3.4698146887754725e-05, "loss": 1.2236, "step": 21660 }, { "epoch": 12.446869615163699, "grad_norm": 1.0889356136322021, "learning_rate": 3.465259262829685e-05, "loss": 1.2404, "step": 21670 }, { "epoch": 12.452613440551406, "grad_norm": 0.9695348739624023, "learning_rate": 3.4607052433469177e-05, "loss": 1.2334, "step": 21680 }, { "epoch": 12.458357265939116, "grad_norm": 1.0182578563690186, "learning_rate": 3.456152634499871e-05, "loss": 1.2517, "step": 21690 }, { "epoch": 12.464101091326823, "grad_norm": 0.9320023655891418, "learning_rate": 3.45160144045996e-05, "loss": 1.2448, "step": 21700 }, { "epoch": 12.469844916714532, "grad_norm": 0.9913382530212402, "learning_rate": 3.447051665397295e-05, "loss": 1.2461, "step": 21710 }, { "epoch": 12.47558874210224, "grad_norm": 1.0610382556915283, "learning_rate": 3.442503313480693e-05, "loss": 1.2474, "step": 21720 }, { "epoch": 12.481332567489948, "grad_norm": 0.9985247254371643, "learning_rate": 3.437956388877659e-05, "loss": 1.2703, "step": 21730 }, { "epoch": 12.487076392877656, "grad_norm": 0.9466493725776672, "learning_rate": 3.433410895754396e-05, "loss": 1.2527, "step": 21740 }, { "epoch": 12.492820218265365, "grad_norm": 1.041614294052124, "learning_rate": 3.428866838275799e-05, "loss": 1.2589, "step": 21750 }, { "epoch": 12.498564043653072, "grad_norm": 1.0434848070144653, "learning_rate": 3.424324220605437e-05, "loss": 1.2622, "step": 21760 }, { "epoch": 12.504307869040781, "grad_norm": 1.0748556852340698, "learning_rate": 3.41978304690557e-05, "loss": 1.2521, "step": 21770 }, { "epoch": 12.510051694428489, "grad_norm": 0.997688889503479, "learning_rate": 3.415243321337127e-05, "loss": 1.2472, "step": 21780 }, { "epoch": 12.515795519816198, "grad_norm": 1.0677061080932617, "learning_rate": 3.4107050480597144e-05, "loss": 1.2282, "step": 21790 }, { "epoch": 12.521539345203905, "grad_norm": 0.9597499370574951, "learning_rate": 3.4061682312316095e-05, "loss": 1.2496, "step": 21800 }, { "epoch": 12.527283170591614, "grad_norm": 0.95613032579422, "learning_rate": 3.40163287500975e-05, "loss": 1.2565, "step": 21810 }, { "epoch": 12.533026995979322, "grad_norm": 1.0700112581253052, "learning_rate": 3.397098983549739e-05, "loss": 1.2407, "step": 21820 }, { "epoch": 12.53877082136703, "grad_norm": 0.8729975819587708, "learning_rate": 3.3925665610058394e-05, "loss": 1.241, "step": 21830 }, { "epoch": 12.544514646754738, "grad_norm": 1.0821452140808105, "learning_rate": 3.388035611530959e-05, "loss": 1.251, "step": 21840 }, { "epoch": 12.550258472142447, "grad_norm": 0.9793508648872375, "learning_rate": 3.3835061392766695e-05, "loss": 1.2518, "step": 21850 }, { "epoch": 12.556002297530155, "grad_norm": 1.0446723699569702, "learning_rate": 3.378978148393176e-05, "loss": 1.2391, "step": 21860 }, { "epoch": 12.561746122917864, "grad_norm": 0.9936966300010681, "learning_rate": 3.374451643029334e-05, "loss": 1.2475, "step": 21870 }, { "epoch": 12.567489948305571, "grad_norm": 0.998653769493103, "learning_rate": 3.3699266273326376e-05, "loss": 1.2539, "step": 21880 }, { "epoch": 12.57323377369328, "grad_norm": 0.9928255677223206, "learning_rate": 3.36540310544921e-05, "loss": 1.2484, "step": 21890 }, { "epoch": 12.578977599080988, "grad_norm": 0.9713891744613647, "learning_rate": 3.360881081523815e-05, "loss": 1.2189, "step": 21900 }, { "epoch": 12.584721424468697, "grad_norm": 1.2316539287567139, "learning_rate": 3.3563605596998354e-05, "loss": 1.2607, "step": 21910 }, { "epoch": 12.590465249856404, "grad_norm": 1.0364990234375, "learning_rate": 3.351841544119281e-05, "loss": 1.2658, "step": 21920 }, { "epoch": 12.596209075244113, "grad_norm": 1.0314924716949463, "learning_rate": 3.3473240389227854e-05, "loss": 1.2657, "step": 21930 }, { "epoch": 12.60195290063182, "grad_norm": 1.1862787008285522, "learning_rate": 3.342808048249589e-05, "loss": 1.2526, "step": 21940 }, { "epoch": 12.60769672601953, "grad_norm": 0.9992510676383972, "learning_rate": 3.338293576237555e-05, "loss": 1.2587, "step": 21950 }, { "epoch": 12.613440551407237, "grad_norm": 1.094550609588623, "learning_rate": 3.3337806270231456e-05, "loss": 1.2811, "step": 21960 }, { "epoch": 12.619184376794946, "grad_norm": 0.9854142069816589, "learning_rate": 3.329269204741435e-05, "loss": 1.2506, "step": 21970 }, { "epoch": 12.624928202182653, "grad_norm": 0.9363583326339722, "learning_rate": 3.3247593135260954e-05, "loss": 1.2441, "step": 21980 }, { "epoch": 12.630672027570363, "grad_norm": 0.9072157740592957, "learning_rate": 3.320250957509393e-05, "loss": 1.2374, "step": 21990 }, { "epoch": 12.63641585295807, "grad_norm": 0.997549295425415, "learning_rate": 3.3157441408221946e-05, "loss": 1.2427, "step": 22000 }, { "epoch": 12.63641585295807, "eval_loss": 1.0694881677627563, "eval_runtime": 121.1056, "eval_samples_per_second": 13.137, "eval_steps_per_second": 0.14, "eval_wer": 0.0892191208046107, "step": 22000 }, { "epoch": 12.642159678345779, "grad_norm": 1.0122162103652954, "learning_rate": 3.3112388675939494e-05, "loss": 1.2408, "step": 22010 }, { "epoch": 12.647903503733486, "grad_norm": 1.0155773162841797, "learning_rate": 3.306735141952698e-05, "loss": 1.2548, "step": 22020 }, { "epoch": 12.653647329121195, "grad_norm": 1.0692771673202515, "learning_rate": 3.3022329680250605e-05, "loss": 1.2448, "step": 22030 }, { "epoch": 12.659391154508903, "grad_norm": 0.9947674870491028, "learning_rate": 3.2977323499362314e-05, "loss": 1.2686, "step": 22040 }, { "epoch": 12.665134979896612, "grad_norm": 0.9661487936973572, "learning_rate": 3.2932332918099876e-05, "loss": 1.2434, "step": 22050 }, { "epoch": 12.67087880528432, "grad_norm": 1.0560882091522217, "learning_rate": 3.288735797768669e-05, "loss": 1.2377, "step": 22060 }, { "epoch": 12.676622630672028, "grad_norm": 1.0522441864013672, "learning_rate": 3.2842398719331906e-05, "loss": 1.2435, "step": 22070 }, { "epoch": 12.682366456059736, "grad_norm": 0.9267446398735046, "learning_rate": 3.279745518423022e-05, "loss": 1.2457, "step": 22080 }, { "epoch": 12.688110281447443, "grad_norm": 1.0738468170166016, "learning_rate": 3.275252741356195e-05, "loss": 1.2522, "step": 22090 }, { "epoch": 12.693854106835152, "grad_norm": 1.0103540420532227, "learning_rate": 3.2707615448492995e-05, "loss": 1.255, "step": 22100 }, { "epoch": 12.69959793222286, "grad_norm": 1.005452275276184, "learning_rate": 3.266271933017476e-05, "loss": 1.2417, "step": 22110 }, { "epoch": 12.705341757610569, "grad_norm": 1.0360212326049805, "learning_rate": 3.261783909974413e-05, "loss": 1.2403, "step": 22120 }, { "epoch": 12.711085582998276, "grad_norm": 0.938593327999115, "learning_rate": 3.2572974798323406e-05, "loss": 1.2483, "step": 22130 }, { "epoch": 12.716829408385985, "grad_norm": 1.0119835138320923, "learning_rate": 3.2528126467020346e-05, "loss": 1.2416, "step": 22140 }, { "epoch": 12.722573233773693, "grad_norm": 0.9499661922454834, "learning_rate": 3.2483294146928014e-05, "loss": 1.2364, "step": 22150 }, { "epoch": 12.728317059161402, "grad_norm": 0.9927830100059509, "learning_rate": 3.243847787912484e-05, "loss": 1.2329, "step": 22160 }, { "epoch": 12.734060884549109, "grad_norm": 1.033819556236267, "learning_rate": 3.239367770467456e-05, "loss": 1.2488, "step": 22170 }, { "epoch": 12.739804709936818, "grad_norm": 1.0417524576187134, "learning_rate": 3.2348893664626115e-05, "loss": 1.237, "step": 22180 }, { "epoch": 12.745548535324525, "grad_norm": 0.9829577207565308, "learning_rate": 3.230412580001371e-05, "loss": 1.2343, "step": 22190 }, { "epoch": 12.751292360712235, "grad_norm": 1.0233218669891357, "learning_rate": 3.2259374151856724e-05, "loss": 1.2463, "step": 22200 }, { "epoch": 12.757036186099942, "grad_norm": 1.0613200664520264, "learning_rate": 3.2214638761159635e-05, "loss": 1.2692, "step": 22210 }, { "epoch": 12.762780011487651, "grad_norm": 0.987410843372345, "learning_rate": 3.2169919668912066e-05, "loss": 1.2563, "step": 22220 }, { "epoch": 12.768523836875358, "grad_norm": 0.9725896120071411, "learning_rate": 3.212521691608868e-05, "loss": 1.2236, "step": 22230 }, { "epoch": 12.774267662263068, "grad_norm": 1.0240734815597534, "learning_rate": 3.208053054364922e-05, "loss": 1.2574, "step": 22240 }, { "epoch": 12.780011487650775, "grad_norm": 1.0398136377334595, "learning_rate": 3.203586059253836e-05, "loss": 1.2579, "step": 22250 }, { "epoch": 12.785755313038484, "grad_norm": 1.025903582572937, "learning_rate": 3.199120710368573e-05, "loss": 1.2539, "step": 22260 }, { "epoch": 12.791499138426191, "grad_norm": 0.9504820108413696, "learning_rate": 3.194657011800593e-05, "loss": 1.2507, "step": 22270 }, { "epoch": 12.7972429638139, "grad_norm": 1.1003867387771606, "learning_rate": 3.190194967639838e-05, "loss": 1.253, "step": 22280 }, { "epoch": 12.802986789201608, "grad_norm": 1.0308908224105835, "learning_rate": 3.185734581974739e-05, "loss": 1.2495, "step": 22290 }, { "epoch": 12.808730614589317, "grad_norm": 0.9987902641296387, "learning_rate": 3.1812758588922045e-05, "loss": 1.235, "step": 22300 }, { "epoch": 12.814474439977024, "grad_norm": 0.9599257707595825, "learning_rate": 3.176818802477617e-05, "loss": 1.251, "step": 22310 }, { "epoch": 12.820218265364733, "grad_norm": 1.0296530723571777, "learning_rate": 3.172363416814839e-05, "loss": 1.2369, "step": 22320 }, { "epoch": 12.82596209075244, "grad_norm": 0.9570682048797607, "learning_rate": 3.167909705986196e-05, "loss": 1.2493, "step": 22330 }, { "epoch": 12.83170591614015, "grad_norm": 1.0246185064315796, "learning_rate": 3.163457674072482e-05, "loss": 1.2749, "step": 22340 }, { "epoch": 12.837449741527857, "grad_norm": 1.042843222618103, "learning_rate": 3.1590073251529524e-05, "loss": 1.2472, "step": 22350 }, { "epoch": 12.843193566915566, "grad_norm": 0.9533725380897522, "learning_rate": 3.1545586633053173e-05, "loss": 1.2397, "step": 22360 }, { "epoch": 12.848937392303274, "grad_norm": 1.073738694190979, "learning_rate": 3.150111692605746e-05, "loss": 1.2495, "step": 22370 }, { "epoch": 12.854681217690983, "grad_norm": 0.970797598361969, "learning_rate": 3.1456664171288556e-05, "loss": 1.253, "step": 22380 }, { "epoch": 12.86042504307869, "grad_norm": 1.119827389717102, "learning_rate": 3.141222840947709e-05, "loss": 1.2473, "step": 22390 }, { "epoch": 12.8661688684664, "grad_norm": 1.013615369796753, "learning_rate": 3.136780968133816e-05, "loss": 1.257, "step": 22400 }, { "epoch": 12.871912693854107, "grad_norm": 1.1204568147659302, "learning_rate": 3.1323408027571174e-05, "loss": 1.2395, "step": 22410 }, { "epoch": 12.877656519241816, "grad_norm": 1.0731607675552368, "learning_rate": 3.127902348886e-05, "loss": 1.2319, "step": 22420 }, { "epoch": 12.883400344629523, "grad_norm": 1.1322605609893799, "learning_rate": 3.123465610587274e-05, "loss": 1.2648, "step": 22430 }, { "epoch": 12.889144170017232, "grad_norm": 1.0746358633041382, "learning_rate": 3.1190305919261865e-05, "loss": 1.2615, "step": 22440 }, { "epoch": 12.89488799540494, "grad_norm": 1.0055949687957764, "learning_rate": 3.114597296966399e-05, "loss": 1.2632, "step": 22450 }, { "epoch": 12.900631820792649, "grad_norm": 1.0397506952285767, "learning_rate": 3.110165729769997e-05, "loss": 1.2579, "step": 22460 }, { "epoch": 12.906375646180356, "grad_norm": 1.0291892290115356, "learning_rate": 3.105735894397487e-05, "loss": 1.2484, "step": 22470 }, { "epoch": 12.912119471568065, "grad_norm": 1.0710965394973755, "learning_rate": 3.101307794907784e-05, "loss": 1.2496, "step": 22480 }, { "epoch": 12.917863296955773, "grad_norm": 1.2363033294677734, "learning_rate": 3.096881435358217e-05, "loss": 1.2731, "step": 22490 }, { "epoch": 12.92360712234348, "grad_norm": 1.0314826965332031, "learning_rate": 3.0924568198045164e-05, "loss": 1.236, "step": 22500 }, { "epoch": 12.929350947731189, "grad_norm": 1.1028001308441162, "learning_rate": 3.088033952300814e-05, "loss": 1.2726, "step": 22510 }, { "epoch": 12.935094773118898, "grad_norm": 0.9568919539451599, "learning_rate": 3.083612836899646e-05, "loss": 1.2563, "step": 22520 }, { "epoch": 12.940838598506605, "grad_norm": 1.0045106410980225, "learning_rate": 3.079193477651936e-05, "loss": 1.2386, "step": 22530 }, { "epoch": 12.946582423894313, "grad_norm": 0.9620433449745178, "learning_rate": 3.0747758786070044e-05, "loss": 1.2591, "step": 22540 }, { "epoch": 12.952326249282022, "grad_norm": 1.0270787477493286, "learning_rate": 3.070360043812553e-05, "loss": 1.2382, "step": 22550 }, { "epoch": 12.95807007466973, "grad_norm": 1.0438264608383179, "learning_rate": 3.0659459773146746e-05, "loss": 1.2453, "step": 22560 }, { "epoch": 12.963813900057438, "grad_norm": 1.002175211906433, "learning_rate": 3.0615336831578347e-05, "loss": 1.2453, "step": 22570 }, { "epoch": 12.969557725445146, "grad_norm": 1.0425161123275757, "learning_rate": 3.057123165384876e-05, "loss": 1.2472, "step": 22580 }, { "epoch": 12.975301550832855, "grad_norm": 0.9981757998466492, "learning_rate": 3.052714428037021e-05, "loss": 1.2487, "step": 22590 }, { "epoch": 12.981045376220562, "grad_norm": 1.0290584564208984, "learning_rate": 3.0483074751538482e-05, "loss": 1.2571, "step": 22600 }, { "epoch": 12.986789201608271, "grad_norm": 0.9339661598205566, "learning_rate": 3.043902310773312e-05, "loss": 1.2563, "step": 22610 }, { "epoch": 12.992533026995979, "grad_norm": 1.0874369144439697, "learning_rate": 3.039498938931724e-05, "loss": 1.2517, "step": 22620 }, { "epoch": 12.998276852383688, "grad_norm": 1.0075799226760864, "learning_rate": 3.03509736366375e-05, "loss": 1.2597, "step": 22630 }, { "epoch": 13.004020677771395, "grad_norm": 0.9720813632011414, "learning_rate": 3.030697589002417e-05, "loss": 1.2482, "step": 22640 }, { "epoch": 13.009764503159104, "grad_norm": 0.9604555368423462, "learning_rate": 3.026299618979095e-05, "loss": 1.2316, "step": 22650 }, { "epoch": 13.015508328546812, "grad_norm": 1.0735442638397217, "learning_rate": 3.0219034576235043e-05, "loss": 1.2197, "step": 22660 }, { "epoch": 13.02125215393452, "grad_norm": 1.1345727443695068, "learning_rate": 3.0175091089637093e-05, "loss": 1.2381, "step": 22670 }, { "epoch": 13.026995979322228, "grad_norm": 1.0326781272888184, "learning_rate": 3.0131165770261087e-05, "loss": 1.2299, "step": 22680 }, { "epoch": 13.032739804709937, "grad_norm": 1.0122668743133545, "learning_rate": 3.008725865835441e-05, "loss": 1.2204, "step": 22690 }, { "epoch": 13.038483630097645, "grad_norm": 0.9963854551315308, "learning_rate": 3.004336979414773e-05, "loss": 1.2183, "step": 22700 }, { "epoch": 13.044227455485354, "grad_norm": 0.9391648173332214, "learning_rate": 2.9999499217855038e-05, "loss": 1.2172, "step": 22710 }, { "epoch": 13.049971280873061, "grad_norm": 0.9927622675895691, "learning_rate": 2.9955646969673527e-05, "loss": 1.2265, "step": 22720 }, { "epoch": 13.05571510626077, "grad_norm": 1.011696219444275, "learning_rate": 2.99118130897836e-05, "loss": 1.2267, "step": 22730 }, { "epoch": 13.061458931648477, "grad_norm": 1.04385244846344, "learning_rate": 2.986799761834888e-05, "loss": 1.2418, "step": 22740 }, { "epoch": 13.067202757036187, "grad_norm": 1.0134224891662598, "learning_rate": 2.982420059551604e-05, "loss": 1.2238, "step": 22750 }, { "epoch": 13.072946582423894, "grad_norm": 0.9780691266059875, "learning_rate": 2.978042206141492e-05, "loss": 1.2041, "step": 22760 }, { "epoch": 13.078690407811603, "grad_norm": 1.0055365562438965, "learning_rate": 2.9736662056158405e-05, "loss": 1.2422, "step": 22770 }, { "epoch": 13.08443423319931, "grad_norm": 0.9709728956222534, "learning_rate": 2.9692920619842353e-05, "loss": 1.2419, "step": 22780 }, { "epoch": 13.09017805858702, "grad_norm": 1.033838152885437, "learning_rate": 2.9649197792545675e-05, "loss": 1.2198, "step": 22790 }, { "epoch": 13.095921883974727, "grad_norm": 0.9667700529098511, "learning_rate": 2.960549361433019e-05, "loss": 1.231, "step": 22800 }, { "epoch": 13.101665709362436, "grad_norm": 0.9930551052093506, "learning_rate": 2.9561808125240663e-05, "loss": 1.2333, "step": 22810 }, { "epoch": 13.107409534750143, "grad_norm": 1.0348711013793945, "learning_rate": 2.9518141365304704e-05, "loss": 1.2274, "step": 22820 }, { "epoch": 13.113153360137852, "grad_norm": 1.003509521484375, "learning_rate": 2.9474493374532743e-05, "loss": 1.2124, "step": 22830 }, { "epoch": 13.11889718552556, "grad_norm": 1.1533737182617188, "learning_rate": 2.943086419291806e-05, "loss": 1.2189, "step": 22840 }, { "epoch": 13.124641010913269, "grad_norm": 0.988194465637207, "learning_rate": 2.9387253860436685e-05, "loss": 1.2327, "step": 22850 }, { "epoch": 13.130384836300976, "grad_norm": 1.0287445783615112, "learning_rate": 2.9343662417047396e-05, "loss": 1.236, "step": 22860 }, { "epoch": 13.136128661688685, "grad_norm": 0.9780846238136292, "learning_rate": 2.930008990269161e-05, "loss": 1.2272, "step": 22870 }, { "epoch": 13.141872487076393, "grad_norm": 0.9711022973060608, "learning_rate": 2.9256536357293424e-05, "loss": 1.2257, "step": 22880 }, { "epoch": 13.147616312464102, "grad_norm": 1.114785075187683, "learning_rate": 2.9213001820759583e-05, "loss": 1.2346, "step": 22890 }, { "epoch": 13.15336013785181, "grad_norm": 1.0094363689422607, "learning_rate": 2.916948633297939e-05, "loss": 1.2381, "step": 22900 }, { "epoch": 13.159103963239518, "grad_norm": 1.045957088470459, "learning_rate": 2.912598993382468e-05, "loss": 1.2226, "step": 22910 }, { "epoch": 13.164847788627226, "grad_norm": 1.175858974456787, "learning_rate": 2.908251266314985e-05, "loss": 1.2424, "step": 22920 }, { "epoch": 13.170591614014935, "grad_norm": 0.9187557697296143, "learning_rate": 2.90390545607917e-05, "loss": 1.235, "step": 22930 }, { "epoch": 13.176335439402642, "grad_norm": 0.9896097183227539, "learning_rate": 2.8995615666569544e-05, "loss": 1.2335, "step": 22940 }, { "epoch": 13.18207926479035, "grad_norm": 0.9177210927009583, "learning_rate": 2.8952196020285e-05, "loss": 1.2168, "step": 22950 }, { "epoch": 13.187823090178059, "grad_norm": 0.9632211327552795, "learning_rate": 2.8908795661722155e-05, "loss": 1.2454, "step": 22960 }, { "epoch": 13.193566915565766, "grad_norm": 1.0591082572937012, "learning_rate": 2.8865414630647323e-05, "loss": 1.2199, "step": 22970 }, { "epoch": 13.199310740953475, "grad_norm": 1.0240976810455322, "learning_rate": 2.8822052966809215e-05, "loss": 1.2242, "step": 22980 }, { "epoch": 13.205054566341182, "grad_norm": 0.9828415513038635, "learning_rate": 2.8778710709938707e-05, "loss": 1.2325, "step": 22990 }, { "epoch": 13.210798391728892, "grad_norm": 0.9983562231063843, "learning_rate": 2.87353878997489e-05, "loss": 1.2471, "step": 23000 }, { "epoch": 13.210798391728892, "eval_loss": 1.0588030815124512, "eval_runtime": 122.0845, "eval_samples_per_second": 13.032, "eval_steps_per_second": 0.139, "eval_wer": 0.08557464120239575, "step": 23000 }, { "epoch": 13.216542217116599, "grad_norm": 0.9553768038749695, "learning_rate": 2.8692084575935135e-05, "loss": 1.24, "step": 23010 }, { "epoch": 13.222286042504308, "grad_norm": 0.8903969526290894, "learning_rate": 2.864880077817486e-05, "loss": 1.2232, "step": 23020 }, { "epoch": 13.228029867892015, "grad_norm": 0.9967452883720398, "learning_rate": 2.8605536546127658e-05, "loss": 1.2307, "step": 23030 }, { "epoch": 13.233773693279725, "grad_norm": 1.0698235034942627, "learning_rate": 2.8562291919435146e-05, "loss": 1.2195, "step": 23040 }, { "epoch": 13.239517518667432, "grad_norm": 0.9739837050437927, "learning_rate": 2.8519066937720973e-05, "loss": 1.2226, "step": 23050 }, { "epoch": 13.245261344055141, "grad_norm": 1.014878511428833, "learning_rate": 2.847586164059085e-05, "loss": 1.2163, "step": 23060 }, { "epoch": 13.251005169442848, "grad_norm": 1.1119699478149414, "learning_rate": 2.8432676067632363e-05, "loss": 1.2345, "step": 23070 }, { "epoch": 13.256748994830557, "grad_norm": 1.0444631576538086, "learning_rate": 2.838951025841513e-05, "loss": 1.2405, "step": 23080 }, { "epoch": 13.262492820218265, "grad_norm": 1.0360527038574219, "learning_rate": 2.8346364252490566e-05, "loss": 1.2238, "step": 23090 }, { "epoch": 13.268236645605974, "grad_norm": 1.0620757341384888, "learning_rate": 2.8303238089391982e-05, "loss": 1.2506, "step": 23100 }, { "epoch": 13.273980470993681, "grad_norm": 0.9800290465354919, "learning_rate": 2.8260131808634527e-05, "loss": 1.2183, "step": 23110 }, { "epoch": 13.27972429638139, "grad_norm": 1.0119231939315796, "learning_rate": 2.8217045449715092e-05, "loss": 1.2316, "step": 23120 }, { "epoch": 13.285468121769098, "grad_norm": 1.0038931369781494, "learning_rate": 2.817397905211234e-05, "loss": 1.222, "step": 23130 }, { "epoch": 13.291211947156807, "grad_norm": 1.0422852039337158, "learning_rate": 2.8130932655286646e-05, "loss": 1.23, "step": 23140 }, { "epoch": 13.296955772544514, "grad_norm": 1.2609690427780151, "learning_rate": 2.8087906298680018e-05, "loss": 1.2369, "step": 23150 }, { "epoch": 13.302699597932223, "grad_norm": 0.9615899920463562, "learning_rate": 2.804490002171617e-05, "loss": 1.2403, "step": 23160 }, { "epoch": 13.30844342331993, "grad_norm": 1.066990852355957, "learning_rate": 2.800191386380034e-05, "loss": 1.2475, "step": 23170 }, { "epoch": 13.31418724870764, "grad_norm": 1.0014039278030396, "learning_rate": 2.7958947864319412e-05, "loss": 1.2261, "step": 23180 }, { "epoch": 13.319931074095347, "grad_norm": 1.010463833808899, "learning_rate": 2.7916002062641733e-05, "loss": 1.2355, "step": 23190 }, { "epoch": 13.325674899483056, "grad_norm": 1.3250781297683716, "learning_rate": 2.787307649811718e-05, "loss": 1.2278, "step": 23200 }, { "epoch": 13.331418724870764, "grad_norm": 1.0303561687469482, "learning_rate": 2.7830171210077094e-05, "loss": 1.2316, "step": 23210 }, { "epoch": 13.337162550258473, "grad_norm": 1.0535682439804077, "learning_rate": 2.7787286237834193e-05, "loss": 1.2257, "step": 23220 }, { "epoch": 13.34290637564618, "grad_norm": 1.1015154123306274, "learning_rate": 2.7744421620682636e-05, "loss": 1.2408, "step": 23230 }, { "epoch": 13.34865020103389, "grad_norm": 1.029428482055664, "learning_rate": 2.7701577397897894e-05, "loss": 1.2442, "step": 23240 }, { "epoch": 13.354394026421597, "grad_norm": 1.0226709842681885, "learning_rate": 2.7658753608736726e-05, "loss": 1.2237, "step": 23250 }, { "epoch": 13.360137851809306, "grad_norm": 1.0162632465362549, "learning_rate": 2.761595029243726e-05, "loss": 1.2644, "step": 23260 }, { "epoch": 13.365881677197013, "grad_norm": 0.9599072933197021, "learning_rate": 2.7573167488218764e-05, "loss": 1.2385, "step": 23270 }, { "epoch": 13.371625502584722, "grad_norm": 0.9452846050262451, "learning_rate": 2.753040523528177e-05, "loss": 1.2263, "step": 23280 }, { "epoch": 13.37736932797243, "grad_norm": 0.9728118181228638, "learning_rate": 2.7487663572807992e-05, "loss": 1.2265, "step": 23290 }, { "epoch": 13.383113153360139, "grad_norm": 1.0152369737625122, "learning_rate": 2.7444942539960204e-05, "loss": 1.2334, "step": 23300 }, { "epoch": 13.388856978747846, "grad_norm": 0.9801512360572815, "learning_rate": 2.7402242175882375e-05, "loss": 1.223, "step": 23310 }, { "epoch": 13.394600804135555, "grad_norm": 1.009696364402771, "learning_rate": 2.7359562519699434e-05, "loss": 1.2292, "step": 23320 }, { "epoch": 13.400344629523262, "grad_norm": 1.152689814567566, "learning_rate": 2.7316903610517436e-05, "loss": 1.2197, "step": 23330 }, { "epoch": 13.406088454910972, "grad_norm": 1.09321129322052, "learning_rate": 2.7274265487423356e-05, "loss": 1.2333, "step": 23340 }, { "epoch": 13.411832280298679, "grad_norm": 1.1807641983032227, "learning_rate": 2.723164818948512e-05, "loss": 1.2236, "step": 23350 }, { "epoch": 13.417576105686386, "grad_norm": 1.0430477857589722, "learning_rate": 2.718905175575165e-05, "loss": 1.2364, "step": 23360 }, { "epoch": 13.423319931074095, "grad_norm": 1.0594213008880615, "learning_rate": 2.7146476225252647e-05, "loss": 1.2427, "step": 23370 }, { "epoch": 13.429063756461803, "grad_norm": 0.9445695877075195, "learning_rate": 2.7103921636998735e-05, "loss": 1.2251, "step": 23380 }, { "epoch": 13.434807581849512, "grad_norm": 1.036017894744873, "learning_rate": 2.7061388029981333e-05, "loss": 1.221, "step": 23390 }, { "epoch": 13.44055140723722, "grad_norm": 1.6682243347167969, "learning_rate": 2.701887544317263e-05, "loss": 1.2158, "step": 23400 }, { "epoch": 13.446295232624928, "grad_norm": 0.8891414999961853, "learning_rate": 2.6976383915525554e-05, "loss": 1.2261, "step": 23410 }, { "epoch": 13.452039058012636, "grad_norm": 0.9824521541595459, "learning_rate": 2.6933913485973693e-05, "loss": 1.2463, "step": 23420 }, { "epoch": 13.457782883400345, "grad_norm": 0.9425431489944458, "learning_rate": 2.6891464193431405e-05, "loss": 1.2352, "step": 23430 }, { "epoch": 13.463526708788052, "grad_norm": 0.9886574745178223, "learning_rate": 2.6849036076793564e-05, "loss": 1.2335, "step": 23440 }, { "epoch": 13.469270534175761, "grad_norm": 1.0540603399276733, "learning_rate": 2.6806629174935754e-05, "loss": 1.2191, "step": 23450 }, { "epoch": 13.475014359563469, "grad_norm": 1.169461727142334, "learning_rate": 2.676424352671403e-05, "loss": 1.2313, "step": 23460 }, { "epoch": 13.480758184951178, "grad_norm": 0.9283115267753601, "learning_rate": 2.6721879170965003e-05, "loss": 1.2429, "step": 23470 }, { "epoch": 13.486502010338885, "grad_norm": 0.9985254406929016, "learning_rate": 2.667953614650583e-05, "loss": 1.2523, "step": 23480 }, { "epoch": 13.492245835726594, "grad_norm": 1.0303760766983032, "learning_rate": 2.663721449213401e-05, "loss": 1.2378, "step": 23490 }, { "epoch": 13.497989661114302, "grad_norm": 1.1579179763793945, "learning_rate": 2.6594914246627578e-05, "loss": 1.24, "step": 23500 }, { "epoch": 13.50373348650201, "grad_norm": 1.0409373044967651, "learning_rate": 2.6552635448744872e-05, "loss": 1.2366, "step": 23510 }, { "epoch": 13.509477311889718, "grad_norm": 1.0083309412002563, "learning_rate": 2.6510378137224585e-05, "loss": 1.2455, "step": 23520 }, { "epoch": 13.515221137277427, "grad_norm": 1.0123381614685059, "learning_rate": 2.6468142350785786e-05, "loss": 1.2384, "step": 23530 }, { "epoch": 13.520964962665134, "grad_norm": 1.0339261293411255, "learning_rate": 2.642592812812774e-05, "loss": 1.2192, "step": 23540 }, { "epoch": 13.526708788052844, "grad_norm": 0.8907485604286194, "learning_rate": 2.638373550793003e-05, "loss": 1.2116, "step": 23550 }, { "epoch": 13.532452613440551, "grad_norm": 0.9731130003929138, "learning_rate": 2.634156452885236e-05, "loss": 1.2301, "step": 23560 }, { "epoch": 13.53819643882826, "grad_norm": 0.959918737411499, "learning_rate": 2.629941522953468e-05, "loss": 1.218, "step": 23570 }, { "epoch": 13.543940264215967, "grad_norm": 1.192635178565979, "learning_rate": 2.6257287648597073e-05, "loss": 1.2277, "step": 23580 }, { "epoch": 13.549684089603677, "grad_norm": 1.036597728729248, "learning_rate": 2.6215181824639647e-05, "loss": 1.2133, "step": 23590 }, { "epoch": 13.555427914991384, "grad_norm": 1.0206176042556763, "learning_rate": 2.6173097796242657e-05, "loss": 1.2291, "step": 23600 }, { "epoch": 13.561171740379093, "grad_norm": 1.0097376108169556, "learning_rate": 2.613103560196636e-05, "loss": 1.2176, "step": 23610 }, { "epoch": 13.5669155657668, "grad_norm": 0.9872629046440125, "learning_rate": 2.6088995280350958e-05, "loss": 1.2231, "step": 23620 }, { "epoch": 13.57265939115451, "grad_norm": 1.01505446434021, "learning_rate": 2.6046976869916712e-05, "loss": 1.2332, "step": 23630 }, { "epoch": 13.578403216542217, "grad_norm": 1.026518702507019, "learning_rate": 2.6004980409163705e-05, "loss": 1.25, "step": 23640 }, { "epoch": 13.584147041929926, "grad_norm": 1.0941832065582275, "learning_rate": 2.596300593657196e-05, "loss": 1.2366, "step": 23650 }, { "epoch": 13.589890867317633, "grad_norm": 0.9591879844665527, "learning_rate": 2.5921053490601388e-05, "loss": 1.237, "step": 23660 }, { "epoch": 13.595634692705342, "grad_norm": 1.0955452919006348, "learning_rate": 2.5879123109691635e-05, "loss": 1.2458, "step": 23670 }, { "epoch": 13.60137851809305, "grad_norm": 1.0414639711380005, "learning_rate": 2.5837214832262192e-05, "loss": 1.2363, "step": 23680 }, { "epoch": 13.607122343480759, "grad_norm": 0.9564809203147888, "learning_rate": 2.5795328696712246e-05, "loss": 1.2187, "step": 23690 }, { "epoch": 13.612866168868466, "grad_norm": 0.9911343455314636, "learning_rate": 2.5753464741420775e-05, "loss": 1.2278, "step": 23700 }, { "epoch": 13.618609994256175, "grad_norm": 0.9907875657081604, "learning_rate": 2.5711623004746348e-05, "loss": 1.2483, "step": 23710 }, { "epoch": 13.624353819643883, "grad_norm": 0.9678093194961548, "learning_rate": 2.5669803525027207e-05, "loss": 1.2296, "step": 23720 }, { "epoch": 13.630097645031592, "grad_norm": 1.058763027191162, "learning_rate": 2.5628006340581244e-05, "loss": 1.2111, "step": 23730 }, { "epoch": 13.6358414704193, "grad_norm": 0.9545002579689026, "learning_rate": 2.558623148970584e-05, "loss": 1.2418, "step": 23740 }, { "epoch": 13.641585295807008, "grad_norm": 0.9811776876449585, "learning_rate": 2.5544479010677984e-05, "loss": 1.2363, "step": 23750 }, { "epoch": 13.647329121194716, "grad_norm": 1.0909405946731567, "learning_rate": 2.5502748941754155e-05, "loss": 1.2347, "step": 23760 }, { "epoch": 13.653072946582423, "grad_norm": 0.9519487023353577, "learning_rate": 2.5461041321170243e-05, "loss": 1.2231, "step": 23770 }, { "epoch": 13.658816771970132, "grad_norm": 0.944462776184082, "learning_rate": 2.5419356187141652e-05, "loss": 1.2307, "step": 23780 }, { "epoch": 13.664560597357841, "grad_norm": 0.9848902821540833, "learning_rate": 2.5377693577863092e-05, "loss": 1.2123, "step": 23790 }, { "epoch": 13.670304422745549, "grad_norm": 1.0787533521652222, "learning_rate": 2.5336053531508737e-05, "loss": 1.2451, "step": 23800 }, { "epoch": 13.676048248133256, "grad_norm": 0.9055966138839722, "learning_rate": 2.529443608623198e-05, "loss": 1.2285, "step": 23810 }, { "epoch": 13.681792073520965, "grad_norm": 1.0509308576583862, "learning_rate": 2.5252841280165606e-05, "loss": 1.2488, "step": 23820 }, { "epoch": 13.687535898908672, "grad_norm": 0.9895369410514832, "learning_rate": 2.521126915142156e-05, "loss": 1.2443, "step": 23830 }, { "epoch": 13.693279724296382, "grad_norm": 1.0254125595092773, "learning_rate": 2.5169719738091092e-05, "loss": 1.2213, "step": 23840 }, { "epoch": 13.699023549684089, "grad_norm": 0.9926055073738098, "learning_rate": 2.5128193078244606e-05, "loss": 1.2458, "step": 23850 }, { "epoch": 13.704767375071798, "grad_norm": 0.9813425540924072, "learning_rate": 2.508668920993162e-05, "loss": 1.2353, "step": 23860 }, { "epoch": 13.710511200459505, "grad_norm": 0.9867540597915649, "learning_rate": 2.504520817118084e-05, "loss": 1.2543, "step": 23870 }, { "epoch": 13.716255025847214, "grad_norm": 1.0961766242980957, "learning_rate": 2.5003750000000016e-05, "loss": 1.2295, "step": 23880 }, { "epoch": 13.721998851234922, "grad_norm": 1.0170906782150269, "learning_rate": 2.4962314734375903e-05, "loss": 1.2372, "step": 23890 }, { "epoch": 13.727742676622631, "grad_norm": 1.052619457244873, "learning_rate": 2.4920902412274367e-05, "loss": 1.2597, "step": 23900 }, { "epoch": 13.733486502010338, "grad_norm": 0.94364333152771, "learning_rate": 2.4879513071640153e-05, "loss": 1.2159, "step": 23910 }, { "epoch": 13.739230327398047, "grad_norm": 0.9344938397407532, "learning_rate": 2.4838146750397033e-05, "loss": 1.2217, "step": 23920 }, { "epoch": 13.744974152785755, "grad_norm": 0.9291685819625854, "learning_rate": 2.479680348644761e-05, "loss": 1.2478, "step": 23930 }, { "epoch": 13.750717978173464, "grad_norm": 0.9887988567352295, "learning_rate": 2.4755483317673416e-05, "loss": 1.218, "step": 23940 }, { "epoch": 13.756461803561171, "grad_norm": 1.0006673336029053, "learning_rate": 2.4714186281934818e-05, "loss": 1.2275, "step": 23950 }, { "epoch": 13.76220562894888, "grad_norm": 1.0098106861114502, "learning_rate": 2.467291241707094e-05, "loss": 1.2175, "step": 23960 }, { "epoch": 13.767949454336588, "grad_norm": 1.0578207969665527, "learning_rate": 2.4631661760899736e-05, "loss": 1.2232, "step": 23970 }, { "epoch": 13.773693279724297, "grad_norm": 0.9974209666252136, "learning_rate": 2.459043435121785e-05, "loss": 1.2104, "step": 23980 }, { "epoch": 13.779437105112004, "grad_norm": 1.015912413597107, "learning_rate": 2.454923022580063e-05, "loss": 1.2082, "step": 23990 }, { "epoch": 13.785180930499713, "grad_norm": 1.0176284313201904, "learning_rate": 2.450804942240213e-05, "loss": 1.2125, "step": 24000 }, { "epoch": 13.785180930499713, "eval_loss": 1.0619091987609863, "eval_runtime": 121.1691, "eval_samples_per_second": 13.13, "eval_steps_per_second": 0.14, "eval_wer": 0.087778280031642, "step": 24000 }, { "epoch": 13.79092475588742, "grad_norm": 1.0536777973175049, "learning_rate": 2.446689197875498e-05, "loss": 1.2323, "step": 24010 }, { "epoch": 13.79666858127513, "grad_norm": 1.0505125522613525, "learning_rate": 2.4425757932570432e-05, "loss": 1.2295, "step": 24020 }, { "epoch": 13.802412406662837, "grad_norm": 0.9551796317100525, "learning_rate": 2.438464732153833e-05, "loss": 1.2154, "step": 24030 }, { "epoch": 13.808156232050546, "grad_norm": 0.989189624786377, "learning_rate": 2.434356018332698e-05, "loss": 1.2203, "step": 24040 }, { "epoch": 13.813900057438254, "grad_norm": 1.0429370403289795, "learning_rate": 2.4302496555583244e-05, "loss": 1.2311, "step": 24050 }, { "epoch": 13.819643882825963, "grad_norm": 1.084020972251892, "learning_rate": 2.426145647593239e-05, "loss": 1.2467, "step": 24060 }, { "epoch": 13.82538770821367, "grad_norm": 0.9452738761901855, "learning_rate": 2.422043998197815e-05, "loss": 1.2244, "step": 24070 }, { "epoch": 13.83113153360138, "grad_norm": 1.0351113080978394, "learning_rate": 2.417944711130263e-05, "loss": 1.2372, "step": 24080 }, { "epoch": 13.836875358989086, "grad_norm": 1.043131947517395, "learning_rate": 2.4138477901466256e-05, "loss": 1.2173, "step": 24090 }, { "epoch": 13.842619184376796, "grad_norm": 1.017194151878357, "learning_rate": 2.4097532390007852e-05, "loss": 1.2379, "step": 24100 }, { "epoch": 13.848363009764503, "grad_norm": 0.9793208837509155, "learning_rate": 2.4056610614444442e-05, "loss": 1.2276, "step": 24110 }, { "epoch": 13.854106835152212, "grad_norm": 1.0079565048217773, "learning_rate": 2.4015712612271366e-05, "loss": 1.2177, "step": 24120 }, { "epoch": 13.85985066053992, "grad_norm": 1.075614094734192, "learning_rate": 2.397483842096217e-05, "loss": 1.2359, "step": 24130 }, { "epoch": 13.865594485927629, "grad_norm": 0.9938237071037292, "learning_rate": 2.393398807796854e-05, "loss": 1.2285, "step": 24140 }, { "epoch": 13.871338311315336, "grad_norm": 1.0977957248687744, "learning_rate": 2.3893161620720377e-05, "loss": 1.2554, "step": 24150 }, { "epoch": 13.877082136703045, "grad_norm": 1.1225249767303467, "learning_rate": 2.3852359086625622e-05, "loss": 1.2522, "step": 24160 }, { "epoch": 13.882825962090752, "grad_norm": 1.0598392486572266, "learning_rate": 2.381158051307038e-05, "loss": 1.2193, "step": 24170 }, { "epoch": 13.88856978747846, "grad_norm": 0.9928282499313354, "learning_rate": 2.3770825937418726e-05, "loss": 1.2353, "step": 24180 }, { "epoch": 13.894313612866169, "grad_norm": 1.024247646331787, "learning_rate": 2.373009539701276e-05, "loss": 1.2427, "step": 24190 }, { "epoch": 13.900057438253878, "grad_norm": 0.9893248677253723, "learning_rate": 2.36893889291726e-05, "loss": 1.2163, "step": 24200 }, { "epoch": 13.905801263641585, "grad_norm": 0.9153217077255249, "learning_rate": 2.36487065711963e-05, "loss": 1.218, "step": 24210 }, { "epoch": 13.911545089029293, "grad_norm": 0.9943744540214539, "learning_rate": 2.3608048360359765e-05, "loss": 1.2391, "step": 24220 }, { "epoch": 13.917288914417002, "grad_norm": 1.0962735414505005, "learning_rate": 2.3567414333916867e-05, "loss": 1.2227, "step": 24230 }, { "epoch": 13.92303273980471, "grad_norm": 1.0162806510925293, "learning_rate": 2.352680452909921e-05, "loss": 1.232, "step": 24240 }, { "epoch": 13.928776565192418, "grad_norm": 1.024606466293335, "learning_rate": 2.348621898311631e-05, "loss": 1.2315, "step": 24250 }, { "epoch": 13.934520390580126, "grad_norm": 0.9210469126701355, "learning_rate": 2.3445657733155372e-05, "loss": 1.2247, "step": 24260 }, { "epoch": 13.940264215967835, "grad_norm": 1.0622237920761108, "learning_rate": 2.3405120816381412e-05, "loss": 1.2463, "step": 24270 }, { "epoch": 13.946008041355542, "grad_norm": 1.0887614488601685, "learning_rate": 2.336460826993707e-05, "loss": 1.2414, "step": 24280 }, { "epoch": 13.951751866743251, "grad_norm": 0.9074932932853699, "learning_rate": 2.332412013094274e-05, "loss": 1.2, "step": 24290 }, { "epoch": 13.957495692130959, "grad_norm": 1.0141096115112305, "learning_rate": 2.3283656436496378e-05, "loss": 1.2436, "step": 24300 }, { "epoch": 13.963239517518668, "grad_norm": 0.9717239737510681, "learning_rate": 2.324321722367359e-05, "loss": 1.2228, "step": 24310 }, { "epoch": 13.968983342906375, "grad_norm": 1.0706043243408203, "learning_rate": 2.320280252952755e-05, "loss": 1.2488, "step": 24320 }, { "epoch": 13.974727168294084, "grad_norm": 0.9823508262634277, "learning_rate": 2.3162412391088918e-05, "loss": 1.2416, "step": 24330 }, { "epoch": 13.980470993681791, "grad_norm": 1.0465178489685059, "learning_rate": 2.312204684536593e-05, "loss": 1.2404, "step": 24340 }, { "epoch": 13.9862148190695, "grad_norm": 1.1321772336959839, "learning_rate": 2.3081705929344234e-05, "loss": 1.2414, "step": 24350 }, { "epoch": 13.991958644457208, "grad_norm": 1.0251168012619019, "learning_rate": 2.3041389679986896e-05, "loss": 1.2296, "step": 24360 }, { "epoch": 13.997702469844917, "grad_norm": 1.0465214252471924, "learning_rate": 2.300109813423444e-05, "loss": 1.2338, "step": 24370 }, { "epoch": 14.003446295232624, "grad_norm": 1.0605627298355103, "learning_rate": 2.29608313290047e-05, "loss": 1.2389, "step": 24380 }, { "epoch": 14.009190120620334, "grad_norm": 1.0412893295288086, "learning_rate": 2.29205893011929e-05, "loss": 1.2097, "step": 24390 }, { "epoch": 14.01493394600804, "grad_norm": 1.0431143045425415, "learning_rate": 2.2880372087671476e-05, "loss": 1.2198, "step": 24400 }, { "epoch": 14.02067777139575, "grad_norm": 0.9980940222740173, "learning_rate": 2.2840179725290204e-05, "loss": 1.2208, "step": 24410 }, { "epoch": 14.026421596783457, "grad_norm": 1.017864465713501, "learning_rate": 2.2800012250876087e-05, "loss": 1.2142, "step": 24420 }, { "epoch": 14.032165422171166, "grad_norm": 1.0230566263198853, "learning_rate": 2.2759869701233248e-05, "loss": 1.1941, "step": 24430 }, { "epoch": 14.037909247558874, "grad_norm": 0.939879834651947, "learning_rate": 2.2719752113143074e-05, "loss": 1.2045, "step": 24440 }, { "epoch": 14.043653072946583, "grad_norm": 0.9331865906715393, "learning_rate": 2.267965952336401e-05, "loss": 1.204, "step": 24450 }, { "epoch": 14.04939689833429, "grad_norm": 1.0055807828903198, "learning_rate": 2.2639591968631596e-05, "loss": 1.225, "step": 24460 }, { "epoch": 14.055140723722, "grad_norm": 0.9420186281204224, "learning_rate": 2.2599549485658487e-05, "loss": 1.2118, "step": 24470 }, { "epoch": 14.060884549109707, "grad_norm": 0.9405049681663513, "learning_rate": 2.2559532111134298e-05, "loss": 1.2139, "step": 24480 }, { "epoch": 14.066628374497416, "grad_norm": 1.0664889812469482, "learning_rate": 2.2519539881725692e-05, "loss": 1.215, "step": 24490 }, { "epoch": 14.072372199885123, "grad_norm": 0.9530662894248962, "learning_rate": 2.247957283407629e-05, "loss": 1.238, "step": 24500 }, { "epoch": 14.078116025272832, "grad_norm": 1.040010929107666, "learning_rate": 2.2439631004806593e-05, "loss": 1.2432, "step": 24510 }, { "epoch": 14.08385985066054, "grad_norm": 0.9727911949157715, "learning_rate": 2.2399714430514043e-05, "loss": 1.1997, "step": 24520 }, { "epoch": 14.089603676048249, "grad_norm": 0.9158945679664612, "learning_rate": 2.2359823147772902e-05, "loss": 1.2534, "step": 24530 }, { "epoch": 14.095347501435956, "grad_norm": 0.9205055236816406, "learning_rate": 2.2319957193134302e-05, "loss": 1.2164, "step": 24540 }, { "epoch": 14.101091326823665, "grad_norm": 1.0510560274124146, "learning_rate": 2.2280116603126145e-05, "loss": 1.23, "step": 24550 }, { "epoch": 14.106835152211373, "grad_norm": 0.9652541875839233, "learning_rate": 2.2240301414253058e-05, "loss": 1.205, "step": 24560 }, { "epoch": 14.112578977599082, "grad_norm": 1.0684396028518677, "learning_rate": 2.220051166299647e-05, "loss": 1.2223, "step": 24570 }, { "epoch": 14.118322802986789, "grad_norm": 0.9396750926971436, "learning_rate": 2.2160747385814422e-05, "loss": 1.2192, "step": 24580 }, { "epoch": 14.124066628374498, "grad_norm": 1.0323659181594849, "learning_rate": 2.2121008619141676e-05, "loss": 1.2215, "step": 24590 }, { "epoch": 14.129810453762206, "grad_norm": 1.0589594841003418, "learning_rate": 2.208129539938961e-05, "loss": 1.2476, "step": 24600 }, { "epoch": 14.135554279149915, "grad_norm": 0.9992800951004028, "learning_rate": 2.204160776294614e-05, "loss": 1.2275, "step": 24610 }, { "epoch": 14.141298104537622, "grad_norm": 0.9543492197990417, "learning_rate": 2.200194574617582e-05, "loss": 1.2274, "step": 24620 }, { "epoch": 14.14704192992533, "grad_norm": 0.9667035937309265, "learning_rate": 2.1962309385419655e-05, "loss": 1.2171, "step": 24630 }, { "epoch": 14.152785755313039, "grad_norm": 1.0217777490615845, "learning_rate": 2.192269871699521e-05, "loss": 1.2041, "step": 24640 }, { "epoch": 14.158529580700746, "grad_norm": 0.9827529191970825, "learning_rate": 2.188311377719646e-05, "loss": 1.2276, "step": 24650 }, { "epoch": 14.164273406088455, "grad_norm": 0.9652236104011536, "learning_rate": 2.184355460229381e-05, "loss": 1.2354, "step": 24660 }, { "epoch": 14.170017231476162, "grad_norm": 1.102372646331787, "learning_rate": 2.1804021228534077e-05, "loss": 1.2382, "step": 24670 }, { "epoch": 14.175761056863871, "grad_norm": 0.9392674565315247, "learning_rate": 2.176451369214043e-05, "loss": 1.2267, "step": 24680 }, { "epoch": 14.181504882251579, "grad_norm": 0.967389702796936, "learning_rate": 2.172503202931239e-05, "loss": 1.2246, "step": 24690 }, { "epoch": 14.187248707639288, "grad_norm": 1.061562418937683, "learning_rate": 2.1685576276225707e-05, "loss": 1.2129, "step": 24700 }, { "epoch": 14.192992533026995, "grad_norm": 1.068269968032837, "learning_rate": 2.164614646903246e-05, "loss": 1.2152, "step": 24710 }, { "epoch": 14.198736358414704, "grad_norm": 0.9657204151153564, "learning_rate": 2.1606742643860903e-05, "loss": 1.2033, "step": 24720 }, { "epoch": 14.204480183802412, "grad_norm": 1.0366562604904175, "learning_rate": 2.156736483681549e-05, "loss": 1.2323, "step": 24730 }, { "epoch": 14.21022400919012, "grad_norm": 1.034131407737732, "learning_rate": 2.152801308397689e-05, "loss": 1.2072, "step": 24740 }, { "epoch": 14.215967834577828, "grad_norm": 0.977993905544281, "learning_rate": 2.1488687421401806e-05, "loss": 1.2291, "step": 24750 }, { "epoch": 14.221711659965537, "grad_norm": 1.0195544958114624, "learning_rate": 2.144938788512314e-05, "loss": 1.238, "step": 24760 }, { "epoch": 14.227455485353245, "grad_norm": 0.9511464238166809, "learning_rate": 2.1410114511149752e-05, "loss": 1.2294, "step": 24770 }, { "epoch": 14.233199310740954, "grad_norm": 1.0745797157287598, "learning_rate": 2.1370867335466615e-05, "loss": 1.2182, "step": 24780 }, { "epoch": 14.238943136128661, "grad_norm": 0.9654967784881592, "learning_rate": 2.1331646394034675e-05, "loss": 1.1998, "step": 24790 }, { "epoch": 14.24468696151637, "grad_norm": 1.0536357164382935, "learning_rate": 2.1292451722790784e-05, "loss": 1.2385, "step": 24800 }, { "epoch": 14.250430786904078, "grad_norm": 1.0275930166244507, "learning_rate": 2.1253283357647812e-05, "loss": 1.2212, "step": 24810 }, { "epoch": 14.256174612291787, "grad_norm": 0.9694525599479675, "learning_rate": 2.1214141334494466e-05, "loss": 1.2286, "step": 24820 }, { "epoch": 14.261918437679494, "grad_norm": 0.9446169137954712, "learning_rate": 2.117502568919531e-05, "loss": 1.2172, "step": 24830 }, { "epoch": 14.267662263067203, "grad_norm": 1.020424485206604, "learning_rate": 2.11359364575908e-05, "loss": 1.2329, "step": 24840 }, { "epoch": 14.27340608845491, "grad_norm": 1.0075353384017944, "learning_rate": 2.1096873675497118e-05, "loss": 1.2319, "step": 24850 }, { "epoch": 14.27914991384262, "grad_norm": 1.0493297576904297, "learning_rate": 2.1057837378706257e-05, "loss": 1.1959, "step": 24860 }, { "epoch": 14.284893739230327, "grad_norm": 1.0635554790496826, "learning_rate": 2.101882760298595e-05, "loss": 1.2303, "step": 24870 }, { "epoch": 14.290637564618036, "grad_norm": 0.9816174507141113, "learning_rate": 2.097984438407957e-05, "loss": 1.2118, "step": 24880 }, { "epoch": 14.296381390005743, "grad_norm": 0.9723330140113831, "learning_rate": 2.0940887757706244e-05, "loss": 1.2054, "step": 24890 }, { "epoch": 14.302125215393453, "grad_norm": 1.0326104164123535, "learning_rate": 2.090195775956063e-05, "loss": 1.1977, "step": 24900 }, { "epoch": 14.30786904078116, "grad_norm": 0.9113220572471619, "learning_rate": 2.0863054425313096e-05, "loss": 1.2239, "step": 24910 }, { "epoch": 14.313612866168869, "grad_norm": 1.0533758401870728, "learning_rate": 2.08241777906095e-05, "loss": 1.2089, "step": 24920 }, { "epoch": 14.319356691556576, "grad_norm": 1.024215579032898, "learning_rate": 2.0785327891071247e-05, "loss": 1.2031, "step": 24930 }, { "epoch": 14.325100516944286, "grad_norm": 1.2030800580978394, "learning_rate": 2.074650476229529e-05, "loss": 1.211, "step": 24940 }, { "epoch": 14.330844342331993, "grad_norm": 1.03361177444458, "learning_rate": 2.070770843985399e-05, "loss": 1.2509, "step": 24950 }, { "epoch": 14.336588167719702, "grad_norm": 1.013210654258728, "learning_rate": 2.06689389592952e-05, "loss": 1.2166, "step": 24960 }, { "epoch": 14.34233199310741, "grad_norm": 1.0044347047805786, "learning_rate": 2.0630196356142172e-05, "loss": 1.1984, "step": 24970 }, { "epoch": 14.348075818495118, "grad_norm": 1.1455705165863037, "learning_rate": 2.059148066589348e-05, "loss": 1.213, "step": 24980 }, { "epoch": 14.353819643882826, "grad_norm": 0.9642274379730225, "learning_rate": 2.055279192402312e-05, "loss": 1.2113, "step": 24990 }, { "epoch": 14.359563469270535, "grad_norm": 1.0105242729187012, "learning_rate": 2.0514130165980297e-05, "loss": 1.2086, "step": 25000 }, { "epoch": 14.359563469270535, "eval_loss": 1.0593957901000977, "eval_runtime": 121.3666, "eval_samples_per_second": 13.109, "eval_steps_per_second": 0.14, "eval_wer": 0.08608317323991412, "step": 25000 }, { "epoch": 14.365307294658242, "grad_norm": 1.089063048362732, "learning_rate": 2.0475495427189602e-05, "loss": 1.2109, "step": 25010 }, { "epoch": 14.371051120045951, "grad_norm": 1.0358047485351562, "learning_rate": 2.0436887743050785e-05, "loss": 1.2312, "step": 25020 }, { "epoch": 14.376794945433659, "grad_norm": 1.117578387260437, "learning_rate": 2.0398307148938818e-05, "loss": 1.2302, "step": 25030 }, { "epoch": 14.382538770821368, "grad_norm": 1.0078582763671875, "learning_rate": 2.0359753680203885e-05, "loss": 1.1883, "step": 25040 }, { "epoch": 14.388282596209075, "grad_norm": 1.0129399299621582, "learning_rate": 2.0321227372171307e-05, "loss": 1.2141, "step": 25050 }, { "epoch": 14.394026421596784, "grad_norm": 1.097625970840454, "learning_rate": 2.028272826014151e-05, "loss": 1.2164, "step": 25060 }, { "epoch": 14.399770246984492, "grad_norm": 1.108125925064087, "learning_rate": 2.024425637939e-05, "loss": 1.2005, "step": 25070 }, { "epoch": 14.405514072372199, "grad_norm": 0.993674635887146, "learning_rate": 2.0205811765167314e-05, "loss": 1.2327, "step": 25080 }, { "epoch": 14.411257897759908, "grad_norm": 1.1155519485473633, "learning_rate": 2.0167394452699055e-05, "loss": 1.2324, "step": 25090 }, { "epoch": 14.417001723147616, "grad_norm": 1.074629545211792, "learning_rate": 2.0129004477185746e-05, "loss": 1.2309, "step": 25100 }, { "epoch": 14.422745548535325, "grad_norm": 1.0510847568511963, "learning_rate": 2.0090641873802928e-05, "loss": 1.225, "step": 25110 }, { "epoch": 14.428489373923032, "grad_norm": 1.0449714660644531, "learning_rate": 2.005230667770101e-05, "loss": 1.2274, "step": 25120 }, { "epoch": 14.434233199310741, "grad_norm": 1.1334681510925293, "learning_rate": 2.0013998924005328e-05, "loss": 1.1983, "step": 25130 }, { "epoch": 14.439977024698448, "grad_norm": 0.948148250579834, "learning_rate": 1.997571864781602e-05, "loss": 1.2274, "step": 25140 }, { "epoch": 14.445720850086158, "grad_norm": 0.9665245413780212, "learning_rate": 1.9937465884208113e-05, "loss": 1.2123, "step": 25150 }, { "epoch": 14.451464675473865, "grad_norm": 1.0260932445526123, "learning_rate": 1.9899240668231394e-05, "loss": 1.2072, "step": 25160 }, { "epoch": 14.457208500861574, "grad_norm": 1.0720040798187256, "learning_rate": 1.986104303491038e-05, "loss": 1.2143, "step": 25170 }, { "epoch": 14.462952326249281, "grad_norm": 1.108367681503296, "learning_rate": 1.9822873019244378e-05, "loss": 1.2226, "step": 25180 }, { "epoch": 14.46869615163699, "grad_norm": 0.973435640335083, "learning_rate": 1.9784730656207343e-05, "loss": 1.2209, "step": 25190 }, { "epoch": 14.474439977024698, "grad_norm": 1.114501714706421, "learning_rate": 1.974661598074788e-05, "loss": 1.2345, "step": 25200 }, { "epoch": 14.480183802412407, "grad_norm": 0.8884481191635132, "learning_rate": 1.9708529027789286e-05, "loss": 1.2116, "step": 25210 }, { "epoch": 14.485927627800114, "grad_norm": 0.9580786824226379, "learning_rate": 1.967046983222939e-05, "loss": 1.2274, "step": 25220 }, { "epoch": 14.491671453187823, "grad_norm": 1.047084927558899, "learning_rate": 1.963243842894063e-05, "loss": 1.2077, "step": 25230 }, { "epoch": 14.49741527857553, "grad_norm": 1.1091161966323853, "learning_rate": 1.9594434852769982e-05, "loss": 1.2192, "step": 25240 }, { "epoch": 14.50315910396324, "grad_norm": 1.1419296264648438, "learning_rate": 1.955645913853889e-05, "loss": 1.2207, "step": 25250 }, { "epoch": 14.508902929350947, "grad_norm": 0.9813277721405029, "learning_rate": 1.9518511321043305e-05, "loss": 1.2002, "step": 25260 }, { "epoch": 14.514646754738656, "grad_norm": 0.9757702946662903, "learning_rate": 1.9480591435053577e-05, "loss": 1.2263, "step": 25270 }, { "epoch": 14.520390580126364, "grad_norm": 1.0747148990631104, "learning_rate": 1.944269951531452e-05, "loss": 1.2213, "step": 25280 }, { "epoch": 14.526134405514073, "grad_norm": 1.0319701433181763, "learning_rate": 1.940483559654527e-05, "loss": 1.2205, "step": 25290 }, { "epoch": 14.53187823090178, "grad_norm": 1.0581765174865723, "learning_rate": 1.9366999713439317e-05, "loss": 1.2032, "step": 25300 }, { "epoch": 14.53762205628949, "grad_norm": 1.0285146236419678, "learning_rate": 1.9329191900664502e-05, "loss": 1.2185, "step": 25310 }, { "epoch": 14.543365881677197, "grad_norm": 1.0385221242904663, "learning_rate": 1.9291412192862882e-05, "loss": 1.2142, "step": 25320 }, { "epoch": 14.549109707064906, "grad_norm": 1.051267385482788, "learning_rate": 1.925366062465082e-05, "loss": 1.2249, "step": 25330 }, { "epoch": 14.554853532452613, "grad_norm": 0.9933992624282837, "learning_rate": 1.9215937230618887e-05, "loss": 1.2199, "step": 25340 }, { "epoch": 14.560597357840322, "grad_norm": 1.0396558046340942, "learning_rate": 1.917824204533179e-05, "loss": 1.2047, "step": 25350 }, { "epoch": 14.56634118322803, "grad_norm": 0.9974486827850342, "learning_rate": 1.9140575103328458e-05, "loss": 1.2137, "step": 25360 }, { "epoch": 14.572085008615739, "grad_norm": 1.0098021030426025, "learning_rate": 1.9102936439121875e-05, "loss": 1.2289, "step": 25370 }, { "epoch": 14.577828834003446, "grad_norm": 0.8930213451385498, "learning_rate": 1.906532608719918e-05, "loss": 1.2322, "step": 25380 }, { "epoch": 14.583572659391155, "grad_norm": 0.9245844483375549, "learning_rate": 1.9027744082021522e-05, "loss": 1.2089, "step": 25390 }, { "epoch": 14.589316484778863, "grad_norm": 1.0075827836990356, "learning_rate": 1.8990190458024077e-05, "loss": 1.2083, "step": 25400 }, { "epoch": 14.595060310166572, "grad_norm": 1.068303108215332, "learning_rate": 1.8952665249616052e-05, "loss": 1.2129, "step": 25410 }, { "epoch": 14.600804135554279, "grad_norm": 0.9758381247520447, "learning_rate": 1.8915168491180593e-05, "loss": 1.2246, "step": 25420 }, { "epoch": 14.606547960941988, "grad_norm": 0.9314061999320984, "learning_rate": 1.88777002170748e-05, "loss": 1.2073, "step": 25430 }, { "epoch": 14.612291786329695, "grad_norm": 0.9090464115142822, "learning_rate": 1.884026046162964e-05, "loss": 1.2079, "step": 25440 }, { "epoch": 14.618035611717405, "grad_norm": 1.012971043586731, "learning_rate": 1.880284925914995e-05, "loss": 1.24, "step": 25450 }, { "epoch": 14.623779437105112, "grad_norm": 0.982789158821106, "learning_rate": 1.8765466643914452e-05, "loss": 1.1976, "step": 25460 }, { "epoch": 14.629523262492821, "grad_norm": 0.9931904077529907, "learning_rate": 1.8728112650175616e-05, "loss": 1.1945, "step": 25470 }, { "epoch": 14.635267087880528, "grad_norm": 1.1287841796875, "learning_rate": 1.8690787312159744e-05, "loss": 1.2369, "step": 25480 }, { "epoch": 14.641010913268236, "grad_norm": 0.9044769406318665, "learning_rate": 1.865349066406683e-05, "loss": 1.2278, "step": 25490 }, { "epoch": 14.646754738655945, "grad_norm": 1.0848132371902466, "learning_rate": 1.8616222740070592e-05, "loss": 1.2289, "step": 25500 }, { "epoch": 14.652498564043652, "grad_norm": 0.9106241464614868, "learning_rate": 1.857898357431846e-05, "loss": 1.1974, "step": 25510 }, { "epoch": 14.658242389431361, "grad_norm": 1.0156275033950806, "learning_rate": 1.8541773200931487e-05, "loss": 1.2334, "step": 25520 }, { "epoch": 14.663986214819069, "grad_norm": 1.0207141637802124, "learning_rate": 1.850459165400436e-05, "loss": 1.2123, "step": 25530 }, { "epoch": 14.669730040206778, "grad_norm": 0.9943966865539551, "learning_rate": 1.8467438967605322e-05, "loss": 1.246, "step": 25540 }, { "epoch": 14.675473865594485, "grad_norm": 0.9694631099700928, "learning_rate": 1.8430315175776226e-05, "loss": 1.2132, "step": 25550 }, { "epoch": 14.681217690982194, "grad_norm": 1.0691896677017212, "learning_rate": 1.8393220312532396e-05, "loss": 1.212, "step": 25560 }, { "epoch": 14.686961516369902, "grad_norm": 0.967818558216095, "learning_rate": 1.8356154411862655e-05, "loss": 1.2189, "step": 25570 }, { "epoch": 14.69270534175761, "grad_norm": 0.9793399572372437, "learning_rate": 1.831911750772934e-05, "loss": 1.2235, "step": 25580 }, { "epoch": 14.698449167145318, "grad_norm": 0.9611982703208923, "learning_rate": 1.828210963406815e-05, "loss": 1.2139, "step": 25590 }, { "epoch": 14.704192992533027, "grad_norm": 1.0509424209594727, "learning_rate": 1.8245130824788237e-05, "loss": 1.2317, "step": 25600 }, { "epoch": 14.709936817920735, "grad_norm": 0.9915058016777039, "learning_rate": 1.820818111377212e-05, "loss": 1.2313, "step": 25610 }, { "epoch": 14.715680643308444, "grad_norm": 1.0132150650024414, "learning_rate": 1.8171260534875604e-05, "loss": 1.2234, "step": 25620 }, { "epoch": 14.721424468696151, "grad_norm": 1.0299506187438965, "learning_rate": 1.8134369121927874e-05, "loss": 1.2068, "step": 25630 }, { "epoch": 14.72716829408386, "grad_norm": 0.9782707691192627, "learning_rate": 1.8097506908731316e-05, "loss": 1.2268, "step": 25640 }, { "epoch": 14.732912119471568, "grad_norm": 0.9715372323989868, "learning_rate": 1.8060673929061638e-05, "loss": 1.216, "step": 25650 }, { "epoch": 14.738655944859277, "grad_norm": 0.9990441799163818, "learning_rate": 1.80238702166677e-05, "loss": 1.2265, "step": 25660 }, { "epoch": 14.744399770246984, "grad_norm": 0.9666119813919067, "learning_rate": 1.798709580527156e-05, "loss": 1.2174, "step": 25670 }, { "epoch": 14.750143595634693, "grad_norm": 1.1548281908035278, "learning_rate": 1.795035072856847e-05, "loss": 1.247, "step": 25680 }, { "epoch": 14.7558874210224, "grad_norm": 1.0683759450912476, "learning_rate": 1.7913635020226733e-05, "loss": 1.2118, "step": 25690 }, { "epoch": 14.76163124641011, "grad_norm": 1.0158852338790894, "learning_rate": 1.7876948713887797e-05, "loss": 1.2155, "step": 25700 }, { "epoch": 14.767375071797817, "grad_norm": 0.9987695813179016, "learning_rate": 1.784029184316618e-05, "loss": 1.2346, "step": 25710 }, { "epoch": 14.773118897185526, "grad_norm": 1.07984459400177, "learning_rate": 1.7803664441649354e-05, "loss": 1.2072, "step": 25720 }, { "epoch": 14.778862722573233, "grad_norm": 1.0457539558410645, "learning_rate": 1.7767066542897885e-05, "loss": 1.2144, "step": 25730 }, { "epoch": 14.784606547960943, "grad_norm": 1.0007987022399902, "learning_rate": 1.7730498180445218e-05, "loss": 1.232, "step": 25740 }, { "epoch": 14.79035037334865, "grad_norm": 0.9794312119483948, "learning_rate": 1.7693959387797817e-05, "loss": 1.207, "step": 25750 }, { "epoch": 14.796094198736359, "grad_norm": 1.0783519744873047, "learning_rate": 1.765745019843499e-05, "loss": 1.1933, "step": 25760 }, { "epoch": 14.801838024124066, "grad_norm": 1.0177414417266846, "learning_rate": 1.762097064580892e-05, "loss": 1.2129, "step": 25770 }, { "epoch": 14.807581849511775, "grad_norm": 0.9529037475585938, "learning_rate": 1.7584520763344678e-05, "loss": 1.2239, "step": 25780 }, { "epoch": 14.813325674899483, "grad_norm": 0.9531726241111755, "learning_rate": 1.7548100584440135e-05, "loss": 1.1922, "step": 25790 }, { "epoch": 14.819069500287192, "grad_norm": 0.9784784317016602, "learning_rate": 1.7511710142465952e-05, "loss": 1.1997, "step": 25800 }, { "epoch": 14.8248133256749, "grad_norm": 0.9810519218444824, "learning_rate": 1.74753494707655e-05, "loss": 1.2282, "step": 25810 }, { "epoch": 14.830557151062608, "grad_norm": 0.982638955116272, "learning_rate": 1.7439018602654902e-05, "loss": 1.2145, "step": 25820 }, { "epoch": 14.836300976450316, "grad_norm": 0.9705809354782104, "learning_rate": 1.7402717571422997e-05, "loss": 1.2024, "step": 25830 }, { "epoch": 14.842044801838025, "grad_norm": 1.0295052528381348, "learning_rate": 1.736644641033123e-05, "loss": 1.2163, "step": 25840 }, { "epoch": 14.847788627225732, "grad_norm": 0.99381422996521, "learning_rate": 1.7330205152613747e-05, "loss": 1.2159, "step": 25850 }, { "epoch": 14.853532452613441, "grad_norm": 1.071428656578064, "learning_rate": 1.729399383147723e-05, "loss": 1.2219, "step": 25860 }, { "epoch": 14.859276278001149, "grad_norm": 1.031275987625122, "learning_rate": 1.725781248010094e-05, "loss": 1.22, "step": 25870 }, { "epoch": 14.865020103388858, "grad_norm": 1.0345505475997925, "learning_rate": 1.722166113163672e-05, "loss": 1.2225, "step": 25880 }, { "epoch": 14.870763928776565, "grad_norm": 1.0205128192901611, "learning_rate": 1.7185539819208894e-05, "loss": 1.1921, "step": 25890 }, { "epoch": 14.876507754164273, "grad_norm": 1.00784432888031, "learning_rate": 1.7149448575914286e-05, "loss": 1.229, "step": 25900 }, { "epoch": 14.882251579551982, "grad_norm": 0.9938313364982605, "learning_rate": 1.7113387434822123e-05, "loss": 1.2114, "step": 25910 }, { "epoch": 14.88799540493969, "grad_norm": 1.09950590133667, "learning_rate": 1.7077356428974066e-05, "loss": 1.2292, "step": 25920 }, { "epoch": 14.893739230327398, "grad_norm": 1.1125129461288452, "learning_rate": 1.7041355591384214e-05, "loss": 1.2272, "step": 25930 }, { "epoch": 14.899483055715105, "grad_norm": 0.9555776715278625, "learning_rate": 1.700538495503895e-05, "loss": 1.2249, "step": 25940 }, { "epoch": 14.905226881102815, "grad_norm": 1.0296878814697266, "learning_rate": 1.6969444552897054e-05, "loss": 1.2001, "step": 25950 }, { "epoch": 14.910970706490522, "grad_norm": 1.0420628786087036, "learning_rate": 1.6933534417889535e-05, "loss": 1.2054, "step": 25960 }, { "epoch": 14.916714531878231, "grad_norm": 0.9140343070030212, "learning_rate": 1.6897654582919716e-05, "loss": 1.2044, "step": 25970 }, { "epoch": 14.922458357265938, "grad_norm": 0.995277464389801, "learning_rate": 1.686180508086317e-05, "loss": 1.2347, "step": 25980 }, { "epoch": 14.928202182653648, "grad_norm": 0.9565374851226807, "learning_rate": 1.682598594456761e-05, "loss": 1.225, "step": 25990 }, { "epoch": 14.933946008041355, "grad_norm": 1.0220321416854858, "learning_rate": 1.6790197206853004e-05, "loss": 1.2379, "step": 26000 }, { "epoch": 14.933946008041355, "eval_loss": 1.057450294494629, "eval_runtime": 122.2652, "eval_samples_per_second": 13.013, "eval_steps_per_second": 0.139, "eval_wer": 0.08630918747881117, "step": 26000 }, { "epoch": 14.939689833429064, "grad_norm": 1.063464641571045, "learning_rate": 1.67544389005114e-05, "loss": 1.2218, "step": 26010 }, { "epoch": 14.945433658816771, "grad_norm": 0.9798442125320435, "learning_rate": 1.6718711058307017e-05, "loss": 1.247, "step": 26020 }, { "epoch": 14.95117748420448, "grad_norm": 0.999824047088623, "learning_rate": 1.6683013712976128e-05, "loss": 1.2189, "step": 26030 }, { "epoch": 14.956921309592188, "grad_norm": 1.012591004371643, "learning_rate": 1.664734689722706e-05, "loss": 1.2255, "step": 26040 }, { "epoch": 14.962665134979897, "grad_norm": 0.9571306705474854, "learning_rate": 1.6611710643740194e-05, "loss": 1.2255, "step": 26050 }, { "epoch": 14.968408960367604, "grad_norm": 1.0604947805404663, "learning_rate": 1.6576104985167873e-05, "loss": 1.2273, "step": 26060 }, { "epoch": 14.974152785755313, "grad_norm": 1.0356248617172241, "learning_rate": 1.6540529954134434e-05, "loss": 1.2183, "step": 26070 }, { "epoch": 14.97989661114302, "grad_norm": 1.0434683561325073, "learning_rate": 1.650498558323616e-05, "loss": 1.2371, "step": 26080 }, { "epoch": 14.98564043653073, "grad_norm": 1.0114096403121948, "learning_rate": 1.64694719050412e-05, "loss": 1.2066, "step": 26090 }, { "epoch": 14.991384261918437, "grad_norm": 1.0262424945831299, "learning_rate": 1.6433988952089623e-05, "loss": 1.2161, "step": 26100 }, { "epoch": 14.997128087306146, "grad_norm": 0.999446451663971, "learning_rate": 1.63985367568933e-05, "loss": 1.2244, "step": 26110 }, { "epoch": 15.002871912693854, "grad_norm": 0.973579466342926, "learning_rate": 1.636311535193598e-05, "loss": 1.2119, "step": 26120 }, { "epoch": 15.008615738081563, "grad_norm": 1.0167733430862427, "learning_rate": 1.632772476967315e-05, "loss": 1.2061, "step": 26130 }, { "epoch": 15.01435956346927, "grad_norm": 0.9677587747573853, "learning_rate": 1.6292365042532053e-05, "loss": 1.1875, "step": 26140 }, { "epoch": 15.02010338885698, "grad_norm": 0.9801494479179382, "learning_rate": 1.6257036202911688e-05, "loss": 1.2005, "step": 26150 }, { "epoch": 15.025847214244687, "grad_norm": 0.9402710795402527, "learning_rate": 1.6221738283182757e-05, "loss": 1.1977, "step": 26160 }, { "epoch": 15.031591039632396, "grad_norm": 0.977249801158905, "learning_rate": 1.618647131568762e-05, "loss": 1.1928, "step": 26170 }, { "epoch": 15.037334865020103, "grad_norm": 1.024449348449707, "learning_rate": 1.6151235332740262e-05, "loss": 1.2096, "step": 26180 }, { "epoch": 15.043078690407812, "grad_norm": 0.8933520913124084, "learning_rate": 1.6116030366626283e-05, "loss": 1.208, "step": 26190 }, { "epoch": 15.04882251579552, "grad_norm": 0.9599255919456482, "learning_rate": 1.608085644960289e-05, "loss": 1.2106, "step": 26200 }, { "epoch": 15.054566341183229, "grad_norm": 1.0584702491760254, "learning_rate": 1.6045713613898794e-05, "loss": 1.2126, "step": 26210 }, { "epoch": 15.060310166570936, "grad_norm": 1.0307273864746094, "learning_rate": 1.601060189171428e-05, "loss": 1.1861, "step": 26220 }, { "epoch": 15.066053991958645, "grad_norm": 1.0065584182739258, "learning_rate": 1.597552131522109e-05, "loss": 1.2033, "step": 26230 }, { "epoch": 15.071797817346352, "grad_norm": 0.9939149022102356, "learning_rate": 1.5940471916562417e-05, "loss": 1.2197, "step": 26240 }, { "epoch": 15.077541642734062, "grad_norm": 0.9140249490737915, "learning_rate": 1.5905453727852918e-05, "loss": 1.1874, "step": 26250 }, { "epoch": 15.083285468121769, "grad_norm": 0.9340547323226929, "learning_rate": 1.587046678117865e-05, "loss": 1.2016, "step": 26260 }, { "epoch": 15.089029293509478, "grad_norm": 0.9199875593185425, "learning_rate": 1.583551110859704e-05, "loss": 1.2326, "step": 26270 }, { "epoch": 15.094773118897185, "grad_norm": 1.003050446510315, "learning_rate": 1.5800586742136862e-05, "loss": 1.2127, "step": 26280 }, { "epoch": 15.100516944284895, "grad_norm": 1.009954571723938, "learning_rate": 1.5765693713798156e-05, "loss": 1.2017, "step": 26290 }, { "epoch": 15.106260769672602, "grad_norm": 1.0128146409988403, "learning_rate": 1.5730832055552337e-05, "loss": 1.2086, "step": 26300 }, { "epoch": 15.112004595060311, "grad_norm": 0.9843529462814331, "learning_rate": 1.569600179934199e-05, "loss": 1.207, "step": 26310 }, { "epoch": 15.117748420448018, "grad_norm": 1.0136809349060059, "learning_rate": 1.5661202977081003e-05, "loss": 1.2059, "step": 26320 }, { "epoch": 15.123492245835727, "grad_norm": 0.9684053659439087, "learning_rate": 1.56264356206544e-05, "loss": 1.1945, "step": 26330 }, { "epoch": 15.129236071223435, "grad_norm": 1.0100078582763672, "learning_rate": 1.5591699761918404e-05, "loss": 1.2, "step": 26340 }, { "epoch": 15.134979896611142, "grad_norm": 0.9501697421073914, "learning_rate": 1.5556995432700398e-05, "loss": 1.206, "step": 26350 }, { "epoch": 15.140723721998851, "grad_norm": 0.969898521900177, "learning_rate": 1.5522322664798815e-05, "loss": 1.2397, "step": 26360 }, { "epoch": 15.146467547386559, "grad_norm": 1.0443209409713745, "learning_rate": 1.5487681489983243e-05, "loss": 1.204, "step": 26370 }, { "epoch": 15.152211372774268, "grad_norm": 1.0347651243209839, "learning_rate": 1.5453071939994268e-05, "loss": 1.2084, "step": 26380 }, { "epoch": 15.157955198161975, "grad_norm": 0.9488282203674316, "learning_rate": 1.5418494046543493e-05, "loss": 1.213, "step": 26390 }, { "epoch": 15.163699023549684, "grad_norm": 1.0040168762207031, "learning_rate": 1.5383947841313576e-05, "loss": 1.2372, "step": 26400 }, { "epoch": 15.169442848937392, "grad_norm": 1.0397101640701294, "learning_rate": 1.534943335595807e-05, "loss": 1.2067, "step": 26410 }, { "epoch": 15.1751866743251, "grad_norm": 0.9035594463348389, "learning_rate": 1.5314950622101527e-05, "loss": 1.2078, "step": 26420 }, { "epoch": 15.180930499712808, "grad_norm": 0.9689311385154724, "learning_rate": 1.5280499671339345e-05, "loss": 1.205, "step": 26430 }, { "epoch": 15.186674325100517, "grad_norm": 1.0225797891616821, "learning_rate": 1.5246080535237839e-05, "loss": 1.1817, "step": 26440 }, { "epoch": 15.192418150488225, "grad_norm": 1.2207682132720947, "learning_rate": 1.5211693245334194e-05, "loss": 1.2068, "step": 26450 }, { "epoch": 15.198161975875934, "grad_norm": 1.0378779172897339, "learning_rate": 1.5177337833136343e-05, "loss": 1.1952, "step": 26460 }, { "epoch": 15.203905801263641, "grad_norm": 0.9449943900108337, "learning_rate": 1.514301433012309e-05, "loss": 1.1989, "step": 26470 }, { "epoch": 15.20964962665135, "grad_norm": 1.0502628087997437, "learning_rate": 1.5108722767743935e-05, "loss": 1.2315, "step": 26480 }, { "epoch": 15.215393452039057, "grad_norm": 1.0287044048309326, "learning_rate": 1.5074463177419179e-05, "loss": 1.1943, "step": 26490 }, { "epoch": 15.221137277426767, "grad_norm": 0.9346133470535278, "learning_rate": 1.5040235590539761e-05, "loss": 1.2185, "step": 26500 }, { "epoch": 15.226881102814474, "grad_norm": 0.9057783484458923, "learning_rate": 1.500604003846732e-05, "loss": 1.196, "step": 26510 }, { "epoch": 15.232624928202183, "grad_norm": 0.9676570892333984, "learning_rate": 1.4971876552534158e-05, "loss": 1.1938, "step": 26520 }, { "epoch": 15.23836875358989, "grad_norm": 1.1092582941055298, "learning_rate": 1.4937745164043218e-05, "loss": 1.2041, "step": 26530 }, { "epoch": 15.2441125789776, "grad_norm": 1.1173124313354492, "learning_rate": 1.4903645904267952e-05, "loss": 1.1981, "step": 26540 }, { "epoch": 15.249856404365307, "grad_norm": 1.0028637647628784, "learning_rate": 1.4869578804452464e-05, "loss": 1.2139, "step": 26550 }, { "epoch": 15.255600229753016, "grad_norm": 1.071292757987976, "learning_rate": 1.4835543895811321e-05, "loss": 1.2187, "step": 26560 }, { "epoch": 15.261344055140723, "grad_norm": 1.0676053762435913, "learning_rate": 1.4801541209529652e-05, "loss": 1.2194, "step": 26570 }, { "epoch": 15.267087880528432, "grad_norm": 0.9352045655250549, "learning_rate": 1.4767570776762996e-05, "loss": 1.2018, "step": 26580 }, { "epoch": 15.27283170591614, "grad_norm": 0.922773003578186, "learning_rate": 1.4733632628637418e-05, "loss": 1.2017, "step": 26590 }, { "epoch": 15.278575531303849, "grad_norm": 1.0585378408432007, "learning_rate": 1.4699726796249333e-05, "loss": 1.209, "step": 26600 }, { "epoch": 15.284319356691556, "grad_norm": 0.938613772392273, "learning_rate": 1.4665853310665572e-05, "loss": 1.2247, "step": 26610 }, { "epoch": 15.290063182079265, "grad_norm": 1.286502480506897, "learning_rate": 1.4632012202923332e-05, "loss": 1.1999, "step": 26620 }, { "epoch": 15.295807007466973, "grad_norm": 0.9761466383934021, "learning_rate": 1.4598203504030145e-05, "loss": 1.2082, "step": 26630 }, { "epoch": 15.301550832854682, "grad_norm": 1.0073760747909546, "learning_rate": 1.4564427244963854e-05, "loss": 1.1948, "step": 26640 }, { "epoch": 15.30729465824239, "grad_norm": 1.2302110195159912, "learning_rate": 1.4530683456672557e-05, "loss": 1.2152, "step": 26650 }, { "epoch": 15.313038483630098, "grad_norm": 1.0625994205474854, "learning_rate": 1.4496972170074594e-05, "loss": 1.22, "step": 26660 }, { "epoch": 15.318782309017806, "grad_norm": 0.9122873544692993, "learning_rate": 1.4463293416058565e-05, "loss": 1.2128, "step": 26670 }, { "epoch": 15.324526134405515, "grad_norm": 0.9717715382575989, "learning_rate": 1.442964722548322e-05, "loss": 1.2064, "step": 26680 }, { "epoch": 15.330269959793222, "grad_norm": 0.9961033463478088, "learning_rate": 1.4396033629177507e-05, "loss": 1.2213, "step": 26690 }, { "epoch": 15.336013785180931, "grad_norm": 0.9851220846176147, "learning_rate": 1.436245265794047e-05, "loss": 1.1985, "step": 26700 }, { "epoch": 15.341757610568639, "grad_norm": 1.0120820999145508, "learning_rate": 1.4328904342541302e-05, "loss": 1.2056, "step": 26710 }, { "epoch": 15.347501435956348, "grad_norm": 0.8697179555892944, "learning_rate": 1.4295388713719232e-05, "loss": 1.2156, "step": 26720 }, { "epoch": 15.353245261344055, "grad_norm": 0.9076546430587769, "learning_rate": 1.4261905802183573e-05, "loss": 1.2147, "step": 26730 }, { "epoch": 15.358989086731764, "grad_norm": 1.0316888093948364, "learning_rate": 1.4228455638613663e-05, "loss": 1.2012, "step": 26740 }, { "epoch": 15.364732912119472, "grad_norm": 1.0450801849365234, "learning_rate": 1.4195038253658808e-05, "loss": 1.2034, "step": 26750 }, { "epoch": 15.370476737507179, "grad_norm": 0.9911081790924072, "learning_rate": 1.4161653677938266e-05, "loss": 1.2104, "step": 26760 }, { "epoch": 15.376220562894888, "grad_norm": 1.050289273262024, "learning_rate": 1.4128301942041303e-05, "loss": 1.1989, "step": 26770 }, { "epoch": 15.381964388282595, "grad_norm": 0.9373721480369568, "learning_rate": 1.4094983076527004e-05, "loss": 1.1955, "step": 26780 }, { "epoch": 15.387708213670305, "grad_norm": 0.9938370585441589, "learning_rate": 1.4061697111924426e-05, "loss": 1.2119, "step": 26790 }, { "epoch": 15.393452039058012, "grad_norm": 0.9579716920852661, "learning_rate": 1.4028444078732397e-05, "loss": 1.2222, "step": 26800 }, { "epoch": 15.399195864445721, "grad_norm": 0.9794312119483948, "learning_rate": 1.3995224007419633e-05, "loss": 1.2209, "step": 26810 }, { "epoch": 15.404939689833428, "grad_norm": 0.9548497200012207, "learning_rate": 1.3962036928424632e-05, "loss": 1.1973, "step": 26820 }, { "epoch": 15.410683515221137, "grad_norm": 1.0342283248901367, "learning_rate": 1.3928882872155625e-05, "loss": 1.2074, "step": 26830 }, { "epoch": 15.416427340608845, "grad_norm": 0.8523366451263428, "learning_rate": 1.3895761868990653e-05, "loss": 1.2119, "step": 26840 }, { "epoch": 15.422171165996554, "grad_norm": 1.1082189083099365, "learning_rate": 1.38626739492774e-05, "loss": 1.1826, "step": 26850 }, { "epoch": 15.427914991384261, "grad_norm": 1.024062156677246, "learning_rate": 1.38296191433333e-05, "loss": 1.2187, "step": 26860 }, { "epoch": 15.43365881677197, "grad_norm": 0.9709998369216919, "learning_rate": 1.3796597481445404e-05, "loss": 1.1904, "step": 26870 }, { "epoch": 15.439402642159678, "grad_norm": 1.0230603218078613, "learning_rate": 1.3763608993870383e-05, "loss": 1.2262, "step": 26880 }, { "epoch": 15.445146467547387, "grad_norm": 0.9761072397232056, "learning_rate": 1.3730653710834585e-05, "loss": 1.2181, "step": 26890 }, { "epoch": 15.450890292935094, "grad_norm": 0.8359770178794861, "learning_rate": 1.3697731662533832e-05, "loss": 1.2181, "step": 26900 }, { "epoch": 15.456634118322803, "grad_norm": 0.9193968772888184, "learning_rate": 1.3664842879133575e-05, "loss": 1.2029, "step": 26910 }, { "epoch": 15.46237794371051, "grad_norm": 0.9340499043464661, "learning_rate": 1.3631987390768764e-05, "loss": 1.2085, "step": 26920 }, { "epoch": 15.46812176909822, "grad_norm": 0.9572991132736206, "learning_rate": 1.3599165227543815e-05, "loss": 1.2169, "step": 26930 }, { "epoch": 15.473865594485927, "grad_norm": 1.0473037958145142, "learning_rate": 1.3566376419532643e-05, "loss": 1.2216, "step": 26940 }, { "epoch": 15.479609419873636, "grad_norm": 0.9519008994102478, "learning_rate": 1.353362099677857e-05, "loss": 1.1989, "step": 26950 }, { "epoch": 15.485353245261344, "grad_norm": 1.0680210590362549, "learning_rate": 1.3500898989294365e-05, "loss": 1.2199, "step": 26960 }, { "epoch": 15.491097070649053, "grad_norm": 1.0191963911056519, "learning_rate": 1.346821042706215e-05, "loss": 1.1942, "step": 26970 }, { "epoch": 15.49684089603676, "grad_norm": 1.0114282369613647, "learning_rate": 1.3435555340033393e-05, "loss": 1.1902, "step": 26980 }, { "epoch": 15.50258472142447, "grad_norm": 1.073899269104004, "learning_rate": 1.3402933758128927e-05, "loss": 1.2106, "step": 26990 }, { "epoch": 15.508328546812177, "grad_norm": 1.086017370223999, "learning_rate": 1.3370345711238862e-05, "loss": 1.1943, "step": 27000 }, { "epoch": 15.508328546812177, "eval_loss": 1.0578992366790771, "eval_runtime": 121.2853, "eval_samples_per_second": 13.118, "eval_steps_per_second": 0.14, "eval_wer": 0.08602666968018985, "step": 27000 }, { "epoch": 15.514072372199886, "grad_norm": 1.0317455530166626, "learning_rate": 1.3337791229222601e-05, "loss": 1.2073, "step": 27010 }, { "epoch": 15.519816197587593, "grad_norm": 0.9488269090652466, "learning_rate": 1.3305270341908765e-05, "loss": 1.2108, "step": 27020 }, { "epoch": 15.525560022975302, "grad_norm": 0.9255710244178772, "learning_rate": 1.3272783079095186e-05, "loss": 1.198, "step": 27030 }, { "epoch": 15.53130384836301, "grad_norm": 0.9926071166992188, "learning_rate": 1.3240329470548934e-05, "loss": 1.2126, "step": 27040 }, { "epoch": 15.537047673750719, "grad_norm": 0.9667512774467468, "learning_rate": 1.3207909546006188e-05, "loss": 1.2114, "step": 27050 }, { "epoch": 15.542791499138426, "grad_norm": 1.0259897708892822, "learning_rate": 1.3175523335172329e-05, "loss": 1.2192, "step": 27060 }, { "epoch": 15.548535324526135, "grad_norm": 1.0754188299179077, "learning_rate": 1.3143170867721779e-05, "loss": 1.201, "step": 27070 }, { "epoch": 15.554279149913842, "grad_norm": 0.9826086163520813, "learning_rate": 1.3110852173298063e-05, "loss": 1.21, "step": 27080 }, { "epoch": 15.560022975301552, "grad_norm": 0.9746337532997131, "learning_rate": 1.3078567281513784e-05, "loss": 1.1986, "step": 27090 }, { "epoch": 15.565766800689259, "grad_norm": 0.9189111590385437, "learning_rate": 1.3046316221950558e-05, "loss": 1.1878, "step": 27100 }, { "epoch": 15.571510626076968, "grad_norm": 0.9347783923149109, "learning_rate": 1.3014099024159018e-05, "loss": 1.1633, "step": 27110 }, { "epoch": 15.577254451464675, "grad_norm": 1.175068974494934, "learning_rate": 1.298191571765873e-05, "loss": 1.2166, "step": 27120 }, { "epoch": 15.582998276852384, "grad_norm": 0.9539526700973511, "learning_rate": 1.2949766331938229e-05, "loss": 1.1993, "step": 27130 }, { "epoch": 15.588742102240092, "grad_norm": 0.9957134127616882, "learning_rate": 1.2917650896454992e-05, "loss": 1.191, "step": 27140 }, { "epoch": 15.594485927627801, "grad_norm": 1.019853115081787, "learning_rate": 1.2885569440635337e-05, "loss": 1.2132, "step": 27150 }, { "epoch": 15.600229753015508, "grad_norm": 0.992588996887207, "learning_rate": 1.2853521993874512e-05, "loss": 1.1935, "step": 27160 }, { "epoch": 15.605973578403216, "grad_norm": 0.9598777294158936, "learning_rate": 1.282150858553654e-05, "loss": 1.2166, "step": 27170 }, { "epoch": 15.611717403790925, "grad_norm": 1.043892502784729, "learning_rate": 1.2789529244954304e-05, "loss": 1.2074, "step": 27180 }, { "epoch": 15.617461229178634, "grad_norm": 0.9710313677787781, "learning_rate": 1.2757584001429457e-05, "loss": 1.1988, "step": 27190 }, { "epoch": 15.623205054566341, "grad_norm": 0.9652072191238403, "learning_rate": 1.2725672884232382e-05, "loss": 1.2019, "step": 27200 }, { "epoch": 15.628948879954049, "grad_norm": 1.0439345836639404, "learning_rate": 1.2693795922602247e-05, "loss": 1.2206, "step": 27210 }, { "epoch": 15.634692705341758, "grad_norm": 1.0208942890167236, "learning_rate": 1.2661953145746882e-05, "loss": 1.2044, "step": 27220 }, { "epoch": 15.640436530729465, "grad_norm": 1.0519356727600098, "learning_rate": 1.2630144582842793e-05, "loss": 1.2024, "step": 27230 }, { "epoch": 15.646180356117174, "grad_norm": 0.9655662775039673, "learning_rate": 1.259837026303517e-05, "loss": 1.2136, "step": 27240 }, { "epoch": 15.651924181504882, "grad_norm": 1.1176307201385498, "learning_rate": 1.2566630215437792e-05, "loss": 1.1995, "step": 27250 }, { "epoch": 15.65766800689259, "grad_norm": 0.9592282772064209, "learning_rate": 1.2534924469133069e-05, "loss": 1.2094, "step": 27260 }, { "epoch": 15.663411832280298, "grad_norm": 1.111539363861084, "learning_rate": 1.2503253053171949e-05, "loss": 1.2095, "step": 27270 }, { "epoch": 15.669155657668007, "grad_norm": 1.0378633737564087, "learning_rate": 1.2471615996573943e-05, "loss": 1.222, "step": 27280 }, { "epoch": 15.674899483055714, "grad_norm": 0.9379689693450928, "learning_rate": 1.2440013328327096e-05, "loss": 1.1971, "step": 27290 }, { "epoch": 15.680643308443424, "grad_norm": 1.015716552734375, "learning_rate": 1.2408445077387889e-05, "loss": 1.2135, "step": 27300 }, { "epoch": 15.686387133831131, "grad_norm": 1.0179091691970825, "learning_rate": 1.2376911272681341e-05, "loss": 1.2213, "step": 27310 }, { "epoch": 15.69213095921884, "grad_norm": 1.0006569623947144, "learning_rate": 1.234541194310083e-05, "loss": 1.2129, "step": 27320 }, { "epoch": 15.697874784606547, "grad_norm": 1.0056092739105225, "learning_rate": 1.2313947117508231e-05, "loss": 1.214, "step": 27330 }, { "epoch": 15.703618609994257, "grad_norm": 1.4138686656951904, "learning_rate": 1.228251682473373e-05, "loss": 1.1937, "step": 27340 }, { "epoch": 15.709362435381964, "grad_norm": 1.0444179773330688, "learning_rate": 1.2251121093575897e-05, "loss": 1.199, "step": 27350 }, { "epoch": 15.715106260769673, "grad_norm": 0.9492107033729553, "learning_rate": 1.2219759952801644e-05, "loss": 1.2043, "step": 27360 }, { "epoch": 15.72085008615738, "grad_norm": 1.0371536016464233, "learning_rate": 1.218843343114619e-05, "loss": 1.2019, "step": 27370 }, { "epoch": 15.72659391154509, "grad_norm": 1.1378275156021118, "learning_rate": 1.2157141557313044e-05, "loss": 1.2015, "step": 27380 }, { "epoch": 15.732337736932797, "grad_norm": 1.0947866439819336, "learning_rate": 1.2125884359973927e-05, "loss": 1.2177, "step": 27390 }, { "epoch": 15.738081562320506, "grad_norm": 0.9873200058937073, "learning_rate": 1.2094661867768801e-05, "loss": 1.2088, "step": 27400 }, { "epoch": 15.743825387708213, "grad_norm": 1.0540186166763306, "learning_rate": 1.2063474109305876e-05, "loss": 1.1984, "step": 27410 }, { "epoch": 15.749569213095922, "grad_norm": 1.0627162456512451, "learning_rate": 1.2032321113161456e-05, "loss": 1.2346, "step": 27420 }, { "epoch": 15.75531303848363, "grad_norm": 0.9104661345481873, "learning_rate": 1.200120290788008e-05, "loss": 1.2072, "step": 27430 }, { "epoch": 15.761056863871339, "grad_norm": 1.0108287334442139, "learning_rate": 1.1970119521974346e-05, "loss": 1.2329, "step": 27440 }, { "epoch": 15.766800689259046, "grad_norm": 0.9816228747367859, "learning_rate": 1.1939070983924949e-05, "loss": 1.2033, "step": 27450 }, { "epoch": 15.772544514646755, "grad_norm": 1.0340903997421265, "learning_rate": 1.1908057322180694e-05, "loss": 1.1988, "step": 27460 }, { "epoch": 15.778288340034463, "grad_norm": 0.9723082780838013, "learning_rate": 1.1877078565158409e-05, "loss": 1.1879, "step": 27470 }, { "epoch": 15.784032165422172, "grad_norm": 1.0375847816467285, "learning_rate": 1.1846134741242952e-05, "loss": 1.212, "step": 27480 }, { "epoch": 15.78977599080988, "grad_norm": 0.9549993872642517, "learning_rate": 1.1815225878787154e-05, "loss": 1.2155, "step": 27490 }, { "epoch": 15.795519816197588, "grad_norm": 1.015122413635254, "learning_rate": 1.1784352006111796e-05, "loss": 1.2144, "step": 27500 }, { "epoch": 15.801263641585296, "grad_norm": 0.8969539999961853, "learning_rate": 1.1753513151505652e-05, "loss": 1.1924, "step": 27510 }, { "epoch": 15.807007466973005, "grad_norm": 0.9606672525405884, "learning_rate": 1.1722709343225355e-05, "loss": 1.1867, "step": 27520 }, { "epoch": 15.812751292360712, "grad_norm": 0.9681193828582764, "learning_rate": 1.1691940609495476e-05, "loss": 1.2042, "step": 27530 }, { "epoch": 15.818495117748421, "grad_norm": 1.0136154890060425, "learning_rate": 1.1661206978508403e-05, "loss": 1.189, "step": 27540 }, { "epoch": 15.824238943136129, "grad_norm": 0.9765663146972656, "learning_rate": 1.1630508478424388e-05, "loss": 1.2053, "step": 27550 }, { "epoch": 15.829982768523838, "grad_norm": 1.080919623374939, "learning_rate": 1.15998451373715e-05, "loss": 1.2252, "step": 27560 }, { "epoch": 15.835726593911545, "grad_norm": 0.9988478422164917, "learning_rate": 1.1569216983445558e-05, "loss": 1.2141, "step": 27570 }, { "epoch": 15.841470419299252, "grad_norm": 1.012364387512207, "learning_rate": 1.1538624044710187e-05, "loss": 1.199, "step": 27580 }, { "epoch": 15.847214244686961, "grad_norm": 0.9939747452735901, "learning_rate": 1.1508066349196705e-05, "loss": 1.1986, "step": 27590 }, { "epoch": 15.85295807007467, "grad_norm": 1.1809818744659424, "learning_rate": 1.1477543924904143e-05, "loss": 1.1975, "step": 27600 }, { "epoch": 15.858701895462378, "grad_norm": 1.0233080387115479, "learning_rate": 1.1447056799799245e-05, "loss": 1.2076, "step": 27610 }, { "epoch": 15.864445720850085, "grad_norm": 0.9648825526237488, "learning_rate": 1.1416605001816368e-05, "loss": 1.2136, "step": 27620 }, { "epoch": 15.870189546237794, "grad_norm": 0.9563939571380615, "learning_rate": 1.1386188558857551e-05, "loss": 1.1985, "step": 27630 }, { "epoch": 15.875933371625502, "grad_norm": 0.9492806196212769, "learning_rate": 1.1355807498792378e-05, "loss": 1.1903, "step": 27640 }, { "epoch": 15.881677197013211, "grad_norm": 0.997193455696106, "learning_rate": 1.132546184945806e-05, "loss": 1.1999, "step": 27650 }, { "epoch": 15.887421022400918, "grad_norm": 1.0065877437591553, "learning_rate": 1.1295151638659367e-05, "loss": 1.2149, "step": 27660 }, { "epoch": 15.893164847788627, "grad_norm": 0.9710814952850342, "learning_rate": 1.126487689416854e-05, "loss": 1.2254, "step": 27670 }, { "epoch": 15.898908673176335, "grad_norm": 1.055302619934082, "learning_rate": 1.1234637643725394e-05, "loss": 1.2135, "step": 27680 }, { "epoch": 15.904652498564044, "grad_norm": 0.9559252262115479, "learning_rate": 1.1204433915037178e-05, "loss": 1.2132, "step": 27690 }, { "epoch": 15.910396323951751, "grad_norm": 0.950984537601471, "learning_rate": 1.1174265735778583e-05, "loss": 1.2118, "step": 27700 }, { "epoch": 15.91614014933946, "grad_norm": 1.0113270282745361, "learning_rate": 1.1144133133591784e-05, "loss": 1.2188, "step": 27710 }, { "epoch": 15.921883974727168, "grad_norm": 0.8991410136222839, "learning_rate": 1.1114036136086298e-05, "loss": 1.1844, "step": 27720 }, { "epoch": 15.927627800114877, "grad_norm": 1.0259344577789307, "learning_rate": 1.1083974770839044e-05, "loss": 1.2061, "step": 27730 }, { "epoch": 15.933371625502584, "grad_norm": 0.9925030469894409, "learning_rate": 1.1053949065394301e-05, "loss": 1.2012, "step": 27740 }, { "epoch": 15.939115450890293, "grad_norm": 0.9454563856124878, "learning_rate": 1.1023959047263672e-05, "loss": 1.1916, "step": 27750 }, { "epoch": 15.944859276278, "grad_norm": 0.9770966172218323, "learning_rate": 1.0994004743926045e-05, "loss": 1.197, "step": 27760 }, { "epoch": 15.95060310166571, "grad_norm": 1.0804015398025513, "learning_rate": 1.0964086182827582e-05, "loss": 1.2327, "step": 27770 }, { "epoch": 15.956346927053417, "grad_norm": 1.069285273551941, "learning_rate": 1.0934203391381723e-05, "loss": 1.2285, "step": 27780 }, { "epoch": 15.962090752441126, "grad_norm": 1.0548807382583618, "learning_rate": 1.0904356396969095e-05, "loss": 1.2229, "step": 27790 }, { "epoch": 15.967834577828834, "grad_norm": 0.9684779047966003, "learning_rate": 1.087454522693757e-05, "loss": 1.2163, "step": 27800 }, { "epoch": 15.973578403216543, "grad_norm": 1.0450820922851562, "learning_rate": 1.0844769908602166e-05, "loss": 1.2312, "step": 27810 }, { "epoch": 15.97932222860425, "grad_norm": 0.9897649884223938, "learning_rate": 1.081503046924503e-05, "loss": 1.1967, "step": 27820 }, { "epoch": 15.98506605399196, "grad_norm": 0.969822347164154, "learning_rate": 1.078532693611549e-05, "loss": 1.1866, "step": 27830 }, { "epoch": 15.990809879379666, "grad_norm": 1.0288376808166504, "learning_rate": 1.075565933642993e-05, "loss": 1.2109, "step": 27840 }, { "epoch": 15.996553704767376, "grad_norm": 0.9943313598632812, "learning_rate": 1.0726027697371854e-05, "loss": 1.1967, "step": 27850 }, { "epoch": 16.002297530155083, "grad_norm": 0.9977245926856995, "learning_rate": 1.0696432046091763e-05, "loss": 1.1938, "step": 27860 }, { "epoch": 16.00804135554279, "grad_norm": 0.9767646193504333, "learning_rate": 1.0666872409707193e-05, "loss": 1.1955, "step": 27870 }, { "epoch": 16.0137851809305, "grad_norm": 0.9896988272666931, "learning_rate": 1.0637348815302727e-05, "loss": 1.2149, "step": 27880 }, { "epoch": 16.01952900631821, "grad_norm": 0.9613653421401978, "learning_rate": 1.0607861289929868e-05, "loss": 1.2041, "step": 27890 }, { "epoch": 16.025272831705916, "grad_norm": 0.9446055889129639, "learning_rate": 1.0578409860607114e-05, "loss": 1.2045, "step": 27900 }, { "epoch": 16.031016657093623, "grad_norm": 0.9424024820327759, "learning_rate": 1.0548994554319847e-05, "loss": 1.1803, "step": 27910 }, { "epoch": 16.036760482481334, "grad_norm": 0.9718156456947327, "learning_rate": 1.0519615398020385e-05, "loss": 1.1856, "step": 27920 }, { "epoch": 16.04250430786904, "grad_norm": 0.9116566777229309, "learning_rate": 1.049027241862793e-05, "loss": 1.1876, "step": 27930 }, { "epoch": 16.04824813325675, "grad_norm": 0.9529868960380554, "learning_rate": 1.0460965643028485e-05, "loss": 1.1925, "step": 27940 }, { "epoch": 16.053991958644456, "grad_norm": 1.213744878768921, "learning_rate": 1.0431695098074936e-05, "loss": 1.1906, "step": 27950 }, { "epoch": 16.059735784032167, "grad_norm": 0.9598230123519897, "learning_rate": 1.0402460810586947e-05, "loss": 1.2019, "step": 27960 }, { "epoch": 16.065479609419874, "grad_norm": 1.0964374542236328, "learning_rate": 1.037326280735094e-05, "loss": 1.1894, "step": 27970 }, { "epoch": 16.07122343480758, "grad_norm": 1.0539747476577759, "learning_rate": 1.0344101115120144e-05, "loss": 1.1687, "step": 27980 }, { "epoch": 16.07696726019529, "grad_norm": 1.2400903701782227, "learning_rate": 1.031497576061446e-05, "loss": 1.2158, "step": 27990 }, { "epoch": 16.082711085583, "grad_norm": 1.0825668573379517, "learning_rate": 1.0285886770520548e-05, "loss": 1.2002, "step": 28000 }, { "epoch": 16.082711085583, "eval_loss": 1.0573391914367676, "eval_runtime": 121.4206, "eval_samples_per_second": 13.103, "eval_steps_per_second": 0.14, "eval_wer": 0.08639394281839756, "step": 28000 }, { "epoch": 16.088454910970707, "grad_norm": 1.1023013591766357, "learning_rate": 1.0256834171491693e-05, "loss": 1.1982, "step": 28010 }, { "epoch": 16.094198736358415, "grad_norm": 1.0633989572525024, "learning_rate": 1.0227817990147873e-05, "loss": 1.205, "step": 28020 }, { "epoch": 16.099942561746122, "grad_norm": 1.0366227626800537, "learning_rate": 1.0198838253075715e-05, "loss": 1.1883, "step": 28030 }, { "epoch": 16.10568638713383, "grad_norm": 0.9466197490692139, "learning_rate": 1.016989498682839e-05, "loss": 1.2044, "step": 28040 }, { "epoch": 16.11143021252154, "grad_norm": 0.9514585137367249, "learning_rate": 1.0140988217925718e-05, "loss": 1.2066, "step": 28050 }, { "epoch": 16.117174037909248, "grad_norm": 1.1074148416519165, "learning_rate": 1.0112117972854033e-05, "loss": 1.1944, "step": 28060 }, { "epoch": 16.122917863296955, "grad_norm": 0.9759691953659058, "learning_rate": 1.0083284278066212e-05, "loss": 1.1962, "step": 28070 }, { "epoch": 16.128661688684662, "grad_norm": 1.005771279335022, "learning_rate": 1.005448715998167e-05, "loss": 1.2013, "step": 28080 }, { "epoch": 16.134405514072373, "grad_norm": 0.8988441824913025, "learning_rate": 1.0025726644986264e-05, "loss": 1.1918, "step": 28090 }, { "epoch": 16.14014933946008, "grad_norm": 1.018329381942749, "learning_rate": 9.99700275943235e-06, "loss": 1.2157, "step": 28100 }, { "epoch": 16.145893164847788, "grad_norm": 0.9809838533401489, "learning_rate": 9.968315529638716e-06, "loss": 1.1999, "step": 28110 }, { "epoch": 16.151636990235495, "grad_norm": 1.1276025772094727, "learning_rate": 9.939664981890534e-06, "loss": 1.1856, "step": 28120 }, { "epoch": 16.157380815623206, "grad_norm": 0.9696183800697327, "learning_rate": 9.911051142439412e-06, "loss": 1.2121, "step": 28130 }, { "epoch": 16.163124641010914, "grad_norm": 0.9299204349517822, "learning_rate": 9.882474037503268e-06, "loss": 1.2033, "step": 28140 }, { "epoch": 16.16886846639862, "grad_norm": 1.0278395414352417, "learning_rate": 9.853933693266419e-06, "loss": 1.1915, "step": 28150 }, { "epoch": 16.174612291786328, "grad_norm": 0.9883560538291931, "learning_rate": 9.82543013587945e-06, "loss": 1.2089, "step": 28160 }, { "epoch": 16.18035611717404, "grad_norm": 0.9810996055603027, "learning_rate": 9.796963391459275e-06, "loss": 1.2007, "step": 28170 }, { "epoch": 16.186099942561746, "grad_norm": 1.1436527967453003, "learning_rate": 9.768533486089066e-06, "loss": 1.1956, "step": 28180 }, { "epoch": 16.191843767949454, "grad_norm": 0.97896409034729, "learning_rate": 9.740140445818214e-06, "loss": 1.1968, "step": 28190 }, { "epoch": 16.19758759333716, "grad_norm": 0.9986919164657593, "learning_rate": 9.711784296662372e-06, "loss": 1.1971, "step": 28200 }, { "epoch": 16.203331418724872, "grad_norm": 1.0654900074005127, "learning_rate": 9.68346506460337e-06, "loss": 1.1995, "step": 28210 }, { "epoch": 16.20907524411258, "grad_norm": 0.9688809514045715, "learning_rate": 9.655182775589234e-06, "loss": 1.2014, "step": 28220 }, { "epoch": 16.214819069500287, "grad_norm": 1.0210652351379395, "learning_rate": 9.626937455534115e-06, "loss": 1.212, "step": 28230 }, { "epoch": 16.220562894887994, "grad_norm": 0.9696595668792725, "learning_rate": 9.598729130318278e-06, "loss": 1.1932, "step": 28240 }, { "epoch": 16.226306720275705, "grad_norm": 0.9565127491950989, "learning_rate": 9.570557825788133e-06, "loss": 1.214, "step": 28250 }, { "epoch": 16.232050545663412, "grad_norm": 1.064549207687378, "learning_rate": 9.54242356775613e-06, "loss": 1.2043, "step": 28260 }, { "epoch": 16.23779437105112, "grad_norm": 0.9245190024375916, "learning_rate": 9.514326382000815e-06, "loss": 1.1961, "step": 28270 }, { "epoch": 16.243538196438827, "grad_norm": 1.0006709098815918, "learning_rate": 9.486266294266716e-06, "loss": 1.2021, "step": 28280 }, { "epoch": 16.249282021826538, "grad_norm": 0.9861505627632141, "learning_rate": 9.458243330264414e-06, "loss": 1.2084, "step": 28290 }, { "epoch": 16.255025847214245, "grad_norm": 1.1787704229354858, "learning_rate": 9.430257515670456e-06, "loss": 1.2062, "step": 28300 }, { "epoch": 16.260769672601953, "grad_norm": 0.9108131527900696, "learning_rate": 9.402308876127336e-06, "loss": 1.1899, "step": 28310 }, { "epoch": 16.26651349798966, "grad_norm": 1.0063194036483765, "learning_rate": 9.374397437243523e-06, "loss": 1.1811, "step": 28320 }, { "epoch": 16.27225732337737, "grad_norm": 0.9425542950630188, "learning_rate": 9.346523224593368e-06, "loss": 1.1835, "step": 28330 }, { "epoch": 16.278001148765078, "grad_norm": 1.0358150005340576, "learning_rate": 9.318686263717099e-06, "loss": 1.1977, "step": 28340 }, { "epoch": 16.283744974152786, "grad_norm": 0.9147601127624512, "learning_rate": 9.290886580120874e-06, "loss": 1.2002, "step": 28350 }, { "epoch": 16.289488799540493, "grad_norm": 1.041288137435913, "learning_rate": 9.263124199276624e-06, "loss": 1.208, "step": 28360 }, { "epoch": 16.295232624928204, "grad_norm": 0.9085084199905396, "learning_rate": 9.235399146622156e-06, "loss": 1.1713, "step": 28370 }, { "epoch": 16.30097645031591, "grad_norm": 0.9786022901535034, "learning_rate": 9.207711447561029e-06, "loss": 1.2103, "step": 28380 }, { "epoch": 16.30672027570362, "grad_norm": 1.0332207679748535, "learning_rate": 9.180061127462613e-06, "loss": 1.1893, "step": 28390 }, { "epoch": 16.312464101091326, "grad_norm": 0.9504291415214539, "learning_rate": 9.152448211662016e-06, "loss": 1.191, "step": 28400 }, { "epoch": 16.318207926479037, "grad_norm": 1.037750244140625, "learning_rate": 9.124872725460055e-06, "loss": 1.189, "step": 28410 }, { "epoch": 16.323951751866744, "grad_norm": 0.9503852128982544, "learning_rate": 9.097334694123288e-06, "loss": 1.1838, "step": 28420 }, { "epoch": 16.32969557725445, "grad_norm": 1.1074426174163818, "learning_rate": 9.069834142883928e-06, "loss": 1.2221, "step": 28430 }, { "epoch": 16.33543940264216, "grad_norm": 1.002485990524292, "learning_rate": 9.04237109693984e-06, "loss": 1.1997, "step": 28440 }, { "epoch": 16.34118322802987, "grad_norm": 1.118814468383789, "learning_rate": 9.014945581454553e-06, "loss": 1.1996, "step": 28450 }, { "epoch": 16.346927053417577, "grad_norm": 1.0240421295166016, "learning_rate": 8.987557621557167e-06, "loss": 1.1958, "step": 28460 }, { "epoch": 16.352670878805284, "grad_norm": 1.0071808099746704, "learning_rate": 8.960207242342423e-06, "loss": 1.1989, "step": 28470 }, { "epoch": 16.35841470419299, "grad_norm": 0.986801266670227, "learning_rate": 8.932894468870596e-06, "loss": 1.2046, "step": 28480 }, { "epoch": 16.3641585295807, "grad_norm": 1.0389275550842285, "learning_rate": 8.905619326167489e-06, "loss": 1.2099, "step": 28490 }, { "epoch": 16.36990235496841, "grad_norm": 1.093624472618103, "learning_rate": 8.878381839224475e-06, "loss": 1.1872, "step": 28500 }, { "epoch": 16.375646180356117, "grad_norm": 1.0568568706512451, "learning_rate": 8.85118203299836e-06, "loss": 1.199, "step": 28510 }, { "epoch": 16.381390005743825, "grad_norm": 1.1465950012207031, "learning_rate": 8.824019932411489e-06, "loss": 1.2018, "step": 28520 }, { "epoch": 16.387133831131532, "grad_norm": 1.0193283557891846, "learning_rate": 8.796895562351616e-06, "loss": 1.1978, "step": 28530 }, { "epoch": 16.392877656519243, "grad_norm": 0.9369019269943237, "learning_rate": 8.769808947671922e-06, "loss": 1.209, "step": 28540 }, { "epoch": 16.39862148190695, "grad_norm": 1.00782310962677, "learning_rate": 8.74276011319103e-06, "loss": 1.1999, "step": 28550 }, { "epoch": 16.404365307294658, "grad_norm": 1.0319541692733765, "learning_rate": 8.715749083692899e-06, "loss": 1.189, "step": 28560 }, { "epoch": 16.410109132682365, "grad_norm": 1.0399322509765625, "learning_rate": 8.688775883926889e-06, "loss": 1.2012, "step": 28570 }, { "epoch": 16.415852958070076, "grad_norm": 1.033645510673523, "learning_rate": 8.661840538607685e-06, "loss": 1.1888, "step": 28580 }, { "epoch": 16.421596783457783, "grad_norm": 0.9258694648742676, "learning_rate": 8.634943072415283e-06, "loss": 1.1773, "step": 28590 }, { "epoch": 16.42734060884549, "grad_norm": 0.9932130575180054, "learning_rate": 8.608083509994975e-06, "loss": 1.1912, "step": 28600 }, { "epoch": 16.433084434233198, "grad_norm": 1.056031584739685, "learning_rate": 8.581261875957303e-06, "loss": 1.2126, "step": 28610 }, { "epoch": 16.43882825962091, "grad_norm": 1.055188536643982, "learning_rate": 8.554478194878099e-06, "loss": 1.2104, "step": 28620 }, { "epoch": 16.444572085008616, "grad_norm": 0.9226313829421997, "learning_rate": 8.527732491298365e-06, "loss": 1.1946, "step": 28630 }, { "epoch": 16.450315910396323, "grad_norm": 1.0118293762207031, "learning_rate": 8.501024789724371e-06, "loss": 1.1849, "step": 28640 }, { "epoch": 16.45605973578403, "grad_norm": 0.9435645937919617, "learning_rate": 8.474355114627498e-06, "loss": 1.1929, "step": 28650 }, { "epoch": 16.46180356117174, "grad_norm": 1.0594557523727417, "learning_rate": 8.447723490444338e-06, "loss": 1.196, "step": 28660 }, { "epoch": 16.46754738655945, "grad_norm": 0.9332827925682068, "learning_rate": 8.4211299415766e-06, "loss": 1.1903, "step": 28670 }, { "epoch": 16.473291211947156, "grad_norm": 0.9605672955513, "learning_rate": 8.39457449239109e-06, "loss": 1.189, "step": 28680 }, { "epoch": 16.479035037334864, "grad_norm": 1.1050618886947632, "learning_rate": 8.368057167219738e-06, "loss": 1.1995, "step": 28690 }, { "epoch": 16.484778862722575, "grad_norm": 0.9725523591041565, "learning_rate": 8.34157799035951e-06, "loss": 1.2191, "step": 28700 }, { "epoch": 16.490522688110282, "grad_norm": 1.015174388885498, "learning_rate": 8.31513698607242e-06, "loss": 1.1942, "step": 28710 }, { "epoch": 16.49626651349799, "grad_norm": 0.9837433695793152, "learning_rate": 8.288734178585535e-06, "loss": 1.1958, "step": 28720 }, { "epoch": 16.502010338885697, "grad_norm": 0.9460749626159668, "learning_rate": 8.262369592090893e-06, "loss": 1.169, "step": 28730 }, { "epoch": 16.507754164273408, "grad_norm": 1.051423192024231, "learning_rate": 8.236043250745537e-06, "loss": 1.1832, "step": 28740 }, { "epoch": 16.513497989661115, "grad_norm": 0.9495314359664917, "learning_rate": 8.209755178671432e-06, "loss": 1.1956, "step": 28750 }, { "epoch": 16.519241815048822, "grad_norm": 0.9878236651420593, "learning_rate": 8.183505399955516e-06, "loss": 1.1765, "step": 28760 }, { "epoch": 16.52498564043653, "grad_norm": 0.9338296055793762, "learning_rate": 8.15729393864963e-06, "loss": 1.2115, "step": 28770 }, { "epoch": 16.53072946582424, "grad_norm": 0.9370130300521851, "learning_rate": 8.13112081877047e-06, "loss": 1.188, "step": 28780 }, { "epoch": 16.536473291211948, "grad_norm": 0.9579607844352722, "learning_rate": 8.104986064299666e-06, "loss": 1.1928, "step": 28790 }, { "epoch": 16.542217116599655, "grad_norm": 1.0250658988952637, "learning_rate": 8.07888969918364e-06, "loss": 1.1843, "step": 28800 }, { "epoch": 16.547960941987363, "grad_norm": 0.9766988158226013, "learning_rate": 8.052831747333654e-06, "loss": 1.1911, "step": 28810 }, { "epoch": 16.553704767375073, "grad_norm": 0.9715381264686584, "learning_rate": 8.026812232625792e-06, "loss": 1.1961, "step": 28820 }, { "epoch": 16.55944859276278, "grad_norm": 0.9229410886764526, "learning_rate": 8.000831178900886e-06, "loss": 1.1933, "step": 28830 }, { "epoch": 16.565192418150488, "grad_norm": 1.0843863487243652, "learning_rate": 7.974888609964557e-06, "loss": 1.2201, "step": 28840 }, { "epoch": 16.570936243538195, "grad_norm": 1.121579885482788, "learning_rate": 7.948984549587168e-06, "loss": 1.2018, "step": 28850 }, { "epoch": 16.576680068925903, "grad_norm": 1.0867716073989868, "learning_rate": 7.923119021503753e-06, "loss": 1.2295, "step": 28860 }, { "epoch": 16.582423894313614, "grad_norm": 0.9368448257446289, "learning_rate": 7.897292049414097e-06, "loss": 1.1946, "step": 28870 }, { "epoch": 16.58816771970132, "grad_norm": 0.8986218571662903, "learning_rate": 7.871503656982604e-06, "loss": 1.2094, "step": 28880 }, { "epoch": 16.59391154508903, "grad_norm": 0.9822723865509033, "learning_rate": 7.845753867838389e-06, "loss": 1.1977, "step": 28890 }, { "epoch": 16.59965537047674, "grad_norm": 0.9966803789138794, "learning_rate": 7.820042705575133e-06, "loss": 1.1897, "step": 28900 }, { "epoch": 16.605399195864447, "grad_norm": 0.9297454357147217, "learning_rate": 7.794370193751156e-06, "loss": 1.1786, "step": 28910 }, { "epoch": 16.611143021252154, "grad_norm": 1.010553002357483, "learning_rate": 7.768736355889381e-06, "loss": 1.208, "step": 28920 }, { "epoch": 16.61688684663986, "grad_norm": 0.9486767649650574, "learning_rate": 7.743141215477244e-06, "loss": 1.1977, "step": 28930 }, { "epoch": 16.62263067202757, "grad_norm": 1.0453968048095703, "learning_rate": 7.71758479596678e-06, "loss": 1.2115, "step": 28940 }, { "epoch": 16.62837449741528, "grad_norm": 1.0398608446121216, "learning_rate": 7.692067120774517e-06, "loss": 1.2053, "step": 28950 }, { "epoch": 16.634118322802987, "grad_norm": 0.9172380566596985, "learning_rate": 7.666588213281477e-06, "loss": 1.2, "step": 28960 }, { "epoch": 16.639862148190694, "grad_norm": 1.0411999225616455, "learning_rate": 7.641148096833188e-06, "loss": 1.1999, "step": 28970 }, { "epoch": 16.6456059735784, "grad_norm": 1.0267289876937866, "learning_rate": 7.615746794739595e-06, "loss": 1.1999, "step": 28980 }, { "epoch": 16.651349798966113, "grad_norm": 1.0688894987106323, "learning_rate": 7.5903843302751204e-06, "loss": 1.2097, "step": 28990 }, { "epoch": 16.65709362435382, "grad_norm": 0.9703477025032043, "learning_rate": 7.565060726678552e-06, "loss": 1.1963, "step": 29000 }, { "epoch": 16.65709362435382, "eval_loss": 1.0564184188842773, "eval_runtime": 122.5765, "eval_samples_per_second": 12.98, "eval_steps_per_second": 0.139, "eval_wer": 0.08554638942253362, "step": 29000 }, { "epoch": 16.662837449741527, "grad_norm": 0.9734466075897217, "learning_rate": 7.539776007153135e-06, "loss": 1.2067, "step": 29010 }, { "epoch": 16.668581275129235, "grad_norm": 0.9291802048683167, "learning_rate": 7.514530194866423e-06, "loss": 1.1975, "step": 29020 }, { "epoch": 16.674325100516945, "grad_norm": 1.0431774854660034, "learning_rate": 7.4893233129503704e-06, "loss": 1.2, "step": 29030 }, { "epoch": 16.680068925904653, "grad_norm": 1.0795116424560547, "learning_rate": 7.4641553845012135e-06, "loss": 1.2062, "step": 29040 }, { "epoch": 16.68581275129236, "grad_norm": 0.9555503129959106, "learning_rate": 7.43902643257954e-06, "loss": 1.191, "step": 29050 }, { "epoch": 16.691556576680068, "grad_norm": 1.025253176689148, "learning_rate": 7.413936480210208e-06, "loss": 1.2118, "step": 29060 }, { "epoch": 16.69730040206778, "grad_norm": 0.845024585723877, "learning_rate": 7.38888555038234e-06, "loss": 1.1814, "step": 29070 }, { "epoch": 16.703044227455486, "grad_norm": 1.0023993253707886, "learning_rate": 7.36387366604928e-06, "loss": 1.1924, "step": 29080 }, { "epoch": 16.708788052843193, "grad_norm": 0.9628487229347229, "learning_rate": 7.3389008501286495e-06, "loss": 1.2031, "step": 29090 }, { "epoch": 16.7145318782309, "grad_norm": 0.9806420803070068, "learning_rate": 7.313967125502222e-06, "loss": 1.1901, "step": 29100 }, { "epoch": 16.72027570361861, "grad_norm": 1.0675193071365356, "learning_rate": 7.289072515015991e-06, "loss": 1.166, "step": 29110 }, { "epoch": 16.72601952900632, "grad_norm": 1.0211138725280762, "learning_rate": 7.264217041480069e-06, "loss": 1.1693, "step": 29120 }, { "epoch": 16.731763354394026, "grad_norm": 0.9645518660545349, "learning_rate": 7.239400727668755e-06, "loss": 1.1841, "step": 29130 }, { "epoch": 16.737507179781733, "grad_norm": 1.025267243385315, "learning_rate": 7.214623596320447e-06, "loss": 1.2125, "step": 29140 }, { "epoch": 16.743251005169444, "grad_norm": 0.9313498139381409, "learning_rate": 7.1898856701376194e-06, "loss": 1.1923, "step": 29150 }, { "epoch": 16.74899483055715, "grad_norm": 0.9631925821304321, "learning_rate": 7.165186971786865e-06, "loss": 1.19, "step": 29160 }, { "epoch": 16.75473865594486, "grad_norm": 0.91645747423172, "learning_rate": 7.140527523898805e-06, "loss": 1.1989, "step": 29170 }, { "epoch": 16.760482481332566, "grad_norm": 1.0127781629562378, "learning_rate": 7.115907349068095e-06, "loss": 1.1866, "step": 29180 }, { "epoch": 16.766226306720277, "grad_norm": 1.0304737091064453, "learning_rate": 7.091326469853429e-06, "loss": 1.2095, "step": 29190 }, { "epoch": 16.771970132107985, "grad_norm": 0.9764739274978638, "learning_rate": 7.06678490877747e-06, "loss": 1.193, "step": 29200 }, { "epoch": 16.777713957495692, "grad_norm": 1.0257683992385864, "learning_rate": 7.042282688326887e-06, "loss": 1.198, "step": 29210 }, { "epoch": 16.7834577828834, "grad_norm": 1.0606697797775269, "learning_rate": 7.017819830952253e-06, "loss": 1.2153, "step": 29220 }, { "epoch": 16.78920160827111, "grad_norm": 0.9175252914428711, "learning_rate": 6.993396359068124e-06, "loss": 1.1785, "step": 29230 }, { "epoch": 16.794945433658818, "grad_norm": 1.086734414100647, "learning_rate": 6.9690122950529556e-06, "loss": 1.1964, "step": 29240 }, { "epoch": 16.800689259046525, "grad_norm": 0.9455732107162476, "learning_rate": 6.944667661249064e-06, "loss": 1.1956, "step": 29250 }, { "epoch": 16.806433084434232, "grad_norm": 1.0959241390228271, "learning_rate": 6.920362479962677e-06, "loss": 1.2058, "step": 29260 }, { "epoch": 16.812176909821943, "grad_norm": 1.000938892364502, "learning_rate": 6.896096773463859e-06, "loss": 1.1844, "step": 29270 }, { "epoch": 16.81792073520965, "grad_norm": 1.0796737670898438, "learning_rate": 6.871870563986479e-06, "loss": 1.1967, "step": 29280 }, { "epoch": 16.823664560597358, "grad_norm": 0.9358550310134888, "learning_rate": 6.847683873728268e-06, "loss": 1.1806, "step": 29290 }, { "epoch": 16.829408385985065, "grad_norm": 1.0473881959915161, "learning_rate": 6.823536724850693e-06, "loss": 1.2444, "step": 29300 }, { "epoch": 16.835152211372773, "grad_norm": 0.9135806560516357, "learning_rate": 6.799429139479029e-06, "loss": 1.2198, "step": 29310 }, { "epoch": 16.840896036760483, "grad_norm": 0.8903408646583557, "learning_rate": 6.775361139702296e-06, "loss": 1.1831, "step": 29320 }, { "epoch": 16.84663986214819, "grad_norm": 0.9620775580406189, "learning_rate": 6.751332747573212e-06, "loss": 1.1738, "step": 29330 }, { "epoch": 16.852383687535898, "grad_norm": 1.0818181037902832, "learning_rate": 6.727343985108255e-06, "loss": 1.2052, "step": 29340 }, { "epoch": 16.858127512923605, "grad_norm": 1.0362913608551025, "learning_rate": 6.703394874287526e-06, "loss": 1.2024, "step": 29350 }, { "epoch": 16.863871338311316, "grad_norm": 1.071187973022461, "learning_rate": 6.679485437054868e-06, "loss": 1.1935, "step": 29360 }, { "epoch": 16.869615163699024, "grad_norm": 1.0566585063934326, "learning_rate": 6.655615695317711e-06, "loss": 1.1988, "step": 29370 }, { "epoch": 16.87535898908673, "grad_norm": 1.0511890649795532, "learning_rate": 6.631785670947139e-06, "loss": 1.1856, "step": 29380 }, { "epoch": 16.88110281447444, "grad_norm": 1.0357812643051147, "learning_rate": 6.607995385777858e-06, "loss": 1.1944, "step": 29390 }, { "epoch": 16.88684663986215, "grad_norm": 1.0455526113510132, "learning_rate": 6.584244861608126e-06, "loss": 1.1992, "step": 29400 }, { "epoch": 16.892590465249857, "grad_norm": 0.9211399555206299, "learning_rate": 6.560534120199799e-06, "loss": 1.1789, "step": 29410 }, { "epoch": 16.898334290637564, "grad_norm": 1.0147125720977783, "learning_rate": 6.536863183278284e-06, "loss": 1.1817, "step": 29420 }, { "epoch": 16.90407811602527, "grad_norm": 1.0553264617919922, "learning_rate": 6.513232072532488e-06, "loss": 1.1879, "step": 29430 }, { "epoch": 16.909821941412982, "grad_norm": 1.0018610954284668, "learning_rate": 6.489640809614859e-06, "loss": 1.198, "step": 29440 }, { "epoch": 16.91556576680069, "grad_norm": 0.9988446831703186, "learning_rate": 6.466089416141301e-06, "loss": 1.1865, "step": 29450 }, { "epoch": 16.921309592188397, "grad_norm": 0.9795340299606323, "learning_rate": 6.4425779136912235e-06, "loss": 1.1996, "step": 29460 }, { "epoch": 16.927053417576104, "grad_norm": 1.0192444324493408, "learning_rate": 6.419106323807446e-06, "loss": 1.1958, "step": 29470 }, { "epoch": 16.932797242963815, "grad_norm": 0.9266735911369324, "learning_rate": 6.395674667996256e-06, "loss": 1.1998, "step": 29480 }, { "epoch": 16.938541068351523, "grad_norm": 0.9761055111885071, "learning_rate": 6.3722829677273155e-06, "loss": 1.1942, "step": 29490 }, { "epoch": 16.94428489373923, "grad_norm": 1.0179728269577026, "learning_rate": 6.348931244433695e-06, "loss": 1.2028, "step": 29500 }, { "epoch": 16.950028719126937, "grad_norm": 1.0008351802825928, "learning_rate": 6.3256195195118555e-06, "loss": 1.2141, "step": 29510 }, { "epoch": 16.955772544514648, "grad_norm": 0.9183552861213684, "learning_rate": 6.302347814321556e-06, "loss": 1.2051, "step": 29520 }, { "epoch": 16.961516369902355, "grad_norm": 0.9476117491722107, "learning_rate": 6.279116150185937e-06, "loss": 1.21, "step": 29530 }, { "epoch": 16.967260195290063, "grad_norm": 0.9979916214942932, "learning_rate": 6.255924548391431e-06, "loss": 1.1879, "step": 29540 }, { "epoch": 16.97300402067777, "grad_norm": 0.9785417914390564, "learning_rate": 6.232773030187744e-06, "loss": 1.192, "step": 29550 }, { "epoch": 16.97874784606548, "grad_norm": 1.0037745237350464, "learning_rate": 6.209661616787891e-06, "loss": 1.1923, "step": 29560 }, { "epoch": 16.98449167145319, "grad_norm": 1.12059485912323, "learning_rate": 6.1865903293681105e-06, "loss": 1.2012, "step": 29570 }, { "epoch": 16.990235496840896, "grad_norm": 0.9373881220817566, "learning_rate": 6.163559189067901e-06, "loss": 1.1918, "step": 29580 }, { "epoch": 16.995979322228603, "grad_norm": 1.027611255645752, "learning_rate": 6.140568216989946e-06, "loss": 1.1945, "step": 29590 }, { "epoch": 17.001723147616314, "grad_norm": 1.0132412910461426, "learning_rate": 6.117617434200149e-06, "loss": 1.1651, "step": 29600 }, { "epoch": 17.00746697300402, "grad_norm": 1.050065279006958, "learning_rate": 6.09470686172758e-06, "loss": 1.1986, "step": 29610 }, { "epoch": 17.01321079839173, "grad_norm": 1.0160305500030518, "learning_rate": 6.071836520564459e-06, "loss": 1.1885, "step": 29620 }, { "epoch": 17.018954623779436, "grad_norm": 1.2584477663040161, "learning_rate": 6.049006431666157e-06, "loss": 1.1943, "step": 29630 }, { "epoch": 17.024698449167147, "grad_norm": 0.9321852326393127, "learning_rate": 6.026216615951157e-06, "loss": 1.1745, "step": 29640 }, { "epoch": 17.030442274554854, "grad_norm": 0.9220076203346252, "learning_rate": 6.003467094301026e-06, "loss": 1.1919, "step": 29650 }, { "epoch": 17.03618609994256, "grad_norm": 1.0550299882888794, "learning_rate": 5.980757887560441e-06, "loss": 1.2172, "step": 29660 }, { "epoch": 17.04192992533027, "grad_norm": 0.9694183468818665, "learning_rate": 5.958089016537105e-06, "loss": 1.1968, "step": 29670 }, { "epoch": 17.04767375071798, "grad_norm": 1.0885531902313232, "learning_rate": 5.935460502001793e-06, "loss": 1.1871, "step": 29680 }, { "epoch": 17.053417576105687, "grad_norm": 0.993267834186554, "learning_rate": 5.912872364688297e-06, "loss": 1.1742, "step": 29690 }, { "epoch": 17.059161401493395, "grad_norm": 1.0666749477386475, "learning_rate": 5.890324625293393e-06, "loss": 1.1916, "step": 29700 }, { "epoch": 17.064905226881102, "grad_norm": 0.983528196811676, "learning_rate": 5.867817304476871e-06, "loss": 1.1915, "step": 29710 }, { "epoch": 17.070649052268813, "grad_norm": 1.036799669265747, "learning_rate": 5.845350422861448e-06, "loss": 1.1836, "step": 29720 }, { "epoch": 17.07639287765652, "grad_norm": 1.0437095165252686, "learning_rate": 5.822924001032831e-06, "loss": 1.1815, "step": 29730 }, { "epoch": 17.082136703044227, "grad_norm": 0.9241591095924377, "learning_rate": 5.800538059539632e-06, "loss": 1.1897, "step": 29740 }, { "epoch": 17.087880528431935, "grad_norm": 0.9938023686408997, "learning_rate": 5.778192618893352e-06, "loss": 1.1859, "step": 29750 }, { "epoch": 17.093624353819642, "grad_norm": 0.988040030002594, "learning_rate": 5.755887699568438e-06, "loss": 1.198, "step": 29760 }, { "epoch": 17.099368179207353, "grad_norm": 1.0079675912857056, "learning_rate": 5.733623322002151e-06, "loss": 1.1805, "step": 29770 }, { "epoch": 17.10511200459506, "grad_norm": 1.0408531427383423, "learning_rate": 5.711399506594632e-06, "loss": 1.1906, "step": 29780 }, { "epoch": 17.110855829982768, "grad_norm": 1.0767842531204224, "learning_rate": 5.689216273708877e-06, "loss": 1.174, "step": 29790 }, { "epoch": 17.116599655370475, "grad_norm": 1.0826375484466553, "learning_rate": 5.667073643670644e-06, "loss": 1.1907, "step": 29800 }, { "epoch": 17.122343480758186, "grad_norm": 1.1785517930984497, "learning_rate": 5.644971636768544e-06, "loss": 1.1829, "step": 29810 }, { "epoch": 17.128087306145893, "grad_norm": 1.0013254880905151, "learning_rate": 5.622910273253913e-06, "loss": 1.1938, "step": 29820 }, { "epoch": 17.1338311315336, "grad_norm": 0.9207583069801331, "learning_rate": 5.6008895733409056e-06, "loss": 1.1888, "step": 29830 }, { "epoch": 17.139574956921308, "grad_norm": 0.9881877899169922, "learning_rate": 5.578909557206364e-06, "loss": 1.2028, "step": 29840 }, { "epoch": 17.14531878230902, "grad_norm": 0.9341586828231812, "learning_rate": 5.556970244989879e-06, "loss": 1.1878, "step": 29850 }, { "epoch": 17.151062607696726, "grad_norm": 0.9626673460006714, "learning_rate": 5.535071656793739e-06, "loss": 1.2037, "step": 29860 }, { "epoch": 17.156806433084434, "grad_norm": 1.0223544836044312, "learning_rate": 5.51321381268293e-06, "loss": 1.1734, "step": 29870 }, { "epoch": 17.16255025847214, "grad_norm": 0.9245195388793945, "learning_rate": 5.4913967326851015e-06, "loss": 1.1859, "step": 29880 }, { "epoch": 17.168294083859852, "grad_norm": 1.0195177793502808, "learning_rate": 5.469620436790535e-06, "loss": 1.194, "step": 29890 }, { "epoch": 17.17403790924756, "grad_norm": 0.9807387590408325, "learning_rate": 5.447884944952165e-06, "loss": 1.1908, "step": 29900 }, { "epoch": 17.179781734635267, "grad_norm": 0.9793677926063538, "learning_rate": 5.426190277085527e-06, "loss": 1.1836, "step": 29910 }, { "epoch": 17.185525560022974, "grad_norm": 1.0263057947158813, "learning_rate": 5.40453645306874e-06, "loss": 1.1843, "step": 29920 }, { "epoch": 17.191269385410685, "grad_norm": 0.9091349840164185, "learning_rate": 5.382923492742535e-06, "loss": 1.2058, "step": 29930 }, { "epoch": 17.197013210798392, "grad_norm": 0.9294777512550354, "learning_rate": 5.3613514159101476e-06, "loss": 1.1817, "step": 29940 }, { "epoch": 17.2027570361861, "grad_norm": 1.07024347782135, "learning_rate": 5.339820242337416e-06, "loss": 1.1831, "step": 29950 }, { "epoch": 17.208500861573807, "grad_norm": 0.9532782435417175, "learning_rate": 5.3183299917526434e-06, "loss": 1.1948, "step": 29960 }, { "epoch": 17.214244686961518, "grad_norm": 0.9681318998336792, "learning_rate": 5.2968806838466666e-06, "loss": 1.1763, "step": 29970 }, { "epoch": 17.219988512349225, "grad_norm": 1.1072713136672974, "learning_rate": 5.275472338272809e-06, "loss": 1.2035, "step": 29980 }, { "epoch": 17.225732337736932, "grad_norm": 1.002629041671753, "learning_rate": 5.2541049746468476e-06, "loss": 1.1763, "step": 29990 }, { "epoch": 17.23147616312464, "grad_norm": 0.9318569898605347, "learning_rate": 5.232778612547026e-06, "loss": 1.1754, "step": 30000 }, { "epoch": 17.23147616312464, "eval_loss": 1.0579822063446045, "eval_runtime": 121.3648, "eval_samples_per_second": 13.109, "eval_steps_per_second": 0.14, "eval_wer": 0.08602666968018985, "step": 30000 }, { "epoch": 17.23721998851235, "grad_norm": 0.9345382452011108, "learning_rate": 5.211493271514e-06, "loss": 1.1944, "step": 30010 }, { "epoch": 17.242963813900058, "grad_norm": 0.9140852093696594, "learning_rate": 5.190248971050838e-06, "loss": 1.179, "step": 30020 }, { "epoch": 17.248707639287765, "grad_norm": 0.9444893002510071, "learning_rate": 5.169045730623035e-06, "loss": 1.1886, "step": 30030 }, { "epoch": 17.254451464675473, "grad_norm": 1.0549089908599854, "learning_rate": 5.147883569658422e-06, "loss": 1.1847, "step": 30040 }, { "epoch": 17.260195290063184, "grad_norm": 1.0795629024505615, "learning_rate": 5.126762507547228e-06, "loss": 1.1913, "step": 30050 }, { "epoch": 17.26593911545089, "grad_norm": 0.9579092860221863, "learning_rate": 5.105682563642012e-06, "loss": 1.2006, "step": 30060 }, { "epoch": 17.2716829408386, "grad_norm": 1.0866056680679321, "learning_rate": 5.084643757257633e-06, "loss": 1.2115, "step": 30070 }, { "epoch": 17.277426766226306, "grad_norm": 0.927370548248291, "learning_rate": 5.063646107671302e-06, "loss": 1.1883, "step": 30080 }, { "epoch": 17.283170591614017, "grad_norm": 1.0040650367736816, "learning_rate": 5.042689634122476e-06, "loss": 1.1982, "step": 30090 }, { "epoch": 17.288914417001724, "grad_norm": 1.0011693239212036, "learning_rate": 5.02177435581292e-06, "loss": 1.1834, "step": 30100 }, { "epoch": 17.29465824238943, "grad_norm": 0.968258798122406, "learning_rate": 5.000900291906624e-06, "loss": 1.204, "step": 30110 }, { "epoch": 17.30040206777714, "grad_norm": 0.874392569065094, "learning_rate": 4.98006746152982e-06, "loss": 1.1904, "step": 30120 }, { "epoch": 17.30614589316485, "grad_norm": 1.0383871793746948, "learning_rate": 4.95927588377098e-06, "loss": 1.202, "step": 30130 }, { "epoch": 17.311889718552557, "grad_norm": 1.0016822814941406, "learning_rate": 4.938525577680753e-06, "loss": 1.1858, "step": 30140 }, { "epoch": 17.317633543940264, "grad_norm": 0.9589056968688965, "learning_rate": 4.9178165622719834e-06, "loss": 1.2012, "step": 30150 }, { "epoch": 17.32337736932797, "grad_norm": 1.0535483360290527, "learning_rate": 4.897148856519698e-06, "loss": 1.1949, "step": 30160 }, { "epoch": 17.32912119471568, "grad_norm": 0.9668654799461365, "learning_rate": 4.87652247936103e-06, "loss": 1.2121, "step": 30170 }, { "epoch": 17.33486502010339, "grad_norm": 0.9578180909156799, "learning_rate": 4.855937449695287e-06, "loss": 1.2017, "step": 30180 }, { "epoch": 17.340608845491097, "grad_norm": 0.9804530739784241, "learning_rate": 4.8353937863838665e-06, "loss": 1.2035, "step": 30190 }, { "epoch": 17.346352670878805, "grad_norm": 0.9453611969947815, "learning_rate": 4.814891508250284e-06, "loss": 1.1992, "step": 30200 }, { "epoch": 17.352096496266512, "grad_norm": 1.0003318786621094, "learning_rate": 4.794430634080113e-06, "loss": 1.1916, "step": 30210 }, { "epoch": 17.357840321654223, "grad_norm": 1.0354970693588257, "learning_rate": 4.774011182620992e-06, "loss": 1.1944, "step": 30220 }, { "epoch": 17.36358414704193, "grad_norm": 0.9997474551200867, "learning_rate": 4.753633172582621e-06, "loss": 1.1905, "step": 30230 }, { "epoch": 17.369327972429637, "grad_norm": 0.9148712754249573, "learning_rate": 4.733296622636721e-06, "loss": 1.1849, "step": 30240 }, { "epoch": 17.375071797817345, "grad_norm": 0.9375751614570618, "learning_rate": 4.713001551417031e-06, "loss": 1.1642, "step": 30250 }, { "epoch": 17.380815623205056, "grad_norm": 1.0607540607452393, "learning_rate": 4.692747977519268e-06, "loss": 1.1868, "step": 30260 }, { "epoch": 17.386559448592763, "grad_norm": 1.0224499702453613, "learning_rate": 4.672535919501126e-06, "loss": 1.2019, "step": 30270 }, { "epoch": 17.39230327398047, "grad_norm": 0.992601752281189, "learning_rate": 4.65236539588229e-06, "loss": 1.167, "step": 30280 }, { "epoch": 17.398047099368178, "grad_norm": 1.169980764389038, "learning_rate": 4.632236425144348e-06, "loss": 1.201, "step": 30290 }, { "epoch": 17.40379092475589, "grad_norm": 0.9831385016441345, "learning_rate": 4.612149025730849e-06, "loss": 1.1968, "step": 30300 }, { "epoch": 17.409534750143596, "grad_norm": 0.8991706371307373, "learning_rate": 4.592103216047218e-06, "loss": 1.1713, "step": 30310 }, { "epoch": 17.415278575531303, "grad_norm": 0.9753699898719788, "learning_rate": 4.572099014460809e-06, "loss": 1.2003, "step": 30320 }, { "epoch": 17.42102240091901, "grad_norm": 1.0464894771575928, "learning_rate": 4.552136439300821e-06, "loss": 1.2101, "step": 30330 }, { "epoch": 17.42676622630672, "grad_norm": 1.0638798475265503, "learning_rate": 4.532215508858323e-06, "loss": 1.1851, "step": 30340 }, { "epoch": 17.43251005169443, "grad_norm": 1.0615841150283813, "learning_rate": 4.512336241386242e-06, "loss": 1.2097, "step": 30350 }, { "epoch": 17.438253877082136, "grad_norm": 0.949299156665802, "learning_rate": 4.492498655099306e-06, "loss": 1.1833, "step": 30360 }, { "epoch": 17.443997702469844, "grad_norm": 0.9039355516433716, "learning_rate": 4.472702768174065e-06, "loss": 1.1972, "step": 30370 }, { "epoch": 17.449741527857555, "grad_norm": 0.9006574749946594, "learning_rate": 4.452948598748864e-06, "loss": 1.1778, "step": 30380 }, { "epoch": 17.455485353245262, "grad_norm": 1.045052170753479, "learning_rate": 4.433236164923797e-06, "loss": 1.1795, "step": 30390 }, { "epoch": 17.46122917863297, "grad_norm": 1.006403923034668, "learning_rate": 4.413565484760765e-06, "loss": 1.1835, "step": 30400 }, { "epoch": 17.466973004020677, "grad_norm": 0.9585305452346802, "learning_rate": 4.393936576283358e-06, "loss": 1.198, "step": 30410 }, { "epoch": 17.472716829408387, "grad_norm": 0.9196950197219849, "learning_rate": 4.374349457476937e-06, "loss": 1.2057, "step": 30420 }, { "epoch": 17.478460654796095, "grad_norm": 0.9103960990905762, "learning_rate": 4.354804146288554e-06, "loss": 1.1746, "step": 30430 }, { "epoch": 17.484204480183802, "grad_norm": 0.9729591012001038, "learning_rate": 4.335300660626942e-06, "loss": 1.2144, "step": 30440 }, { "epoch": 17.48994830557151, "grad_norm": 0.9710026979446411, "learning_rate": 4.3158390183625395e-06, "loss": 1.1872, "step": 30450 }, { "epoch": 17.49569213095922, "grad_norm": 1.1012686491012573, "learning_rate": 4.296419237327403e-06, "loss": 1.2105, "step": 30460 }, { "epoch": 17.501435956346928, "grad_norm": 0.9311768412590027, "learning_rate": 4.27704133531529e-06, "loss": 1.181, "step": 30470 }, { "epoch": 17.507179781734635, "grad_norm": 0.9613198041915894, "learning_rate": 4.257705330081526e-06, "loss": 1.1866, "step": 30480 }, { "epoch": 17.512923607122342, "grad_norm": 0.9295112490653992, "learning_rate": 4.238411239343087e-06, "loss": 1.1948, "step": 30490 }, { "epoch": 17.518667432510053, "grad_norm": 1.0412112474441528, "learning_rate": 4.219159080778534e-06, "loss": 1.196, "step": 30500 }, { "epoch": 17.52441125789776, "grad_norm": 1.0489840507507324, "learning_rate": 4.1999488720279975e-06, "loss": 1.1879, "step": 30510 }, { "epoch": 17.530155083285468, "grad_norm": 0.9769622087478638, "learning_rate": 4.180780630693182e-06, "loss": 1.1903, "step": 30520 }, { "epoch": 17.535898908673175, "grad_norm": 1.1007071733474731, "learning_rate": 4.161654374337343e-06, "loss": 1.217, "step": 30530 }, { "epoch": 17.541642734060886, "grad_norm": 1.0456956624984741, "learning_rate": 4.142570120485247e-06, "loss": 1.2118, "step": 30540 }, { "epoch": 17.547386559448594, "grad_norm": 0.9357936382293701, "learning_rate": 4.123527886623198e-06, "loss": 1.1848, "step": 30550 }, { "epoch": 17.5531303848363, "grad_norm": 0.9416925311088562, "learning_rate": 4.104527690198977e-06, "loss": 1.1712, "step": 30560 }, { "epoch": 17.55887421022401, "grad_norm": 0.9844315052032471, "learning_rate": 4.08556954862187e-06, "loss": 1.2017, "step": 30570 }, { "epoch": 17.564618035611716, "grad_norm": 0.9683049321174622, "learning_rate": 4.0666534792626114e-06, "loss": 1.1916, "step": 30580 }, { "epoch": 17.570361860999427, "grad_norm": 1.000704050064087, "learning_rate": 4.047779499453378e-06, "loss": 1.1768, "step": 30590 }, { "epoch": 17.576105686387134, "grad_norm": 0.9367661476135254, "learning_rate": 4.028947626487807e-06, "loss": 1.2011, "step": 30600 }, { "epoch": 17.58184951177484, "grad_norm": 0.9693049192428589, "learning_rate": 4.010157877620944e-06, "loss": 1.1763, "step": 30610 }, { "epoch": 17.58759333716255, "grad_norm": 1.0307310819625854, "learning_rate": 3.9914102700692405e-06, "loss": 1.1906, "step": 30620 }, { "epoch": 17.59333716255026, "grad_norm": 0.9715161323547363, "learning_rate": 3.972704821010528e-06, "loss": 1.1889, "step": 30630 }, { "epoch": 17.599080987937967, "grad_norm": 1.0649501085281372, "learning_rate": 3.954041547583995e-06, "loss": 1.1796, "step": 30640 }, { "epoch": 17.604824813325674, "grad_norm": 1.0577764511108398, "learning_rate": 3.935420466890235e-06, "loss": 1.2047, "step": 30650 }, { "epoch": 17.61056863871338, "grad_norm": 1.1082143783569336, "learning_rate": 3.916841595991117e-06, "loss": 1.1737, "step": 30660 }, { "epoch": 17.616312464101092, "grad_norm": 1.0087838172912598, "learning_rate": 3.898304951909895e-06, "loss": 1.2019, "step": 30670 }, { "epoch": 17.6220562894888, "grad_norm": 0.9971638321876526, "learning_rate": 3.879810551631093e-06, "loss": 1.1782, "step": 30680 }, { "epoch": 17.627800114876507, "grad_norm": 0.9899203777313232, "learning_rate": 3.861358412100526e-06, "loss": 1.2046, "step": 30690 }, { "epoch": 17.633543940264214, "grad_norm": 0.9599500298500061, "learning_rate": 3.842948550225317e-06, "loss": 1.21, "step": 30700 }, { "epoch": 17.639287765651925, "grad_norm": 0.945087194442749, "learning_rate": 3.824580982873834e-06, "loss": 1.1916, "step": 30710 }, { "epoch": 17.645031591039633, "grad_norm": 0.9519335031509399, "learning_rate": 3.806255726875696e-06, "loss": 1.1846, "step": 30720 }, { "epoch": 17.65077541642734, "grad_norm": 0.9074278473854065, "learning_rate": 3.787972799021735e-06, "loss": 1.2071, "step": 30730 }, { "epoch": 17.656519241815047, "grad_norm": 1.0127816200256348, "learning_rate": 3.7697322160640307e-06, "loss": 1.183, "step": 30740 }, { "epoch": 17.66226306720276, "grad_norm": 0.9552931785583496, "learning_rate": 3.751533994715843e-06, "loss": 1.1885, "step": 30750 }, { "epoch": 17.668006892590466, "grad_norm": 1.101788878440857, "learning_rate": 3.7333781516516065e-06, "loss": 1.1782, "step": 30760 }, { "epoch": 17.673750717978173, "grad_norm": 0.9679074287414551, "learning_rate": 3.7152647035069634e-06, "loss": 1.1844, "step": 30770 }, { "epoch": 17.67949454336588, "grad_norm": 1.050503134727478, "learning_rate": 3.6971936668786684e-06, "loss": 1.1903, "step": 30780 }, { "epoch": 17.68523836875359, "grad_norm": 0.9870368838310242, "learning_rate": 3.6791650583246427e-06, "loss": 1.195, "step": 30790 }, { "epoch": 17.6909821941413, "grad_norm": 1.0412532091140747, "learning_rate": 3.6611788943639354e-06, "loss": 1.1929, "step": 30800 }, { "epoch": 17.696726019529006, "grad_norm": 1.0213820934295654, "learning_rate": 3.643235191476682e-06, "loss": 1.1702, "step": 30810 }, { "epoch": 17.702469844916713, "grad_norm": 0.9927578568458557, "learning_rate": 3.6253339661041317e-06, "loss": 1.2005, "step": 30820 }, { "epoch": 17.708213670304424, "grad_norm": 1.0043290853500366, "learning_rate": 3.6074752346485976e-06, "loss": 1.1871, "step": 30830 }, { "epoch": 17.71395749569213, "grad_norm": 0.9973768591880798, "learning_rate": 3.5896590134734723e-06, "loss": 1.215, "step": 30840 }, { "epoch": 17.71970132107984, "grad_norm": 1.0821785926818848, "learning_rate": 3.5718853189031967e-06, "loss": 1.1812, "step": 30850 }, { "epoch": 17.725445146467546, "grad_norm": 0.9004652500152588, "learning_rate": 3.5541541672232182e-06, "loss": 1.1897, "step": 30860 }, { "epoch": 17.731188971855257, "grad_norm": 0.9411226511001587, "learning_rate": 3.5364655746800508e-06, "loss": 1.1778, "step": 30870 }, { "epoch": 17.736932797242964, "grad_norm": 1.085516095161438, "learning_rate": 3.5188195574811615e-06, "loss": 1.1797, "step": 30880 }, { "epoch": 17.742676622630672, "grad_norm": 0.8895266056060791, "learning_rate": 3.5012161317950537e-06, "loss": 1.1883, "step": 30890 }, { "epoch": 17.74842044801838, "grad_norm": 1.0511223077774048, "learning_rate": 3.4836553137511787e-06, "loss": 1.1825, "step": 30900 }, { "epoch": 17.75416427340609, "grad_norm": 0.9958426356315613, "learning_rate": 3.4661371194399487e-06, "loss": 1.2005, "step": 30910 }, { "epoch": 17.759908098793797, "grad_norm": 0.9609330892562866, "learning_rate": 3.4486615649127377e-06, "loss": 1.1884, "step": 30920 }, { "epoch": 17.765651924181505, "grad_norm": 0.9471411108970642, "learning_rate": 3.431228666181819e-06, "loss": 1.1998, "step": 30930 }, { "epoch": 17.771395749569212, "grad_norm": 0.9301806688308716, "learning_rate": 3.413838439220422e-06, "loss": 1.1837, "step": 30940 }, { "epoch": 17.777139574956923, "grad_norm": 0.9926958084106445, "learning_rate": 3.3964908999626476e-06, "loss": 1.1943, "step": 30950 }, { "epoch": 17.78288340034463, "grad_norm": 0.8857885599136353, "learning_rate": 3.3791860643034864e-06, "loss": 1.188, "step": 30960 }, { "epoch": 17.788627225732338, "grad_norm": 1.0259875059127808, "learning_rate": 3.36192394809881e-06, "loss": 1.1851, "step": 30970 }, { "epoch": 17.794371051120045, "grad_norm": 0.9533064961433411, "learning_rate": 3.344704567165342e-06, "loss": 1.1813, "step": 30980 }, { "epoch": 17.800114876507756, "grad_norm": 0.9835970401763916, "learning_rate": 3.3275279372806736e-06, "loss": 1.2012, "step": 30990 }, { "epoch": 17.805858701895463, "grad_norm": 1.0173864364624023, "learning_rate": 3.310394074183181e-06, "loss": 1.2093, "step": 31000 }, { "epoch": 17.805858701895463, "eval_loss": 1.0560516119003296, "eval_runtime": 122.3847, "eval_samples_per_second": 13.0, "eval_steps_per_second": 0.139, "eval_wer": 0.0862526839190869, "step": 31000 }, { "epoch": 17.81160252728317, "grad_norm": 1.0129344463348389, "learning_rate": 3.2933029935720725e-06, "loss": 1.2035, "step": 31010 }, { "epoch": 17.817346352670878, "grad_norm": 1.0153981447219849, "learning_rate": 3.276254711107376e-06, "loss": 1.1951, "step": 31020 }, { "epoch": 17.823090178058585, "grad_norm": 0.9164778590202332, "learning_rate": 3.2592492424098743e-06, "loss": 1.1769, "step": 31030 }, { "epoch": 17.828834003446296, "grad_norm": 0.971153974533081, "learning_rate": 3.2422866030611482e-06, "loss": 1.1893, "step": 31040 }, { "epoch": 17.834577828834004, "grad_norm": 0.9555110931396484, "learning_rate": 3.2253668086035185e-06, "loss": 1.1841, "step": 31050 }, { "epoch": 17.84032165422171, "grad_norm": 0.9475175142288208, "learning_rate": 3.208489874540043e-06, "loss": 1.1834, "step": 31060 }, { "epoch": 17.84606547960942, "grad_norm": 1.0406694412231445, "learning_rate": 3.191655816334522e-06, "loss": 1.2058, "step": 31070 }, { "epoch": 17.85180930499713, "grad_norm": 0.9656884670257568, "learning_rate": 3.174864649411473e-06, "loss": 1.1737, "step": 31080 }, { "epoch": 17.857553130384836, "grad_norm": 1.022202968597412, "learning_rate": 3.1581163891561085e-06, "loss": 1.2135, "step": 31090 }, { "epoch": 17.863296955772544, "grad_norm": 1.0151762962341309, "learning_rate": 3.1414110509143176e-06, "loss": 1.1941, "step": 31100 }, { "epoch": 17.86904078116025, "grad_norm": 1.0387190580368042, "learning_rate": 3.124748649992664e-06, "loss": 1.1856, "step": 31110 }, { "epoch": 17.874784606547962, "grad_norm": 0.9732431769371033, "learning_rate": 3.108129201658386e-06, "loss": 1.1797, "step": 31120 }, { "epoch": 17.88052843193567, "grad_norm": 0.9476851224899292, "learning_rate": 3.091552721139342e-06, "loss": 1.1878, "step": 31130 }, { "epoch": 17.886272257323377, "grad_norm": 0.8947543501853943, "learning_rate": 3.0750192236240436e-06, "loss": 1.1827, "step": 31140 }, { "epoch": 17.892016082711084, "grad_norm": 0.9530948400497437, "learning_rate": 3.0585287242615935e-06, "loss": 1.1906, "step": 31150 }, { "epoch": 17.897759908098795, "grad_norm": 0.9605783820152283, "learning_rate": 3.0420812381617147e-06, "loss": 1.2048, "step": 31160 }, { "epoch": 17.903503733486502, "grad_norm": 1.0425201654434204, "learning_rate": 3.0256767803947264e-06, "loss": 1.1886, "step": 31170 }, { "epoch": 17.90924755887421, "grad_norm": 1.1017308235168457, "learning_rate": 3.0093153659914917e-06, "loss": 1.1935, "step": 31180 }, { "epoch": 17.914991384261917, "grad_norm": 1.0700063705444336, "learning_rate": 2.9929970099434685e-06, "loss": 1.1742, "step": 31190 }, { "epoch": 17.920735209649628, "grad_norm": 0.9036211967468262, "learning_rate": 2.976721727202626e-06, "loss": 1.1897, "step": 31200 }, { "epoch": 17.926479035037335, "grad_norm": 0.9064768552780151, "learning_rate": 2.960489532681511e-06, "loss": 1.1783, "step": 31210 }, { "epoch": 17.932222860425043, "grad_norm": 0.963930606842041, "learning_rate": 2.944300441253144e-06, "loss": 1.1909, "step": 31220 }, { "epoch": 17.93796668581275, "grad_norm": 0.9728797078132629, "learning_rate": 2.928154467751077e-06, "loss": 1.1734, "step": 31230 }, { "epoch": 17.94371051120046, "grad_norm": 0.9973131418228149, "learning_rate": 2.9120516269693645e-06, "loss": 1.2156, "step": 31240 }, { "epoch": 17.94945433658817, "grad_norm": 1.0095248222351074, "learning_rate": 2.8959919336625044e-06, "loss": 1.1853, "step": 31250 }, { "epoch": 17.955198161975876, "grad_norm": 1.0700557231903076, "learning_rate": 2.8799754025454895e-06, "loss": 1.1901, "step": 31260 }, { "epoch": 17.960941987363583, "grad_norm": 0.8967266082763672, "learning_rate": 2.864002048293768e-06, "loss": 1.1801, "step": 31270 }, { "epoch": 17.966685812751294, "grad_norm": 0.9925025701522827, "learning_rate": 2.848071885543195e-06, "loss": 1.2058, "step": 31280 }, { "epoch": 17.972429638139, "grad_norm": 0.9694082140922546, "learning_rate": 2.832184928890092e-06, "loss": 1.1764, "step": 31290 }, { "epoch": 17.97817346352671, "grad_norm": 1.002292513847351, "learning_rate": 2.816341192891147e-06, "loss": 1.2049, "step": 31300 }, { "epoch": 17.983917288914416, "grad_norm": 1.0052485466003418, "learning_rate": 2.8005406920634884e-06, "loss": 1.1999, "step": 31310 }, { "epoch": 17.989661114302127, "grad_norm": 1.112763524055481, "learning_rate": 2.784783440884605e-06, "loss": 1.1952, "step": 31320 }, { "epoch": 17.995404939689834, "grad_norm": 1.0258405208587646, "learning_rate": 2.7690694537923527e-06, "loss": 1.1905, "step": 31330 }, { "epoch": 18.00114876507754, "grad_norm": 1.068730115890503, "learning_rate": 2.753398745184966e-06, "loss": 1.1983, "step": 31340 }, { "epoch": 18.00689259046525, "grad_norm": 1.0559611320495605, "learning_rate": 2.7377713294210185e-06, "loss": 1.1908, "step": 31350 }, { "epoch": 18.01263641585296, "grad_norm": 0.9972121119499207, "learning_rate": 2.7221872208194012e-06, "loss": 1.2084, "step": 31360 }, { "epoch": 18.018380241240667, "grad_norm": 1.0247503519058228, "learning_rate": 2.7066464336593493e-06, "loss": 1.1824, "step": 31370 }, { "epoch": 18.024124066628374, "grad_norm": 0.9227665662765503, "learning_rate": 2.6911489821803816e-06, "loss": 1.1857, "step": 31380 }, { "epoch": 18.02986789201608, "grad_norm": 0.8686926960945129, "learning_rate": 2.6756948805823188e-06, "loss": 1.1642, "step": 31390 }, { "epoch": 18.035611717403793, "grad_norm": 0.8699676990509033, "learning_rate": 2.6602841430252627e-06, "loss": 1.1863, "step": 31400 }, { "epoch": 18.0413555427915, "grad_norm": 1.0342296361923218, "learning_rate": 2.6449167836295796e-06, "loss": 1.1892, "step": 31410 }, { "epoch": 18.047099368179207, "grad_norm": 1.0260791778564453, "learning_rate": 2.629592816475895e-06, "loss": 1.1944, "step": 31420 }, { "epoch": 18.052843193566915, "grad_norm": 1.0384284257888794, "learning_rate": 2.614312255605053e-06, "loss": 1.1782, "step": 31430 }, { "epoch": 18.058587018954622, "grad_norm": 0.954505980014801, "learning_rate": 2.599075115018159e-06, "loss": 1.1956, "step": 31440 }, { "epoch": 18.064330844342333, "grad_norm": 0.8944472670555115, "learning_rate": 2.5838814086765183e-06, "loss": 1.1798, "step": 31450 }, { "epoch": 18.07007466973004, "grad_norm": 1.0629847049713135, "learning_rate": 2.5687311505016487e-06, "loss": 1.1888, "step": 31460 }, { "epoch": 18.075818495117748, "grad_norm": 1.10912024974823, "learning_rate": 2.553624354375228e-06, "loss": 1.2027, "step": 31470 }, { "epoch": 18.081562320505455, "grad_norm": 1.0266085863113403, "learning_rate": 2.5385610341391366e-06, "loss": 1.1833, "step": 31480 }, { "epoch": 18.087306145893166, "grad_norm": 0.9426797032356262, "learning_rate": 2.5235412035954266e-06, "loss": 1.1739, "step": 31490 }, { "epoch": 18.093049971280873, "grad_norm": 0.9277395009994507, "learning_rate": 2.5085648765062725e-06, "loss": 1.2078, "step": 31500 }, { "epoch": 18.09879379666858, "grad_norm": 0.9199485182762146, "learning_rate": 2.493632066594017e-06, "loss": 1.191, "step": 31510 }, { "epoch": 18.104537622056288, "grad_norm": 0.9879516363143921, "learning_rate": 2.478742787541107e-06, "loss": 1.1759, "step": 31520 }, { "epoch": 18.110281447444, "grad_norm": 0.9990441799163818, "learning_rate": 2.4638970529901317e-06, "loss": 1.1975, "step": 31530 }, { "epoch": 18.116025272831706, "grad_norm": 0.9551469087600708, "learning_rate": 2.4490948765437397e-06, "loss": 1.1884, "step": 31540 }, { "epoch": 18.121769098219414, "grad_norm": 0.9738582968711853, "learning_rate": 2.4343362717647036e-06, "loss": 1.1935, "step": 31550 }, { "epoch": 18.12751292360712, "grad_norm": 0.9513758420944214, "learning_rate": 2.419621252175874e-06, "loss": 1.1781, "step": 31560 }, { "epoch": 18.13325674899483, "grad_norm": 0.9835777282714844, "learning_rate": 2.404949831260141e-06, "loss": 1.1948, "step": 31570 }, { "epoch": 18.13900057438254, "grad_norm": 0.9798340797424316, "learning_rate": 2.39032202246045e-06, "loss": 1.2007, "step": 31580 }, { "epoch": 18.144744399770246, "grad_norm": 1.0984320640563965, "learning_rate": 2.3757378391798206e-06, "loss": 1.1733, "step": 31590 }, { "epoch": 18.150488225157954, "grad_norm": 0.9663336873054504, "learning_rate": 2.3611972947812452e-06, "loss": 1.199, "step": 31600 }, { "epoch": 18.156232050545665, "grad_norm": 1.0654637813568115, "learning_rate": 2.3467004025877882e-06, "loss": 1.1802, "step": 31610 }, { "epoch": 18.161975875933372, "grad_norm": 0.9697952270507812, "learning_rate": 2.3322471758824715e-06, "loss": 1.1795, "step": 31620 }, { "epoch": 18.16771970132108, "grad_norm": 0.9033987522125244, "learning_rate": 2.3178376279083267e-06, "loss": 1.1708, "step": 31630 }, { "epoch": 18.173463526708787, "grad_norm": 1.0864723920822144, "learning_rate": 2.3034717718683767e-06, "loss": 1.1822, "step": 31640 }, { "epoch": 18.179207352096498, "grad_norm": 0.9678418040275574, "learning_rate": 2.289149620925578e-06, "loss": 1.2006, "step": 31650 }, { "epoch": 18.184951177484205, "grad_norm": 0.9530956149101257, "learning_rate": 2.274871188202877e-06, "loss": 1.1933, "step": 31660 }, { "epoch": 18.190695002871912, "grad_norm": 1.038476586341858, "learning_rate": 2.2606364867831256e-06, "loss": 1.216, "step": 31670 }, { "epoch": 18.19643882825962, "grad_norm": 1.0612119436264038, "learning_rate": 2.2464455297091543e-06, "loss": 1.1968, "step": 31680 }, { "epoch": 18.20218265364733, "grad_norm": 0.9730681777000427, "learning_rate": 2.2322983299836623e-06, "loss": 1.1929, "step": 31690 }, { "epoch": 18.207926479035038, "grad_norm": 0.9385014176368713, "learning_rate": 2.218194900569281e-06, "loss": 1.1701, "step": 31700 }, { "epoch": 18.213670304422745, "grad_norm": 1.0291447639465332, "learning_rate": 2.2041352543885382e-06, "loss": 1.1798, "step": 31710 }, { "epoch": 18.219414129810453, "grad_norm": 1.09242582321167, "learning_rate": 2.190119404323829e-06, "loss": 1.1933, "step": 31720 }, { "epoch": 18.225157955198164, "grad_norm": 1.0085396766662598, "learning_rate": 2.176147363217443e-06, "loss": 1.1997, "step": 31730 }, { "epoch": 18.23090178058587, "grad_norm": 0.8830893635749817, "learning_rate": 2.1622191438715104e-06, "loss": 1.1944, "step": 31740 }, { "epoch": 18.236645605973578, "grad_norm": 0.9416563510894775, "learning_rate": 2.148334759048006e-06, "loss": 1.2072, "step": 31750 }, { "epoch": 18.242389431361286, "grad_norm": 0.9617279171943665, "learning_rate": 2.1344942214687613e-06, "loss": 1.1861, "step": 31760 }, { "epoch": 18.248133256748996, "grad_norm": 1.0335792303085327, "learning_rate": 2.1206975438154094e-06, "loss": 1.1968, "step": 31770 }, { "epoch": 18.253877082136704, "grad_norm": 0.9486597776412964, "learning_rate": 2.1069447387294097e-06, "loss": 1.2038, "step": 31780 }, { "epoch": 18.25962090752441, "grad_norm": 0.8976007699966431, "learning_rate": 2.093235818812025e-06, "loss": 1.1784, "step": 31790 }, { "epoch": 18.26536473291212, "grad_norm": 0.9666115045547485, "learning_rate": 2.0795707966242835e-06, "loss": 1.1958, "step": 31800 }, { "epoch": 18.27110855829983, "grad_norm": 0.9059499502182007, "learning_rate": 2.065949684687016e-06, "loss": 1.1718, "step": 31810 }, { "epoch": 18.276852383687537, "grad_norm": 0.962051272392273, "learning_rate": 2.052372495480825e-06, "loss": 1.1764, "step": 31820 }, { "epoch": 18.282596209075244, "grad_norm": 0.9237242937088013, "learning_rate": 2.0388392414460486e-06, "loss": 1.1902, "step": 31830 }, { "epoch": 18.28834003446295, "grad_norm": 1.0864202976226807, "learning_rate": 2.0253499349827687e-06, "loss": 1.1695, "step": 31840 }, { "epoch": 18.29408385985066, "grad_norm": 1.0745251178741455, "learning_rate": 2.0119045884508137e-06, "loss": 1.1962, "step": 31850 }, { "epoch": 18.29982768523837, "grad_norm": 0.9637471437454224, "learning_rate": 1.9985032141697234e-06, "loss": 1.1918, "step": 31860 }, { "epoch": 18.305571510626077, "grad_norm": 1.0692942142486572, "learning_rate": 1.9851458244187443e-06, "loss": 1.1962, "step": 31870 }, { "epoch": 18.311315336013784, "grad_norm": 0.9761235117912292, "learning_rate": 1.9718324314368356e-06, "loss": 1.1932, "step": 31880 }, { "epoch": 18.31705916140149, "grad_norm": 1.0368660688400269, "learning_rate": 1.958563047422633e-06, "loss": 1.2043, "step": 31890 }, { "epoch": 18.322802986789203, "grad_norm": 0.9934616684913635, "learning_rate": 1.945337684534437e-06, "loss": 1.1948, "step": 31900 }, { "epoch": 18.32854681217691, "grad_norm": 0.9699981808662415, "learning_rate": 1.9321563548902415e-06, "loss": 1.207, "step": 31910 }, { "epoch": 18.334290637564617, "grad_norm": 0.9280322790145874, "learning_rate": 1.919019070567665e-06, "loss": 1.1881, "step": 31920 }, { "epoch": 18.340034462952325, "grad_norm": 0.9402063488960266, "learning_rate": 1.905925843603993e-06, "loss": 1.1888, "step": 31930 }, { "epoch": 18.345778288340036, "grad_norm": 0.9985532760620117, "learning_rate": 1.8928766859961331e-06, "loss": 1.183, "step": 31940 }, { "epoch": 18.351522113727743, "grad_norm": 0.8984982967376709, "learning_rate": 1.8798716097005962e-06, "loss": 1.1876, "step": 31950 }, { "epoch": 18.35726593911545, "grad_norm": 0.9172433614730835, "learning_rate": 1.866910626633531e-06, "loss": 1.181, "step": 31960 }, { "epoch": 18.363009764503158, "grad_norm": 1.0557652711868286, "learning_rate": 1.8539937486706664e-06, "loss": 1.1921, "step": 31970 }, { "epoch": 18.36875358989087, "grad_norm": 0.9710733294487, "learning_rate": 1.8411209876473316e-06, "loss": 1.189, "step": 31980 }, { "epoch": 18.374497415278576, "grad_norm": 0.9865237474441528, "learning_rate": 1.828292355358423e-06, "loss": 1.1945, "step": 31990 }, { "epoch": 18.380241240666283, "grad_norm": 1.0615195035934448, "learning_rate": 1.8155078635584063e-06, "loss": 1.1795, "step": 32000 }, { "epoch": 18.380241240666283, "eval_loss": 1.0559861660003662, "eval_runtime": 122.2621, "eval_samples_per_second": 13.013, "eval_steps_per_second": 0.139, "eval_wer": 0.08588541078087919, "step": 32000 }, { "epoch": 18.38598506605399, "grad_norm": 0.9324732422828674, "learning_rate": 1.802767523961308e-06, "loss": 1.1753, "step": 32010 }, { "epoch": 18.3917288914417, "grad_norm": 1.055492877960205, "learning_rate": 1.7900713482406836e-06, "loss": 1.1895, "step": 32020 }, { "epoch": 18.39747271682941, "grad_norm": 1.0246851444244385, "learning_rate": 1.7774193480296508e-06, "loss": 1.1777, "step": 32030 }, { "epoch": 18.403216542217116, "grad_norm": 0.9835911393165588, "learning_rate": 1.7648115349208183e-06, "loss": 1.183, "step": 32040 }, { "epoch": 18.408960367604823, "grad_norm": 1.0278693437576294, "learning_rate": 1.7522479204663333e-06, "loss": 1.1819, "step": 32050 }, { "epoch": 18.414704192992534, "grad_norm": 1.0746480226516724, "learning_rate": 1.7397285161778282e-06, "loss": 1.1835, "step": 32060 }, { "epoch": 18.42044801838024, "grad_norm": 0.9996069073677063, "learning_rate": 1.7272533335264362e-06, "loss": 1.1732, "step": 32070 }, { "epoch": 18.42619184376795, "grad_norm": 0.9989613890647888, "learning_rate": 1.7148223839427695e-06, "loss": 1.1926, "step": 32080 }, { "epoch": 18.431935669155656, "grad_norm": 1.0255465507507324, "learning_rate": 1.7024356788169027e-06, "loss": 1.1912, "step": 32090 }, { "epoch": 18.437679494543367, "grad_norm": 0.9210498929023743, "learning_rate": 1.6900932294983836e-06, "loss": 1.1802, "step": 32100 }, { "epoch": 18.443423319931075, "grad_norm": 1.0370471477508545, "learning_rate": 1.6777950472962167e-06, "loss": 1.2272, "step": 32110 }, { "epoch": 18.449167145318782, "grad_norm": 0.8572918772697449, "learning_rate": 1.6655411434788132e-06, "loss": 1.157, "step": 32120 }, { "epoch": 18.45491097070649, "grad_norm": 1.078244686126709, "learning_rate": 1.6533315292740461e-06, "loss": 1.1988, "step": 32130 }, { "epoch": 18.4606547960942, "grad_norm": 1.0443696975708008, "learning_rate": 1.641166215869196e-06, "loss": 1.1818, "step": 32140 }, { "epoch": 18.466398621481908, "grad_norm": 0.9752913117408752, "learning_rate": 1.629045214410944e-06, "loss": 1.1871, "step": 32150 }, { "epoch": 18.472142446869615, "grad_norm": 0.9803406000137329, "learning_rate": 1.6169685360053896e-06, "loss": 1.1836, "step": 32160 }, { "epoch": 18.477886272257322, "grad_norm": 0.9486103057861328, "learning_rate": 1.6049361917179883e-06, "loss": 1.171, "step": 32170 }, { "epoch": 18.483630097645033, "grad_norm": 1.0817418098449707, "learning_rate": 1.5929481925736087e-06, "loss": 1.217, "step": 32180 }, { "epoch": 18.48937392303274, "grad_norm": 1.0366880893707275, "learning_rate": 1.5810045495564643e-06, "loss": 1.1813, "step": 32190 }, { "epoch": 18.495117748420448, "grad_norm": 0.9360283613204956, "learning_rate": 1.5691052736101425e-06, "loss": 1.1788, "step": 32200 }, { "epoch": 18.500861573808155, "grad_norm": 1.003732442855835, "learning_rate": 1.557250375637565e-06, "loss": 1.18, "step": 32210 }, { "epoch": 18.506605399195866, "grad_norm": 1.027153730392456, "learning_rate": 1.5454398665009885e-06, "loss": 1.1881, "step": 32220 }, { "epoch": 18.512349224583573, "grad_norm": 0.9948772192001343, "learning_rate": 1.5336737570220205e-06, "loss": 1.1785, "step": 32230 }, { "epoch": 18.51809304997128, "grad_norm": 0.9291689991950989, "learning_rate": 1.521952057981559e-06, "loss": 1.208, "step": 32240 }, { "epoch": 18.523836875358988, "grad_norm": 1.1291691064834595, "learning_rate": 1.5102747801198303e-06, "loss": 1.1772, "step": 32250 }, { "epoch": 18.5295807007467, "grad_norm": 0.9160462617874146, "learning_rate": 1.498641934136352e-06, "loss": 1.1735, "step": 32260 }, { "epoch": 18.535324526134406, "grad_norm": 0.9377362132072449, "learning_rate": 1.4870535306899193e-06, "loss": 1.2046, "step": 32270 }, { "epoch": 18.541068351522114, "grad_norm": 0.9862871170043945, "learning_rate": 1.4755095803986246e-06, "loss": 1.1899, "step": 32280 }, { "epoch": 18.54681217690982, "grad_norm": 0.9550046324729919, "learning_rate": 1.4640100938398162e-06, "loss": 1.192, "step": 32290 }, { "epoch": 18.55255600229753, "grad_norm": 1.0236836671829224, "learning_rate": 1.4525550815501215e-06, "loss": 1.1963, "step": 32300 }, { "epoch": 18.55829982768524, "grad_norm": 1.028032660484314, "learning_rate": 1.4411445540253867e-06, "loss": 1.2119, "step": 32310 }, { "epoch": 18.564043653072947, "grad_norm": 0.9778861403465271, "learning_rate": 1.4297785217207136e-06, "loss": 1.1742, "step": 32320 }, { "epoch": 18.569787478460654, "grad_norm": 0.9274613261222839, "learning_rate": 1.4184569950504512e-06, "loss": 1.1922, "step": 32330 }, { "epoch": 18.57553130384836, "grad_norm": 0.9358683228492737, "learning_rate": 1.4071799843881318e-06, "loss": 1.1774, "step": 32340 }, { "epoch": 18.581275129236072, "grad_norm": 0.9302921891212463, "learning_rate": 1.3959475000665397e-06, "loss": 1.1746, "step": 32350 }, { "epoch": 18.58701895462378, "grad_norm": 0.9335759878158569, "learning_rate": 1.3847595523776216e-06, "loss": 1.17, "step": 32360 }, { "epoch": 18.592762780011487, "grad_norm": 1.0395796298980713, "learning_rate": 1.3736161515725535e-06, "loss": 1.1884, "step": 32370 }, { "epoch": 18.598506605399194, "grad_norm": 0.9471568465232849, "learning_rate": 1.3625173078616738e-06, "loss": 1.1852, "step": 32380 }, { "epoch": 18.604250430786905, "grad_norm": 0.919293224811554, "learning_rate": 1.351463031414494e-06, "loss": 1.193, "step": 32390 }, { "epoch": 18.609994256174613, "grad_norm": 0.9507539868354797, "learning_rate": 1.3404533323596998e-06, "loss": 1.1889, "step": 32400 }, { "epoch": 18.61573808156232, "grad_norm": 0.9733325839042664, "learning_rate": 1.329488220785128e-06, "loss": 1.1826, "step": 32410 }, { "epoch": 18.621481906950027, "grad_norm": 0.9237587451934814, "learning_rate": 1.3185677067377502e-06, "loss": 1.1923, "step": 32420 }, { "epoch": 18.627225732337738, "grad_norm": 1.0180895328521729, "learning_rate": 1.3076918002237004e-06, "loss": 1.2273, "step": 32430 }, { "epoch": 18.632969557725445, "grad_norm": 0.9843405485153198, "learning_rate": 1.2968605112082086e-06, "loss": 1.1925, "step": 32440 }, { "epoch": 18.638713383113153, "grad_norm": 1.0176849365234375, "learning_rate": 1.2860738496156563e-06, "loss": 1.1794, "step": 32450 }, { "epoch": 18.64445720850086, "grad_norm": 0.9655718207359314, "learning_rate": 1.2753318253294982e-06, "loss": 1.1787, "step": 32460 }, { "epoch": 18.65020103388857, "grad_norm": 0.9129777550697327, "learning_rate": 1.264634448192319e-06, "loss": 1.1795, "step": 32470 }, { "epoch": 18.65594485927628, "grad_norm": 1.025109887123108, "learning_rate": 1.2539817280057926e-06, "loss": 1.2009, "step": 32480 }, { "epoch": 18.661688684663986, "grad_norm": 0.9512131214141846, "learning_rate": 1.2433736745306454e-06, "loss": 1.2032, "step": 32490 }, { "epoch": 18.667432510051693, "grad_norm": 1.0156642198562622, "learning_rate": 1.2328102974867215e-06, "loss": 1.1762, "step": 32500 }, { "epoch": 18.673176335439404, "grad_norm": 0.9579278826713562, "learning_rate": 1.222291606552883e-06, "loss": 1.1854, "step": 32510 }, { "epoch": 18.67892016082711, "grad_norm": 1.0485162734985352, "learning_rate": 1.2118176113670935e-06, "loss": 1.1762, "step": 32520 }, { "epoch": 18.68466398621482, "grad_norm": 1.087209701538086, "learning_rate": 1.201388321526324e-06, "loss": 1.1946, "step": 32530 }, { "epoch": 18.690407811602526, "grad_norm": 0.9611390233039856, "learning_rate": 1.191003746586602e-06, "loss": 1.1928, "step": 32540 }, { "epoch": 18.696151636990237, "grad_norm": 1.0697816610336304, "learning_rate": 1.1806638960629846e-06, "loss": 1.1799, "step": 32550 }, { "epoch": 18.701895462377944, "grad_norm": 0.9938270449638367, "learning_rate": 1.1703687794295473e-06, "loss": 1.1836, "step": 32560 }, { "epoch": 18.70763928776565, "grad_norm": 0.9449156522750854, "learning_rate": 1.160118406119383e-06, "loss": 1.1621, "step": 32570 }, { "epoch": 18.71338311315336, "grad_norm": 1.0239744186401367, "learning_rate": 1.1499127855245757e-06, "loss": 1.1878, "step": 32580 }, { "epoch": 18.71912693854107, "grad_norm": 1.0607497692108154, "learning_rate": 1.1397519269962052e-06, "loss": 1.1806, "step": 32590 }, { "epoch": 18.724870763928777, "grad_norm": 0.9377030730247498, "learning_rate": 1.1296358398443468e-06, "loss": 1.1927, "step": 32600 }, { "epoch": 18.730614589316485, "grad_norm": 0.9128755331039429, "learning_rate": 1.1195645333380452e-06, "loss": 1.1766, "step": 32610 }, { "epoch": 18.736358414704192, "grad_norm": 0.9334362149238586, "learning_rate": 1.1095380167053283e-06, "loss": 1.1831, "step": 32620 }, { "epoch": 18.742102240091903, "grad_norm": 0.9193927645683289, "learning_rate": 1.0995562991331604e-06, "loss": 1.1927, "step": 32630 }, { "epoch": 18.74784606547961, "grad_norm": 0.9001800417900085, "learning_rate": 1.089619389767473e-06, "loss": 1.1877, "step": 32640 }, { "epoch": 18.753589890867318, "grad_norm": 0.9730002880096436, "learning_rate": 1.0797272977131387e-06, "loss": 1.1864, "step": 32650 }, { "epoch": 18.759333716255025, "grad_norm": 0.9310572147369385, "learning_rate": 1.06988003203397e-06, "loss": 1.1819, "step": 32660 }, { "epoch": 18.765077541642736, "grad_norm": 0.9590122699737549, "learning_rate": 1.060077601752704e-06, "loss": 1.1957, "step": 32670 }, { "epoch": 18.770821367030443, "grad_norm": 1.3960466384887695, "learning_rate": 1.0503200158509892e-06, "loss": 1.1865, "step": 32680 }, { "epoch": 18.77656519241815, "grad_norm": 0.9677202105522156, "learning_rate": 1.0406072832693883e-06, "loss": 1.1931, "step": 32690 }, { "epoch": 18.782309017805858, "grad_norm": 0.9956695437431335, "learning_rate": 1.0309394129073758e-06, "loss": 1.2012, "step": 32700 }, { "epoch": 18.78805284319357, "grad_norm": 1.0677580833435059, "learning_rate": 1.0213164136233057e-06, "loss": 1.1648, "step": 32710 }, { "epoch": 18.793796668581276, "grad_norm": 1.025475025177002, "learning_rate": 1.011738294234428e-06, "loss": 1.1991, "step": 32720 }, { "epoch": 18.799540493968983, "grad_norm": 1.043155312538147, "learning_rate": 1.002205063516867e-06, "loss": 1.1715, "step": 32730 }, { "epoch": 18.80528431935669, "grad_norm": 0.9822032451629639, "learning_rate": 9.927167302056206e-07, "loss": 1.1792, "step": 32740 }, { "epoch": 18.811028144744398, "grad_norm": 0.9842929244041443, "learning_rate": 9.832733029945434e-07, "loss": 1.1938, "step": 32750 }, { "epoch": 18.81677197013211, "grad_norm": 1.0182658433914185, "learning_rate": 9.738747905363475e-07, "loss": 1.2057, "step": 32760 }, { "epoch": 18.822515795519816, "grad_norm": 0.9370742440223694, "learning_rate": 9.645212014425863e-07, "loss": 1.1899, "step": 32770 }, { "epoch": 18.828259620907524, "grad_norm": 1.0303369760513306, "learning_rate": 9.552125442836639e-07, "loss": 1.1832, "step": 32780 }, { "epoch": 18.83400344629523, "grad_norm": 0.9194797873497009, "learning_rate": 9.459488275887919e-07, "loss": 1.1831, "step": 32790 }, { "epoch": 18.839747271682942, "grad_norm": 0.9884278178215027, "learning_rate": 9.367300598460334e-07, "loss": 1.175, "step": 32800 }, { "epoch": 18.84549109707065, "grad_norm": 0.9763996601104736, "learning_rate": 9.275562495022369e-07, "loss": 1.1639, "step": 32810 }, { "epoch": 18.851234922458357, "grad_norm": 0.9939215779304504, "learning_rate": 9.184274049630856e-07, "loss": 1.1939, "step": 32820 }, { "epoch": 18.856978747846064, "grad_norm": 1.0694992542266846, "learning_rate": 9.093435345930311e-07, "loss": 1.1871, "step": 32830 }, { "epoch": 18.862722573233775, "grad_norm": 0.9541735053062439, "learning_rate": 9.003046467153492e-07, "loss": 1.1876, "step": 32840 }, { "epoch": 18.868466398621482, "grad_norm": 0.9616324305534363, "learning_rate": 8.913107496120836e-07, "loss": 1.1885, "step": 32850 }, { "epoch": 18.87421022400919, "grad_norm": 1.0270761251449585, "learning_rate": 8.823618515240467e-07, "loss": 1.2023, "step": 32860 }, { "epoch": 18.879954049396897, "grad_norm": 0.9661944508552551, "learning_rate": 8.734579606508359e-07, "loss": 1.1777, "step": 32870 }, { "epoch": 18.885697874784608, "grad_norm": 0.9591684341430664, "learning_rate": 8.645990851507945e-07, "loss": 1.1776, "step": 32880 }, { "epoch": 18.891441700172315, "grad_norm": 1.0537124872207642, "learning_rate": 8.557852331410345e-07, "loss": 1.1814, "step": 32890 }, { "epoch": 18.897185525560023, "grad_norm": 0.8942594528198242, "learning_rate": 8.470164126974029e-07, "loss": 1.1817, "step": 32900 }, { "epoch": 18.90292935094773, "grad_norm": 0.9840949773788452, "learning_rate": 8.382926318544929e-07, "loss": 1.1912, "step": 32910 }, { "epoch": 18.90867317633544, "grad_norm": 0.9735470414161682, "learning_rate": 8.296138986056215e-07, "loss": 1.1892, "step": 32920 }, { "epoch": 18.914417001723148, "grad_norm": 1.0625840425491333, "learning_rate": 8.209802209028356e-07, "loss": 1.1832, "step": 32930 }, { "epoch": 18.920160827110855, "grad_norm": 0.9278374910354614, "learning_rate": 8.123916066569109e-07, "loss": 1.185, "step": 32940 }, { "epoch": 18.925904652498563, "grad_norm": 0.9827959537506104, "learning_rate": 8.038480637373089e-07, "loss": 1.1968, "step": 32950 }, { "epoch": 18.931648477886274, "grad_norm": 1.1904926300048828, "learning_rate": 7.953495999722039e-07, "loss": 1.1976, "step": 32960 }, { "epoch": 18.93739230327398, "grad_norm": 0.9505891799926758, "learning_rate": 7.868962231484717e-07, "loss": 1.2011, "step": 32970 }, { "epoch": 18.94313612866169, "grad_norm": 0.9961323738098145, "learning_rate": 7.784879410116677e-07, "loss": 1.1795, "step": 32980 }, { "epoch": 18.948879954049396, "grad_norm": 0.9580272436141968, "learning_rate": 7.701247612660436e-07, "loss": 1.1804, "step": 32990 }, { "epoch": 18.954623779437107, "grad_norm": 0.8720689415931702, "learning_rate": 7.61806691574503e-07, "loss": 1.1807, "step": 33000 }, { "epoch": 18.954623779437107, "eval_loss": 1.0560623407363892, "eval_runtime": 121.4757, "eval_samples_per_second": 13.097, "eval_steps_per_second": 0.14, "eval_wer": 0.08574415188156854, "step": 33000 }, { "epoch": 18.960367604824814, "grad_norm": 1.044963002204895, "learning_rate": 7.535337395586235e-07, "loss": 1.1946, "step": 33010 }, { "epoch": 18.96611143021252, "grad_norm": 0.8906787037849426, "learning_rate": 7.453059127986563e-07, "loss": 1.1666, "step": 33020 }, { "epoch": 18.97185525560023, "grad_norm": 1.0831100940704346, "learning_rate": 7.37123218833494e-07, "loss": 1.1744, "step": 33030 }, { "epoch": 18.97759908098794, "grad_norm": 0.9545280337333679, "learning_rate": 7.289856651606806e-07, "loss": 1.1814, "step": 33040 }, { "epoch": 18.983342906375647, "grad_norm": 1.0840380191802979, "learning_rate": 7.208932592363951e-07, "loss": 1.1851, "step": 33050 }, { "epoch": 18.989086731763354, "grad_norm": 0.9227370619773865, "learning_rate": 7.128460084754465e-07, "loss": 1.1579, "step": 33060 }, { "epoch": 18.99483055715106, "grad_norm": 0.9979352355003357, "learning_rate": 7.048439202512788e-07, "loss": 1.1892, "step": 33070 }, { "epoch": 19.000574382538773, "grad_norm": 0.9541153311729431, "learning_rate": 6.968870018959487e-07, "loss": 1.1995, "step": 33080 }, { "epoch": 19.00631820792648, "grad_norm": 0.9431100487709045, "learning_rate": 6.889752607001263e-07, "loss": 1.1864, "step": 33090 }, { "epoch": 19.012062033314187, "grad_norm": 0.9281498789787292, "learning_rate": 6.811087039130835e-07, "loss": 1.1761, "step": 33100 }, { "epoch": 19.017805858701895, "grad_norm": 0.8648608922958374, "learning_rate": 6.732873387426991e-07, "loss": 1.1613, "step": 33110 }, { "epoch": 19.023549684089605, "grad_norm": 1.0289045572280884, "learning_rate": 6.655111723554488e-07, "loss": 1.1939, "step": 33120 }, { "epoch": 19.029293509477313, "grad_norm": 0.964759111404419, "learning_rate": 6.57780211876371e-07, "loss": 1.1727, "step": 33130 }, { "epoch": 19.03503733486502, "grad_norm": 1.0692437887191772, "learning_rate": 6.500944643891058e-07, "loss": 1.1965, "step": 33140 }, { "epoch": 19.040781160252727, "grad_norm": 0.9712770581245422, "learning_rate": 6.424539369358568e-07, "loss": 1.186, "step": 33150 }, { "epoch": 19.046524985640435, "grad_norm": 1.029645323753357, "learning_rate": 6.348586365173956e-07, "loss": 1.1949, "step": 33160 }, { "epoch": 19.052268811028146, "grad_norm": 0.9500347971916199, "learning_rate": 6.273085700930517e-07, "loss": 1.2041, "step": 33170 }, { "epoch": 19.058012636415853, "grad_norm": 0.9927458167076111, "learning_rate": 6.198037445807118e-07, "loss": 1.1813, "step": 33180 }, { "epoch": 19.06375646180356, "grad_norm": 1.0557481050491333, "learning_rate": 6.123441668568088e-07, "loss": 1.1713, "step": 33190 }, { "epoch": 19.069500287191268, "grad_norm": 1.0194036960601807, "learning_rate": 6.049298437563168e-07, "loss": 1.2059, "step": 33200 }, { "epoch": 19.07524411257898, "grad_norm": 1.1165223121643066, "learning_rate": 5.975607820727337e-07, "loss": 1.1889, "step": 33210 }, { "epoch": 19.080987937966686, "grad_norm": 1.0386104583740234, "learning_rate": 5.902369885581151e-07, "loss": 1.1689, "step": 33220 }, { "epoch": 19.086731763354393, "grad_norm": 1.050809383392334, "learning_rate": 5.829584699229959e-07, "loss": 1.2036, "step": 33230 }, { "epoch": 19.0924755887421, "grad_norm": 0.9038121104240417, "learning_rate": 5.757252328364692e-07, "loss": 1.1688, "step": 33240 }, { "epoch": 19.09821941412981, "grad_norm": 0.9542014002799988, "learning_rate": 5.685372839261126e-07, "loss": 1.192, "step": 33250 }, { "epoch": 19.10396323951752, "grad_norm": 0.9695626497268677, "learning_rate": 5.613946297780116e-07, "loss": 1.1922, "step": 33260 }, { "epoch": 19.109707064905226, "grad_norm": 0.9080055356025696, "learning_rate": 5.542972769367536e-07, "loss": 1.1604, "step": 33270 }, { "epoch": 19.115450890292934, "grad_norm": 0.9846060276031494, "learning_rate": 5.472452319054169e-07, "loss": 1.1811, "step": 33280 }, { "epoch": 19.121194715680645, "grad_norm": 0.944907009601593, "learning_rate": 5.402385011455648e-07, "loss": 1.1793, "step": 33290 }, { "epoch": 19.126938541068352, "grad_norm": 0.9618197083473206, "learning_rate": 5.332770910772406e-07, "loss": 1.2035, "step": 33300 }, { "epoch": 19.13268236645606, "grad_norm": 0.9726974368095398, "learning_rate": 5.263610080789673e-07, "loss": 1.2093, "step": 33310 }, { "epoch": 19.138426191843767, "grad_norm": 0.9449639916419983, "learning_rate": 5.194902584877253e-07, "loss": 1.1847, "step": 33320 }, { "epoch": 19.144170017231477, "grad_norm": 1.0320802927017212, "learning_rate": 5.126648485989637e-07, "loss": 1.187, "step": 33330 }, { "epoch": 19.149913842619185, "grad_norm": 0.9300134778022766, "learning_rate": 5.058847846665949e-07, "loss": 1.162, "step": 33340 }, { "epoch": 19.155657668006892, "grad_norm": 1.0697548389434814, "learning_rate": 4.991500729029606e-07, "loss": 1.1888, "step": 33350 }, { "epoch": 19.1614014933946, "grad_norm": 0.9973644614219666, "learning_rate": 4.924607194788773e-07, "loss": 1.1956, "step": 33360 }, { "epoch": 19.16714531878231, "grad_norm": 0.9632745981216431, "learning_rate": 4.858167305235796e-07, "loss": 1.1702, "step": 33370 }, { "epoch": 19.172889144170018, "grad_norm": 1.0530446767807007, "learning_rate": 4.792181121247377e-07, "loss": 1.2096, "step": 33380 }, { "epoch": 19.178632969557725, "grad_norm": 0.9524180889129639, "learning_rate": 4.726648703284571e-07, "loss": 1.1903, "step": 33390 }, { "epoch": 19.184376794945432, "grad_norm": 0.9242413640022278, "learning_rate": 4.6615701113927323e-07, "loss": 1.1768, "step": 33400 }, { "epoch": 19.190120620333143, "grad_norm": 0.953406810760498, "learning_rate": 4.596945405201232e-07, "loss": 1.1645, "step": 33410 }, { "epoch": 19.19586444572085, "grad_norm": 0.8971442580223083, "learning_rate": 4.532774643923575e-07, "loss": 1.1734, "step": 33420 }, { "epoch": 19.201608271108558, "grad_norm": 1.0064551830291748, "learning_rate": 4.4690578863574533e-07, "loss": 1.1926, "step": 33430 }, { "epoch": 19.207352096496265, "grad_norm": 0.8886227607727051, "learning_rate": 4.405795190884521e-07, "loss": 1.1982, "step": 33440 }, { "epoch": 19.213095921883976, "grad_norm": 1.0023388862609863, "learning_rate": 4.342986615470288e-07, "loss": 1.1886, "step": 33450 }, { "epoch": 19.218839747271684, "grad_norm": 0.9755746126174927, "learning_rate": 4.280632217664339e-07, "loss": 1.1883, "step": 33460 }, { "epoch": 19.22458357265939, "grad_norm": 1.0072139501571655, "learning_rate": 4.2187320545998927e-07, "loss": 1.1807, "step": 33470 }, { "epoch": 19.2303273980471, "grad_norm": 0.8797051310539246, "learning_rate": 4.157286182994184e-07, "loss": 1.1776, "step": 33480 }, { "epoch": 19.23607122343481, "grad_norm": 0.9377007484436035, "learning_rate": 4.096294659148083e-07, "loss": 1.1787, "step": 33490 }, { "epoch": 19.241815048822517, "grad_norm": 0.9638490080833435, "learning_rate": 4.0357575389461456e-07, "loss": 1.1689, "step": 33500 }, { "epoch": 19.247558874210224, "grad_norm": 1.0174905061721802, "learning_rate": 3.9756748778566697e-07, "loss": 1.203, "step": 33510 }, { "epoch": 19.25330269959793, "grad_norm": 0.9596717953681946, "learning_rate": 3.916046730931476e-07, "loss": 1.1617, "step": 33520 }, { "epoch": 19.259046524985642, "grad_norm": 1.1768429279327393, "learning_rate": 3.8568731528058465e-07, "loss": 1.1684, "step": 33530 }, { "epoch": 19.26479035037335, "grad_norm": 0.9234669804573059, "learning_rate": 3.798154197698699e-07, "loss": 1.1819, "step": 33540 }, { "epoch": 19.270534175761057, "grad_norm": 1.0348083972930908, "learning_rate": 3.7398899194123595e-07, "loss": 1.2102, "step": 33550 }, { "epoch": 19.276278001148764, "grad_norm": 0.897323727607727, "learning_rate": 3.682080371332507e-07, "loss": 1.1797, "step": 33560 }, { "epoch": 19.28202182653647, "grad_norm": 0.9276246428489685, "learning_rate": 3.624725606428176e-07, "loss": 1.1832, "step": 33570 }, { "epoch": 19.287765651924182, "grad_norm": 0.9449304342269897, "learning_rate": 3.567825677251644e-07, "loss": 1.1661, "step": 33580 }, { "epoch": 19.29350947731189, "grad_norm": 0.9621635675430298, "learning_rate": 3.5113806359386514e-07, "loss": 1.1824, "step": 33590 }, { "epoch": 19.299253302699597, "grad_norm": 1.0164074897766113, "learning_rate": 3.455390534207853e-07, "loss": 1.1732, "step": 33600 }, { "epoch": 19.304997128087305, "grad_norm": 0.8767728805541992, "learning_rate": 3.3998554233613093e-07, "loss": 1.1966, "step": 33610 }, { "epoch": 19.310740953475015, "grad_norm": 1.0204222202301025, "learning_rate": 3.344775354283937e-07, "loss": 1.2068, "step": 33620 }, { "epoch": 19.316484778862723, "grad_norm": 0.8729372620582581, "learning_rate": 3.2901503774439517e-07, "loss": 1.188, "step": 33630 }, { "epoch": 19.32222860425043, "grad_norm": 0.9593812823295593, "learning_rate": 3.2359805428924226e-07, "loss": 1.1775, "step": 33640 }, { "epoch": 19.327972429638137, "grad_norm": 0.9696235656738281, "learning_rate": 3.182265900263442e-07, "loss": 1.1824, "step": 33650 }, { "epoch": 19.33371625502585, "grad_norm": 0.932036817073822, "learning_rate": 3.1290064987740636e-07, "loss": 1.2054, "step": 33660 }, { "epoch": 19.339460080413556, "grad_norm": 0.8533786535263062, "learning_rate": 3.0762023872240895e-07, "loss": 1.1778, "step": 33670 }, { "epoch": 19.345203905801263, "grad_norm": 0.948656439781189, "learning_rate": 3.02385361399634e-07, "loss": 1.1983, "step": 33680 }, { "epoch": 19.35094773118897, "grad_norm": 0.9859423637390137, "learning_rate": 2.971960227056324e-07, "loss": 1.1757, "step": 33690 }, { "epoch": 19.35669155657668, "grad_norm": 1.0002000331878662, "learning_rate": 2.920522273952183e-07, "loss": 1.1787, "step": 33700 }, { "epoch": 19.36243538196439, "grad_norm": 0.9671477675437927, "learning_rate": 2.869539801815025e-07, "loss": 1.2028, "step": 33710 }, { "epoch": 19.368179207352096, "grad_norm": 0.9760408401489258, "learning_rate": 2.8190128573583103e-07, "loss": 1.1997, "step": 33720 }, { "epoch": 19.373923032739803, "grad_norm": 0.9765370488166809, "learning_rate": 2.7689414868783575e-07, "loss": 1.1696, "step": 33730 }, { "epoch": 19.379666858127514, "grad_norm": 0.9645829796791077, "learning_rate": 2.719325736254004e-07, "loss": 1.1799, "step": 33740 }, { "epoch": 19.38541068351522, "grad_norm": 0.9811045527458191, "learning_rate": 2.6701656509464423e-07, "loss": 1.1907, "step": 33750 }, { "epoch": 19.39115450890293, "grad_norm": 0.9414727091789246, "learning_rate": 2.6214612759995543e-07, "loss": 1.197, "step": 33760 }, { "epoch": 19.396898334290636, "grad_norm": 0.9492089748382568, "learning_rate": 2.5732126560396876e-07, "loss": 1.1892, "step": 33770 }, { "epoch": 19.402642159678347, "grad_norm": 0.9696224927902222, "learning_rate": 2.5254198352754324e-07, "loss": 1.1663, "step": 33780 }, { "epoch": 19.408385985066055, "grad_norm": 0.9296945333480835, "learning_rate": 2.478082857497791e-07, "loss": 1.18, "step": 33790 }, { "epoch": 19.414129810453762, "grad_norm": 0.9570572376251221, "learning_rate": 2.4312017660802304e-07, "loss": 1.1854, "step": 33800 }, { "epoch": 19.41987363584147, "grad_norm": 0.9428401589393616, "learning_rate": 2.384776603978296e-07, "loss": 1.1866, "step": 33810 }, { "epoch": 19.42561746122918, "grad_norm": 1.0214594602584839, "learning_rate": 2.3388074137298883e-07, "loss": 1.2008, "step": 33820 }, { "epoch": 19.431361286616887, "grad_norm": 1.0670970678329468, "learning_rate": 2.2932942374552058e-07, "loss": 1.1983, "step": 33830 }, { "epoch": 19.437105112004595, "grad_norm": 1.0117735862731934, "learning_rate": 2.2482371168564155e-07, "loss": 1.1874, "step": 33840 }, { "epoch": 19.442848937392302, "grad_norm": 0.9729011058807373, "learning_rate": 2.2036360932180382e-07, "loss": 1.1794, "step": 33850 }, { "epoch": 19.448592762780013, "grad_norm": 1.0010849237442017, "learning_rate": 2.1594912074063937e-07, "loss": 1.1875, "step": 33860 }, { "epoch": 19.45433658816772, "grad_norm": 1.0569932460784912, "learning_rate": 2.115802499870159e-07, "loss": 1.1711, "step": 33870 }, { "epoch": 19.460080413555428, "grad_norm": 0.9954378008842468, "learning_rate": 2.0725700106399206e-07, "loss": 1.1913, "step": 33880 }, { "epoch": 19.465824238943135, "grad_norm": 1.1241528987884521, "learning_rate": 2.0297937793281756e-07, "loss": 1.1809, "step": 33890 }, { "epoch": 19.471568064330846, "grad_norm": 0.9078443050384521, "learning_rate": 1.9874738451293884e-07, "loss": 1.18, "step": 33900 }, { "epoch": 19.477311889718553, "grad_norm": 1.027892827987671, "learning_rate": 1.9456102468199895e-07, "loss": 1.1911, "step": 33910 }, { "epoch": 19.48305571510626, "grad_norm": 0.9565598368644714, "learning_rate": 1.9042030227582648e-07, "loss": 1.1857, "step": 33920 }, { "epoch": 19.488799540493968, "grad_norm": 0.9142249822616577, "learning_rate": 1.863252210884411e-07, "loss": 1.1811, "step": 33930 }, { "epoch": 19.49454336588168, "grad_norm": 1.0252262353897095, "learning_rate": 1.8227578487202028e-07, "loss": 1.208, "step": 33940 }, { "epoch": 19.500287191269386, "grad_norm": 0.9889923930168152, "learning_rate": 1.7827199733693812e-07, "loss": 1.2077, "step": 33950 }, { "epoch": 19.506031016657094, "grad_norm": 1.0916591882705688, "learning_rate": 1.7431386215174877e-07, "loss": 1.1948, "step": 33960 }, { "epoch": 19.5117748420448, "grad_norm": 0.9584410190582275, "learning_rate": 1.7040138294314742e-07, "loss": 1.1562, "step": 33970 }, { "epoch": 19.517518667432512, "grad_norm": 0.962062656879425, "learning_rate": 1.6653456329603148e-07, "loss": 1.1755, "step": 33980 }, { "epoch": 19.52326249282022, "grad_norm": 0.9326885938644409, "learning_rate": 1.6271340675342845e-07, "loss": 1.1809, "step": 33990 }, { "epoch": 19.529006318207927, "grad_norm": 0.942996621131897, "learning_rate": 1.589379168165513e-07, "loss": 1.1923, "step": 34000 }, { "epoch": 19.529006318207927, "eval_loss": 1.0561457872390747, "eval_runtime": 122.4086, "eval_samples_per_second": 12.997, "eval_steps_per_second": 0.139, "eval_wer": 0.08608317323991412, "step": 34000 }, { "epoch": 19.534750143595634, "grad_norm": 1.0817135572433472, "learning_rate": 1.5520809694475972e-07, "loss": 1.1696, "step": 34010 }, { "epoch": 19.54049396898334, "grad_norm": 0.9195762276649475, "learning_rate": 1.5152395055556563e-07, "loss": 1.1732, "step": 34020 }, { "epoch": 19.546237794371052, "grad_norm": 0.9074607491493225, "learning_rate": 1.4788548102463318e-07, "loss": 1.1864, "step": 34030 }, { "epoch": 19.55198161975876, "grad_norm": 0.9895302057266235, "learning_rate": 1.4429269168578434e-07, "loss": 1.1848, "step": 34040 }, { "epoch": 19.557725445146467, "grad_norm": 0.9548456072807312, "learning_rate": 1.4074558583097104e-07, "loss": 1.1897, "step": 34050 }, { "epoch": 19.563469270534174, "grad_norm": 1.0416704416275024, "learning_rate": 1.3724416671029753e-07, "loss": 1.2011, "step": 34060 }, { "epoch": 19.569213095921885, "grad_norm": 1.0131986141204834, "learning_rate": 1.3378843753199802e-07, "loss": 1.1879, "step": 34070 }, { "epoch": 19.574956921309592, "grad_norm": 1.0371094942092896, "learning_rate": 1.3037840146244788e-07, "loss": 1.19, "step": 34080 }, { "epoch": 19.5807007466973, "grad_norm": 0.8578032851219177, "learning_rate": 1.2701406162615257e-07, "loss": 1.1766, "step": 34090 }, { "epoch": 19.586444572085007, "grad_norm": 0.9512685537338257, "learning_rate": 1.2369542110575303e-07, "loss": 1.185, "step": 34100 }, { "epoch": 19.592188397472718, "grad_norm": 0.8890244364738464, "learning_rate": 1.2042248294201471e-07, "loss": 1.19, "step": 34110 }, { "epoch": 19.597932222860425, "grad_norm": 1.0625840425491333, "learning_rate": 1.1719525013381657e-07, "loss": 1.2045, "step": 34120 }, { "epoch": 19.603676048248133, "grad_norm": 0.8612390756607056, "learning_rate": 1.1401372563818403e-07, "loss": 1.1725, "step": 34130 }, { "epoch": 19.60941987363584, "grad_norm": 0.9465601444244385, "learning_rate": 1.1087791237023385e-07, "loss": 1.1841, "step": 34140 }, { "epoch": 19.61516369902355, "grad_norm": 0.9632443785667419, "learning_rate": 1.0778781320321831e-07, "loss": 1.1846, "step": 34150 }, { "epoch": 19.62090752441126, "grad_norm": 0.9088215231895447, "learning_rate": 1.0474343096849204e-07, "loss": 1.18, "step": 34160 }, { "epoch": 19.626651349798966, "grad_norm": 0.9967238903045654, "learning_rate": 1.0174476845552848e-07, "loss": 1.1958, "step": 34170 }, { "epoch": 19.632395175186673, "grad_norm": 0.9305984973907471, "learning_rate": 9.879182841190899e-08, "loss": 1.1861, "step": 34180 }, { "epoch": 19.638139000574384, "grad_norm": 1.0029916763305664, "learning_rate": 9.588461354331716e-08, "loss": 1.1692, "step": 34190 }, { "epoch": 19.64388282596209, "grad_norm": 1.0017539262771606, "learning_rate": 9.302312651353336e-08, "loss": 1.183, "step": 34200 }, { "epoch": 19.6496266513498, "grad_norm": 0.939613401889801, "learning_rate": 9.020736994445683e-08, "loss": 1.1933, "step": 34210 }, { "epoch": 19.655370476737506, "grad_norm": 0.9600231051445007, "learning_rate": 8.743734641606694e-08, "loss": 1.1762, "step": 34220 }, { "epoch": 19.661114302125217, "grad_norm": 1.005743145942688, "learning_rate": 8.47130584664564e-08, "loss": 1.1956, "step": 34230 }, { "epoch": 19.666858127512924, "grad_norm": 0.9833147525787354, "learning_rate": 8.2034508591798e-08, "loss": 1.1922, "step": 34240 }, { "epoch": 19.67260195290063, "grad_norm": 0.9680048227310181, "learning_rate": 7.940169924636128e-08, "loss": 1.1684, "step": 34250 }, { "epoch": 19.67834577828834, "grad_norm": 1.1390366554260254, "learning_rate": 7.681463284250695e-08, "loss": 1.2086, "step": 34260 }, { "epoch": 19.68408960367605, "grad_norm": 1.0122030973434448, "learning_rate": 7.42733117506813e-08, "loss": 1.1902, "step": 34270 }, { "epoch": 19.689833429063757, "grad_norm": 0.9906060099601746, "learning_rate": 7.177773829941631e-08, "loss": 1.2034, "step": 34280 }, { "epoch": 19.695577254451464, "grad_norm": 1.048588514328003, "learning_rate": 6.932791477532957e-08, "loss": 1.1887, "step": 34290 }, { "epoch": 19.701321079839172, "grad_norm": 0.9483558535575867, "learning_rate": 6.69238434231076e-08, "loss": 1.2065, "step": 34300 }, { "epoch": 19.707064905226883, "grad_norm": 0.9528472423553467, "learning_rate": 6.456552644552817e-08, "loss": 1.1887, "step": 34310 }, { "epoch": 19.71280873061459, "grad_norm": 1.1356106996536255, "learning_rate": 6.225296600344348e-08, "loss": 1.1918, "step": 34320 }, { "epoch": 19.718552556002297, "grad_norm": 1.0088497400283813, "learning_rate": 5.998616421578035e-08, "loss": 1.181, "step": 34330 }, { "epoch": 19.724296381390005, "grad_norm": 1.0687867403030396, "learning_rate": 5.776512315952894e-08, "loss": 1.1957, "step": 34340 }, { "epoch": 19.730040206777716, "grad_norm": 0.9260311722755432, "learning_rate": 5.55898448697651e-08, "loss": 1.1761, "step": 34350 }, { "epoch": 19.735784032165423, "grad_norm": 0.9769617319107056, "learning_rate": 5.3460331339628064e-08, "loss": 1.1913, "step": 34360 }, { "epoch": 19.74152785755313, "grad_norm": 1.0382274389266968, "learning_rate": 5.137658452032051e-08, "loss": 1.2104, "step": 34370 }, { "epoch": 19.747271682940838, "grad_norm": 0.9430953860282898, "learning_rate": 4.9338606321114064e-08, "loss": 1.181, "step": 34380 }, { "epoch": 19.753015508328545, "grad_norm": 0.9655850529670715, "learning_rate": 4.7346398609343796e-08, "loss": 1.1771, "step": 34390 }, { "epoch": 19.758759333716256, "grad_norm": 0.9451389908790588, "learning_rate": 4.539996321040264e-08, "loss": 1.2001, "step": 34400 }, { "epoch": 19.764503159103963, "grad_norm": 1.1364023685455322, "learning_rate": 4.349930190774696e-08, "loss": 1.1909, "step": 34410 }, { "epoch": 19.77024698449167, "grad_norm": 0.9212002754211426, "learning_rate": 4.164441644289652e-08, "loss": 1.178, "step": 34420 }, { "epoch": 19.775990809879378, "grad_norm": 1.1874828338623047, "learning_rate": 3.983530851541788e-08, "loss": 1.1896, "step": 34430 }, { "epoch": 19.78173463526709, "grad_norm": 0.9512391090393066, "learning_rate": 3.807197978294654e-08, "loss": 1.171, "step": 34440 }, { "epoch": 19.787478460654796, "grad_norm": 0.8978659510612488, "learning_rate": 3.635443186115928e-08, "loss": 1.1792, "step": 34450 }, { "epoch": 19.793222286042504, "grad_norm": 1.0512269735336304, "learning_rate": 3.468266632379067e-08, "loss": 1.1928, "step": 34460 }, { "epoch": 19.79896611143021, "grad_norm": 0.9473230838775635, "learning_rate": 3.305668470262766e-08, "loss": 1.1789, "step": 34470 }, { "epoch": 19.804709936817922, "grad_norm": 0.9017809629440308, "learning_rate": 3.147648848750395e-08, "loss": 1.1904, "step": 34480 }, { "epoch": 19.81045376220563, "grad_norm": 0.9176917672157288, "learning_rate": 2.994207912630556e-08, "loss": 1.1992, "step": 34490 }, { "epoch": 19.816197587593336, "grad_norm": 0.9881791472434998, "learning_rate": 2.8453458024954193e-08, "loss": 1.186, "step": 34500 }, { "epoch": 19.821941412981044, "grad_norm": 1.028637409210205, "learning_rate": 2.701062654744049e-08, "loss": 1.1818, "step": 34510 }, { "epoch": 19.827685238368755, "grad_norm": 1.0274701118469238, "learning_rate": 2.5613586015774136e-08, "loss": 1.2012, "step": 34520 }, { "epoch": 19.833429063756462, "grad_norm": 1.1395429372787476, "learning_rate": 2.4262337710017143e-08, "loss": 1.1988, "step": 34530 }, { "epoch": 19.83917288914417, "grad_norm": 0.9607253670692444, "learning_rate": 2.295688286828382e-08, "loss": 1.1914, "step": 34540 }, { "epoch": 19.844916714531877, "grad_norm": 0.8855134844779968, "learning_rate": 2.1697222686713053e-08, "loss": 1.1973, "step": 34550 }, { "epoch": 19.850660539919588, "grad_norm": 0.9070685505867004, "learning_rate": 2.0483358319496047e-08, "loss": 1.182, "step": 34560 }, { "epoch": 19.856404365307295, "grad_norm": 0.9213180541992188, "learning_rate": 1.931529087885968e-08, "loss": 1.1589, "step": 34570 }, { "epoch": 19.862148190695002, "grad_norm": 1.0456494092941284, "learning_rate": 1.819302143506094e-08, "loss": 1.1938, "step": 34580 }, { "epoch": 19.86789201608271, "grad_norm": 0.9651570320129395, "learning_rate": 1.7116551016403593e-08, "loss": 1.1847, "step": 34590 }, { "epoch": 19.87363584147042, "grad_norm": 0.9324113130569458, "learning_rate": 1.6085880609221513e-08, "loss": 1.1865, "step": 34600 }, { "epoch": 19.879379666858128, "grad_norm": 1.0895005464553833, "learning_rate": 1.5101011157884246e-08, "loss": 1.1948, "step": 34610 }, { "epoch": 19.885123492245835, "grad_norm": 1.0173923969268799, "learning_rate": 1.4161943564797008e-08, "loss": 1.1904, "step": 34620 }, { "epoch": 19.890867317633543, "grad_norm": 0.9053332209587097, "learning_rate": 1.3268678690395126e-08, "loss": 1.1749, "step": 34630 }, { "epoch": 19.896611143021254, "grad_norm": 0.8996206521987915, "learning_rate": 1.2421217353155158e-08, "loss": 1.1793, "step": 34640 }, { "epoch": 19.90235496840896, "grad_norm": 1.0398614406585693, "learning_rate": 1.1619560329578216e-08, "loss": 1.1836, "step": 34650 }, { "epoch": 19.90809879379667, "grad_norm": 1.0628360509872437, "learning_rate": 1.0863708354189982e-08, "loss": 1.1832, "step": 34660 }, { "epoch": 19.913842619184376, "grad_norm": 1.0432826280593872, "learning_rate": 1.0153662119557358e-08, "loss": 1.2013, "step": 34670 }, { "epoch": 19.919586444572086, "grad_norm": 0.9931183457374573, "learning_rate": 9.489422276271813e-09, "loss": 1.1984, "step": 34680 }, { "epoch": 19.925330269959794, "grad_norm": 0.8921299576759338, "learning_rate": 8.870989432960484e-09, "loss": 1.1847, "step": 34690 }, { "epoch": 19.9310740953475, "grad_norm": 1.3161612749099731, "learning_rate": 8.29836415626397e-09, "loss": 1.1698, "step": 34700 }, { "epoch": 19.93681792073521, "grad_norm": 1.0505892038345337, "learning_rate": 7.77154697086964e-09, "loss": 1.1853, "step": 34710 }, { "epoch": 19.94256174612292, "grad_norm": 0.9417886137962341, "learning_rate": 7.2905383594838795e-09, "loss": 1.1831, "step": 34720 }, { "epoch": 19.948305571510627, "grad_norm": 0.9646815657615662, "learning_rate": 6.855338762832093e-09, "loss": 1.1987, "step": 34730 }, { "epoch": 19.954049396898334, "grad_norm": 0.9895023703575134, "learning_rate": 6.465948579675348e-09, "loss": 1.1817, "step": 34740 }, { "epoch": 19.95979322228604, "grad_norm": 0.9899342060089111, "learning_rate": 6.122368166799279e-09, "loss": 1.1805, "step": 34750 }, { "epoch": 19.965537047673752, "grad_norm": 0.916469156742096, "learning_rate": 5.824597839025189e-09, "loss": 1.1922, "step": 34760 }, { "epoch": 19.97128087306146, "grad_norm": 0.8887254595756531, "learning_rate": 5.572637869176747e-09, "loss": 1.1868, "step": 34770 }, { "epoch": 19.977024698449167, "grad_norm": 0.9541832208633423, "learning_rate": 5.366488488124388e-09, "loss": 1.1707, "step": 34780 }, { "epoch": 19.982768523836874, "grad_norm": 0.9640499949455261, "learning_rate": 5.2061498847520126e-09, "loss": 1.1839, "step": 34790 }, { "epoch": 19.988512349224585, "grad_norm": 0.973200798034668, "learning_rate": 5.091622205979189e-09, "loss": 1.1961, "step": 34800 }, { "epoch": 19.994256174612293, "grad_norm": 0.9521649479866028, "learning_rate": 5.022905556744502e-09, "loss": 1.1848, "step": 34810 }, { "epoch": 20.0, "grad_norm": 3.1310455799102783, "learning_rate": 5e-09, "loss": 1.1786, "step": 34820 }, { "epoch": 20.0, "step": 34820, "total_flos": 0.0, "train_loss": 1.6298207611684346, "train_runtime": 35628.5116, "train_samples_per_second": 93.771, "train_steps_per_second": 0.977 } ], "logging_steps": 10, "max_steps": 34820, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 96, "trial_name": null, "trial_params": null }