| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 2000, | |
| "global_step": 60928, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.967174369747899e-05, | |
| "loss": 3.2651, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.934348739495799e-05, | |
| "loss": 1.7248, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.901523109243697e-05, | |
| "loss": 1.3764, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.868697478991597e-05, | |
| "loss": 1.1757, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_e": 0.581678956324447, | |
| "eval_f1": 0.5311118764265024, | |
| "eval_loss": 1.6264814138412476, | |
| "eval_runtime": 40.5361, | |
| "eval_samples_per_second": 86.984, | |
| "eval_steps_per_second": 0.691, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.835871848739496e-05, | |
| "loss": 1.0579, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8030462184873956e-05, | |
| "loss": 0.9707, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.7702205882352946e-05, | |
| "loss": 0.9118, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.7373949579831936e-05, | |
| "loss": 0.8875, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_e": 0.6534316505955757, | |
| "eval_f1": 0.596454998267645, | |
| "eval_loss": 1.359203577041626, | |
| "eval_runtime": 41.4011, | |
| "eval_samples_per_second": 85.167, | |
| "eval_steps_per_second": 0.676, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.7045693277310926e-05, | |
| "loss": 0.8308, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.6717436974789916e-05, | |
| "loss": 0.8191, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.638918067226891e-05, | |
| "loss": 0.8039, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.6060924369747897e-05, | |
| "loss": 0.7682, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_e": 0.6781055019852524, | |
| "eval_f1": 0.6213789300225637, | |
| "eval_loss": 1.1949362754821777, | |
| "eval_runtime": 40.0845, | |
| "eval_samples_per_second": 87.964, | |
| "eval_steps_per_second": 0.699, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.5732668067226893e-05, | |
| "loss": 0.7669, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.5404411764705883e-05, | |
| "loss": 0.7246, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.507615546218488e-05, | |
| "loss": 0.723, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.474789915966387e-05, | |
| "loss": 0.7023, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_e": 0.7073170731707317, | |
| "eval_f1": 0.6500612956253415, | |
| "eval_loss": 1.1214724779129028, | |
| "eval_runtime": 36.2407, | |
| "eval_samples_per_second": 97.294, | |
| "eval_steps_per_second": 0.773, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.4419642857142854e-05, | |
| "loss": 0.6801, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.409138655462185e-05, | |
| "loss": 0.6614, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.376313025210084e-05, | |
| "loss": 0.6895, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.343487394957984e-05, | |
| "loss": 0.6633, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_e": 0.707884288145207, | |
| "eval_f1": 0.6365888205833479, | |
| "eval_loss": 1.1668164730072021, | |
| "eval_runtime": 36.2099, | |
| "eval_samples_per_second": 97.377, | |
| "eval_steps_per_second": 0.773, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.310661764705883e-05, | |
| "loss": 0.6346, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.277836134453782e-05, | |
| "loss": 0.6376, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.245010504201681e-05, | |
| "loss": 0.654, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.21218487394958e-05, | |
| "loss": 0.6169, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_e": 0.7271695972773681, | |
| "eval_f1": 0.6654857811733846, | |
| "eval_loss": 1.0702860355377197, | |
| "eval_runtime": 36.2662, | |
| "eval_samples_per_second": 97.226, | |
| "eval_steps_per_second": 0.772, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.1793592436974794e-05, | |
| "loss": 0.6161, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.1465336134453784e-05, | |
| "loss": 0.6157, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.1137079831932774e-05, | |
| "loss": 0.618, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.0808823529411765e-05, | |
| "loss": 0.604, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_e": 0.7399319342030629, | |
| "eval_f1": 0.6704723459995169, | |
| "eval_loss": 0.9919618964195251, | |
| "eval_runtime": 36.2385, | |
| "eval_samples_per_second": 97.3, | |
| "eval_steps_per_second": 0.773, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.048056722689076e-05, | |
| "loss": 0.6043, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.015231092436975e-05, | |
| "loss": 0.579, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.982405462184874e-05, | |
| "loss": 0.5503, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.949579831932773e-05, | |
| "loss": 0.5093, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_e": 0.7359614293817357, | |
| "eval_f1": 0.6659783311586052, | |
| "eval_loss": 0.9624159336090088, | |
| "eval_runtime": 36.2264, | |
| "eval_samples_per_second": 97.332, | |
| "eval_steps_per_second": 0.773, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.916754201680672e-05, | |
| "loss": 0.5139, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.883928571428572e-05, | |
| "loss": 0.5018, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.851102941176471e-05, | |
| "loss": 0.5009, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.81827731092437e-05, | |
| "loss": 0.5068, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_e": 0.7464549064095292, | |
| "eval_f1": 0.6771850837420853, | |
| "eval_loss": 1.0379488468170166, | |
| "eval_runtime": 36.2397, | |
| "eval_samples_per_second": 97.297, | |
| "eval_steps_per_second": 0.773, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.785451680672269e-05, | |
| "loss": 0.4928, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.7526260504201685e-05, | |
| "loss": 0.5026, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.7198004201680675e-05, | |
| "loss": 0.4938, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.6869747899159665e-05, | |
| "loss": 0.4812, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_e": 0.742484401588202, | |
| "eval_f1": 0.6734074837554972, | |
| "eval_loss": 1.0004695653915405, | |
| "eval_runtime": 36.2259, | |
| "eval_samples_per_second": 97.334, | |
| "eval_steps_per_second": 0.773, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.6541491596638656e-05, | |
| "loss": 0.4877, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.6213235294117646e-05, | |
| "loss": 0.4823, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.588497899159664e-05, | |
| "loss": 0.4708, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.555672268907563e-05, | |
| "loss": 0.4831, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_e": 0.7433352240499149, | |
| "eval_f1": 0.6745692090035141, | |
| "eval_loss": 1.0472208261489868, | |
| "eval_runtime": 36.2204, | |
| "eval_samples_per_second": 97.348, | |
| "eval_steps_per_second": 0.773, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.522846638655463e-05, | |
| "loss": 0.4716, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.490021008403361e-05, | |
| "loss": 0.4716, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.45719537815126e-05, | |
| "loss": 0.474, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.42436974789916e-05, | |
| "loss": 0.4748, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_e": 0.7552467385138968, | |
| "eval_f1": 0.6830863599993048, | |
| "eval_loss": 0.9445247650146484, | |
| "eval_runtime": 41.2911, | |
| "eval_samples_per_second": 85.394, | |
| "eval_steps_per_second": 0.678, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.391544117647059e-05, | |
| "loss": 0.4668, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.358718487394958e-05, | |
| "loss": 0.467, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.325892857142857e-05, | |
| "loss": 0.4517, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.2930672268907566e-05, | |
| "loss": 0.4581, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_e": 0.7552467385138968, | |
| "eval_f1": 0.6822537839548614, | |
| "eval_loss": 0.9966788291931152, | |
| "eval_runtime": 36.282, | |
| "eval_samples_per_second": 97.183, | |
| "eval_steps_per_second": 0.772, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.2602415966386556e-05, | |
| "loss": 0.458, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.2274159663865547e-05, | |
| "loss": 0.441, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.1945903361344537e-05, | |
| "loss": 0.4703, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.161764705882353e-05, | |
| "loss": 0.4438, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_e": 0.7487237663074305, | |
| "eval_f1": 0.6789318734426844, | |
| "eval_loss": 0.9515223503112793, | |
| "eval_runtime": 39.9941, | |
| "eval_samples_per_second": 88.163, | |
| "eval_steps_per_second": 0.7, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.1289390756302523e-05, | |
| "loss": 0.4556, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.0961134453781514e-05, | |
| "loss": 0.4453, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.063287815126051e-05, | |
| "loss": 0.4415, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.0304621848739494e-05, | |
| "loss": 0.4417, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_e": 0.7577992058990357, | |
| "eval_f1": 0.686996111301651, | |
| "eval_loss": 0.9782966375350952, | |
| "eval_runtime": 36.255, | |
| "eval_samples_per_second": 97.256, | |
| "eval_steps_per_second": 0.772, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 2.9976365546218487e-05, | |
| "loss": 0.4254, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.964810924369748e-05, | |
| "loss": 0.3574, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 2.9319852941176474e-05, | |
| "loss": 0.3596, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 2.8991596638655467e-05, | |
| "loss": 0.3692, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_e": 0.7524106636415201, | |
| "eval_f1": 0.6861686413926235, | |
| "eval_loss": 1.0621048212051392, | |
| "eval_runtime": 36.2047, | |
| "eval_samples_per_second": 97.391, | |
| "eval_steps_per_second": 0.773, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 2.8663340336134454e-05, | |
| "loss": 0.3673, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 2.8335084033613447e-05, | |
| "loss": 0.3683, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 2.8006827731092438e-05, | |
| "loss": 0.3538, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 2.767857142857143e-05, | |
| "loss": 0.3555, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_e": 0.7620533182076007, | |
| "eval_f1": 0.6877485506353153, | |
| "eval_loss": 1.040844202041626, | |
| "eval_runtime": 36.2597, | |
| "eval_samples_per_second": 97.243, | |
| "eval_steps_per_second": 0.772, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.7350315126050424e-05, | |
| "loss": 0.3718, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.702205882352941e-05, | |
| "loss": 0.3557, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.6693802521008405e-05, | |
| "loss": 0.3459, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.6365546218487398e-05, | |
| "loss": 0.3657, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_e": 0.7597844583096994, | |
| "eval_f1": 0.6853469432427786, | |
| "eval_loss": 1.1275439262390137, | |
| "eval_runtime": 40.6163, | |
| "eval_samples_per_second": 86.813, | |
| "eval_steps_per_second": 0.689, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.6037289915966388e-05, | |
| "loss": 0.3593, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 2.5709033613445378e-05, | |
| "loss": 0.3468, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 2.5380777310924368e-05, | |
| "loss": 0.3475, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.505252100840336e-05, | |
| "loss": 0.3504, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_e": 0.7603516732841747, | |
| "eval_f1": 0.6846985297135735, | |
| "eval_loss": 1.0078964233398438, | |
| "eval_runtime": 36.2227, | |
| "eval_samples_per_second": 97.342, | |
| "eval_steps_per_second": 0.773, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 2.4724264705882355e-05, | |
| "loss": 0.3574, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.4396008403361345e-05, | |
| "loss": 0.3543, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.406775210084034e-05, | |
| "loss": 0.3339, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.373949579831933e-05, | |
| "loss": 0.3573, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_e": 0.7589336358479863, | |
| "eval_f1": 0.683545228612745, | |
| "eval_loss": 1.0078063011169434, | |
| "eval_runtime": 36.2573, | |
| "eval_samples_per_second": 97.25, | |
| "eval_steps_per_second": 0.772, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.3411239495798322e-05, | |
| "loss": 0.3583, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.3082983193277312e-05, | |
| "loss": 0.346, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.2754726890756302e-05, | |
| "loss": 0.3468, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.2426470588235296e-05, | |
| "loss": 0.3409, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_e": 0.7552467385138968, | |
| "eval_f1": 0.6810380542275204, | |
| "eval_loss": 1.08004629611969, | |
| "eval_runtime": 36.2112, | |
| "eval_samples_per_second": 97.373, | |
| "eval_steps_per_second": 0.773, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.2098214285714286e-05, | |
| "loss": 0.3501, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.176995798319328e-05, | |
| "loss": 0.3361, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.144170168067227e-05, | |
| "loss": 0.3452, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.1113445378151263e-05, | |
| "loss": 0.3602, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_e": 0.7609188882586501, | |
| "eval_f1": 0.6832601552139036, | |
| "eval_loss": 1.031318187713623, | |
| "eval_runtime": 36.2238, | |
| "eval_samples_per_second": 97.339, | |
| "eval_steps_per_second": 0.773, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.0785189075630253e-05, | |
| "loss": 0.3473, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.0456932773109243e-05, | |
| "loss": 0.3309, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.0128676470588236e-05, | |
| "loss": 0.3342, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.9800420168067226e-05, | |
| "loss": 0.3, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_e": 0.7577992058990357, | |
| "eval_f1": 0.6814344985190464, | |
| "eval_loss": 1.0859274864196777, | |
| "eval_runtime": 36.2358, | |
| "eval_samples_per_second": 97.307, | |
| "eval_steps_per_second": 0.773, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.947216386554622e-05, | |
| "loss": 0.2727, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 1.9143907563025213e-05, | |
| "loss": 0.271, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.8815651260504203e-05, | |
| "loss": 0.2777, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.8487394957983196e-05, | |
| "loss": 0.2616, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_e": 0.7623369256948384, | |
| "eval_f1": 0.6829232660093741, | |
| "eval_loss": 1.1106504201889038, | |
| "eval_runtime": 36.1973, | |
| "eval_samples_per_second": 97.41, | |
| "eval_steps_per_second": 0.774, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.8159138655462187e-05, | |
| "loss": 0.2702, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.7830882352941177e-05, | |
| "loss": 0.2665, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.7502626050420167e-05, | |
| "loss": 0.2661, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.717436974789916e-05, | |
| "loss": 0.2728, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_e": 0.7555303460011344, | |
| "eval_f1": 0.680738586407374, | |
| "eval_loss": 1.158908724784851, | |
| "eval_runtime": 36.2449, | |
| "eval_samples_per_second": 97.283, | |
| "eval_steps_per_second": 0.773, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 1.6846113445378154e-05, | |
| "loss": 0.2753, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.6517857142857144e-05, | |
| "loss": 0.2717, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.6189600840336137e-05, | |
| "loss": 0.2723, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.5861344537815127e-05, | |
| "loss": 0.2738, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "eval_e": 0.7648893930799773, | |
| "eval_f1": 0.6879751943104644, | |
| "eval_loss": 1.1301259994506836, | |
| "eval_runtime": 41.2984, | |
| "eval_samples_per_second": 85.379, | |
| "eval_steps_per_second": 0.678, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.5533088235294117e-05, | |
| "loss": 0.2692, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.5204831932773109e-05, | |
| "loss": 0.2714, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.48765756302521e-05, | |
| "loss": 0.2646, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.4548319327731094e-05, | |
| "loss": 0.2664, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_e": 0.7688598979013046, | |
| "eval_f1": 0.6891421496451101, | |
| "eval_loss": 1.0957111120224, | |
| "eval_runtime": 36.2366, | |
| "eval_samples_per_second": 97.305, | |
| "eval_steps_per_second": 0.773, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.4220063025210084e-05, | |
| "loss": 0.2683, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.3891806722689078e-05, | |
| "loss": 0.2698, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.3563550420168068e-05, | |
| "loss": 0.2709, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 0.2737, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "eval_e": 0.7631877481565513, | |
| "eval_f1": 0.6905602249087808, | |
| "eval_loss": 1.0759004354476929, | |
| "eval_runtime": 36.2232, | |
| "eval_samples_per_second": 97.341, | |
| "eval_steps_per_second": 0.773, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.2907037815126053e-05, | |
| "loss": 0.2703, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.2578781512605043e-05, | |
| "loss": 0.2609, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.2250525210084033e-05, | |
| "loss": 0.2616, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.1922268907563026e-05, | |
| "loss": 0.2784, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "eval_e": 0.7614861032331254, | |
| "eval_f1": 0.6854242844445689, | |
| "eval_loss": 1.0705878734588623, | |
| "eval_runtime": 36.2079, | |
| "eval_samples_per_second": 97.382, | |
| "eval_steps_per_second": 0.773, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.1594012605042018e-05, | |
| "loss": 0.2701, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.126575630252101e-05, | |
| "loss": 0.2647, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.09375e-05, | |
| "loss": 0.2608, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.0609243697478992e-05, | |
| "loss": 0.2622, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_e": 0.7612024957458877, | |
| "eval_f1": 0.6860082711191074, | |
| "eval_loss": 1.1619102954864502, | |
| "eval_runtime": 36.2375, | |
| "eval_samples_per_second": 97.303, | |
| "eval_steps_per_second": 0.773, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.0280987394957983e-05, | |
| "loss": 0.2547, | |
| "step": 60500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 76160, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 1.9495781319278917e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |