| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 5331, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018758206715438003, | |
| "grad_norm": 0.17594827711582184, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.428, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03751641343087601, | |
| "grad_norm": 0.12495549023151398, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.4287, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.056274620146314014, | |
| "grad_norm": 0.196218803524971, | |
| "learning_rate": 6e-06, | |
| "loss": 2.4281, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07503282686175201, | |
| "grad_norm": 0.27286475896835327, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.4217, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09379103357719001, | |
| "grad_norm": 0.36982518434524536, | |
| "learning_rate": 1e-05, | |
| "loss": 2.3086, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11254924029262803, | |
| "grad_norm": 0.44393104314804077, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.3104, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13130744700806604, | |
| "grad_norm": 0.5053464770317078, | |
| "learning_rate": 1.4e-05, | |
| "loss": 2.265, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15006565372350403, | |
| "grad_norm": 0.5742277503013611, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 2.2493, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.16882386043894204, | |
| "grad_norm": 0.5711067318916321, | |
| "learning_rate": 1.8e-05, | |
| "loss": 2.1924, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.18758206715438003, | |
| "grad_norm": 0.7165639996528625, | |
| "learning_rate": 2e-05, | |
| "loss": 2.1846, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.20634027386981804, | |
| "grad_norm": 0.6914772391319275, | |
| "learning_rate": 1.9973703197307516e-05, | |
| "loss": 2.1952, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.22509848058525606, | |
| "grad_norm": 0.7354649305343628, | |
| "learning_rate": 1.989495109359643e-05, | |
| "loss": 2.1214, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.24385668730069404, | |
| "grad_norm": 0.7735409736633301, | |
| "learning_rate": 1.976415787457332e-05, | |
| "loss": 2.1591, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2626148940161321, | |
| "grad_norm": 0.7348875999450684, | |
| "learning_rate": 1.958201142893303e-05, | |
| "loss": 2.1296, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.28137310073157007, | |
| "grad_norm": 0.743415355682373, | |
| "learning_rate": 1.934946973050398e-05, | |
| "loss": 2.1263, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.30013130744700806, | |
| "grad_norm": 0.7139678597450256, | |
| "learning_rate": 1.906775579991844e-05, | |
| "loss": 2.093, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3188895141624461, | |
| "grad_norm": 0.8803767561912537, | |
| "learning_rate": 1.8738351272306086e-05, | |
| "loss": 2.1002, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.3376477208778841, | |
| "grad_norm": 0.8513467907905579, | |
| "learning_rate": 1.8362988604840638e-05, | |
| "loss": 2.0908, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.35640592759332207, | |
| "grad_norm": 1.1361035108566284, | |
| "learning_rate": 1.7943641965122992e-05, | |
| "loss": 2.0669, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.37516413430876006, | |
| "grad_norm": 1.1987696886062622, | |
| "learning_rate": 1.7482516848322028e-05, | |
| "loss": 2.0395, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3939223410241981, | |
| "grad_norm": 1.084433674812317, | |
| "learning_rate": 1.698203847768036e-05, | |
| "loss": 2.0576, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4126805477396361, | |
| "grad_norm": 0.8210257291793823, | |
| "learning_rate": 1.6444839049390907e-05, | |
| "loss": 2.0277, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.43143875445507407, | |
| "grad_norm": 0.9078176617622375, | |
| "learning_rate": 1.587374388892813e-05, | |
| "loss": 2.0227, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.4501969611705121, | |
| "grad_norm": 0.8175433278083801, | |
| "learning_rate": 1.527175659164268e-05, | |
| "loss": 2.0827, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.4689551678859501, | |
| "grad_norm": 0.824755072593689, | |
| "learning_rate": 1.4642043225770583e-05, | |
| "loss": 2.0061, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4877133746013881, | |
| "grad_norm": 0.8904985785484314, | |
| "learning_rate": 1.3987915680938873e-05, | |
| "loss": 2.0519, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5064715813168261, | |
| "grad_norm": 0.6717514395713806, | |
| "learning_rate": 1.3312814249743977e-05, | |
| "loss": 2.0615, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5252297880322642, | |
| "grad_norm": 1.2688275575637817, | |
| "learning_rate": 1.2620289534012606e-05, | |
| "loss": 2.0281, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5439879947477021, | |
| "grad_norm": 1.184589147567749, | |
| "learning_rate": 1.1913983770906618e-05, | |
| "loss": 2.038, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.5627462014631401, | |
| "grad_norm": 0.9304941892623901, | |
| "learning_rate": 1.1197611677084595e-05, | |
| "loss": 2.0309, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5815044081785782, | |
| "grad_norm": 0.9069289565086365, | |
| "learning_rate": 1.047494091166767e-05, | |
| "loss": 2.0165, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6002626148940161, | |
| "grad_norm": 0.7898929119110107, | |
| "learning_rate": 9.749772260761804e-06, | |
| "loss": 2.0473, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6190208216094542, | |
| "grad_norm": 1.2299916744232178, | |
| "learning_rate": 9.025919647753327e-06, | |
| "loss": 2.0166, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6377790283248922, | |
| "grad_norm": 0.877371609210968, | |
| "learning_rate": 8.307190074510776e-06, | |
| "loss": 2.0431, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6565372350403301, | |
| "grad_norm": 1.2550036907196045, | |
| "learning_rate": 7.597363598989522e-06, | |
| "loss": 2.0108, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6752954417557682, | |
| "grad_norm": 0.8311442732810974, | |
| "learning_rate": 6.900173454544101e-06, | |
| "loss": 2.0512, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.6940536484712062, | |
| "grad_norm": 0.9112470149993896, | |
| "learning_rate": 6.2192864155080345e-06, | |
| "loss": 1.9973, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7128118551866441, | |
| "grad_norm": 1.4032444953918457, | |
| "learning_rate": 5.558283512305602e-06, | |
| "loss": 2.0361, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7315700619020822, | |
| "grad_norm": 0.8047692775726318, | |
| "learning_rate": 4.920641197521746e-06, | |
| "loss": 2.0191, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7503282686175201, | |
| "grad_norm": 0.8837004899978638, | |
| "learning_rate": 4.309713061984511e-06, | |
| "loss": 2.0163, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.7690864753329582, | |
| "grad_norm": 0.9701162576675415, | |
| "learning_rate": 3.7287121970217963e-06, | |
| "loss": 2.0364, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.7878446820483962, | |
| "grad_norm": 1.0035524368286133, | |
| "learning_rate": 3.1806942956556277e-06, | |
| "loss": 1.9763, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8066028887638341, | |
| "grad_norm": 0.8405268788337708, | |
| "learning_rate": 2.668541581610836e-06, | |
| "loss": 2.0572, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8253610954792722, | |
| "grad_norm": 0.8475466966629028, | |
| "learning_rate": 2.1949476506613487e-06, | |
| "loss": 1.9963, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8441193021947102, | |
| "grad_norm": 1.072921633720398, | |
| "learning_rate": 1.7624033040388798e-06, | |
| "loss": 1.9533, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8628775089101481, | |
| "grad_norm": 0.9307979941368103, | |
| "learning_rate": 1.3731834484112005e-06, | |
| "loss": 2.0392, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8816357156255862, | |
| "grad_norm": 0.9195451736450195, | |
| "learning_rate": 1.0293351313278033e-06, | |
| "loss": 2.038, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9003939223410242, | |
| "grad_norm": 0.9155580401420593, | |
| "learning_rate": 7.326667750587823e-07, | |
| "loss": 2.0031, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9191521290564622, | |
| "grad_norm": 1.0459394454956055, | |
| "learning_rate": 4.847386654501185e-07, | |
| "loss": 2.0346, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9379103357719002, | |
| "grad_norm": 1.1910587549209595, | |
| "learning_rate": 2.8685474581787496e-07, | |
| "loss": 2.0398, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9566685424873382, | |
| "grad_norm": 1.1656410694122314, | |
| "learning_rate": 1.4005575904016632e-07, | |
| "loss": 2.0618, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.9754267492027762, | |
| "grad_norm": 1.093923807144165, | |
| "learning_rate": 4.5113773915143045e-08, | |
| "loss": 1.9963, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.9941849559182142, | |
| "grad_norm": 0.9703367948532104, | |
| "learning_rate": 2.528124572819257e-09, | |
| "loss": 2.0322, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 5331, | |
| "total_flos": 9.709371506688e+16, | |
| "train_loss": 2.098851282809917, | |
| "train_runtime": 861.0045, | |
| "train_samples_per_second": 12.382, | |
| "train_steps_per_second": 6.192 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 5331, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.709371506688e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |