{ "best_global_step": 200, "best_metric": 0.008479318581521511, "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/medgemma_finetuned/checkpoint-200", "epoch": 0.7782101167315175, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019455252918287938, "grad_norm": 3.5695066452026367, "learning_rate": 4.137931034482758e-06, "loss": 0.8424, "step": 5 }, { "epoch": 0.038910505836575876, "grad_norm": 1.942628026008606, "learning_rate": 9.310344827586207e-06, "loss": 0.7525, "step": 10 }, { "epoch": 0.058365758754863814, "grad_norm": 1.2901023626327515, "learning_rate": 1.4482758620689657e-05, "loss": 0.5573, "step": 15 }, { "epoch": 0.07782101167315175, "grad_norm": 1.1915671825408936, "learning_rate": 1.9655172413793102e-05, "loss": 0.3637, "step": 20 }, { "epoch": 0.09727626459143969, "grad_norm": 1.0968961715698242, "learning_rate": 2.4827586206896553e-05, "loss": 0.1735, "step": 25 }, { "epoch": 0.11673151750972763, "grad_norm": 0.4196433126926422, "learning_rate": 3e-05, "loss": 0.0496, "step": 30 }, { "epoch": 0.13618677042801555, "grad_norm": 0.1870017945766449, "learning_rate": 3.517241379310345e-05, "loss": 0.0232, "step": 35 }, { "epoch": 0.1556420233463035, "grad_norm": 0.23363570868968964, "learning_rate": 4.03448275862069e-05, "loss": 0.0168, "step": 40 }, { "epoch": 0.17509727626459143, "grad_norm": 0.14438340067863464, "learning_rate": 4.551724137931034e-05, "loss": 0.0151, "step": 45 }, { "epoch": 0.19455252918287938, "grad_norm": 0.10051655769348145, "learning_rate": 5.0689655172413794e-05, "loss": 0.0107, "step": 50 }, { "epoch": 0.2140077821011673, "grad_norm": 0.09743820875883102, "learning_rate": 5.586206896551724e-05, "loss": 0.008, "step": 55 }, { "epoch": 0.23346303501945526, "grad_norm": 0.06392025947570801, "learning_rate": 6.103448275862069e-05, "loss": 0.0085, "step": 60 }, { "epoch": 0.2529182879377432, "grad_norm": 0.04592013731598854, "learning_rate": 6.620689655172413e-05, "loss": 0.0075, "step": 65 }, { "epoch": 0.2723735408560311, "grad_norm": 0.055331017822027206, "learning_rate": 7.137931034482759e-05, "loss": 0.0077, "step": 70 }, { "epoch": 0.2918287937743191, "grad_norm": 0.048375148326158524, "learning_rate": 7.655172413793105e-05, "loss": 0.0072, "step": 75 }, { "epoch": 0.311284046692607, "grad_norm": 0.060988396406173706, "learning_rate": 8.172413793103448e-05, "loss": 0.0087, "step": 80 }, { "epoch": 0.33073929961089493, "grad_norm": 0.04453667998313904, "learning_rate": 8.689655172413794e-05, "loss": 0.008, "step": 85 }, { "epoch": 0.35019455252918286, "grad_norm": 0.06883223354816437, "learning_rate": 9.206896551724138e-05, "loss": 0.0083, "step": 90 }, { "epoch": 0.36964980544747084, "grad_norm": 0.08616536855697632, "learning_rate": 9.724137931034482e-05, "loss": 0.0084, "step": 95 }, { "epoch": 0.38910505836575876, "grad_norm": 0.03529098257422447, "learning_rate": 0.00010241379310344828, "loss": 0.0076, "step": 100 }, { "epoch": 0.38910505836575876, "eval_loss": 0.009547106921672821, "eval_runtime": 157.0062, "eval_samples_per_second": 3.248, "eval_steps_per_second": 0.815, "step": 100 }, { "epoch": 0.4085603112840467, "grad_norm": 0.03567972779273987, "learning_rate": 0.00010758620689655173, "loss": 0.0084, "step": 105 }, { "epoch": 0.4280155642023346, "grad_norm": 0.06252816319465637, "learning_rate": 0.00011275862068965518, "loss": 0.008, "step": 110 }, { "epoch": 0.4474708171206226, "grad_norm": 0.044703833758831024, "learning_rate": 0.00011793103448275861, "loss": 0.0083, "step": 115 }, { "epoch": 0.4669260700389105, "grad_norm": 0.03152047470211983, "learning_rate": 0.00011999378882999482, "loss": 0.0074, "step": 120 }, { "epoch": 0.48638132295719844, "grad_norm": 0.03210924193263054, "learning_rate": 0.00011995583633681744, "loss": 0.0078, "step": 125 }, { "epoch": 0.5058365758754864, "grad_norm": 0.04019011929631233, "learning_rate": 0.00011988340380013058, "loss": 0.0074, "step": 130 }, { "epoch": 0.5252918287937743, "grad_norm": 0.04210692271590233, "learning_rate": 0.00011977653287521201, "loss": 0.0071, "step": 135 }, { "epoch": 0.5447470817120622, "grad_norm": 0.03734419122338295, "learning_rate": 0.00011963528502253607, "loss": 0.0075, "step": 140 }, { "epoch": 0.5642023346303502, "grad_norm": 0.02421458251774311, "learning_rate": 0.00011945974147242832, "loss": 0.0075, "step": 145 }, { "epoch": 0.5836575875486382, "grad_norm": 0.035657692700624466, "learning_rate": 0.0001192500031783508, "loss": 0.008, "step": 150 }, { "epoch": 0.603112840466926, "grad_norm": 0.0272879209369421, "learning_rate": 0.00011900619075884453, "loss": 0.0077, "step": 155 }, { "epoch": 0.622568093385214, "grad_norm": 0.03400944918394089, "learning_rate": 0.00011872844442816295, "loss": 0.0073, "step": 160 }, { "epoch": 0.642023346303502, "grad_norm": 0.03923163563013077, "learning_rate": 0.00011841692391563607, "loss": 0.0076, "step": 165 }, { "epoch": 0.6614785992217899, "grad_norm": 0.03444487228989601, "learning_rate": 0.00011807180837381154, "loss": 0.0077, "step": 170 }, { "epoch": 0.6809338521400778, "grad_norm": 0.03119409829378128, "learning_rate": 0.00011769329627542567, "loss": 0.008, "step": 175 }, { "epoch": 0.7003891050583657, "grad_norm": 0.0293661467730999, "learning_rate": 0.00011728160529926373, "loss": 0.008, "step": 180 }, { "epoch": 0.7198443579766537, "grad_norm": 0.03529886156320572, "learning_rate": 0.00011683697220497477, "loss": 0.0075, "step": 185 }, { "epoch": 0.7392996108949417, "grad_norm": 0.0280179213732481, "learning_rate": 0.00011635965269691342, "loss": 0.0071, "step": 190 }, { "epoch": 0.7587548638132295, "grad_norm": 0.047665953636169434, "learning_rate": 0.00011584992127708669, "loss": 0.0075, "step": 195 }, { "epoch": 0.7782101167315175, "grad_norm": 0.019554605707526207, "learning_rate": 0.00011530807108729038, "loss": 0.0073, "step": 200 }, { "epoch": 0.7782101167315175, "eval_loss": 0.008479318581521511, "eval_runtime": 133.9329, "eval_samples_per_second": 3.808, "eval_steps_per_second": 0.956, "step": 200 } ], "logging_steps": 5, "max_steps": 771, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 30, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1817397032906586e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }