{
  "best_global_step": 200,
  "best_metric": 0.008479318581521511,
  "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/medgemma_finetuned/checkpoint-200",
  "epoch": 0.7782101167315175,
  "eval_steps": 100,
  "global_step": 200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.019455252918287938,
      "grad_norm": 3.5695066452026367,
      "learning_rate": 4.137931034482758e-06,
      "loss": 0.8424,
      "step": 5
    },
    {
      "epoch": 0.038910505836575876,
      "grad_norm": 1.942628026008606,
      "learning_rate": 9.310344827586207e-06,
      "loss": 0.7525,
      "step": 10
    },
    {
      "epoch": 0.058365758754863814,
      "grad_norm": 1.2901023626327515,
      "learning_rate": 1.4482758620689657e-05,
      "loss": 0.5573,
      "step": 15
    },
    {
      "epoch": 0.07782101167315175,
      "grad_norm": 1.1915671825408936,
      "learning_rate": 1.9655172413793102e-05,
      "loss": 0.3637,
      "step": 20
    },
    {
      "epoch": 0.09727626459143969,
      "grad_norm": 1.0968961715698242,
      "learning_rate": 2.4827586206896553e-05,
      "loss": 0.1735,
      "step": 25
    },
    {
      "epoch": 0.11673151750972763,
      "grad_norm": 0.4196433126926422,
      "learning_rate": 3e-05,
      "loss": 0.0496,
      "step": 30
    },
    {
      "epoch": 0.13618677042801555,
      "grad_norm": 0.1870017945766449,
      "learning_rate": 3.517241379310345e-05,
      "loss": 0.0232,
      "step": 35
    },
    {
      "epoch": 0.1556420233463035,
      "grad_norm": 0.23363570868968964,
      "learning_rate": 4.03448275862069e-05,
      "loss": 0.0168,
      "step": 40
    },
    {
      "epoch": 0.17509727626459143,
      "grad_norm": 0.14438340067863464,
      "learning_rate": 4.551724137931034e-05,
      "loss": 0.0151,
      "step": 45
    },
    {
      "epoch": 0.19455252918287938,
      "grad_norm": 0.10051655769348145,
      "learning_rate": 5.0689655172413794e-05,
      "loss": 0.0107,
      "step": 50
    },
    {
      "epoch": 0.2140077821011673,
      "grad_norm": 0.09743820875883102,
      "learning_rate": 5.586206896551724e-05,
      "loss": 0.008,
      "step": 55
    },
    {
      "epoch": 0.23346303501945526,
      "grad_norm": 0.06392025947570801,
      "learning_rate": 6.103448275862069e-05,
      "loss": 0.0085,
      "step": 60
    },
    {
      "epoch": 0.2529182879377432,
      "grad_norm": 0.04592013731598854,
      "learning_rate": 6.620689655172413e-05,
      "loss": 0.0075,
      "step": 65
    },
    {
      "epoch": 0.2723735408560311,
      "grad_norm": 0.055331017822027206,
      "learning_rate": 7.137931034482759e-05,
      "loss": 0.0077,
      "step": 70
    },
    {
      "epoch": 0.2918287937743191,
      "grad_norm": 0.048375148326158524,
      "learning_rate": 7.655172413793105e-05,
      "loss": 0.0072,
      "step": 75
    },
    {
      "epoch": 0.311284046692607,
      "grad_norm": 0.060988396406173706,
      "learning_rate": 8.172413793103448e-05,
      "loss": 0.0087,
      "step": 80
    },
    {
      "epoch": 0.33073929961089493,
      "grad_norm": 0.04453667998313904,
      "learning_rate": 8.689655172413794e-05,
      "loss": 0.008,
      "step": 85
    },
    {
      "epoch": 0.35019455252918286,
      "grad_norm": 0.06883223354816437,
      "learning_rate": 9.206896551724138e-05,
      "loss": 0.0083,
      "step": 90
    },
    {
      "epoch": 0.36964980544747084,
      "grad_norm": 0.08616536855697632,
      "learning_rate": 9.724137931034482e-05,
      "loss": 0.0084,
      "step": 95
    },
    {
      "epoch": 0.38910505836575876,
      "grad_norm": 0.03529098257422447,
      "learning_rate": 0.00010241379310344828,
      "loss": 0.0076,
      "step": 100
    },
    {
      "epoch": 0.38910505836575876,
      "eval_loss": 0.009547106921672821,
      "eval_runtime": 157.0062,
      "eval_samples_per_second": 3.248,
      "eval_steps_per_second": 0.815,
      "step": 100
    },
    {
      "epoch": 0.4085603112840467,
      "grad_norm": 0.03567972779273987,
      "learning_rate": 0.00010758620689655173,
      "loss": 0.0084,
      "step": 105
    },
    {
      "epoch": 0.4280155642023346,
      "grad_norm": 0.06252816319465637,
      "learning_rate": 0.00011275862068965518,
      "loss": 0.008,
      "step": 110
    },
    {
      "epoch": 0.4474708171206226,
      "grad_norm": 0.044703833758831024,
      "learning_rate": 0.00011793103448275861,
      "loss": 0.0083,
      "step": 115
    },
    {
      "epoch": 0.4669260700389105,
      "grad_norm": 0.03152047470211983,
      "learning_rate": 0.00011999378882999482,
      "loss": 0.0074,
      "step": 120
    },
    {
      "epoch": 0.48638132295719844,
      "grad_norm": 0.03210924193263054,
      "learning_rate": 0.00011995583633681744,
      "loss": 0.0078,
      "step": 125
    },
    {
      "epoch": 0.5058365758754864,
      "grad_norm": 0.04019011929631233,
      "learning_rate": 0.00011988340380013058,
      "loss": 0.0074,
      "step": 130
    },
    {
      "epoch": 0.5252918287937743,
      "grad_norm": 0.04210692271590233,
      "learning_rate": 0.00011977653287521201,
      "loss": 0.0071,
      "step": 135
    },
    {
      "epoch": 0.5447470817120622,
      "grad_norm": 0.03734419122338295,
      "learning_rate": 0.00011963528502253607,
      "loss": 0.0075,
      "step": 140
    },
    {
      "epoch": 0.5642023346303502,
      "grad_norm": 0.02421458251774311,
      "learning_rate": 0.00011945974147242832,
      "loss": 0.0075,
      "step": 145
    },
    {
      "epoch": 0.5836575875486382,
      "grad_norm": 0.035657692700624466,
      "learning_rate": 0.0001192500031783508,
      "loss": 0.008,
      "step": 150
    },
    {
      "epoch": 0.603112840466926,
      "grad_norm": 0.0272879209369421,
      "learning_rate": 0.00011900619075884453,
      "loss": 0.0077,
      "step": 155
    },
    {
      "epoch": 0.622568093385214,
      "grad_norm": 0.03400944918394089,
      "learning_rate": 0.00011872844442816295,
      "loss": 0.0073,
      "step": 160
    },
    {
      "epoch": 0.642023346303502,
      "grad_norm": 0.03923163563013077,
      "learning_rate": 0.00011841692391563607,
      "loss": 0.0076,
      "step": 165
    },
    {
      "epoch": 0.6614785992217899,
      "grad_norm": 0.03444487228989601,
      "learning_rate": 0.00011807180837381154,
      "loss": 0.0077,
      "step": 170
    },
    {
      "epoch": 0.6809338521400778,
      "grad_norm": 0.03119409829378128,
      "learning_rate": 0.00011769329627542567,
      "loss": 0.008,
      "step": 175
    },
    {
      "epoch": 0.7003891050583657,
      "grad_norm": 0.0293661467730999,
      "learning_rate": 0.00011728160529926373,
      "loss": 0.008,
      "step": 180
    },
    {
      "epoch": 0.7198443579766537,
      "grad_norm": 0.03529886156320572,
      "learning_rate": 0.00011683697220497477,
      "loss": 0.0075,
      "step": 185
    },
    {
      "epoch": 0.7392996108949417,
      "grad_norm": 0.0280179213732481,
      "learning_rate": 0.00011635965269691342,
      "loss": 0.0071,
      "step": 190
    },
    {
      "epoch": 0.7587548638132295,
      "grad_norm": 0.047665953636169434,
      "learning_rate": 0.00011584992127708669,
      "loss": 0.0075,
      "step": 195
    },
    {
      "epoch": 0.7782101167315175,
      "grad_norm": 0.019554605707526207,
      "learning_rate": 0.00011530807108729038,
      "loss": 0.0073,
      "step": 200
    },
    {
      "epoch": 0.7782101167315175,
      "eval_loss": 0.008479318581521511,
      "eval_runtime": 133.9329,
      "eval_samples_per_second": 3.808,
      "eval_steps_per_second": 0.956,
      "step": 200
    }
  ],
  "logging_steps": 5,
  "max_steps": 771,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 30,
        "early_stopping_threshold": 0.001
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.1817397032906586e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}