{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.999973032011003,
  "eval_steps": 500,
  "global_step": 18540,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.026967988997060488,
      "grad_norm": 1.21619713306427,
      "learning_rate": 4.9911040721359096e-05,
      "loss": 0.6704,
      "step": 500
    },
    {
      "epoch": 0.053935977994120976,
      "grad_norm": 1.506378412246704,
      "learning_rate": 4.96433714360016e-05,
      "loss": 0.5236,
      "step": 1000
    },
    {
      "epoch": 0.08090396699118146,
      "grad_norm": 1.5647770166397095,
      "learning_rate": 4.919891096381639e-05,
      "loss": 0.5145,
      "step": 1500
    },
    {
      "epoch": 0.10787195598824195,
      "grad_norm": 0.6760592460632324,
      "learning_rate": 4.858084785774071e-05,
      "loss": 0.5134,
      "step": 2000
    },
    {
      "epoch": 0.13483994498530244,
      "grad_norm": 0.8161666989326477,
      "learning_rate": 4.779361609347228e-05,
      "loss": 0.5069,
      "step": 2500
    },
    {
      "epoch": 0.16180793398236293,
      "grad_norm": 0.7477109432220459,
      "learning_rate": 4.684492361710262e-05,
      "loss": 0.4985,
      "step": 3000
    },
    {
      "epoch": 0.18877592297942342,
      "grad_norm": 0.9579030275344849,
      "learning_rate": 4.5737776239310215e-05,
      "loss": 0.5048,
      "step": 3500
    },
    {
      "epoch": 0.2157439119764839,
      "grad_norm": 1.604243278503418,
      "learning_rate": 4.4481856357599805e-05,
      "loss": 0.4911,
      "step": 4000
    },
    {
      "epoch": 0.2427119009735444,
      "grad_norm": 0.525109052658081,
      "learning_rate": 4.3086173922863254e-05,
      "loss": 0.4954,
      "step": 4500
    },
    {
      "epoch": 0.2696798899706049,
      "grad_norm": 0.776738703250885,
      "learning_rate": 4.1560741540506945e-05,
      "loss": 0.491,
      "step": 5000
    },
    {
      "epoch": 0.29664787896766537,
      "grad_norm": 0.5928918123245239,
      "learning_rate": 3.9916502640166816e-05,
      "loss": 0.4927,
      "step": 5500
    },
    {
      "epoch": 0.32361586796472586,
      "grad_norm": 0.7824705243110657,
      "learning_rate": 3.816525296770396e-05,
      "loss": 0.4915,
      "step": 6000
    },
    {
      "epoch": 0.35058385696178634,
      "grad_norm": 0.6703963875770569,
      "learning_rate": 3.631955596269604e-05,
      "loss": 0.493,
      "step": 6500
    },
    {
      "epoch": 0.37755184595884683,
      "grad_norm": 0.8029218912124634,
      "learning_rate": 3.439265262850525e-05,
      "loss": 0.4891,
      "step": 7000
    },
    {
      "epoch": 0.4045198349559073,
      "grad_norm": 0.7873429656028748,
      "learning_rate": 3.2402412922624755e-05,
      "loss": 0.4893,
      "step": 7500
    },
    {
      "epoch": 0.4314878239529678,
      "grad_norm": 0.8047146201133728,
      "learning_rate": 3.035514266481141e-05,
      "loss": 0.4891,
      "step": 8000
    },
    {
      "epoch": 0.4584558129500283,
      "grad_norm": 0.7394176125526428,
      "learning_rate": 2.8269454691719026e-05,
      "loss": 0.4864,
      "step": 8500
    },
    {
      "epoch": 0.4854238019470888,
      "grad_norm": 0.6094326972961426,
      "learning_rate": 2.6160311698410382e-05,
      "loss": 0.4865,
      "step": 9000
    },
    {
      "epoch": 0.5123917909441493,
      "grad_norm": 1.3134502172470093,
      "learning_rate": 2.4042844645920752e-05,
      "loss": 0.4861,
      "step": 9500
    },
    {
      "epoch": 0.5393597799412098,
      "grad_norm": 1.6002802848815918,
      "learning_rate": 2.1932244211964456e-05,
      "loss": 0.4822,
      "step": 10000
    },
    {
      "epoch": 0.5663277689382703,
      "grad_norm": 0.8714670538902283,
      "learning_rate": 1.984365181323471e-05,
      "loss": 0.4878,
      "step": 10500
    },
    {
      "epoch": 0.5932957579353307,
      "grad_norm": 0.9478123188018799,
      "learning_rate": 1.779610742769174e-05,
      "loss": 0.4827,
      "step": 11000
    },
    {
      "epoch": 0.6202637469323913,
      "grad_norm": 0.9522613286972046,
      "learning_rate": 1.5800037269566696e-05,
      "loss": 0.4822,
      "step": 11500
    },
    {
      "epoch": 0.6472317359294517,
      "grad_norm": 0.8568246364593506,
      "learning_rate": 1.386591085629102e-05,
      "loss": 0.4808,
      "step": 12000
    },
    {
      "epoch": 0.6741997249265123,
      "grad_norm": 0.8014527559280396,
      "learning_rate": 1.201166023594709e-05,
      "loss": 0.4841,
      "step": 12500
    },
    {
      "epoch": 0.7011677139235727,
      "grad_norm": 0.7167889475822449,
      "learning_rate": 1.0250587775408596e-05,
      "loss": 0.4817,
      "step": 13000
    },
    {
      "epoch": 0.7281357029206332,
      "grad_norm": 0.9786660671234131,
      "learning_rate": 8.595327382791429e-06,
      "loss": 0.4762,
      "step": 13500
    },
    {
      "epoch": 0.7551036919176937,
      "grad_norm": 0.7599090337753296,
      "learning_rate": 7.05775387198132e-06,
      "loss": 0.4837,
      "step": 14000
    },
    {
      "epoch": 0.7820716809147542,
      "grad_norm": 0.6934608221054077,
      "learning_rate": 5.648897772892467e-06,
      "loss": 0.4765,
      "step": 14500
    },
    {
      "epoch": 0.8090396699118146,
      "grad_norm": 1.7073050737380981,
      "learning_rate": 4.378866198606929e-06,
      "loss": 0.4769,
      "step": 15000
    },
    {
      "epoch": 0.8360076589088752,
      "grad_norm": 1.0103133916854858,
      "learning_rate": 3.256770337093046e-06,
      "loss": 0.4779,
      "step": 15500
    },
    {
      "epoch": 0.8629756479059356,
      "grad_norm": 0.8780825138092041,
      "learning_rate": 2.2906600876759358e-06,
      "loss": 0.4783,
      "step": 16000
    },
    {
      "epoch": 0.8899436369029962,
      "grad_norm": 0.7215288877487183,
      "learning_rate": 1.4874663111773158e-06,
      "loss": 0.4784,
      "step": 16500
    },
    {
      "epoch": 0.9169116259000566,
      "grad_norm": 0.8022609353065491,
      "learning_rate": 8.529511080211772e-07,
      "loss": 0.4804,
      "step": 17000
    },
    {
      "epoch": 0.9438796148971171,
      "grad_norm": 0.8722068071365356,
      "learning_rate": 3.9166648100946724e-07,
      "loss": 0.4794,
      "step": 17500
    },
    {
      "epoch": 0.9708476038941776,
      "grad_norm": 0.787874162197113,
      "learning_rate": 1.0692167932047637e-07,
      "loss": 0.4803,
      "step": 18000
    },
    {
      "epoch": 0.9978155928912381,
      "grad_norm": 0.9449836611747742,
      "learning_rate": 7.928366524107e-10,
      "loss": 0.4768,
      "step": 18500
    },
    {
      "epoch": 0.999973032011003,
      "step": 18540,
      "total_flos": 3.0107423632254566e+17,
      "train_loss": 0.49351318332626853,
      "train_runtime": 10689.6882,
      "train_samples_per_second": 6.938,
      "train_steps_per_second": 1.734
    }
  ],
  "logging_steps": 500,
  "max_steps": 18540,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.0107423632254566e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}