Initial private upload
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +6 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/DPO_configs.json +181 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/README.md +70 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/README.md +210 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/adapter_config.json +42 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/adapter_model.safetensors +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/added_tokens.json +24 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/chat_template.jinja +54 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/merges.txt +0 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/optimizer.pt +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/rng_state.pth +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/scheduler.pt +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/special_tokens_map.json +25 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/tokenizer.json +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/tokenizer_config.json +208 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/trainer_state.json +1024 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/training_args.bin +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/vocab.json +0 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/added_tokens.json +24 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/chat_template.jinja +54 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/config.json +66 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/generation_config.json +14 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/merges.txt +0 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00001-of-00003.safetensors +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00002-of-00003.safetensors +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00003-of-00003.safetensors +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model.safetensors.index.json +442 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/special_tokens_map.json +31 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/tokenizer.json +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/tokenizer_config.json +207 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/vocab.json +0 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/model_args.json +4 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/train_args.json +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/DPO_configs.json +181 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/README.md +70 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/README.md +210 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/adapter_config.json +42 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/adapter_model.safetensors +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/added_tokens.json +24 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/chat_template.jinja +54 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/merges.txt +0 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/optimizer.pt +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/rng_state.pth +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/scheduler.pt +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/special_tokens_map.json +25 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/tokenizer.json +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/tokenizer_config.json +208 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/trainer_state.json +1024 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/training_args.bin +3 -0
- Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/merged_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/research_question_agent/checkpoint-669/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/research_question_agent/merged_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/DPO_configs.json
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"output_dir": "Trained_Models/Jackson0018/Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent",
|
| 3 |
+
"overwrite_output_dir": null,
|
| 4 |
+
"do_train": false,
|
| 5 |
+
"do_eval": false,
|
| 6 |
+
"do_predict": false,
|
| 7 |
+
"eval_strategy": "no",
|
| 8 |
+
"prediction_loss_only": false,
|
| 9 |
+
"per_device_train_batch_size": 1,
|
| 10 |
+
"per_device_eval_batch_size": 4,
|
| 11 |
+
"per_gpu_train_batch_size": null,
|
| 12 |
+
"per_gpu_eval_batch_size": null,
|
| 13 |
+
"gradient_accumulation_steps": 32,
|
| 14 |
+
"eval_accumulation_steps": 2,
|
| 15 |
+
"eval_delay": 0,
|
| 16 |
+
"torch_empty_cache_steps": 250,
|
| 17 |
+
"learning_rate": 1e-05,
|
| 18 |
+
"weight_decay": 0.01,
|
| 19 |
+
"adam_beta1": 0.9,
|
| 20 |
+
"adam_beta2": 0.999,
|
| 21 |
+
"adam_epsilon": 1e-08,
|
| 22 |
+
"max_grad_norm": 0.6,
|
| 23 |
+
"num_train_epochs": 3.0,
|
| 24 |
+
"max_steps": -1,
|
| 25 |
+
"lr_scheduler_type": "linear",
|
| 26 |
+
"lr_scheduler_kwargs": {},
|
| 27 |
+
"warmup_ratio": 0.1,
|
| 28 |
+
"warmup_steps": 0,
|
| 29 |
+
"log_level": "passive",
|
| 30 |
+
"log_level_replica": "warning",
|
| 31 |
+
"log_on_each_node": true,
|
| 32 |
+
"logging_dir": "Trained_Models/Jackson0018/Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/runs/Sep29_07-50-17_ai16",
|
| 33 |
+
"logging_strategy": "steps",
|
| 34 |
+
"logging_first_step": false,
|
| 35 |
+
"logging_steps": 10,
|
| 36 |
+
"logging_nan_inf_filter": false,
|
| 37 |
+
"save_strategy": "steps",
|
| 38 |
+
"save_steps": 10000,
|
| 39 |
+
"save_total_limit": null,
|
| 40 |
+
"save_safetensors": true,
|
| 41 |
+
"save_on_each_node": false,
|
| 42 |
+
"save_only_model": false,
|
| 43 |
+
"restore_callback_states_from_checkpoint": false,
|
| 44 |
+
"no_cuda": false,
|
| 45 |
+
"use_cpu": false,
|
| 46 |
+
"use_mps_device": false,
|
| 47 |
+
"seed": 3407,
|
| 48 |
+
"data_seed": 3407,
|
| 49 |
+
"jit_mode_eval": false,
|
| 50 |
+
"use_ipex": false,
|
| 51 |
+
"bf16": true,
|
| 52 |
+
"fp16": false,
|
| 53 |
+
"fp16_opt_level": "O1",
|
| 54 |
+
"half_precision_backend": "auto",
|
| 55 |
+
"bf16_full_eval": false,
|
| 56 |
+
"fp16_full_eval": false,
|
| 57 |
+
"tf32": null,
|
| 58 |
+
"local_rank": 0,
|
| 59 |
+
"ddp_backend": null,
|
| 60 |
+
"tpu_num_cores": null,
|
| 61 |
+
"tpu_metrics_debug": false,
|
| 62 |
+
"debug": [],
|
| 63 |
+
"dataloader_drop_last": false,
|
| 64 |
+
"eval_steps": null,
|
| 65 |
+
"dataloader_num_workers": 0,
|
| 66 |
+
"dataloader_prefetch_factor": null,
|
| 67 |
+
"past_index": -1,
|
| 68 |
+
"run_name": null,
|
| 69 |
+
"disable_tqdm": false,
|
| 70 |
+
"remove_unused_columns": true,
|
| 71 |
+
"label_names": null,
|
| 72 |
+
"load_best_model_at_end": false,
|
| 73 |
+
"metric_for_best_model": null,
|
| 74 |
+
"greater_is_better": null,
|
| 75 |
+
"ignore_data_skip": false,
|
| 76 |
+
"fsdp": [],
|
| 77 |
+
"fsdp_min_num_params": 0,
|
| 78 |
+
"fsdp_config": {
|
| 79 |
+
"min_num_params": 0,
|
| 80 |
+
"xla": false,
|
| 81 |
+
"xla_fsdp_v2": false,
|
| 82 |
+
"xla_fsdp_grad_ckpt": false
|
| 83 |
+
},
|
| 84 |
+
"fsdp_transformer_layer_cls_to_wrap": null,
|
| 85 |
+
"accelerator_config": {
|
| 86 |
+
"split_batches": false,
|
| 87 |
+
"dispatch_batches": null,
|
| 88 |
+
"even_batches": true,
|
| 89 |
+
"use_seedable_sampler": true,
|
| 90 |
+
"non_blocking": false,
|
| 91 |
+
"gradient_accumulation_kwargs": null
|
| 92 |
+
},
|
| 93 |
+
"deepspeed": null,
|
| 94 |
+
"label_smoothing_factor": 0.0,
|
| 95 |
+
"optim": "adamw_8bit",
|
| 96 |
+
"optim_args": null,
|
| 97 |
+
"adafactor": false,
|
| 98 |
+
"group_by_length": false,
|
| 99 |
+
"length_column_name": "length",
|
| 100 |
+
"report_to": [],
|
| 101 |
+
"ddp_find_unused_parameters": null,
|
| 102 |
+
"ddp_bucket_cap_mb": null,
|
| 103 |
+
"ddp_broadcast_buffers": null,
|
| 104 |
+
"dataloader_pin_memory": true,
|
| 105 |
+
"dataloader_persistent_workers": false,
|
| 106 |
+
"skip_memory_metrics": true,
|
| 107 |
+
"use_legacy_prediction_loop": false,
|
| 108 |
+
"push_to_hub": false,
|
| 109 |
+
"resume_from_checkpoint": null,
|
| 110 |
+
"hub_model_id": null,
|
| 111 |
+
"hub_strategy": "every_save",
|
| 112 |
+
"hub_token": "<HUB_TOKEN>",
|
| 113 |
+
"hub_private_repo": null,
|
| 114 |
+
"hub_always_push": false,
|
| 115 |
+
"hub_revision": null,
|
| 116 |
+
"gradient_checkpointing": false,
|
| 117 |
+
"gradient_checkpointing_kwargs": null,
|
| 118 |
+
"include_inputs_for_metrics": false,
|
| 119 |
+
"include_for_metrics": [],
|
| 120 |
+
"eval_do_concat_batches": true,
|
| 121 |
+
"fp16_backend": "auto",
|
| 122 |
+
"push_to_hub_model_id": null,
|
| 123 |
+
"push_to_hub_organization": null,
|
| 124 |
+
"push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
|
| 125 |
+
"mp_parameters": "",
|
| 126 |
+
"auto_find_batch_size": true,
|
| 127 |
+
"full_determinism": false,
|
| 128 |
+
"torchdynamo": null,
|
| 129 |
+
"ray_scope": "last",
|
| 130 |
+
"ddp_timeout": 1800,
|
| 131 |
+
"torch_compile": false,
|
| 132 |
+
"torch_compile_backend": null,
|
| 133 |
+
"torch_compile_mode": null,
|
| 134 |
+
"include_tokens_per_second": false,
|
| 135 |
+
"include_num_input_tokens_seen": false,
|
| 136 |
+
"neftune_noise_alpha": null,
|
| 137 |
+
"optim_target_modules": null,
|
| 138 |
+
"batch_eval_metrics": false,
|
| 139 |
+
"eval_on_start": false,
|
| 140 |
+
"use_liger_kernel": false,
|
| 141 |
+
"liger_kernel_config": null,
|
| 142 |
+
"eval_use_gather_object": false,
|
| 143 |
+
"average_tokens_across_devices": false,
|
| 144 |
+
"model_init_kwargs": null,
|
| 145 |
+
"ref_model_init_kwargs": null,
|
| 146 |
+
"model_adapter_name": null,
|
| 147 |
+
"ref_adapter_name": null,
|
| 148 |
+
"force_use_ref_model": false,
|
| 149 |
+
"disable_dropout": true,
|
| 150 |
+
"use_logits_to_keep": false,
|
| 151 |
+
"dataset_num_proc": 2,
|
| 152 |
+
"padding_value": null,
|
| 153 |
+
"label_pad_token_id": -100,
|
| 154 |
+
"max_prompt_length": 2000,
|
| 155 |
+
"max_completion_length": 2000,
|
| 156 |
+
"max_length": 4000,
|
| 157 |
+
"truncation_mode": "keep_end",
|
| 158 |
+
"padding_free": false,
|
| 159 |
+
"precompute_ref_log_probs": false,
|
| 160 |
+
"precompute_ref_batch_size": null,
|
| 161 |
+
"tools": null,
|
| 162 |
+
"loss_type": "sigmoid",
|
| 163 |
+
"use_liger_loss": false,
|
| 164 |
+
"base_model_attribute_name": "model",
|
| 165 |
+
"beta": 0.1,
|
| 166 |
+
"f_divergence_type": "reverse_kl",
|
| 167 |
+
"f_alpha_divergence_coef": 1.0,
|
| 168 |
+
"reference_free": false,
|
| 169 |
+
"label_smoothing": 0.0,
|
| 170 |
+
"use_weighting": false,
|
| 171 |
+
"rpo_alpha": null,
|
| 172 |
+
"ld_alpha": null,
|
| 173 |
+
"discopop_tau": 0.05,
|
| 174 |
+
"loss_weights": null,
|
| 175 |
+
"sync_ref_model": false,
|
| 176 |
+
"ref_model_mixup_alpha": 0.6,
|
| 177 |
+
"ref_model_sync_steps": 512,
|
| 178 |
+
"generate_during_eval": false,
|
| 179 |
+
"vllm_sampling_params": null,
|
| 180 |
+
"unsloth_num_chunks": -1
|
| 181 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 3 |
+
library_name: transformers
|
| 4 |
+
model_name: experiment_agent
|
| 5 |
+
tags:
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- trl
|
| 8 |
+
- unsloth
|
| 9 |
+
- dpo
|
| 10 |
+
licence: license
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Model Card for experiment_agent
|
| 14 |
+
|
| 15 |
+
This model is a fine-tuned version of [unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit](https://huggingface.co/unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit).
|
| 16 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 17 |
+
|
| 18 |
+
## Quick start
|
| 19 |
+
|
| 20 |
+
```python
|
| 21 |
+
from transformers import pipeline
|
| 22 |
+
|
| 23 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
| 24 |
+
generator = pipeline("text-generation", model="None", device="cuda")
|
| 25 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
| 26 |
+
print(output["generated_text"])
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Training procedure
|
| 30 |
+
|
| 31 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jackson0530/ScientificPaperRetrieval_Train-Train_DPO_unsloth/runs/ye26t4kv)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
|
| 35 |
+
|
| 36 |
+
### Framework versions
|
| 37 |
+
|
| 38 |
+
- TRL: 0.21.0
|
| 39 |
+
- Transformers: 4.55.0
|
| 40 |
+
- Pytorch: 2.7.1
|
| 41 |
+
- Datasets: 3.6.0
|
| 42 |
+
- Tokenizers: 0.21.4
|
| 43 |
+
|
| 44 |
+
## Citations
|
| 45 |
+
|
| 46 |
+
Cite DPO as:
|
| 47 |
+
|
| 48 |
+
```bibtex
|
| 49 |
+
@inproceedings{rafailov2023direct,
|
| 50 |
+
title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
|
| 51 |
+
author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
|
| 52 |
+
year = 2023,
|
| 53 |
+
booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
|
| 54 |
+
url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
|
| 55 |
+
editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
|
| 56 |
+
}
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
Cite TRL as:
|
| 60 |
+
|
| 61 |
+
```bibtex
|
| 62 |
+
@misc{vonwerra2022trl,
|
| 63 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
| 64 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
| 65 |
+
year = 2020,
|
| 66 |
+
journal = {GitHub repository},
|
| 67 |
+
publisher = {GitHub},
|
| 68 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 69 |
+
}
|
| 70 |
+
```
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/README.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 3 |
+
library_name: peft
|
| 4 |
+
pipeline_tag: text-generation
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 7 |
+
- dpo
|
| 8 |
+
- lora
|
| 9 |
+
- transformers
|
| 10 |
+
- trl
|
| 11 |
+
- unsloth
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Model Card for Model ID
|
| 15 |
+
|
| 16 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
## Model Details
|
| 21 |
+
|
| 22 |
+
### Model Description
|
| 23 |
+
|
| 24 |
+
<!-- Provide a longer summary of what this model is. -->
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
- **Developed by:** [More Information Needed]
|
| 29 |
+
- **Funded by [optional]:** [More Information Needed]
|
| 30 |
+
- **Shared by [optional]:** [More Information Needed]
|
| 31 |
+
- **Model type:** [More Information Needed]
|
| 32 |
+
- **Language(s) (NLP):** [More Information Needed]
|
| 33 |
+
- **License:** [More Information Needed]
|
| 34 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
| 35 |
+
|
| 36 |
+
### Model Sources [optional]
|
| 37 |
+
|
| 38 |
+
<!-- Provide the basic links for the model. -->
|
| 39 |
+
|
| 40 |
+
- **Repository:** [More Information Needed]
|
| 41 |
+
- **Paper [optional]:** [More Information Needed]
|
| 42 |
+
- **Demo [optional]:** [More Information Needed]
|
| 43 |
+
|
| 44 |
+
## Uses
|
| 45 |
+
|
| 46 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
| 47 |
+
|
| 48 |
+
### Direct Use
|
| 49 |
+
|
| 50 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
| 51 |
+
|
| 52 |
+
[More Information Needed]
|
| 53 |
+
|
| 54 |
+
### Downstream Use [optional]
|
| 55 |
+
|
| 56 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
| 57 |
+
|
| 58 |
+
[More Information Needed]
|
| 59 |
+
|
| 60 |
+
### Out-of-Scope Use
|
| 61 |
+
|
| 62 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
| 63 |
+
|
| 64 |
+
[More Information Needed]
|
| 65 |
+
|
| 66 |
+
## Bias, Risks, and Limitations
|
| 67 |
+
|
| 68 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
| 69 |
+
|
| 70 |
+
[More Information Needed]
|
| 71 |
+
|
| 72 |
+
### Recommendations
|
| 73 |
+
|
| 74 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
| 75 |
+
|
| 76 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
| 77 |
+
|
| 78 |
+
## How to Get Started with the Model
|
| 79 |
+
|
| 80 |
+
Use the code below to get started with the model.
|
| 81 |
+
|
| 82 |
+
[More Information Needed]
|
| 83 |
+
|
| 84 |
+
## Training Details
|
| 85 |
+
|
| 86 |
+
### Training Data
|
| 87 |
+
|
| 88 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
| 89 |
+
|
| 90 |
+
[More Information Needed]
|
| 91 |
+
|
| 92 |
+
### Training Procedure
|
| 93 |
+
|
| 94 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
| 95 |
+
|
| 96 |
+
#### Preprocessing [optional]
|
| 97 |
+
|
| 98 |
+
[More Information Needed]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
#### Training Hyperparameters
|
| 102 |
+
|
| 103 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
| 104 |
+
|
| 105 |
+
#### Speeds, Sizes, Times [optional]
|
| 106 |
+
|
| 107 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
| 108 |
+
|
| 109 |
+
[More Information Needed]
|
| 110 |
+
|
| 111 |
+
## Evaluation
|
| 112 |
+
|
| 113 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
| 114 |
+
|
| 115 |
+
### Testing Data, Factors & Metrics
|
| 116 |
+
|
| 117 |
+
#### Testing Data
|
| 118 |
+
|
| 119 |
+
<!-- This should link to a Dataset Card if possible. -->
|
| 120 |
+
|
| 121 |
+
[More Information Needed]
|
| 122 |
+
|
| 123 |
+
#### Factors
|
| 124 |
+
|
| 125 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
| 126 |
+
|
| 127 |
+
[More Information Needed]
|
| 128 |
+
|
| 129 |
+
#### Metrics
|
| 130 |
+
|
| 131 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
| 132 |
+
|
| 133 |
+
[More Information Needed]
|
| 134 |
+
|
| 135 |
+
### Results
|
| 136 |
+
|
| 137 |
+
[More Information Needed]
|
| 138 |
+
|
| 139 |
+
#### Summary
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
## Model Examination [optional]
|
| 144 |
+
|
| 145 |
+
<!-- Relevant interpretability work for the model goes here -->
|
| 146 |
+
|
| 147 |
+
[More Information Needed]
|
| 148 |
+
|
| 149 |
+
## Environmental Impact
|
| 150 |
+
|
| 151 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
| 152 |
+
|
| 153 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
| 154 |
+
|
| 155 |
+
- **Hardware Type:** [More Information Needed]
|
| 156 |
+
- **Hours used:** [More Information Needed]
|
| 157 |
+
- **Cloud Provider:** [More Information Needed]
|
| 158 |
+
- **Compute Region:** [More Information Needed]
|
| 159 |
+
- **Carbon Emitted:** [More Information Needed]
|
| 160 |
+
|
| 161 |
+
## Technical Specifications [optional]
|
| 162 |
+
|
| 163 |
+
### Model Architecture and Objective
|
| 164 |
+
|
| 165 |
+
[More Information Needed]
|
| 166 |
+
|
| 167 |
+
### Compute Infrastructure
|
| 168 |
+
|
| 169 |
+
[More Information Needed]
|
| 170 |
+
|
| 171 |
+
#### Hardware
|
| 172 |
+
|
| 173 |
+
[More Information Needed]
|
| 174 |
+
|
| 175 |
+
#### Software
|
| 176 |
+
|
| 177 |
+
[More Information Needed]
|
| 178 |
+
|
| 179 |
+
## Citation [optional]
|
| 180 |
+
|
| 181 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
| 182 |
+
|
| 183 |
+
**BibTeX:**
|
| 184 |
+
|
| 185 |
+
[More Information Needed]
|
| 186 |
+
|
| 187 |
+
**APA:**
|
| 188 |
+
|
| 189 |
+
[More Information Needed]
|
| 190 |
+
|
| 191 |
+
## Glossary [optional]
|
| 192 |
+
|
| 193 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
| 194 |
+
|
| 195 |
+
[More Information Needed]
|
| 196 |
+
|
| 197 |
+
## More Information [optional]
|
| 198 |
+
|
| 199 |
+
[More Information Needed]
|
| 200 |
+
|
| 201 |
+
## Model Card Authors [optional]
|
| 202 |
+
|
| 203 |
+
[More Information Needed]
|
| 204 |
+
|
| 205 |
+
## Model Card Contact
|
| 206 |
+
|
| 207 |
+
[More Information Needed]
|
| 208 |
+
### Framework versions
|
| 209 |
+
|
| 210 |
+
- PEFT 0.17.0
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 64,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 64,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"v_proj",
|
| 29 |
+
"up_proj",
|
| 30 |
+
"gate_proj",
|
| 31 |
+
"q_proj",
|
| 32 |
+
"o_proj",
|
| 33 |
+
"k_proj",
|
| 34 |
+
"down_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "CAUSAL_LM",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d66bf1b56d3472a8100f4986a2eb02d1c146d1d4404d0bb16466e935f5b1e91
|
| 3 |
+
size 479005064
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/chat_template.jinja
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 4 |
+
{{- messages[0]['content'] }}
|
| 5 |
+
{%- else %}
|
| 6 |
+
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
| 7 |
+
{%- endif %}
|
| 8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 9 |
+
{%- for tool in tools %}
|
| 10 |
+
{{- "\n" }}
|
| 11 |
+
{{- tool | tojson }}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 14 |
+
{%- else %}
|
| 15 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
| 17 |
+
{%- else %}
|
| 18 |
+
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endif %}
|
| 21 |
+
{%- for message in messages %}
|
| 22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
| 23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 24 |
+
{%- elif message.role == "assistant" %}
|
| 25 |
+
{{- '<|im_start|>' + message.role }}
|
| 26 |
+
{%- if message.content %}
|
| 27 |
+
{{- '\n' + message.content }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{%- for tool_call in message.tool_calls %}
|
| 30 |
+
{%- if tool_call.function is defined %}
|
| 31 |
+
{%- set tool_call = tool_call.function %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
| 34 |
+
{{- tool_call.name }}
|
| 35 |
+
{{- '", "arguments": ' }}
|
| 36 |
+
{{- tool_call.arguments | tojson }}
|
| 37 |
+
{{- '}\n</tool_call>' }}
|
| 38 |
+
{%- endfor %}
|
| 39 |
+
{{- '<|im_end|>\n' }}
|
| 40 |
+
{%- elif message.role == "tool" %}
|
| 41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
| 42 |
+
{{- '<|im_start|>user' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{{- '\n<tool_response>\n' }}
|
| 45 |
+
{{- message.content }}
|
| 46 |
+
{{- '\n</tool_response>' }}
|
| 47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 48 |
+
{{- '<|im_end|>\n' }}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{%- if add_generation_prompt %}
|
| 53 |
+
{{- '<|im_start|>assistant\n' }}
|
| 54 |
+
{%- endif %}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9566052a00f6f9197425ce3ab8234dc85bf7f8c5da8b84b6ba217b67ed425dc0
|
| 3 |
+
size 243803397
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
|
| 3 |
+
size 14645
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d46728529080e683318d0592a9290e2f3e1dd31fa31190ac892162c71aa04cef
|
| 3 |
+
size 1465
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/special_tokens_map.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": "<|im_end|>"
|
| 25 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 32768,
|
| 203 |
+
"pad_token": "<|im_end|>",
|
| 204 |
+
"padding_side": "right",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/trainer_state.json
ADDED
|
@@ -0,0 +1,1024 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 669,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0449438202247191,
|
| 14 |
+
"grad_norm": 8.41971206665039,
|
| 15 |
+
"learning_rate": 1.3432835820895524e-06,
|
| 16 |
+
"logits/chosen": -0.5647975206375122,
|
| 17 |
+
"logits/rejected": -0.5565350651741028,
|
| 18 |
+
"logps/chosen": -1636.8466796875,
|
| 19 |
+
"logps/rejected": -1614.4744873046875,
|
| 20 |
+
"loss": 0.7095,
|
| 21 |
+
"rewards/accuracies": 0.3343749940395355,
|
| 22 |
+
"rewards/chosen": -0.010566463693976402,
|
| 23 |
+
"rewards/margins": -0.023785116150975227,
|
| 24 |
+
"rewards/rejected": 0.013218650594353676,
|
| 25 |
+
"step": 10
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.0898876404494382,
|
| 29 |
+
"grad_norm": 8.172579765319824,
|
| 30 |
+
"learning_rate": 2.835820895522388e-06,
|
| 31 |
+
"logits/chosen": -0.543634295463562,
|
| 32 |
+
"logits/rejected": -0.5405128002166748,
|
| 33 |
+
"logps/chosen": -1649.8961181640625,
|
| 34 |
+
"logps/rejected": -1622.677978515625,
|
| 35 |
+
"loss": 0.7067,
|
| 36 |
+
"rewards/accuracies": 0.5,
|
| 37 |
+
"rewards/chosen": 0.004455108195543289,
|
| 38 |
+
"rewards/margins": -0.015273856930434704,
|
| 39 |
+
"rewards/rejected": 0.019728967919945717,
|
| 40 |
+
"step": 20
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.1348314606741573,
|
| 44 |
+
"grad_norm": 8.977705955505371,
|
| 45 |
+
"learning_rate": 4.3283582089552236e-06,
|
| 46 |
+
"logits/chosen": -0.5407521724700928,
|
| 47 |
+
"logits/rejected": -0.5417042374610901,
|
| 48 |
+
"logps/chosen": -1637.083251953125,
|
| 49 |
+
"logps/rejected": -1631.73193359375,
|
| 50 |
+
"loss": 0.6967,
|
| 51 |
+
"rewards/accuracies": 0.48750001192092896,
|
| 52 |
+
"rewards/chosen": 0.05982738733291626,
|
| 53 |
+
"rewards/margins": 0.003512363415211439,
|
| 54 |
+
"rewards/rejected": 0.05631502345204353,
|
| 55 |
+
"step": 30
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.1797752808988764,
|
| 59 |
+
"grad_norm": 12.203088760375977,
|
| 60 |
+
"learning_rate": 5.820895522388061e-06,
|
| 61 |
+
"logits/chosen": -0.5544255971908569,
|
| 62 |
+
"logits/rejected": -0.538857102394104,
|
| 63 |
+
"logps/chosen": -1644.1826171875,
|
| 64 |
+
"logps/rejected": -1592.37451171875,
|
| 65 |
+
"loss": 0.6957,
|
| 66 |
+
"rewards/accuracies": 0.49687498807907104,
|
| 67 |
+
"rewards/chosen": 0.15313825011253357,
|
| 68 |
+
"rewards/margins": 0.007030182983726263,
|
| 69 |
+
"rewards/rejected": 0.14610807597637177,
|
| 70 |
+
"step": 40
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.2247191011235955,
|
| 74 |
+
"grad_norm": 8.75937557220459,
|
| 75 |
+
"learning_rate": 7.313432835820896e-06,
|
| 76 |
+
"logits/chosen": -0.5435775518417358,
|
| 77 |
+
"logits/rejected": -0.550085186958313,
|
| 78 |
+
"logps/chosen": -1686.4521484375,
|
| 79 |
+
"logps/rejected": -1653.057861328125,
|
| 80 |
+
"loss": 0.6878,
|
| 81 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 82 |
+
"rewards/chosen": 0.3402329981327057,
|
| 83 |
+
"rewards/margins": 0.025076771154999733,
|
| 84 |
+
"rewards/rejected": 0.3151562511920929,
|
| 85 |
+
"step": 50
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.2696629213483146,
|
| 89 |
+
"grad_norm": 9.613944053649902,
|
| 90 |
+
"learning_rate": 8.805970149253732e-06,
|
| 91 |
+
"logits/chosen": -0.5430251359939575,
|
| 92 |
+
"logits/rejected": -0.5381388068199158,
|
| 93 |
+
"logps/chosen": -1612.47900390625,
|
| 94 |
+
"logps/rejected": -1611.5440673828125,
|
| 95 |
+
"loss": 0.7,
|
| 96 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 97 |
+
"rewards/chosen": 0.4334062933921814,
|
| 98 |
+
"rewards/margins": 0.001930123195052147,
|
| 99 |
+
"rewards/rejected": 0.4314761757850647,
|
| 100 |
+
"step": 60
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.3146067415730337,
|
| 104 |
+
"grad_norm": 9.039813995361328,
|
| 105 |
+
"learning_rate": 9.966777408637874e-06,
|
| 106 |
+
"logits/chosen": -0.5394436717033386,
|
| 107 |
+
"logits/rejected": -0.5394075512886047,
|
| 108 |
+
"logps/chosen": -1656.6136474609375,
|
| 109 |
+
"logps/rejected": -1667.557861328125,
|
| 110 |
+
"loss": 0.7012,
|
| 111 |
+
"rewards/accuracies": 0.4906249940395355,
|
| 112 |
+
"rewards/chosen": 0.371154248714447,
|
| 113 |
+
"rewards/margins": 0.0005123887094669044,
|
| 114 |
+
"rewards/rejected": 0.37064188718795776,
|
| 115 |
+
"step": 70
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.3595505617977528,
|
| 119 |
+
"grad_norm": 10.225497245788574,
|
| 120 |
+
"learning_rate": 9.800664451827243e-06,
|
| 121 |
+
"logits/chosen": -0.5414221882820129,
|
| 122 |
+
"logits/rejected": -0.5409548282623291,
|
| 123 |
+
"logps/chosen": -1660.9986572265625,
|
| 124 |
+
"logps/rejected": -1641.126708984375,
|
| 125 |
+
"loss": 0.6989,
|
| 126 |
+
"rewards/accuracies": 0.5,
|
| 127 |
+
"rewards/chosen": -0.008593291975557804,
|
| 128 |
+
"rewards/margins": 0.009244749322533607,
|
| 129 |
+
"rewards/rejected": -0.017838040366768837,
|
| 130 |
+
"step": 80
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.4044943820224719,
|
| 134 |
+
"grad_norm": 7.43681001663208,
|
| 135 |
+
"learning_rate": 9.634551495016612e-06,
|
| 136 |
+
"logits/chosen": -0.5525733232498169,
|
| 137 |
+
"logits/rejected": -0.5447468757629395,
|
| 138 |
+
"logps/chosen": -1664.12890625,
|
| 139 |
+
"logps/rejected": -1647.4931640625,
|
| 140 |
+
"loss": 0.6937,
|
| 141 |
+
"rewards/accuracies": 0.528124988079071,
|
| 142 |
+
"rewards/chosen": -0.1387433558702469,
|
| 143 |
+
"rewards/margins": 0.023330822587013245,
|
| 144 |
+
"rewards/rejected": -0.16207417845726013,
|
| 145 |
+
"step": 90
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.449438202247191,
|
| 149 |
+
"grad_norm": 7.95203971862793,
|
| 150 |
+
"learning_rate": 9.468438538205981e-06,
|
| 151 |
+
"logits/chosen": -0.5224291086196899,
|
| 152 |
+
"logits/rejected": -0.5349761843681335,
|
| 153 |
+
"logps/chosen": -1696.936767578125,
|
| 154 |
+
"logps/rejected": -1658.69140625,
|
| 155 |
+
"loss": 0.7033,
|
| 156 |
+
"rewards/accuracies": 0.512499988079071,
|
| 157 |
+
"rewards/chosen": 0.006925581488758326,
|
| 158 |
+
"rewards/margins": 0.005276698153465986,
|
| 159 |
+
"rewards/rejected": 0.001648884266614914,
|
| 160 |
+
"step": 100
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"epoch": 0.4943820224719101,
|
| 164 |
+
"grad_norm": 8.228531837463379,
|
| 165 |
+
"learning_rate": 9.30232558139535e-06,
|
| 166 |
+
"logits/chosen": -0.5292581915855408,
|
| 167 |
+
"logits/rejected": -0.5189449191093445,
|
| 168 |
+
"logps/chosen": -1666.102294921875,
|
| 169 |
+
"logps/rejected": -1667.996826171875,
|
| 170 |
+
"loss": 0.7191,
|
| 171 |
+
"rewards/accuracies": 0.4937500059604645,
|
| 172 |
+
"rewards/chosen": 0.2481352537870407,
|
| 173 |
+
"rewards/margins": -0.02416202798485756,
|
| 174 |
+
"rewards/rejected": 0.27229729294776917,
|
| 175 |
+
"step": 110
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"epoch": 0.5393258426966292,
|
| 179 |
+
"grad_norm": 8.233011245727539,
|
| 180 |
+
"learning_rate": 9.136212624584718e-06,
|
| 181 |
+
"logits/chosen": -0.5090035796165466,
|
| 182 |
+
"logits/rejected": -0.5002211928367615,
|
| 183 |
+
"logps/chosen": -1661.6923828125,
|
| 184 |
+
"logps/rejected": -1677.4013671875,
|
| 185 |
+
"loss": 0.7038,
|
| 186 |
+
"rewards/accuracies": 0.53125,
|
| 187 |
+
"rewards/chosen": 0.2764546573162079,
|
| 188 |
+
"rewards/margins": 0.007198885083198547,
|
| 189 |
+
"rewards/rejected": 0.26925572752952576,
|
| 190 |
+
"step": 120
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"epoch": 0.5842696629213483,
|
| 194 |
+
"grad_norm": 8.092947006225586,
|
| 195 |
+
"learning_rate": 8.970099667774087e-06,
|
| 196 |
+
"logits/chosen": -0.4953341484069824,
|
| 197 |
+
"logits/rejected": -0.49804240465164185,
|
| 198 |
+
"logps/chosen": -1656.296875,
|
| 199 |
+
"logps/rejected": -1640.451171875,
|
| 200 |
+
"loss": 0.7029,
|
| 201 |
+
"rewards/accuracies": 0.5062500238418579,
|
| 202 |
+
"rewards/chosen": 0.20165541768074036,
|
| 203 |
+
"rewards/margins": 0.015356823801994324,
|
| 204 |
+
"rewards/rejected": 0.18629857897758484,
|
| 205 |
+
"step": 130
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6292134831460674,
|
| 209 |
+
"grad_norm": 7.318077087402344,
|
| 210 |
+
"learning_rate": 8.803986710963456e-06,
|
| 211 |
+
"logits/chosen": -0.48578256368637085,
|
| 212 |
+
"logits/rejected": -0.48590534925460815,
|
| 213 |
+
"logps/chosen": -1637.2935791015625,
|
| 214 |
+
"logps/rejected": -1652.760498046875,
|
| 215 |
+
"loss": 0.7022,
|
| 216 |
+
"rewards/accuracies": 0.5,
|
| 217 |
+
"rewards/chosen": 0.20414631068706512,
|
| 218 |
+
"rewards/margins": 0.011423548683524132,
|
| 219 |
+
"rewards/rejected": 0.19272276759147644,
|
| 220 |
+
"step": 140
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.6741573033707865,
|
| 224 |
+
"grad_norm": 7.625386714935303,
|
| 225 |
+
"learning_rate": 8.637873754152825e-06,
|
| 226 |
+
"logits/chosen": -0.5298885703086853,
|
| 227 |
+
"logits/rejected": -0.5132607221603394,
|
| 228 |
+
"logps/chosen": -1658.0570068359375,
|
| 229 |
+
"logps/rejected": -1633.949951171875,
|
| 230 |
+
"loss": 0.6884,
|
| 231 |
+
"rewards/accuracies": 0.5562499761581421,
|
| 232 |
+
"rewards/chosen": 0.0400543212890625,
|
| 233 |
+
"rewards/margins": 0.04242260009050369,
|
| 234 |
+
"rewards/rejected": -0.0023682781029492617,
|
| 235 |
+
"step": 150
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"epoch": 0.7191011235955056,
|
| 239 |
+
"grad_norm": 9.02912712097168,
|
| 240 |
+
"learning_rate": 8.471760797342193e-06,
|
| 241 |
+
"logits/chosen": -0.5048017501831055,
|
| 242 |
+
"logits/rejected": -0.5085188746452332,
|
| 243 |
+
"logps/chosen": -1651.9283447265625,
|
| 244 |
+
"logps/rejected": -1614.8092041015625,
|
| 245 |
+
"loss": 0.7017,
|
| 246 |
+
"rewards/accuracies": 0.5,
|
| 247 |
+
"rewards/chosen": 0.15381185710430145,
|
| 248 |
+
"rewards/margins": 0.014438611455261707,
|
| 249 |
+
"rewards/rejected": 0.13937325775623322,
|
| 250 |
+
"step": 160
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"epoch": 0.7640449438202247,
|
| 254 |
+
"grad_norm": 7.760510444641113,
|
| 255 |
+
"learning_rate": 8.305647840531562e-06,
|
| 256 |
+
"logits/chosen": -0.5189553499221802,
|
| 257 |
+
"logits/rejected": -0.5221869945526123,
|
| 258 |
+
"logps/chosen": -1663.1962890625,
|
| 259 |
+
"logps/rejected": -1626.8187255859375,
|
| 260 |
+
"loss": 0.696,
|
| 261 |
+
"rewards/accuracies": 0.528124988079071,
|
| 262 |
+
"rewards/chosen": 0.4035443663597107,
|
| 263 |
+
"rewards/margins": 0.027243101969361305,
|
| 264 |
+
"rewards/rejected": 0.37630128860473633,
|
| 265 |
+
"step": 170
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"epoch": 0.8089887640449438,
|
| 269 |
+
"grad_norm": 7.571713924407959,
|
| 270 |
+
"learning_rate": 8.139534883720931e-06,
|
| 271 |
+
"logits/chosen": -0.5345529317855835,
|
| 272 |
+
"logits/rejected": -0.5297631025314331,
|
| 273 |
+
"logps/chosen": -1653.6646728515625,
|
| 274 |
+
"logps/rejected": -1661.974853515625,
|
| 275 |
+
"loss": 0.7057,
|
| 276 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 277 |
+
"rewards/chosen": 0.721655547618866,
|
| 278 |
+
"rewards/margins": 0.01410127617418766,
|
| 279 |
+
"rewards/rejected": 0.7075542211532593,
|
| 280 |
+
"step": 180
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"epoch": 0.8539325842696629,
|
| 284 |
+
"grad_norm": 7.918243408203125,
|
| 285 |
+
"learning_rate": 7.9734219269103e-06,
|
| 286 |
+
"logits/chosen": -0.5173817276954651,
|
| 287 |
+
"logits/rejected": -0.5203038454055786,
|
| 288 |
+
"logps/chosen": -1616.741455078125,
|
| 289 |
+
"logps/rejected": -1628.0203857421875,
|
| 290 |
+
"loss": 0.6924,
|
| 291 |
+
"rewards/accuracies": 0.5406249761581421,
|
| 292 |
+
"rewards/chosen": 0.4363967776298523,
|
| 293 |
+
"rewards/margins": 0.03799329325556755,
|
| 294 |
+
"rewards/rejected": 0.39840349555015564,
|
| 295 |
+
"step": 190
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"epoch": 0.898876404494382,
|
| 299 |
+
"grad_norm": 7.25588321685791,
|
| 300 |
+
"learning_rate": 7.807308970099668e-06,
|
| 301 |
+
"logits/chosen": -0.5121560096740723,
|
| 302 |
+
"logits/rejected": -0.5224347710609436,
|
| 303 |
+
"logps/chosen": -1623.5826416015625,
|
| 304 |
+
"logps/rejected": -1649.8167724609375,
|
| 305 |
+
"loss": 0.6794,
|
| 306 |
+
"rewards/accuracies": 0.581250011920929,
|
| 307 |
+
"rewards/chosen": 0.05758974701166153,
|
| 308 |
+
"rewards/margins": 0.0646430253982544,
|
| 309 |
+
"rewards/rejected": -0.007053279783576727,
|
| 310 |
+
"step": 200
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9438202247191011,
|
| 314 |
+
"grad_norm": 7.8804097175598145,
|
| 315 |
+
"learning_rate": 7.641196013289037e-06,
|
| 316 |
+
"logits/chosen": -0.5074772238731384,
|
| 317 |
+
"logits/rejected": -0.5116940140724182,
|
| 318 |
+
"logps/chosen": -1675.5833740234375,
|
| 319 |
+
"logps/rejected": -1657.4105224609375,
|
| 320 |
+
"loss": 0.7086,
|
| 321 |
+
"rewards/accuracies": 0.53125,
|
| 322 |
+
"rewards/chosen": -0.3717197775840759,
|
| 323 |
+
"rewards/margins": 0.013349572196602821,
|
| 324 |
+
"rewards/rejected": -0.3850693702697754,
|
| 325 |
+
"step": 210
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.9887640449438202,
|
| 329 |
+
"grad_norm": 7.504465579986572,
|
| 330 |
+
"learning_rate": 7.475083056478406e-06,
|
| 331 |
+
"logits/chosen": -0.5111061334609985,
|
| 332 |
+
"logits/rejected": -0.5182801485061646,
|
| 333 |
+
"logps/chosen": -1713.6666259765625,
|
| 334 |
+
"logps/rejected": -1688.6156005859375,
|
| 335 |
+
"loss": 0.706,
|
| 336 |
+
"rewards/accuracies": 0.5093749761581421,
|
| 337 |
+
"rewards/chosen": -0.3375091552734375,
|
| 338 |
+
"rewards/margins": 0.021358530968427658,
|
| 339 |
+
"rewards/rejected": -0.3588676452636719,
|
| 340 |
+
"step": 220
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"epoch": 1.0314606741573034,
|
| 344 |
+
"grad_norm": 6.4764933586120605,
|
| 345 |
+
"learning_rate": 7.308970099667775e-06,
|
| 346 |
+
"logits/chosen": -0.5052555203437805,
|
| 347 |
+
"logits/rejected": -0.49862349033355713,
|
| 348 |
+
"logps/chosen": -1678.5240478515625,
|
| 349 |
+
"logps/rejected": -1671.0499267578125,
|
| 350 |
+
"loss": 0.5908,
|
| 351 |
+
"rewards/accuracies": 0.7434210777282715,
|
| 352 |
+
"rewards/chosen": 0.31448131799697876,
|
| 353 |
+
"rewards/margins": 0.27145013213157654,
|
| 354 |
+
"rewards/rejected": 0.04303119331598282,
|
| 355 |
+
"step": 230
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"epoch": 1.0764044943820226,
|
| 359 |
+
"grad_norm": 6.172929763793945,
|
| 360 |
+
"learning_rate": 7.1428571428571436e-06,
|
| 361 |
+
"logits/chosen": -0.4970678389072418,
|
| 362 |
+
"logits/rejected": -0.4996468424797058,
|
| 363 |
+
"logps/chosen": -1620.894775390625,
|
| 364 |
+
"logps/rejected": -1650.440673828125,
|
| 365 |
+
"loss": 0.5408,
|
| 366 |
+
"rewards/accuracies": 0.840624988079071,
|
| 367 |
+
"rewards/chosen": 0.7087951302528381,
|
| 368 |
+
"rewards/margins": 0.37826400995254517,
|
| 369 |
+
"rewards/rejected": 0.3305310606956482,
|
| 370 |
+
"step": 240
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"epoch": 1.1213483146067416,
|
| 374 |
+
"grad_norm": 6.442656517028809,
|
| 375 |
+
"learning_rate": 6.976744186046513e-06,
|
| 376 |
+
"logits/chosen": -0.5012631416320801,
|
| 377 |
+
"logits/rejected": -0.4978242516517639,
|
| 378 |
+
"logps/chosen": -1661.751708984375,
|
| 379 |
+
"logps/rejected": -1665.3834228515625,
|
| 380 |
+
"loss": 0.5127,
|
| 381 |
+
"rewards/accuracies": 0.8656250238418579,
|
| 382 |
+
"rewards/chosen": 0.6133396029472351,
|
| 383 |
+
"rewards/margins": 0.45191797614097595,
|
| 384 |
+
"rewards/rejected": 0.16142162680625916,
|
| 385 |
+
"step": 250
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"epoch": 1.1662921348314608,
|
| 389 |
+
"grad_norm": 7.005346775054932,
|
| 390 |
+
"learning_rate": 6.810631229235881e-06,
|
| 391 |
+
"logits/chosen": -0.5041731595993042,
|
| 392 |
+
"logits/rejected": -0.520297646522522,
|
| 393 |
+
"logps/chosen": -1645.6292724609375,
|
| 394 |
+
"logps/rejected": -1593.59375,
|
| 395 |
+
"loss": 0.5327,
|
| 396 |
+
"rewards/accuracies": 0.828125,
|
| 397 |
+
"rewards/chosen": 0.367062509059906,
|
| 398 |
+
"rewards/margins": 0.40600281953811646,
|
| 399 |
+
"rewards/rejected": -0.038940299302339554,
|
| 400 |
+
"step": 260
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"epoch": 1.2112359550561798,
|
| 404 |
+
"grad_norm": 6.600498199462891,
|
| 405 |
+
"learning_rate": 6.64451827242525e-06,
|
| 406 |
+
"logits/chosen": -0.504959225654602,
|
| 407 |
+
"logits/rejected": -0.50092613697052,
|
| 408 |
+
"logps/chosen": -1652.6773681640625,
|
| 409 |
+
"logps/rejected": -1632.398681640625,
|
| 410 |
+
"loss": 0.5225,
|
| 411 |
+
"rewards/accuracies": 0.8531249761581421,
|
| 412 |
+
"rewards/chosen": 0.5157921314239502,
|
| 413 |
+
"rewards/margins": 0.43294817209243774,
|
| 414 |
+
"rewards/rejected": 0.08284398913383484,
|
| 415 |
+
"step": 270
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 1.256179775280899,
|
| 419 |
+
"grad_norm": 6.736950874328613,
|
| 420 |
+
"learning_rate": 6.4784053156146185e-06,
|
| 421 |
+
"logits/chosen": -0.5179052352905273,
|
| 422 |
+
"logits/rejected": -0.526940643787384,
|
| 423 |
+
"logps/chosen": -1627.15478515625,
|
| 424 |
+
"logps/rejected": -1616.917236328125,
|
| 425 |
+
"loss": 0.527,
|
| 426 |
+
"rewards/accuracies": 0.824999988079071,
|
| 427 |
+
"rewards/chosen": 0.6045408844947815,
|
| 428 |
+
"rewards/margins": 0.4338344931602478,
|
| 429 |
+
"rewards/rejected": 0.1707063764333725,
|
| 430 |
+
"step": 280
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 1.301123595505618,
|
| 434 |
+
"grad_norm": 7.117891788482666,
|
| 435 |
+
"learning_rate": 6.312292358803988e-06,
|
| 436 |
+
"logits/chosen": -0.517137885093689,
|
| 437 |
+
"logits/rejected": -0.5145695209503174,
|
| 438 |
+
"logps/chosen": -1660.9166259765625,
|
| 439 |
+
"logps/rejected": -1675.412841796875,
|
| 440 |
+
"loss": 0.5186,
|
| 441 |
+
"rewards/accuracies": 0.831250011920929,
|
| 442 |
+
"rewards/chosen": 0.6492522358894348,
|
| 443 |
+
"rewards/margins": 0.45881325006484985,
|
| 444 |
+
"rewards/rejected": 0.19043894112110138,
|
| 445 |
+
"step": 290
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"epoch": 1.346067415730337,
|
| 449 |
+
"grad_norm": 6.42925500869751,
|
| 450 |
+
"learning_rate": 6.146179401993356e-06,
|
| 451 |
+
"logits/chosen": -0.5063742399215698,
|
| 452 |
+
"logits/rejected": -0.5090619921684265,
|
| 453 |
+
"logps/chosen": -1695.0318603515625,
|
| 454 |
+
"logps/rejected": -1676.716552734375,
|
| 455 |
+
"loss": 0.484,
|
| 456 |
+
"rewards/accuracies": 0.859375,
|
| 457 |
+
"rewards/chosen": 0.5437876582145691,
|
| 458 |
+
"rewards/margins": 0.5404478311538696,
|
| 459 |
+
"rewards/rejected": 0.0033398643136024475,
|
| 460 |
+
"step": 300
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"epoch": 1.3910112359550562,
|
| 464 |
+
"grad_norm": 6.849030017852783,
|
| 465 |
+
"learning_rate": 5.980066445182725e-06,
|
| 466 |
+
"logits/chosen": -0.500776469707489,
|
| 467 |
+
"logits/rejected": -0.4920951724052429,
|
| 468 |
+
"logps/chosen": -1684.324951171875,
|
| 469 |
+
"logps/rejected": -1695.865966796875,
|
| 470 |
+
"loss": 0.4967,
|
| 471 |
+
"rewards/accuracies": 0.84375,
|
| 472 |
+
"rewards/chosen": 0.34679412841796875,
|
| 473 |
+
"rewards/margins": 0.5230782628059387,
|
| 474 |
+
"rewards/rejected": -0.1762840747833252,
|
| 475 |
+
"step": 310
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"epoch": 1.4359550561797754,
|
| 479 |
+
"grad_norm": 6.4250969886779785,
|
| 480 |
+
"learning_rate": 5.8139534883720935e-06,
|
| 481 |
+
"logits/chosen": -0.4945620596408844,
|
| 482 |
+
"logits/rejected": -0.4994569718837738,
|
| 483 |
+
"logps/chosen": -1643.4566650390625,
|
| 484 |
+
"logps/rejected": -1635.079833984375,
|
| 485 |
+
"loss": 0.508,
|
| 486 |
+
"rewards/accuracies": 0.815625011920929,
|
| 487 |
+
"rewards/chosen": 0.28158336877822876,
|
| 488 |
+
"rewards/margins": 0.49037957191467285,
|
| 489 |
+
"rewards/rejected": -0.2087961882352829,
|
| 490 |
+
"step": 320
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 1.4808988764044944,
|
| 494 |
+
"grad_norm": 7.239423751831055,
|
| 495 |
+
"learning_rate": 5.647840531561463e-06,
|
| 496 |
+
"logits/chosen": -0.4839072823524475,
|
| 497 |
+
"logits/rejected": -0.4932027757167816,
|
| 498 |
+
"logps/chosen": -1683.430419921875,
|
| 499 |
+
"logps/rejected": -1663.859619140625,
|
| 500 |
+
"loss": 0.4708,
|
| 501 |
+
"rewards/accuracies": 0.875,
|
| 502 |
+
"rewards/chosen": 0.5630651712417603,
|
| 503 |
+
"rewards/margins": 0.5979984402656555,
|
| 504 |
+
"rewards/rejected": -0.03493330255150795,
|
| 505 |
+
"step": 330
|
| 506 |
+
},
|
| 507 |
+
{
|
| 508 |
+
"epoch": 1.5258426966292133,
|
| 509 |
+
"grad_norm": 6.240829944610596,
|
| 510 |
+
"learning_rate": 5.481727574750831e-06,
|
| 511 |
+
"logits/chosen": -0.5244169235229492,
|
| 512 |
+
"logits/rejected": -0.5004735589027405,
|
| 513 |
+
"logps/chosen": -1670.196044921875,
|
| 514 |
+
"logps/rejected": -1666.2545166015625,
|
| 515 |
+
"loss": 0.502,
|
| 516 |
+
"rewards/accuracies": 0.824999988079071,
|
| 517 |
+
"rewards/chosen": 0.4043809771537781,
|
| 518 |
+
"rewards/margins": 0.5101131200790405,
|
| 519 |
+
"rewards/rejected": -0.10573209822177887,
|
| 520 |
+
"step": 340
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 1.5707865168539326,
|
| 524 |
+
"grad_norm": 8.406665802001953,
|
| 525 |
+
"learning_rate": 5.3156146179402e-06,
|
| 526 |
+
"logits/chosen": -0.4983617663383484,
|
| 527 |
+
"logits/rejected": -0.5115659236907959,
|
| 528 |
+
"logps/chosen": -1620.291259765625,
|
| 529 |
+
"logps/rejected": -1653.3515625,
|
| 530 |
+
"loss": 0.4953,
|
| 531 |
+
"rewards/accuracies": 0.840624988079071,
|
| 532 |
+
"rewards/chosen": 0.19796034693717957,
|
| 533 |
+
"rewards/margins": 0.5496314764022827,
|
| 534 |
+
"rewards/rejected": -0.35167109966278076,
|
| 535 |
+
"step": 350
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 1.6157303370786518,
|
| 539 |
+
"grad_norm": 8.227237701416016,
|
| 540 |
+
"learning_rate": 5.149501661129569e-06,
|
| 541 |
+
"logits/chosen": -0.5067149996757507,
|
| 542 |
+
"logits/rejected": -0.5047170519828796,
|
| 543 |
+
"logps/chosen": -1646.197998046875,
|
| 544 |
+
"logps/rejected": -1636.479736328125,
|
| 545 |
+
"loss": 0.4833,
|
| 546 |
+
"rewards/accuracies": 0.824999988079071,
|
| 547 |
+
"rewards/chosen": 0.1900782585144043,
|
| 548 |
+
"rewards/margins": 0.5684794187545776,
|
| 549 |
+
"rewards/rejected": -0.37840116024017334,
|
| 550 |
+
"step": 360
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"epoch": 1.6606741573033708,
|
| 554 |
+
"grad_norm": 6.621316432952881,
|
| 555 |
+
"learning_rate": 4.983388704318937e-06,
|
| 556 |
+
"logits/chosen": -0.5395005941390991,
|
| 557 |
+
"logits/rejected": -0.5313366055488586,
|
| 558 |
+
"logps/chosen": -1667.989501953125,
|
| 559 |
+
"logps/rejected": -1636.424560546875,
|
| 560 |
+
"loss": 0.4614,
|
| 561 |
+
"rewards/accuracies": 0.8687499761581421,
|
| 562 |
+
"rewards/chosen": 0.33250755071640015,
|
| 563 |
+
"rewards/margins": 0.6282721757888794,
|
| 564 |
+
"rewards/rejected": -0.29576462507247925,
|
| 565 |
+
"step": 370
|
| 566 |
+
},
|
| 567 |
+
{
|
| 568 |
+
"epoch": 1.7056179775280897,
|
| 569 |
+
"grad_norm": 7.328308582305908,
|
| 570 |
+
"learning_rate": 4.817275747508306e-06,
|
| 571 |
+
"logits/chosen": -0.5294896960258484,
|
| 572 |
+
"logits/rejected": -0.5275659561157227,
|
| 573 |
+
"logps/chosen": -1608.811279296875,
|
| 574 |
+
"logps/rejected": -1615.828857421875,
|
| 575 |
+
"loss": 0.4733,
|
| 576 |
+
"rewards/accuracies": 0.862500011920929,
|
| 577 |
+
"rewards/chosen": 0.5715526342391968,
|
| 578 |
+
"rewards/margins": 0.6080284118652344,
|
| 579 |
+
"rewards/rejected": -0.036475833505392075,
|
| 580 |
+
"step": 380
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"epoch": 1.750561797752809,
|
| 584 |
+
"grad_norm": 6.845993518829346,
|
| 585 |
+
"learning_rate": 4.651162790697675e-06,
|
| 586 |
+
"logits/chosen": -0.5189222097396851,
|
| 587 |
+
"logits/rejected": -0.5236440300941467,
|
| 588 |
+
"logps/chosen": -1629.564208984375,
|
| 589 |
+
"logps/rejected": -1620.7515869140625,
|
| 590 |
+
"loss": 0.4831,
|
| 591 |
+
"rewards/accuracies": 0.8343750238418579,
|
| 592 |
+
"rewards/chosen": 0.5099019408226013,
|
| 593 |
+
"rewards/margins": 0.5789185166358948,
|
| 594 |
+
"rewards/rejected": -0.06901657581329346,
|
| 595 |
+
"step": 390
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"epoch": 1.7955056179775282,
|
| 599 |
+
"grad_norm": 6.671547889709473,
|
| 600 |
+
"learning_rate": 4.4850498338870435e-06,
|
| 601 |
+
"logits/chosen": -0.5234844088554382,
|
| 602 |
+
"logits/rejected": -0.5202063322067261,
|
| 603 |
+
"logps/chosen": -1641.2923583984375,
|
| 604 |
+
"logps/rejected": -1654.5943603515625,
|
| 605 |
+
"loss": 0.4701,
|
| 606 |
+
"rewards/accuracies": 0.8343750238418579,
|
| 607 |
+
"rewards/chosen": 0.37002792954444885,
|
| 608 |
+
"rewards/margins": 0.6203145980834961,
|
| 609 |
+
"rewards/rejected": -0.25028663873672485,
|
| 610 |
+
"step": 400
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"epoch": 1.8404494382022472,
|
| 614 |
+
"grad_norm": 7.342043399810791,
|
| 615 |
+
"learning_rate": 4.318936877076413e-06,
|
| 616 |
+
"logits/chosen": -0.5412445068359375,
|
| 617 |
+
"logits/rejected": -0.5334212779998779,
|
| 618 |
+
"logps/chosen": -1657.1158447265625,
|
| 619 |
+
"logps/rejected": -1649.1107177734375,
|
| 620 |
+
"loss": 0.4836,
|
| 621 |
+
"rewards/accuracies": 0.8500000238418579,
|
| 622 |
+
"rewards/chosen": 0.34860578179359436,
|
| 623 |
+
"rewards/margins": 0.5773425698280334,
|
| 624 |
+
"rewards/rejected": -0.22873680293560028,
|
| 625 |
+
"step": 410
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 1.8853932584269661,
|
| 629 |
+
"grad_norm": 7.252129077911377,
|
| 630 |
+
"learning_rate": 4.152823920265781e-06,
|
| 631 |
+
"logits/chosen": -0.5296737551689148,
|
| 632 |
+
"logits/rejected": -0.5220682621002197,
|
| 633 |
+
"logps/chosen": -1667.012451171875,
|
| 634 |
+
"logps/rejected": -1626.130615234375,
|
| 635 |
+
"loss": 0.478,
|
| 636 |
+
"rewards/accuracies": 0.862500011920929,
|
| 637 |
+
"rewards/chosen": 0.5405913591384888,
|
| 638 |
+
"rewards/margins": 0.5873361825942993,
|
| 639 |
+
"rewards/rejected": -0.04674474522471428,
|
| 640 |
+
"step": 420
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 1.9303370786516854,
|
| 644 |
+
"grad_norm": 7.426218509674072,
|
| 645 |
+
"learning_rate": 3.98671096345515e-06,
|
| 646 |
+
"logits/chosen": -0.5249942541122437,
|
| 647 |
+
"logits/rejected": -0.5246230959892273,
|
| 648 |
+
"logps/chosen": -1684.589599609375,
|
| 649 |
+
"logps/rejected": -1637.92041015625,
|
| 650 |
+
"loss": 0.4955,
|
| 651 |
+
"rewards/accuracies": 0.831250011920929,
|
| 652 |
+
"rewards/chosen": 0.6054187417030334,
|
| 653 |
+
"rewards/margins": 0.5561720132827759,
|
| 654 |
+
"rewards/rejected": 0.049246758222579956,
|
| 655 |
+
"step": 430
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
"epoch": 1.9752808988764046,
|
| 659 |
+
"grad_norm": 6.802650451660156,
|
| 660 |
+
"learning_rate": 3.8205980066445185e-06,
|
| 661 |
+
"logits/chosen": -0.5376031398773193,
|
| 662 |
+
"logits/rejected": -0.5206517577171326,
|
| 663 |
+
"logps/chosen": -1635.8851318359375,
|
| 664 |
+
"logps/rejected": -1639.7294921875,
|
| 665 |
+
"loss": 0.4824,
|
| 666 |
+
"rewards/accuracies": 0.856249988079071,
|
| 667 |
+
"rewards/chosen": 0.833863377571106,
|
| 668 |
+
"rewards/margins": 0.5611246228218079,
|
| 669 |
+
"rewards/rejected": 0.2727387845516205,
|
| 670 |
+
"step": 440
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
"epoch": 2.0179775280898875,
|
| 674 |
+
"grad_norm": 5.3229265213012695,
|
| 675 |
+
"learning_rate": 3.6544850498338876e-06,
|
| 676 |
+
"logits/chosen": -0.5227991938591003,
|
| 677 |
+
"logits/rejected": -0.526660144329071,
|
| 678 |
+
"logps/chosen": -1625.6988525390625,
|
| 679 |
+
"logps/rejected": -1614.040771484375,
|
| 680 |
+
"loss": 0.414,
|
| 681 |
+
"rewards/accuracies": 0.9111841917037964,
|
| 682 |
+
"rewards/chosen": 0.9086302518844604,
|
| 683 |
+
"rewards/margins": 0.7858190536499023,
|
| 684 |
+
"rewards/rejected": 0.12281119078397751,
|
| 685 |
+
"step": 450
|
| 686 |
+
},
|
| 687 |
+
{
|
| 688 |
+
"epoch": 2.0629213483146067,
|
| 689 |
+
"grad_norm": 5.2561516761779785,
|
| 690 |
+
"learning_rate": 3.4883720930232564e-06,
|
| 691 |
+
"logits/chosen": -0.538662314414978,
|
| 692 |
+
"logits/rejected": -0.5180272459983826,
|
| 693 |
+
"logps/chosen": -1612.210693359375,
|
| 694 |
+
"logps/rejected": -1614.930908203125,
|
| 695 |
+
"loss": 0.3374,
|
| 696 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 697 |
+
"rewards/chosen": 0.8260666131973267,
|
| 698 |
+
"rewards/margins": 1.0068495273590088,
|
| 699 |
+
"rewards/rejected": -0.1807830035686493,
|
| 700 |
+
"step": 460
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"epoch": 2.107865168539326,
|
| 704 |
+
"grad_norm": 5.642768859863281,
|
| 705 |
+
"learning_rate": 3.322259136212625e-06,
|
| 706 |
+
"logits/chosen": -0.5233631730079651,
|
| 707 |
+
"logits/rejected": -0.5245693922042847,
|
| 708 |
+
"logps/chosen": -1631.31689453125,
|
| 709 |
+
"logps/rejected": -1639.3062744140625,
|
| 710 |
+
"loss": 0.3276,
|
| 711 |
+
"rewards/accuracies": 0.984375,
|
| 712 |
+
"rewards/chosen": 0.6572339534759521,
|
| 713 |
+
"rewards/margins": 1.0290508270263672,
|
| 714 |
+
"rewards/rejected": -0.3718169629573822,
|
| 715 |
+
"step": 470
|
| 716 |
+
},
|
| 717 |
+
{
|
| 718 |
+
"epoch": 2.152808988764045,
|
| 719 |
+
"grad_norm": 5.401882171630859,
|
| 720 |
+
"learning_rate": 3.156146179401994e-06,
|
| 721 |
+
"logits/chosen": -0.538943350315094,
|
| 722 |
+
"logits/rejected": -0.5362802743911743,
|
| 723 |
+
"logps/chosen": -1624.647705078125,
|
| 724 |
+
"logps/rejected": -1646.9886474609375,
|
| 725 |
+
"loss": 0.3255,
|
| 726 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 727 |
+
"rewards/chosen": 0.38137590885162354,
|
| 728 |
+
"rewards/margins": 1.0469489097595215,
|
| 729 |
+
"rewards/rejected": -0.665573000907898,
|
| 730 |
+
"step": 480
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 2.197752808988764,
|
| 734 |
+
"grad_norm": 5.061157703399658,
|
| 735 |
+
"learning_rate": 2.9900332225913626e-06,
|
| 736 |
+
"logits/chosen": -0.534186840057373,
|
| 737 |
+
"logits/rejected": -0.5299129486083984,
|
| 738 |
+
"logps/chosen": -1611.435546875,
|
| 739 |
+
"logps/rejected": -1608.452880859375,
|
| 740 |
+
"loss": 0.335,
|
| 741 |
+
"rewards/accuracies": 0.971875011920929,
|
| 742 |
+
"rewards/chosen": 0.262370765209198,
|
| 743 |
+
"rewards/margins": 1.0311752557754517,
|
| 744 |
+
"rewards/rejected": -0.7688044309616089,
|
| 745 |
+
"step": 490
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 2.242696629213483,
|
| 749 |
+
"grad_norm": 5.667867660522461,
|
| 750 |
+
"learning_rate": 2.8239202657807313e-06,
|
| 751 |
+
"logits/chosen": -0.5238770842552185,
|
| 752 |
+
"logits/rejected": -0.5233170390129089,
|
| 753 |
+
"logps/chosen": -1681.3297119140625,
|
| 754 |
+
"logps/rejected": -1670.050048828125,
|
| 755 |
+
"loss": 0.3283,
|
| 756 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 757 |
+
"rewards/chosen": 0.6769800782203674,
|
| 758 |
+
"rewards/margins": 1.0591976642608643,
|
| 759 |
+
"rewards/rejected": -0.382217675447464,
|
| 760 |
+
"step": 500
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"epoch": 2.2876404494382023,
|
| 764 |
+
"grad_norm": 5.56412410736084,
|
| 765 |
+
"learning_rate": 2.6578073089701e-06,
|
| 766 |
+
"logits/chosen": -0.5160372853279114,
|
| 767 |
+
"logits/rejected": -0.5247339010238647,
|
| 768 |
+
"logps/chosen": -1672.7626953125,
|
| 769 |
+
"logps/rejected": -1685.137451171875,
|
| 770 |
+
"loss": 0.3162,
|
| 771 |
+
"rewards/accuracies": 0.96875,
|
| 772 |
+
"rewards/chosen": 0.8809404373168945,
|
| 773 |
+
"rewards/margins": 1.111767053604126,
|
| 774 |
+
"rewards/rejected": -0.23082669079303741,
|
| 775 |
+
"step": 510
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"epoch": 2.3325842696629215,
|
| 779 |
+
"grad_norm": 5.156087875366211,
|
| 780 |
+
"learning_rate": 2.4916943521594684e-06,
|
| 781 |
+
"logits/chosen": -0.5284621715545654,
|
| 782 |
+
"logits/rejected": -0.5034042596817017,
|
| 783 |
+
"logps/chosen": -1669.8564453125,
|
| 784 |
+
"logps/rejected": -1671.060302734375,
|
| 785 |
+
"loss": 0.314,
|
| 786 |
+
"rewards/accuracies": 0.965624988079071,
|
| 787 |
+
"rewards/chosen": 0.7527168989181519,
|
| 788 |
+
"rewards/margins": 1.123870611190796,
|
| 789 |
+
"rewards/rejected": -0.37115368247032166,
|
| 790 |
+
"step": 520
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"epoch": 2.3775280898876403,
|
| 794 |
+
"grad_norm": 5.17440938949585,
|
| 795 |
+
"learning_rate": 2.3255813953488376e-06,
|
| 796 |
+
"logits/chosen": -0.5137378573417664,
|
| 797 |
+
"logits/rejected": -0.5173753499984741,
|
| 798 |
+
"logps/chosen": -1650.34375,
|
| 799 |
+
"logps/rejected": -1654.510986328125,
|
| 800 |
+
"loss": 0.3163,
|
| 801 |
+
"rewards/accuracies": 0.981249988079071,
|
| 802 |
+
"rewards/chosen": 0.476835161447525,
|
| 803 |
+
"rewards/margins": 1.0856043100357056,
|
| 804 |
+
"rewards/rejected": -0.6087690591812134,
|
| 805 |
+
"step": 530
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"epoch": 2.4224719101123595,
|
| 809 |
+
"grad_norm": 6.027777194976807,
|
| 810 |
+
"learning_rate": 2.1594684385382063e-06,
|
| 811 |
+
"logits/chosen": -0.5290040969848633,
|
| 812 |
+
"logits/rejected": -0.5300403833389282,
|
| 813 |
+
"logps/chosen": -1685.980712890625,
|
| 814 |
+
"logps/rejected": -1669.431884765625,
|
| 815 |
+
"loss": 0.3284,
|
| 816 |
+
"rewards/accuracies": 0.965624988079071,
|
| 817 |
+
"rewards/chosen": 0.3279837369918823,
|
| 818 |
+
"rewards/margins": 1.065712332725525,
|
| 819 |
+
"rewards/rejected": -0.737728476524353,
|
| 820 |
+
"step": 540
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"epoch": 2.4674157303370787,
|
| 824 |
+
"grad_norm": 5.797597408294678,
|
| 825 |
+
"learning_rate": 1.993355481727575e-06,
|
| 826 |
+
"logits/chosen": -0.5459321737289429,
|
| 827 |
+
"logits/rejected": -0.5469938516616821,
|
| 828 |
+
"logps/chosen": -1586.365234375,
|
| 829 |
+
"logps/rejected": -1629.16943359375,
|
| 830 |
+
"loss": 0.3377,
|
| 831 |
+
"rewards/accuracies": 0.956250011920929,
|
| 832 |
+
"rewards/chosen": 0.41557711362838745,
|
| 833 |
+
"rewards/margins": 1.04073965549469,
|
| 834 |
+
"rewards/rejected": -0.6251626014709473,
|
| 835 |
+
"step": 550
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 2.512359550561798,
|
| 839 |
+
"grad_norm": 5.85587215423584,
|
| 840 |
+
"learning_rate": 1.8272425249169438e-06,
|
| 841 |
+
"logits/chosen": -0.5439457297325134,
|
| 842 |
+
"logits/rejected": -0.5378574728965759,
|
| 843 |
+
"logps/chosen": -1665.3310546875,
|
| 844 |
+
"logps/rejected": -1670.1988525390625,
|
| 845 |
+
"loss": 0.3114,
|
| 846 |
+
"rewards/accuracies": 0.984375,
|
| 847 |
+
"rewards/chosen": 0.520895779132843,
|
| 848 |
+
"rewards/margins": 1.1236566305160522,
|
| 849 |
+
"rewards/rejected": -0.6027609705924988,
|
| 850 |
+
"step": 560
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 2.5573033707865167,
|
| 854 |
+
"grad_norm": 5.2037458419799805,
|
| 855 |
+
"learning_rate": 1.6611295681063126e-06,
|
| 856 |
+
"logits/chosen": -0.5538147687911987,
|
| 857 |
+
"logits/rejected": -0.5644603371620178,
|
| 858 |
+
"logps/chosen": -1645.1334228515625,
|
| 859 |
+
"logps/rejected": -1638.0411376953125,
|
| 860 |
+
"loss": 0.3055,
|
| 861 |
+
"rewards/accuracies": 0.96875,
|
| 862 |
+
"rewards/chosen": 0.5205073356628418,
|
| 863 |
+
"rewards/margins": 1.148437738418579,
|
| 864 |
+
"rewards/rejected": -0.6279304027557373,
|
| 865 |
+
"step": 570
|
| 866 |
+
},
|
| 867 |
+
{
|
| 868 |
+
"epoch": 2.602247191011236,
|
| 869 |
+
"grad_norm": 5.944153785705566,
|
| 870 |
+
"learning_rate": 1.4950166112956813e-06,
|
| 871 |
+
"logits/chosen": -0.5424203872680664,
|
| 872 |
+
"logits/rejected": -0.5553634762763977,
|
| 873 |
+
"logps/chosen": -1630.100341796875,
|
| 874 |
+
"logps/rejected": -1639.7135009765625,
|
| 875 |
+
"loss": 0.3289,
|
| 876 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 877 |
+
"rewards/chosen": 0.46930861473083496,
|
| 878 |
+
"rewards/margins": 1.0725914239883423,
|
| 879 |
+
"rewards/rejected": -0.6032828092575073,
|
| 880 |
+
"step": 580
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 2.647191011235955,
|
| 884 |
+
"grad_norm": 5.082998752593994,
|
| 885 |
+
"learning_rate": 1.32890365448505e-06,
|
| 886 |
+
"logits/chosen": -0.5557939410209656,
|
| 887 |
+
"logits/rejected": -0.5634464025497437,
|
| 888 |
+
"logps/chosen": -1674.44921875,
|
| 889 |
+
"logps/rejected": -1665.8763427734375,
|
| 890 |
+
"loss": 0.311,
|
| 891 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 892 |
+
"rewards/chosen": 0.5268303751945496,
|
| 893 |
+
"rewards/margins": 1.155867099761963,
|
| 894 |
+
"rewards/rejected": -0.6290367841720581,
|
| 895 |
+
"step": 590
|
| 896 |
+
},
|
| 897 |
+
{
|
| 898 |
+
"epoch": 2.692134831460674,
|
| 899 |
+
"grad_norm": 6.137351989746094,
|
| 900 |
+
"learning_rate": 1.1627906976744188e-06,
|
| 901 |
+
"logits/chosen": -0.5621194243431091,
|
| 902 |
+
"logits/rejected": -0.5413273572921753,
|
| 903 |
+
"logps/chosen": -1648.4447021484375,
|
| 904 |
+
"logps/rejected": -1647.3736572265625,
|
| 905 |
+
"loss": 0.3104,
|
| 906 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 907 |
+
"rewards/chosen": 0.5918968319892883,
|
| 908 |
+
"rewards/margins": 1.1398565769195557,
|
| 909 |
+
"rewards/rejected": -0.5479596853256226,
|
| 910 |
+
"step": 600
|
| 911 |
+
},
|
| 912 |
+
{
|
| 913 |
+
"epoch": 2.737078651685393,
|
| 914 |
+
"grad_norm": 5.4996490478515625,
|
| 915 |
+
"learning_rate": 9.966777408637875e-07,
|
| 916 |
+
"logits/chosen": -0.5511677265167236,
|
| 917 |
+
"logits/rejected": -0.5449541211128235,
|
| 918 |
+
"logps/chosen": -1658.724609375,
|
| 919 |
+
"logps/rejected": -1671.2269287109375,
|
| 920 |
+
"loss": 0.3269,
|
| 921 |
+
"rewards/accuracies": 0.9593750238418579,
|
| 922 |
+
"rewards/chosen": 0.5969556570053101,
|
| 923 |
+
"rewards/margins": 1.071141242980957,
|
| 924 |
+
"rewards/rejected": -0.4741855561733246,
|
| 925 |
+
"step": 610
|
| 926 |
+
},
|
| 927 |
+
{
|
| 928 |
+
"epoch": 2.7820224719101123,
|
| 929 |
+
"grad_norm": 7.489591598510742,
|
| 930 |
+
"learning_rate": 8.305647840531563e-07,
|
| 931 |
+
"logits/chosen": -0.5342198610305786,
|
| 932 |
+
"logits/rejected": -0.5351474285125732,
|
| 933 |
+
"logps/chosen": -1673.1702880859375,
|
| 934 |
+
"logps/rejected": -1652.4296875,
|
| 935 |
+
"loss": 0.3087,
|
| 936 |
+
"rewards/accuracies": 0.9593750238418579,
|
| 937 |
+
"rewards/chosen": 0.5696877241134644,
|
| 938 |
+
"rewards/margins": 1.1663155555725098,
|
| 939 |
+
"rewards/rejected": -0.5966278314590454,
|
| 940 |
+
"step": 620
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 2.8269662921348315,
|
| 944 |
+
"grad_norm": 5.481851100921631,
|
| 945 |
+
"learning_rate": 6.64451827242525e-07,
|
| 946 |
+
"logits/chosen": -0.5519033670425415,
|
| 947 |
+
"logits/rejected": -0.5528594851493835,
|
| 948 |
+
"logps/chosen": -1630.025634765625,
|
| 949 |
+
"logps/rejected": -1657.2965087890625,
|
| 950 |
+
"loss": 0.3035,
|
| 951 |
+
"rewards/accuracies": 0.984375,
|
| 952 |
+
"rewards/chosen": 0.5027799606323242,
|
| 953 |
+
"rewards/margins": 1.1599414348602295,
|
| 954 |
+
"rewards/rejected": -0.6571615934371948,
|
| 955 |
+
"step": 630
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 2.8719101123595507,
|
| 959 |
+
"grad_norm": 6.2103376388549805,
|
| 960 |
+
"learning_rate": 4.983388704318938e-07,
|
| 961 |
+
"logits/chosen": -0.5550025701522827,
|
| 962 |
+
"logits/rejected": -0.5428484678268433,
|
| 963 |
+
"logps/chosen": -1660.6510009765625,
|
| 964 |
+
"logps/rejected": -1650.1458740234375,
|
| 965 |
+
"loss": 0.3054,
|
| 966 |
+
"rewards/accuracies": 0.981249988079071,
|
| 967 |
+
"rewards/chosen": 0.48065298795700073,
|
| 968 |
+
"rewards/margins": 1.1632822751998901,
|
| 969 |
+
"rewards/rejected": -0.6826292872428894,
|
| 970 |
+
"step": 640
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"epoch": 2.9168539325842695,
|
| 974 |
+
"grad_norm": 5.274155616760254,
|
| 975 |
+
"learning_rate": 3.322259136212625e-07,
|
| 976 |
+
"logits/chosen": -0.559792160987854,
|
| 977 |
+
"logits/rejected": -0.5550666451454163,
|
| 978 |
+
"logps/chosen": -1683.5833740234375,
|
| 979 |
+
"logps/rejected": -1644.792236328125,
|
| 980 |
+
"loss": 0.2943,
|
| 981 |
+
"rewards/accuracies": 0.971875011920929,
|
| 982 |
+
"rewards/chosen": 0.483023077249527,
|
| 983 |
+
"rewards/margins": 1.2071751356124878,
|
| 984 |
+
"rewards/rejected": -0.7241520285606384,
|
| 985 |
+
"step": 650
|
| 986 |
+
},
|
| 987 |
+
{
|
| 988 |
+
"epoch": 2.9617977528089887,
|
| 989 |
+
"grad_norm": 5.632884502410889,
|
| 990 |
+
"learning_rate": 1.6611295681063126e-07,
|
| 991 |
+
"logits/chosen": -0.5550089478492737,
|
| 992 |
+
"logits/rejected": -0.5735629796981812,
|
| 993 |
+
"logps/chosen": -1673.248779296875,
|
| 994 |
+
"logps/rejected": -1636.8924560546875,
|
| 995 |
+
"loss": 0.3071,
|
| 996 |
+
"rewards/accuracies": 0.971875011920929,
|
| 997 |
+
"rewards/chosen": 0.47631826996803284,
|
| 998 |
+
"rewards/margins": 1.1436049938201904,
|
| 999 |
+
"rewards/rejected": -0.6672865748405457,
|
| 1000 |
+
"step": 660
|
| 1001 |
+
}
|
| 1002 |
+
],
|
| 1003 |
+
"logging_steps": 10,
|
| 1004 |
+
"max_steps": 669,
|
| 1005 |
+
"num_input_tokens_seen": 0,
|
| 1006 |
+
"num_train_epochs": 3,
|
| 1007 |
+
"save_steps": 10000,
|
| 1008 |
+
"stateful_callbacks": {
|
| 1009 |
+
"TrainerControl": {
|
| 1010 |
+
"args": {
|
| 1011 |
+
"should_epoch_stop": false,
|
| 1012 |
+
"should_evaluate": false,
|
| 1013 |
+
"should_log": false,
|
| 1014 |
+
"should_save": true,
|
| 1015 |
+
"should_training_stop": true
|
| 1016 |
+
},
|
| 1017 |
+
"attributes": {}
|
| 1018 |
+
}
|
| 1019 |
+
},
|
| 1020 |
+
"total_flos": 0.0,
|
| 1021 |
+
"train_batch_size": 1,
|
| 1022 |
+
"trial_name": null,
|
| 1023 |
+
"trial_params": null
|
| 1024 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea74b09aba8751ccf31169f860970b324a96335b8e52ce8b9b0699d68927693d
|
| 3 |
+
size 7057
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/checkpoint-669/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/chat_template.jinja
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 4 |
+
{{- messages[0]['content'] }}
|
| 5 |
+
{%- else %}
|
| 6 |
+
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
| 7 |
+
{%- endif %}
|
| 8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 9 |
+
{%- for tool in tools %}
|
| 10 |
+
{{- "\n" }}
|
| 11 |
+
{{- tool | tojson }}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 14 |
+
{%- else %}
|
| 15 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
| 17 |
+
{%- else %}
|
| 18 |
+
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endif %}
|
| 21 |
+
{%- for message in messages %}
|
| 22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
| 23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 24 |
+
{%- elif message.role == "assistant" %}
|
| 25 |
+
{{- '<|im_start|>' + message.role }}
|
| 26 |
+
{%- if message.content %}
|
| 27 |
+
{{- '\n' + message.content }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{%- for tool_call in message.tool_calls %}
|
| 30 |
+
{%- if tool_call.function is defined %}
|
| 31 |
+
{%- set tool_call = tool_call.function %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
| 34 |
+
{{- tool_call.name }}
|
| 35 |
+
{{- '", "arguments": ' }}
|
| 36 |
+
{{- tool_call.arguments | tojson }}
|
| 37 |
+
{{- '}\n</tool_call>' }}
|
| 38 |
+
{%- endfor %}
|
| 39 |
+
{{- '<|im_end|>\n' }}
|
| 40 |
+
{%- elif message.role == "tool" %}
|
| 41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
| 42 |
+
{{- '<|im_start|>user' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{{- '\n<tool_response>\n' }}
|
| 45 |
+
{{- message.content }}
|
| 46 |
+
{{- '\n</tool_response>' }}
|
| 47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 48 |
+
{{- '<|im_end|>\n' }}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{%- if add_generation_prompt %}
|
| 53 |
+
{{- '<|im_start|>assistant\n' }}
|
| 54 |
+
{%- endif %}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 2048,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 11008,
|
| 12 |
+
"layer_types": [
|
| 13 |
+
"full_attention",
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"full_attention",
|
| 39 |
+
"full_attention",
|
| 40 |
+
"full_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"full_attention",
|
| 43 |
+
"full_attention",
|
| 44 |
+
"full_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"full_attention",
|
| 47 |
+
"full_attention",
|
| 48 |
+
"full_attention"
|
| 49 |
+
],
|
| 50 |
+
"max_position_embeddings": 32768,
|
| 51 |
+
"max_window_layers": 70,
|
| 52 |
+
"model_type": "qwen2",
|
| 53 |
+
"num_attention_heads": 16,
|
| 54 |
+
"num_hidden_layers": 36,
|
| 55 |
+
"num_key_value_heads": 2,
|
| 56 |
+
"rms_norm_eps": 1e-06,
|
| 57 |
+
"rope_scaling": null,
|
| 58 |
+
"rope_theta": 1000000.0,
|
| 59 |
+
"sliding_window": null,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"torch_dtype": "float32",
|
| 62 |
+
"transformers_version": "4.55.0",
|
| 63 |
+
"use_cache": true,
|
| 64 |
+
"use_sliding_window": false,
|
| 65 |
+
"vocab_size": 151936
|
| 66 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/generation_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"repetition_penalty": 1.05,
|
| 10 |
+
"temperature": 0.7,
|
| 11 |
+
"top_k": 20,
|
| 12 |
+
"top_p": 0.8,
|
| 13 |
+
"transformers_version": "4.55.0"
|
| 14 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00001-of-00003.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57e3cbf78021d13f0b4ae980ae979987f39d5f3bdfe608e70c702f0c140af2ed
|
| 3 |
+
size 4982131536
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00002-of-00003.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be573dbdc2126e1c398f22a3eb2d1582f41c1190b4f39f83e0978cf9c7147c6b
|
| 3 |
+
size 4932949336
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model-00003-of-00003.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15cae31cc6b5ea2ed26deb3ddfc2991fe49ff3a7b9e390ace9c0e682daef5c2b
|
| 3 |
+
size 2428723160
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/model.safetensors.index.json
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_parameters": 3085938688,
|
| 4 |
+
"total_size": 12343754752
|
| 5 |
+
},
|
| 6 |
+
"weight_map": {
|
| 7 |
+
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
| 8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 13 |
+
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 19 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 20 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 21 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 22 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 23 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 24 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 30 |
+
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 31 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 32 |
+
"model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 33 |
+
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 34 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 35 |
+
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 36 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 41 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 42 |
+
"model.layers.10.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 43 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 44 |
+
"model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 45 |
+
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 46 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 47 |
+
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 48 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 52 |
+
"model.layers.11.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 53 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 54 |
+
"model.layers.11.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 55 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 56 |
+
"model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 57 |
+
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 58 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 59 |
+
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 60 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 63 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 64 |
+
"model.layers.12.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 65 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 66 |
+
"model.layers.12.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 67 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 68 |
+
"model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 69 |
+
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 70 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 71 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 72 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 74 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 75 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 76 |
+
"model.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 77 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 78 |
+
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 79 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 80 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 81 |
+
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 82 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 83 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 84 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 85 |
+
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 86 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 87 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 88 |
+
"model.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 89 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 90 |
+
"model.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 91 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 92 |
+
"model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 93 |
+
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 94 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 95 |
+
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 96 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 97 |
+
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 98 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 99 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 100 |
+
"model.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 101 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 102 |
+
"model.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 103 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 104 |
+
"model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 105 |
+
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 106 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 107 |
+
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 108 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 109 |
+
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 110 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 111 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 112 |
+
"model.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 113 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 114 |
+
"model.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 115 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 116 |
+
"model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 117 |
+
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 118 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 119 |
+
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 120 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 121 |
+
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 122 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 123 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 124 |
+
"model.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 125 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 126 |
+
"model.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 127 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 128 |
+
"model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 129 |
+
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 130 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 131 |
+
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 132 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 133 |
+
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 134 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 135 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 136 |
+
"model.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 137 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 138 |
+
"model.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 139 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 140 |
+
"model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 141 |
+
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 142 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 143 |
+
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 144 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 145 |
+
"model.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 146 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 147 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 148 |
+
"model.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 149 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 150 |
+
"model.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 151 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 152 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 153 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 154 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 155 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 156 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 157 |
+
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 158 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 159 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 160 |
+
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 161 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 162 |
+
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 163 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 164 |
+
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 165 |
+
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 166 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 167 |
+
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 168 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 169 |
+
"model.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 170 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 171 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 172 |
+
"model.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 173 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 174 |
+
"model.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 175 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 176 |
+
"model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 177 |
+
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 178 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 179 |
+
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 180 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 181 |
+
"model.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 182 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 183 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 184 |
+
"model.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 185 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 186 |
+
"model.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 187 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 188 |
+
"model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 189 |
+
"model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 190 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 191 |
+
"model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 192 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 193 |
+
"model.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 194 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 195 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 196 |
+
"model.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 197 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 198 |
+
"model.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 199 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 200 |
+
"model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 201 |
+
"model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 202 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 203 |
+
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 204 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 205 |
+
"model.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 206 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 207 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 208 |
+
"model.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 209 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 210 |
+
"model.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 211 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 212 |
+
"model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 213 |
+
"model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 214 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 215 |
+
"model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 216 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 217 |
+
"model.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 218 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 219 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 220 |
+
"model.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 221 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 222 |
+
"model.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 223 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 224 |
+
"model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 225 |
+
"model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 226 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 227 |
+
"model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 228 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 229 |
+
"model.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 230 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 231 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 232 |
+
"model.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 233 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 234 |
+
"model.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 235 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 236 |
+
"model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 237 |
+
"model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 238 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 239 |
+
"model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 240 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 241 |
+
"model.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 242 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 243 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 244 |
+
"model.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 245 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 246 |
+
"model.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 247 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 248 |
+
"model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 249 |
+
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
| 250 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
| 251 |
+
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
| 252 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
| 253 |
+
"model.layers.27.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 254 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 255 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 256 |
+
"model.layers.27.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 257 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 258 |
+
"model.layers.27.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 259 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 260 |
+
"model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 261 |
+
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 262 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 263 |
+
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 264 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 265 |
+
"model.layers.28.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
|
| 266 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
| 267 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
| 268 |
+
"model.layers.28.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
|
| 269 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
| 270 |
+
"model.layers.28.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
|
| 271 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
| 272 |
+
"model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 273 |
+
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 274 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 275 |
+
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 276 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 277 |
+
"model.layers.29.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 278 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 279 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 280 |
+
"model.layers.29.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 281 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 282 |
+
"model.layers.29.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 283 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 284 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 285 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 286 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 287 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 288 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 289 |
+
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 290 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 291 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 292 |
+
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 293 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 294 |
+
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 295 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 296 |
+
"model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 297 |
+
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 298 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 299 |
+
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 300 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 301 |
+
"model.layers.30.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 302 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 303 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 304 |
+
"model.layers.30.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 305 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 306 |
+
"model.layers.30.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 307 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 308 |
+
"model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 309 |
+
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 310 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 311 |
+
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 312 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 313 |
+
"model.layers.31.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 314 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 315 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 316 |
+
"model.layers.31.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 317 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 318 |
+
"model.layers.31.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 319 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 320 |
+
"model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 321 |
+
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 322 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 323 |
+
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 324 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 325 |
+
"model.layers.32.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 326 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 327 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 328 |
+
"model.layers.32.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 329 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 330 |
+
"model.layers.32.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 331 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 332 |
+
"model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 333 |
+
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 334 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 335 |
+
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 336 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 337 |
+
"model.layers.33.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 338 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 339 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 340 |
+
"model.layers.33.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 341 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 342 |
+
"model.layers.33.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 343 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 344 |
+
"model.layers.34.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 345 |
+
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 346 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 347 |
+
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 348 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 349 |
+
"model.layers.34.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 350 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 351 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 352 |
+
"model.layers.34.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 353 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 354 |
+
"model.layers.34.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 355 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 356 |
+
"model.layers.35.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 357 |
+
"model.layers.35.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
| 358 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
| 359 |
+
"model.layers.35.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
| 360 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
| 361 |
+
"model.layers.35.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
| 362 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
| 363 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
| 364 |
+
"model.layers.35.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
|
| 365 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
| 366 |
+
"model.layers.35.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
|
| 367 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
| 368 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 369 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 370 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 371 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 372 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 373 |
+
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 374 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 375 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 376 |
+
"model.layers.4.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 377 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 378 |
+
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 379 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 380 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 381 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 382 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 383 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 384 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 385 |
+
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 386 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 387 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 388 |
+
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 389 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 390 |
+
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 391 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 392 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 393 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 394 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 395 |
+
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 396 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 397 |
+
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 398 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 399 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 400 |
+
"model.layers.6.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 401 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 402 |
+
"model.layers.6.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 403 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 404 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 405 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 406 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 407 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 408 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 409 |
+
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 410 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 411 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 412 |
+
"model.layers.7.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 413 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 414 |
+
"model.layers.7.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 415 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 416 |
+
"model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 417 |
+
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 418 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 419 |
+
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 420 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 421 |
+
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 422 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 423 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 424 |
+
"model.layers.8.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 425 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 426 |
+
"model.layers.8.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 427 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 428 |
+
"model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 429 |
+
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
| 430 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
| 431 |
+
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
| 432 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
| 433 |
+
"model.layers.9.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
| 434 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
| 435 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
| 436 |
+
"model.layers.9.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
| 437 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
| 438 |
+
"model.layers.9.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
| 439 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
| 440 |
+
"model.norm.weight": "model-00003-of-00003.safetensors"
|
| 441 |
+
}
|
| 442 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 131072,
|
| 203 |
+
"pad_token": "<|endoftext|>",
|
| 204 |
+
"split_special_tokens": false,
|
| 205 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 206 |
+
"unk_token": null
|
| 207 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/merged_model/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/model_args.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
|
| 3 |
+
"agent_name": "experiment_agent"
|
| 4 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/experiment_agent/train_args.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"load_in_4bit": true
|
| 3 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/DPO_configs.json
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"output_dir": "Trained_Models/Jackson0018/Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent",
|
| 3 |
+
"overwrite_output_dir": null,
|
| 4 |
+
"do_train": false,
|
| 5 |
+
"do_eval": false,
|
| 6 |
+
"do_predict": false,
|
| 7 |
+
"eval_strategy": "no",
|
| 8 |
+
"prediction_loss_only": false,
|
| 9 |
+
"per_device_train_batch_size": 1,
|
| 10 |
+
"per_device_eval_batch_size": 4,
|
| 11 |
+
"per_gpu_train_batch_size": null,
|
| 12 |
+
"per_gpu_eval_batch_size": null,
|
| 13 |
+
"gradient_accumulation_steps": 32,
|
| 14 |
+
"eval_accumulation_steps": 2,
|
| 15 |
+
"eval_delay": 0,
|
| 16 |
+
"torch_empty_cache_steps": 250,
|
| 17 |
+
"learning_rate": 1e-05,
|
| 18 |
+
"weight_decay": 0.01,
|
| 19 |
+
"adam_beta1": 0.9,
|
| 20 |
+
"adam_beta2": 0.999,
|
| 21 |
+
"adam_epsilon": 1e-08,
|
| 22 |
+
"max_grad_norm": 0.6,
|
| 23 |
+
"num_train_epochs": 3.0,
|
| 24 |
+
"max_steps": -1,
|
| 25 |
+
"lr_scheduler_type": "linear",
|
| 26 |
+
"lr_scheduler_kwargs": {},
|
| 27 |
+
"warmup_ratio": 0.1,
|
| 28 |
+
"warmup_steps": 0,
|
| 29 |
+
"log_level": "passive",
|
| 30 |
+
"log_level_replica": "warning",
|
| 31 |
+
"log_on_each_node": true,
|
| 32 |
+
"logging_dir": "Trained_Models/Jackson0018/Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/runs/Sep28_23-51-45_ai21",
|
| 33 |
+
"logging_strategy": "steps",
|
| 34 |
+
"logging_first_step": false,
|
| 35 |
+
"logging_steps": 10,
|
| 36 |
+
"logging_nan_inf_filter": false,
|
| 37 |
+
"save_strategy": "steps",
|
| 38 |
+
"save_steps": 10000,
|
| 39 |
+
"save_total_limit": null,
|
| 40 |
+
"save_safetensors": true,
|
| 41 |
+
"save_on_each_node": false,
|
| 42 |
+
"save_only_model": false,
|
| 43 |
+
"restore_callback_states_from_checkpoint": false,
|
| 44 |
+
"no_cuda": false,
|
| 45 |
+
"use_cpu": false,
|
| 46 |
+
"use_mps_device": false,
|
| 47 |
+
"seed": 3407,
|
| 48 |
+
"data_seed": 3407,
|
| 49 |
+
"jit_mode_eval": false,
|
| 50 |
+
"use_ipex": false,
|
| 51 |
+
"bf16": true,
|
| 52 |
+
"fp16": false,
|
| 53 |
+
"fp16_opt_level": "O1",
|
| 54 |
+
"half_precision_backend": "auto",
|
| 55 |
+
"bf16_full_eval": false,
|
| 56 |
+
"fp16_full_eval": false,
|
| 57 |
+
"tf32": null,
|
| 58 |
+
"local_rank": 0,
|
| 59 |
+
"ddp_backend": null,
|
| 60 |
+
"tpu_num_cores": null,
|
| 61 |
+
"tpu_metrics_debug": false,
|
| 62 |
+
"debug": [],
|
| 63 |
+
"dataloader_drop_last": false,
|
| 64 |
+
"eval_steps": null,
|
| 65 |
+
"dataloader_num_workers": 0,
|
| 66 |
+
"dataloader_prefetch_factor": null,
|
| 67 |
+
"past_index": -1,
|
| 68 |
+
"run_name": null,
|
| 69 |
+
"disable_tqdm": false,
|
| 70 |
+
"remove_unused_columns": true,
|
| 71 |
+
"label_names": null,
|
| 72 |
+
"load_best_model_at_end": false,
|
| 73 |
+
"metric_for_best_model": null,
|
| 74 |
+
"greater_is_better": null,
|
| 75 |
+
"ignore_data_skip": false,
|
| 76 |
+
"fsdp": [],
|
| 77 |
+
"fsdp_min_num_params": 0,
|
| 78 |
+
"fsdp_config": {
|
| 79 |
+
"min_num_params": 0,
|
| 80 |
+
"xla": false,
|
| 81 |
+
"xla_fsdp_v2": false,
|
| 82 |
+
"xla_fsdp_grad_ckpt": false
|
| 83 |
+
},
|
| 84 |
+
"fsdp_transformer_layer_cls_to_wrap": null,
|
| 85 |
+
"accelerator_config": {
|
| 86 |
+
"split_batches": false,
|
| 87 |
+
"dispatch_batches": null,
|
| 88 |
+
"even_batches": true,
|
| 89 |
+
"use_seedable_sampler": true,
|
| 90 |
+
"non_blocking": false,
|
| 91 |
+
"gradient_accumulation_kwargs": null
|
| 92 |
+
},
|
| 93 |
+
"deepspeed": null,
|
| 94 |
+
"label_smoothing_factor": 0.0,
|
| 95 |
+
"optim": "adamw_8bit",
|
| 96 |
+
"optim_args": null,
|
| 97 |
+
"adafactor": false,
|
| 98 |
+
"group_by_length": false,
|
| 99 |
+
"length_column_name": "length",
|
| 100 |
+
"report_to": [],
|
| 101 |
+
"ddp_find_unused_parameters": null,
|
| 102 |
+
"ddp_bucket_cap_mb": null,
|
| 103 |
+
"ddp_broadcast_buffers": null,
|
| 104 |
+
"dataloader_pin_memory": true,
|
| 105 |
+
"dataloader_persistent_workers": false,
|
| 106 |
+
"skip_memory_metrics": true,
|
| 107 |
+
"use_legacy_prediction_loop": false,
|
| 108 |
+
"push_to_hub": false,
|
| 109 |
+
"resume_from_checkpoint": null,
|
| 110 |
+
"hub_model_id": null,
|
| 111 |
+
"hub_strategy": "every_save",
|
| 112 |
+
"hub_token": "<HUB_TOKEN>",
|
| 113 |
+
"hub_private_repo": null,
|
| 114 |
+
"hub_always_push": false,
|
| 115 |
+
"hub_revision": null,
|
| 116 |
+
"gradient_checkpointing": false,
|
| 117 |
+
"gradient_checkpointing_kwargs": null,
|
| 118 |
+
"include_inputs_for_metrics": false,
|
| 119 |
+
"include_for_metrics": [],
|
| 120 |
+
"eval_do_concat_batches": true,
|
| 121 |
+
"fp16_backend": "auto",
|
| 122 |
+
"push_to_hub_model_id": null,
|
| 123 |
+
"push_to_hub_organization": null,
|
| 124 |
+
"push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
|
| 125 |
+
"mp_parameters": "",
|
| 126 |
+
"auto_find_batch_size": true,
|
| 127 |
+
"full_determinism": false,
|
| 128 |
+
"torchdynamo": null,
|
| 129 |
+
"ray_scope": "last",
|
| 130 |
+
"ddp_timeout": 1800,
|
| 131 |
+
"torch_compile": false,
|
| 132 |
+
"torch_compile_backend": null,
|
| 133 |
+
"torch_compile_mode": null,
|
| 134 |
+
"include_tokens_per_second": false,
|
| 135 |
+
"include_num_input_tokens_seen": false,
|
| 136 |
+
"neftune_noise_alpha": null,
|
| 137 |
+
"optim_target_modules": null,
|
| 138 |
+
"batch_eval_metrics": false,
|
| 139 |
+
"eval_on_start": false,
|
| 140 |
+
"use_liger_kernel": false,
|
| 141 |
+
"liger_kernel_config": null,
|
| 142 |
+
"eval_use_gather_object": false,
|
| 143 |
+
"average_tokens_across_devices": false,
|
| 144 |
+
"model_init_kwargs": null,
|
| 145 |
+
"ref_model_init_kwargs": null,
|
| 146 |
+
"model_adapter_name": null,
|
| 147 |
+
"ref_adapter_name": null,
|
| 148 |
+
"force_use_ref_model": false,
|
| 149 |
+
"disable_dropout": true,
|
| 150 |
+
"use_logits_to_keep": false,
|
| 151 |
+
"dataset_num_proc": 2,
|
| 152 |
+
"padding_value": null,
|
| 153 |
+
"label_pad_token_id": -100,
|
| 154 |
+
"max_prompt_length": 2000,
|
| 155 |
+
"max_completion_length": 2000,
|
| 156 |
+
"max_length": 4000,
|
| 157 |
+
"truncation_mode": "keep_end",
|
| 158 |
+
"padding_free": false,
|
| 159 |
+
"precompute_ref_log_probs": false,
|
| 160 |
+
"precompute_ref_batch_size": null,
|
| 161 |
+
"tools": null,
|
| 162 |
+
"loss_type": "sigmoid",
|
| 163 |
+
"use_liger_loss": false,
|
| 164 |
+
"base_model_attribute_name": "model",
|
| 165 |
+
"beta": 0.1,
|
| 166 |
+
"f_divergence_type": "reverse_kl",
|
| 167 |
+
"f_alpha_divergence_coef": 1.0,
|
| 168 |
+
"reference_free": false,
|
| 169 |
+
"label_smoothing": 0.0,
|
| 170 |
+
"use_weighting": false,
|
| 171 |
+
"rpo_alpha": null,
|
| 172 |
+
"ld_alpha": null,
|
| 173 |
+
"discopop_tau": 0.05,
|
| 174 |
+
"loss_weights": null,
|
| 175 |
+
"sync_ref_model": false,
|
| 176 |
+
"ref_model_mixup_alpha": 0.6,
|
| 177 |
+
"ref_model_sync_steps": 512,
|
| 178 |
+
"generate_during_eval": false,
|
| 179 |
+
"vllm_sampling_params": null,
|
| 180 |
+
"unsloth_num_chunks": -1
|
| 181 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 3 |
+
library_name: transformers
|
| 4 |
+
model_name: method_agent
|
| 5 |
+
tags:
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- unsloth
|
| 8 |
+
- trl
|
| 9 |
+
- dpo
|
| 10 |
+
licence: license
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Model Card for method_agent
|
| 14 |
+
|
| 15 |
+
This model is a fine-tuned version of [unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit](https://huggingface.co/unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit).
|
| 16 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 17 |
+
|
| 18 |
+
## Quick start
|
| 19 |
+
|
| 20 |
+
```python
|
| 21 |
+
from transformers import pipeline
|
| 22 |
+
|
| 23 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
| 24 |
+
generator = pipeline("text-generation", model="None", device="cuda")
|
| 25 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
| 26 |
+
print(output["generated_text"])
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Training procedure
|
| 30 |
+
|
| 31 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jackson0530/ScientificPaperRetrieval_Train-Train_DPO_unsloth/runs/nd2l9u6s)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
|
| 35 |
+
|
| 36 |
+
### Framework versions
|
| 37 |
+
|
| 38 |
+
- TRL: 0.21.0
|
| 39 |
+
- Transformers: 4.55.0
|
| 40 |
+
- Pytorch: 2.7.1
|
| 41 |
+
- Datasets: 3.6.0
|
| 42 |
+
- Tokenizers: 0.21.4
|
| 43 |
+
|
| 44 |
+
## Citations
|
| 45 |
+
|
| 46 |
+
Cite DPO as:
|
| 47 |
+
|
| 48 |
+
```bibtex
|
| 49 |
+
@inproceedings{rafailov2023direct,
|
| 50 |
+
title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}},
|
| 51 |
+
author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn},
|
| 52 |
+
year = 2023,
|
| 53 |
+
booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
|
| 54 |
+
url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html},
|
| 55 |
+
editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
|
| 56 |
+
}
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
Cite TRL as:
|
| 60 |
+
|
| 61 |
+
```bibtex
|
| 62 |
+
@misc{vonwerra2022trl,
|
| 63 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
| 64 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
| 65 |
+
year = 2020,
|
| 66 |
+
journal = {GitHub repository},
|
| 67 |
+
publisher = {GitHub},
|
| 68 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 69 |
+
}
|
| 70 |
+
```
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/README.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 3 |
+
library_name: peft
|
| 4 |
+
pipeline_tag: text-generation
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit
|
| 7 |
+
- dpo
|
| 8 |
+
- lora
|
| 9 |
+
- transformers
|
| 10 |
+
- trl
|
| 11 |
+
- unsloth
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Model Card for Model ID
|
| 15 |
+
|
| 16 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
## Model Details
|
| 21 |
+
|
| 22 |
+
### Model Description
|
| 23 |
+
|
| 24 |
+
<!-- Provide a longer summary of what this model is. -->
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
- **Developed by:** [More Information Needed]
|
| 29 |
+
- **Funded by [optional]:** [More Information Needed]
|
| 30 |
+
- **Shared by [optional]:** [More Information Needed]
|
| 31 |
+
- **Model type:** [More Information Needed]
|
| 32 |
+
- **Language(s) (NLP):** [More Information Needed]
|
| 33 |
+
- **License:** [More Information Needed]
|
| 34 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
| 35 |
+
|
| 36 |
+
### Model Sources [optional]
|
| 37 |
+
|
| 38 |
+
<!-- Provide the basic links for the model. -->
|
| 39 |
+
|
| 40 |
+
- **Repository:** [More Information Needed]
|
| 41 |
+
- **Paper [optional]:** [More Information Needed]
|
| 42 |
+
- **Demo [optional]:** [More Information Needed]
|
| 43 |
+
|
| 44 |
+
## Uses
|
| 45 |
+
|
| 46 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
| 47 |
+
|
| 48 |
+
### Direct Use
|
| 49 |
+
|
| 50 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
| 51 |
+
|
| 52 |
+
[More Information Needed]
|
| 53 |
+
|
| 54 |
+
### Downstream Use [optional]
|
| 55 |
+
|
| 56 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
| 57 |
+
|
| 58 |
+
[More Information Needed]
|
| 59 |
+
|
| 60 |
+
### Out-of-Scope Use
|
| 61 |
+
|
| 62 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
| 63 |
+
|
| 64 |
+
[More Information Needed]
|
| 65 |
+
|
| 66 |
+
## Bias, Risks, and Limitations
|
| 67 |
+
|
| 68 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
| 69 |
+
|
| 70 |
+
[More Information Needed]
|
| 71 |
+
|
| 72 |
+
### Recommendations
|
| 73 |
+
|
| 74 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
| 75 |
+
|
| 76 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
| 77 |
+
|
| 78 |
+
## How to Get Started with the Model
|
| 79 |
+
|
| 80 |
+
Use the code below to get started with the model.
|
| 81 |
+
|
| 82 |
+
[More Information Needed]
|
| 83 |
+
|
| 84 |
+
## Training Details
|
| 85 |
+
|
| 86 |
+
### Training Data
|
| 87 |
+
|
| 88 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
| 89 |
+
|
| 90 |
+
[More Information Needed]
|
| 91 |
+
|
| 92 |
+
### Training Procedure
|
| 93 |
+
|
| 94 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
| 95 |
+
|
| 96 |
+
#### Preprocessing [optional]
|
| 97 |
+
|
| 98 |
+
[More Information Needed]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
#### Training Hyperparameters
|
| 102 |
+
|
| 103 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
| 104 |
+
|
| 105 |
+
#### Speeds, Sizes, Times [optional]
|
| 106 |
+
|
| 107 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
| 108 |
+
|
| 109 |
+
[More Information Needed]
|
| 110 |
+
|
| 111 |
+
## Evaluation
|
| 112 |
+
|
| 113 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
| 114 |
+
|
| 115 |
+
### Testing Data, Factors & Metrics
|
| 116 |
+
|
| 117 |
+
#### Testing Data
|
| 118 |
+
|
| 119 |
+
<!-- This should link to a Dataset Card if possible. -->
|
| 120 |
+
|
| 121 |
+
[More Information Needed]
|
| 122 |
+
|
| 123 |
+
#### Factors
|
| 124 |
+
|
| 125 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
| 126 |
+
|
| 127 |
+
[More Information Needed]
|
| 128 |
+
|
| 129 |
+
#### Metrics
|
| 130 |
+
|
| 131 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
| 132 |
+
|
| 133 |
+
[More Information Needed]
|
| 134 |
+
|
| 135 |
+
### Results
|
| 136 |
+
|
| 137 |
+
[More Information Needed]
|
| 138 |
+
|
| 139 |
+
#### Summary
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
## Model Examination [optional]
|
| 144 |
+
|
| 145 |
+
<!-- Relevant interpretability work for the model goes here -->
|
| 146 |
+
|
| 147 |
+
[More Information Needed]
|
| 148 |
+
|
| 149 |
+
## Environmental Impact
|
| 150 |
+
|
| 151 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
| 152 |
+
|
| 153 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
| 154 |
+
|
| 155 |
+
- **Hardware Type:** [More Information Needed]
|
| 156 |
+
- **Hours used:** [More Information Needed]
|
| 157 |
+
- **Cloud Provider:** [More Information Needed]
|
| 158 |
+
- **Compute Region:** [More Information Needed]
|
| 159 |
+
- **Carbon Emitted:** [More Information Needed]
|
| 160 |
+
|
| 161 |
+
## Technical Specifications [optional]
|
| 162 |
+
|
| 163 |
+
### Model Architecture and Objective
|
| 164 |
+
|
| 165 |
+
[More Information Needed]
|
| 166 |
+
|
| 167 |
+
### Compute Infrastructure
|
| 168 |
+
|
| 169 |
+
[More Information Needed]
|
| 170 |
+
|
| 171 |
+
#### Hardware
|
| 172 |
+
|
| 173 |
+
[More Information Needed]
|
| 174 |
+
|
| 175 |
+
#### Software
|
| 176 |
+
|
| 177 |
+
[More Information Needed]
|
| 178 |
+
|
| 179 |
+
## Citation [optional]
|
| 180 |
+
|
| 181 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
| 182 |
+
|
| 183 |
+
**BibTeX:**
|
| 184 |
+
|
| 185 |
+
[More Information Needed]
|
| 186 |
+
|
| 187 |
+
**APA:**
|
| 188 |
+
|
| 189 |
+
[More Information Needed]
|
| 190 |
+
|
| 191 |
+
## Glossary [optional]
|
| 192 |
+
|
| 193 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
| 194 |
+
|
| 195 |
+
[More Information Needed]
|
| 196 |
+
|
| 197 |
+
## More Information [optional]
|
| 198 |
+
|
| 199 |
+
[More Information Needed]
|
| 200 |
+
|
| 201 |
+
## Model Card Authors [optional]
|
| 202 |
+
|
| 203 |
+
[More Information Needed]
|
| 204 |
+
|
| 205 |
+
## Model Card Contact
|
| 206 |
+
|
| 207 |
+
[More Information Needed]
|
| 208 |
+
### Framework versions
|
| 209 |
+
|
| 210 |
+
- PEFT 0.17.0
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 64,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 64,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"k_proj",
|
| 29 |
+
"gate_proj",
|
| 30 |
+
"o_proj",
|
| 31 |
+
"down_proj",
|
| 32 |
+
"v_proj",
|
| 33 |
+
"q_proj",
|
| 34 |
+
"up_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "CAUSAL_LM",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ae24012a3ea8f7fce9a008de9f97181e12bbbaf0531c68a72541ea472974179
|
| 3 |
+
size 479005064
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/chat_template.jinja
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 4 |
+
{{- messages[0]['content'] }}
|
| 5 |
+
{%- else %}
|
| 6 |
+
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
| 7 |
+
{%- endif %}
|
| 8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 9 |
+
{%- for tool in tools %}
|
| 10 |
+
{{- "\n" }}
|
| 11 |
+
{{- tool | tojson }}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 14 |
+
{%- else %}
|
| 15 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
| 17 |
+
{%- else %}
|
| 18 |
+
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endif %}
|
| 21 |
+
{%- for message in messages %}
|
| 22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
| 23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 24 |
+
{%- elif message.role == "assistant" %}
|
| 25 |
+
{{- '<|im_start|>' + message.role }}
|
| 26 |
+
{%- if message.content %}
|
| 27 |
+
{{- '\n' + message.content }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{%- for tool_call in message.tool_calls %}
|
| 30 |
+
{%- if tool_call.function is defined %}
|
| 31 |
+
{%- set tool_call = tool_call.function %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
| 34 |
+
{{- tool_call.name }}
|
| 35 |
+
{{- '", "arguments": ' }}
|
| 36 |
+
{{- tool_call.arguments | tojson }}
|
| 37 |
+
{{- '}\n</tool_call>' }}
|
| 38 |
+
{%- endfor %}
|
| 39 |
+
{{- '<|im_end|>\n' }}
|
| 40 |
+
{%- elif message.role == "tool" %}
|
| 41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
| 42 |
+
{{- '<|im_start|>user' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{{- '\n<tool_response>\n' }}
|
| 45 |
+
{{- message.content }}
|
| 46 |
+
{{- '\n</tool_response>' }}
|
| 47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 48 |
+
{{- '<|im_end|>\n' }}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{%- if add_generation_prompt %}
|
| 53 |
+
{{- '<|im_start|>assistant\n' }}
|
| 54 |
+
{%- endif %}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:049d3edc908f4459bc1140a73475a4423172563d986ff594d44623c8f1032f04
|
| 3 |
+
size 243803397
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
|
| 3 |
+
size 14645
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d46728529080e683318d0592a9290e2f3e1dd31fa31190ac892162c71aa04cef
|
| 3 |
+
size 1465
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/special_tokens_map.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": "<|im_end|>"
|
| 25 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"extra_special_tokens": {},
|
| 202 |
+
"model_max_length": 32768,
|
| 203 |
+
"pad_token": "<|im_end|>",
|
| 204 |
+
"padding_side": "right",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/trainer_state.json
ADDED
|
@@ -0,0 +1,1024 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 669,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.0449438202247191,
|
| 14 |
+
"grad_norm": 7.2400288581848145,
|
| 15 |
+
"learning_rate": 1.3432835820895524e-06,
|
| 16 |
+
"logits/chosen": -0.6259505748748779,
|
| 17 |
+
"logits/rejected": -0.624777615070343,
|
| 18 |
+
"logps/chosen": -1442.914306640625,
|
| 19 |
+
"logps/rejected": -1479.676025390625,
|
| 20 |
+
"loss": 0.6947,
|
| 21 |
+
"rewards/accuracies": 0.41874998807907104,
|
| 22 |
+
"rewards/chosen": 0.0029697995632886887,
|
| 23 |
+
"rewards/margins": 0.0050926790572702885,
|
| 24 |
+
"rewards/rejected": -0.002122878096997738,
|
| 25 |
+
"step": 10
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.0898876404494382,
|
| 29 |
+
"grad_norm": 7.600191593170166,
|
| 30 |
+
"learning_rate": 2.835820895522388e-06,
|
| 31 |
+
"logits/chosen": -0.6153338551521301,
|
| 32 |
+
"logits/rejected": -0.6100883483886719,
|
| 33 |
+
"logps/chosen": -1482.649658203125,
|
| 34 |
+
"logps/rejected": -1481.495849609375,
|
| 35 |
+
"loss": 0.7043,
|
| 36 |
+
"rewards/accuracies": 0.4937500059604645,
|
| 37 |
+
"rewards/chosen": -0.029805928468704224,
|
| 38 |
+
"rewards/margins": -0.011171265505254269,
|
| 39 |
+
"rewards/rejected": -0.01863466389477253,
|
| 40 |
+
"step": 20
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.1348314606741573,
|
| 44 |
+
"grad_norm": 7.326368808746338,
|
| 45 |
+
"learning_rate": 4.3283582089552236e-06,
|
| 46 |
+
"logits/chosen": -0.6203653216362,
|
| 47 |
+
"logits/rejected": -0.6188796162605286,
|
| 48 |
+
"logps/chosen": -1467.7874755859375,
|
| 49 |
+
"logps/rejected": -1467.8643798828125,
|
| 50 |
+
"loss": 0.6889,
|
| 51 |
+
"rewards/accuracies": 0.518750011920929,
|
| 52 |
+
"rewards/chosen": -0.044243909418582916,
|
| 53 |
+
"rewards/margins": 0.018139898777008057,
|
| 54 |
+
"rewards/rejected": -0.062383800745010376,
|
| 55 |
+
"step": 30
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.1797752808988764,
|
| 59 |
+
"grad_norm": 7.233694553375244,
|
| 60 |
+
"learning_rate": 5.820895522388061e-06,
|
| 61 |
+
"logits/chosen": -0.603333592414856,
|
| 62 |
+
"logits/rejected": -0.6177260279655457,
|
| 63 |
+
"logps/chosen": -1480.260498046875,
|
| 64 |
+
"logps/rejected": -1457.1766357421875,
|
| 65 |
+
"loss": 0.693,
|
| 66 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 67 |
+
"rewards/chosen": -0.09666772186756134,
|
| 68 |
+
"rewards/margins": 0.009738157503306866,
|
| 69 |
+
"rewards/rejected": -0.10640586912631989,
|
| 70 |
+
"step": 40
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.2247191011235955,
|
| 74 |
+
"grad_norm": 8.360169410705566,
|
| 75 |
+
"learning_rate": 7.313432835820896e-06,
|
| 76 |
+
"logits/chosen": -0.6346372961997986,
|
| 77 |
+
"logits/rejected": -0.6143137812614441,
|
| 78 |
+
"logps/chosen": -1467.9993896484375,
|
| 79 |
+
"logps/rejected": -1486.096923828125,
|
| 80 |
+
"loss": 0.6953,
|
| 81 |
+
"rewards/accuracies": 0.512499988079071,
|
| 82 |
+
"rewards/chosen": -0.1760435253381729,
|
| 83 |
+
"rewards/margins": 0.0075997160747647285,
|
| 84 |
+
"rewards/rejected": -0.18364325165748596,
|
| 85 |
+
"step": 50
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.2696629213483146,
|
| 89 |
+
"grad_norm": 7.651188850402832,
|
| 90 |
+
"learning_rate": 8.805970149253732e-06,
|
| 91 |
+
"logits/chosen": -0.6099546551704407,
|
| 92 |
+
"logits/rejected": -0.603184700012207,
|
| 93 |
+
"logps/chosen": -1467.3143310546875,
|
| 94 |
+
"logps/rejected": -1464.572509765625,
|
| 95 |
+
"loss": 0.7023,
|
| 96 |
+
"rewards/accuracies": 0.47187501192092896,
|
| 97 |
+
"rewards/chosen": -0.21066781878471375,
|
| 98 |
+
"rewards/margins": -0.0032155998051166534,
|
| 99 |
+
"rewards/rejected": -0.2074522078037262,
|
| 100 |
+
"step": 60
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.3146067415730337,
|
| 104 |
+
"grad_norm": 7.600541114807129,
|
| 105 |
+
"learning_rate": 9.966777408637874e-06,
|
| 106 |
+
"logits/chosen": -0.6103735566139221,
|
| 107 |
+
"logits/rejected": -0.6086291074752808,
|
| 108 |
+
"logps/chosen": -1429.90673828125,
|
| 109 |
+
"logps/rejected": -1434.4637451171875,
|
| 110 |
+
"loss": 0.6975,
|
| 111 |
+
"rewards/accuracies": 0.534375011920929,
|
| 112 |
+
"rewards/chosen": -0.12626130878925323,
|
| 113 |
+
"rewards/margins": 0.006277655251324177,
|
| 114 |
+
"rewards/rejected": -0.13253895938396454,
|
| 115 |
+
"step": 70
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.3595505617977528,
|
| 119 |
+
"grad_norm": 7.452111721038818,
|
| 120 |
+
"learning_rate": 9.800664451827243e-06,
|
| 121 |
+
"logits/chosen": -0.6045969724655151,
|
| 122 |
+
"logits/rejected": -0.6059508919715881,
|
| 123 |
+
"logps/chosen": -1478.626953125,
|
| 124 |
+
"logps/rejected": -1493.902587890625,
|
| 125 |
+
"loss": 0.7037,
|
| 126 |
+
"rewards/accuracies": 0.503125011920929,
|
| 127 |
+
"rewards/chosen": -0.20764172077178955,
|
| 128 |
+
"rewards/margins": -0.005739938467741013,
|
| 129 |
+
"rewards/rejected": -0.20190176367759705,
|
| 130 |
+
"step": 80
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.4044943820224719,
|
| 134 |
+
"grad_norm": 7.821183681488037,
|
| 135 |
+
"learning_rate": 9.634551495016612e-06,
|
| 136 |
+
"logits/chosen": -0.624080240726471,
|
| 137 |
+
"logits/rejected": -0.6179949045181274,
|
| 138 |
+
"logps/chosen": -1482.8621826171875,
|
| 139 |
+
"logps/rejected": -1521.285888671875,
|
| 140 |
+
"loss": 0.6984,
|
| 141 |
+
"rewards/accuracies": 0.5375000238418579,
|
| 142 |
+
"rewards/chosen": -0.5030026435852051,
|
| 143 |
+
"rewards/margins": 0.013752209953963757,
|
| 144 |
+
"rewards/rejected": -0.5167548656463623,
|
| 145 |
+
"step": 90
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 0.449438202247191,
|
| 149 |
+
"grad_norm": 7.3397536277771,
|
| 150 |
+
"learning_rate": 9.468438538205981e-06,
|
| 151 |
+
"logits/chosen": -0.6049561500549316,
|
| 152 |
+
"logits/rejected": -0.593718945980072,
|
| 153 |
+
"logps/chosen": -1475.85498046875,
|
| 154 |
+
"logps/rejected": -1476.67431640625,
|
| 155 |
+
"loss": 0.6979,
|
| 156 |
+
"rewards/accuracies": 0.53125,
|
| 157 |
+
"rewards/chosen": -0.6247831583023071,
|
| 158 |
+
"rewards/margins": 0.01662410795688629,
|
| 159 |
+
"rewards/rejected": -0.641407310962677,
|
| 160 |
+
"step": 100
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"epoch": 0.4943820224719101,
|
| 164 |
+
"grad_norm": 7.887584686279297,
|
| 165 |
+
"learning_rate": 9.30232558139535e-06,
|
| 166 |
+
"logits/chosen": -0.6068257093429565,
|
| 167 |
+
"logits/rejected": -0.5953084826469421,
|
| 168 |
+
"logps/chosen": -1477.0902099609375,
|
| 169 |
+
"logps/rejected": -1462.1424560546875,
|
| 170 |
+
"loss": 0.6949,
|
| 171 |
+
"rewards/accuracies": 0.5625,
|
| 172 |
+
"rewards/chosen": -0.7456911206245422,
|
| 173 |
+
"rewards/margins": 0.02874937281012535,
|
| 174 |
+
"rewards/rejected": -0.7744405269622803,
|
| 175 |
+
"step": 110
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"epoch": 0.5393258426966292,
|
| 179 |
+
"grad_norm": 7.936194896697998,
|
| 180 |
+
"learning_rate": 9.136212624584718e-06,
|
| 181 |
+
"logits/chosen": -0.6176148653030396,
|
| 182 |
+
"logits/rejected": -0.6264122128486633,
|
| 183 |
+
"logps/chosen": -1426.8101806640625,
|
| 184 |
+
"logps/rejected": -1441.458251953125,
|
| 185 |
+
"loss": 0.6985,
|
| 186 |
+
"rewards/accuracies": 0.543749988079071,
|
| 187 |
+
"rewards/chosen": -0.7066472768783569,
|
| 188 |
+
"rewards/margins": 0.020474322140216827,
|
| 189 |
+
"rewards/rejected": -0.7271216511726379,
|
| 190 |
+
"step": 120
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"epoch": 0.5842696629213483,
|
| 194 |
+
"grad_norm": 7.374553680419922,
|
| 195 |
+
"learning_rate": 8.970099667774087e-06,
|
| 196 |
+
"logits/chosen": -0.6316522359848022,
|
| 197 |
+
"logits/rejected": -0.612767219543457,
|
| 198 |
+
"logps/chosen": -1424.370849609375,
|
| 199 |
+
"logps/rejected": -1446.091552734375,
|
| 200 |
+
"loss": 0.6898,
|
| 201 |
+
"rewards/accuracies": 0.5625,
|
| 202 |
+
"rewards/chosen": -0.5253143310546875,
|
| 203 |
+
"rewards/margins": 0.03358197957277298,
|
| 204 |
+
"rewards/rejected": -0.5588963031768799,
|
| 205 |
+
"step": 130
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.6292134831460674,
|
| 209 |
+
"grad_norm": 7.252725124359131,
|
| 210 |
+
"learning_rate": 8.803986710963456e-06,
|
| 211 |
+
"logits/chosen": -0.5971549153327942,
|
| 212 |
+
"logits/rejected": -0.6010578870773315,
|
| 213 |
+
"logps/chosen": -1466.6973876953125,
|
| 214 |
+
"logps/rejected": -1467.979248046875,
|
| 215 |
+
"loss": 0.7016,
|
| 216 |
+
"rewards/accuracies": 0.528124988079071,
|
| 217 |
+
"rewards/chosen": -0.5401462316513062,
|
| 218 |
+
"rewards/margins": 0.01154586486518383,
|
| 219 |
+
"rewards/rejected": -0.5516921281814575,
|
| 220 |
+
"step": 140
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 0.6741573033707865,
|
| 224 |
+
"grad_norm": 7.468507289886475,
|
| 225 |
+
"learning_rate": 8.637873754152825e-06,
|
| 226 |
+
"logits/chosen": -0.6183695793151855,
|
| 227 |
+
"logits/rejected": -0.6090121865272522,
|
| 228 |
+
"logps/chosen": -1442.530517578125,
|
| 229 |
+
"logps/rejected": -1445.8988037109375,
|
| 230 |
+
"loss": 0.6867,
|
| 231 |
+
"rewards/accuracies": 0.5718749761581421,
|
| 232 |
+
"rewards/chosen": -0.48295894265174866,
|
| 233 |
+
"rewards/margins": 0.03915649652481079,
|
| 234 |
+
"rewards/rejected": -0.5221154093742371,
|
| 235 |
+
"step": 150
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"epoch": 0.7191011235955056,
|
| 239 |
+
"grad_norm": 11.664162635803223,
|
| 240 |
+
"learning_rate": 8.471760797342193e-06,
|
| 241 |
+
"logits/chosen": -0.5982354283332825,
|
| 242 |
+
"logits/rejected": -0.5825232863426208,
|
| 243 |
+
"logps/chosen": -1513.6202392578125,
|
| 244 |
+
"logps/rejected": -1509.6234130859375,
|
| 245 |
+
"loss": 0.7117,
|
| 246 |
+
"rewards/accuracies": 0.503125011920929,
|
| 247 |
+
"rewards/chosen": -0.4665093421936035,
|
| 248 |
+
"rewards/margins": -0.006485844496637583,
|
| 249 |
+
"rewards/rejected": -0.4600234627723694,
|
| 250 |
+
"step": 160
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"epoch": 0.7640449438202247,
|
| 254 |
+
"grad_norm": 8.19553279876709,
|
| 255 |
+
"learning_rate": 8.305647840531562e-06,
|
| 256 |
+
"logits/chosen": -0.6178793907165527,
|
| 257 |
+
"logits/rejected": -0.6187315583229065,
|
| 258 |
+
"logps/chosen": -1486.3048095703125,
|
| 259 |
+
"logps/rejected": -1474.712890625,
|
| 260 |
+
"loss": 0.6918,
|
| 261 |
+
"rewards/accuracies": 0.5249999761581421,
|
| 262 |
+
"rewards/chosen": -0.09939844161272049,
|
| 263 |
+
"rewards/margins": 0.030898302793502808,
|
| 264 |
+
"rewards/rejected": -0.1302967518568039,
|
| 265 |
+
"step": 170
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"epoch": 0.8089887640449438,
|
| 269 |
+
"grad_norm": 7.092017650604248,
|
| 270 |
+
"learning_rate": 8.139534883720931e-06,
|
| 271 |
+
"logits/chosen": -0.6167936325073242,
|
| 272 |
+
"logits/rejected": -0.6189281940460205,
|
| 273 |
+
"logps/chosen": -1467.26904296875,
|
| 274 |
+
"logps/rejected": -1492.12890625,
|
| 275 |
+
"loss": 0.7104,
|
| 276 |
+
"rewards/accuracies": 0.484375,
|
| 277 |
+
"rewards/chosen": -0.042918525636196136,
|
| 278 |
+
"rewards/margins": -0.0062050605192780495,
|
| 279 |
+
"rewards/rejected": -0.03671346232295036,
|
| 280 |
+
"step": 180
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"epoch": 0.8539325842696629,
|
| 284 |
+
"grad_norm": 9.236512184143066,
|
| 285 |
+
"learning_rate": 7.9734219269103e-06,
|
| 286 |
+
"logits/chosen": -0.6129944920539856,
|
| 287 |
+
"logits/rejected": -0.6186620593070984,
|
| 288 |
+
"logps/chosen": -1476.5726318359375,
|
| 289 |
+
"logps/rejected": -1479.4190673828125,
|
| 290 |
+
"loss": 0.6976,
|
| 291 |
+
"rewards/accuracies": 0.5375000238418579,
|
| 292 |
+
"rewards/chosen": -0.11343568563461304,
|
| 293 |
+
"rewards/margins": 0.0183081217110157,
|
| 294 |
+
"rewards/rejected": -0.13174381852149963,
|
| 295 |
+
"step": 190
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"epoch": 0.898876404494382,
|
| 299 |
+
"grad_norm": 8.727457046508789,
|
| 300 |
+
"learning_rate": 7.807308970099668e-06,
|
| 301 |
+
"logits/chosen": -0.6297080516815186,
|
| 302 |
+
"logits/rejected": -0.6237484216690063,
|
| 303 |
+
"logps/chosen": -1487.1334228515625,
|
| 304 |
+
"logps/rejected": -1459.8831787109375,
|
| 305 |
+
"loss": 0.6815,
|
| 306 |
+
"rewards/accuracies": 0.550000011920929,
|
| 307 |
+
"rewards/chosen": -0.06383897364139557,
|
| 308 |
+
"rewards/margins": 0.05165405198931694,
|
| 309 |
+
"rewards/rejected": -0.1154930368065834,
|
| 310 |
+
"step": 200
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.9438202247191011,
|
| 314 |
+
"grad_norm": 8.576679229736328,
|
| 315 |
+
"learning_rate": 7.641196013289037e-06,
|
| 316 |
+
"logits/chosen": -0.6135516166687012,
|
| 317 |
+
"logits/rejected": -0.6387461423873901,
|
| 318 |
+
"logps/chosen": -1464.3004150390625,
|
| 319 |
+
"logps/rejected": -1465.748291015625,
|
| 320 |
+
"loss": 0.7012,
|
| 321 |
+
"rewards/accuracies": 0.4906249940395355,
|
| 322 |
+
"rewards/chosen": 0.04854054003953934,
|
| 323 |
+
"rewards/margins": 0.009796160273253918,
|
| 324 |
+
"rewards/rejected": 0.038744375109672546,
|
| 325 |
+
"step": 210
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"epoch": 0.9887640449438202,
|
| 329 |
+
"grad_norm": 7.046853542327881,
|
| 330 |
+
"learning_rate": 7.475083056478406e-06,
|
| 331 |
+
"logits/chosen": -0.6337479948997498,
|
| 332 |
+
"logits/rejected": -0.6330554485321045,
|
| 333 |
+
"logps/chosen": -1460.4818115234375,
|
| 334 |
+
"logps/rejected": -1454.669189453125,
|
| 335 |
+
"loss": 0.6871,
|
| 336 |
+
"rewards/accuracies": 0.550000011920929,
|
| 337 |
+
"rewards/chosen": -0.08278223127126694,
|
| 338 |
+
"rewards/margins": 0.04179311543703079,
|
| 339 |
+
"rewards/rejected": -0.12457535415887833,
|
| 340 |
+
"step": 220
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"epoch": 1.0314606741573034,
|
| 344 |
+
"grad_norm": 6.021562576293945,
|
| 345 |
+
"learning_rate": 7.308970099667775e-06,
|
| 346 |
+
"logits/chosen": -0.6283758282661438,
|
| 347 |
+
"logits/rejected": -0.6265605092048645,
|
| 348 |
+
"logps/chosen": -1488.2630615234375,
|
| 349 |
+
"logps/rejected": -1481.769775390625,
|
| 350 |
+
"loss": 0.5947,
|
| 351 |
+
"rewards/accuracies": 0.75,
|
| 352 |
+
"rewards/chosen": 0.006120601668953896,
|
| 353 |
+
"rewards/margins": 0.2608007490634918,
|
| 354 |
+
"rewards/rejected": -0.25468015670776367,
|
| 355 |
+
"step": 230
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"epoch": 1.0764044943820226,
|
| 359 |
+
"grad_norm": 8.284282684326172,
|
| 360 |
+
"learning_rate": 7.1428571428571436e-06,
|
| 361 |
+
"logits/chosen": -0.6591601371765137,
|
| 362 |
+
"logits/rejected": -0.652434229850769,
|
| 363 |
+
"logps/chosen": -1491.092529296875,
|
| 364 |
+
"logps/rejected": -1460.5125732421875,
|
| 365 |
+
"loss": 0.5582,
|
| 366 |
+
"rewards/accuracies": 0.8125,
|
| 367 |
+
"rewards/chosen": 0.1593593955039978,
|
| 368 |
+
"rewards/margins": 0.3331313133239746,
|
| 369 |
+
"rewards/rejected": -0.1737719029188156,
|
| 370 |
+
"step": 240
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"epoch": 1.1213483146067416,
|
| 374 |
+
"grad_norm": 8.415833473205566,
|
| 375 |
+
"learning_rate": 6.976744186046513e-06,
|
| 376 |
+
"logits/chosen": -0.6509793996810913,
|
| 377 |
+
"logits/rejected": -0.64632248878479,
|
| 378 |
+
"logps/chosen": -1457.5777587890625,
|
| 379 |
+
"logps/rejected": -1468.1063232421875,
|
| 380 |
+
"loss": 0.5393,
|
| 381 |
+
"rewards/accuracies": 0.8500000238418579,
|
| 382 |
+
"rewards/chosen": 0.19115446507930756,
|
| 383 |
+
"rewards/margins": 0.37512803077697754,
|
| 384 |
+
"rewards/rejected": -0.1839735507965088,
|
| 385 |
+
"step": 250
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"epoch": 1.1662921348314608,
|
| 389 |
+
"grad_norm": 6.646719932556152,
|
| 390 |
+
"learning_rate": 6.810631229235881e-06,
|
| 391 |
+
"logits/chosen": -0.6484296917915344,
|
| 392 |
+
"logits/rejected": -0.6388376951217651,
|
| 393 |
+
"logps/chosen": -1499.72998046875,
|
| 394 |
+
"logps/rejected": -1491.3359375,
|
| 395 |
+
"loss": 0.5073,
|
| 396 |
+
"rewards/accuracies": 0.862500011920929,
|
| 397 |
+
"rewards/chosen": -0.02073713205754757,
|
| 398 |
+
"rewards/margins": 0.46458253264427185,
|
| 399 |
+
"rewards/rejected": -0.48531967401504517,
|
| 400 |
+
"step": 260
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"epoch": 1.2112359550561798,
|
| 404 |
+
"grad_norm": 7.409927845001221,
|
| 405 |
+
"learning_rate": 6.64451827242525e-06,
|
| 406 |
+
"logits/chosen": -0.6177853345870972,
|
| 407 |
+
"logits/rejected": -0.6329953670501709,
|
| 408 |
+
"logps/chosen": -1474.36474609375,
|
| 409 |
+
"logps/rejected": -1496.6304931640625,
|
| 410 |
+
"loss": 0.5096,
|
| 411 |
+
"rewards/accuracies": 0.8812500238418579,
|
| 412 |
+
"rewards/chosen": -0.14079628884792328,
|
| 413 |
+
"rewards/margins": 0.45578232407569885,
|
| 414 |
+
"rewards/rejected": -0.5965785980224609,
|
| 415 |
+
"step": 270
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 1.256179775280899,
|
| 419 |
+
"grad_norm": 5.9450225830078125,
|
| 420 |
+
"learning_rate": 6.4784053156146185e-06,
|
| 421 |
+
"logits/chosen": -0.6525323987007141,
|
| 422 |
+
"logits/rejected": -0.645168662071228,
|
| 423 |
+
"logps/chosen": -1508.730712890625,
|
| 424 |
+
"logps/rejected": -1497.562744140625,
|
| 425 |
+
"loss": 0.517,
|
| 426 |
+
"rewards/accuracies": 0.8374999761581421,
|
| 427 |
+
"rewards/chosen": -0.2821454405784607,
|
| 428 |
+
"rewards/margins": 0.4555455148220062,
|
| 429 |
+
"rewards/rejected": -0.7376910448074341,
|
| 430 |
+
"step": 280
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"epoch": 1.301123595505618,
|
| 434 |
+
"grad_norm": 6.388819217681885,
|
| 435 |
+
"learning_rate": 6.312292358803988e-06,
|
| 436 |
+
"logits/chosen": -0.6514378190040588,
|
| 437 |
+
"logits/rejected": -0.6536304354667664,
|
| 438 |
+
"logps/chosen": -1464.6539306640625,
|
| 439 |
+
"logps/rejected": -1477.315185546875,
|
| 440 |
+
"loss": 0.5042,
|
| 441 |
+
"rewards/accuracies": 0.8531249761581421,
|
| 442 |
+
"rewards/chosen": -0.15214049816131592,
|
| 443 |
+
"rewards/margins": 0.48767295479774475,
|
| 444 |
+
"rewards/rejected": -0.6398134231567383,
|
| 445 |
+
"step": 290
|
| 446 |
+
},
|
| 447 |
+
{
|
| 448 |
+
"epoch": 1.346067415730337,
|
| 449 |
+
"grad_norm": 6.948678970336914,
|
| 450 |
+
"learning_rate": 6.146179401993356e-06,
|
| 451 |
+
"logits/chosen": -0.6365095973014832,
|
| 452 |
+
"logits/rejected": -0.6101225018501282,
|
| 453 |
+
"logps/chosen": -1467.804931640625,
|
| 454 |
+
"logps/rejected": -1457.0943603515625,
|
| 455 |
+
"loss": 0.5226,
|
| 456 |
+
"rewards/accuracies": 0.815625011920929,
|
| 457 |
+
"rewards/chosen": -0.07389238476753235,
|
| 458 |
+
"rewards/margins": 0.45187896490097046,
|
| 459 |
+
"rewards/rejected": -0.5257713794708252,
|
| 460 |
+
"step": 300
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"epoch": 1.3910112359550562,
|
| 464 |
+
"grad_norm": 6.829395771026611,
|
| 465 |
+
"learning_rate": 5.980066445182725e-06,
|
| 466 |
+
"logits/chosen": -0.6450862884521484,
|
| 467 |
+
"logits/rejected": -0.6496433615684509,
|
| 468 |
+
"logps/chosen": -1431.3052978515625,
|
| 469 |
+
"logps/rejected": -1472.2398681640625,
|
| 470 |
+
"loss": 0.5175,
|
| 471 |
+
"rewards/accuracies": 0.824999988079071,
|
| 472 |
+
"rewards/chosen": 0.06596250832080841,
|
| 473 |
+
"rewards/margins": 0.4646366536617279,
|
| 474 |
+
"rewards/rejected": -0.3986741900444031,
|
| 475 |
+
"step": 310
|
| 476 |
+
},
|
| 477 |
+
{
|
| 478 |
+
"epoch": 1.4359550561797754,
|
| 479 |
+
"grad_norm": 7.039060115814209,
|
| 480 |
+
"learning_rate": 5.8139534883720935e-06,
|
| 481 |
+
"logits/chosen": -0.6316220164299011,
|
| 482 |
+
"logits/rejected": -0.6179653406143188,
|
| 483 |
+
"logps/chosen": -1458.3851318359375,
|
| 484 |
+
"logps/rejected": -1472.72509765625,
|
| 485 |
+
"loss": 0.5126,
|
| 486 |
+
"rewards/accuracies": 0.8125,
|
| 487 |
+
"rewards/chosen": -0.2739182710647583,
|
| 488 |
+
"rewards/margins": 0.4745880663394928,
|
| 489 |
+
"rewards/rejected": -0.7485063076019287,
|
| 490 |
+
"step": 320
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 1.4808988764044944,
|
| 494 |
+
"grad_norm": 6.328307628631592,
|
| 495 |
+
"learning_rate": 5.647840531561463e-06,
|
| 496 |
+
"logits/chosen": -0.6379339098930359,
|
| 497 |
+
"logits/rejected": -0.6447880268096924,
|
| 498 |
+
"logps/chosen": -1484.3759765625,
|
| 499 |
+
"logps/rejected": -1490.268310546875,
|
| 500 |
+
"loss": 0.4878,
|
| 501 |
+
"rewards/accuracies": 0.84375,
|
| 502 |
+
"rewards/chosen": -0.1919710487127304,
|
| 503 |
+
"rewards/margins": 0.5375956296920776,
|
| 504 |
+
"rewards/rejected": -0.7295666933059692,
|
| 505 |
+
"step": 330
|
| 506 |
+
},
|
| 507 |
+
{
|
| 508 |
+
"epoch": 1.5258426966292133,
|
| 509 |
+
"grad_norm": 6.797203540802002,
|
| 510 |
+
"learning_rate": 5.481727574750831e-06,
|
| 511 |
+
"logits/chosen": -0.6540791392326355,
|
| 512 |
+
"logits/rejected": -0.6439436674118042,
|
| 513 |
+
"logps/chosen": -1445.4132080078125,
|
| 514 |
+
"logps/rejected": -1481.77294921875,
|
| 515 |
+
"loss": 0.4944,
|
| 516 |
+
"rewards/accuracies": 0.8500000238418579,
|
| 517 |
+
"rewards/chosen": -0.012084199115633965,
|
| 518 |
+
"rewards/margins": 0.51849764585495,
|
| 519 |
+
"rewards/rejected": -0.5305818319320679,
|
| 520 |
+
"step": 340
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 1.5707865168539326,
|
| 524 |
+
"grad_norm": 6.147586822509766,
|
| 525 |
+
"learning_rate": 5.3156146179402e-06,
|
| 526 |
+
"logits/chosen": -0.6325907707214355,
|
| 527 |
+
"logits/rejected": -0.6374093294143677,
|
| 528 |
+
"logps/chosen": -1445.081298828125,
|
| 529 |
+
"logps/rejected": -1476.953857421875,
|
| 530 |
+
"loss": 0.4924,
|
| 531 |
+
"rewards/accuracies": 0.8531249761581421,
|
| 532 |
+
"rewards/chosen": -0.1005701795220375,
|
| 533 |
+
"rewards/margins": 0.5271730422973633,
|
| 534 |
+
"rewards/rejected": -0.6277432441711426,
|
| 535 |
+
"step": 350
|
| 536 |
+
},
|
| 537 |
+
{
|
| 538 |
+
"epoch": 1.6157303370786518,
|
| 539 |
+
"grad_norm": 7.819535255432129,
|
| 540 |
+
"learning_rate": 5.149501661129569e-06,
|
| 541 |
+
"logits/chosen": -0.6434189081192017,
|
| 542 |
+
"logits/rejected": -0.6285640597343445,
|
| 543 |
+
"logps/chosen": -1475.154296875,
|
| 544 |
+
"logps/rejected": -1454.4254150390625,
|
| 545 |
+
"loss": 0.4818,
|
| 546 |
+
"rewards/accuracies": 0.8812500238418579,
|
| 547 |
+
"rewards/chosen": -0.3274011015892029,
|
| 548 |
+
"rewards/margins": 0.5485560297966003,
|
| 549 |
+
"rewards/rejected": -0.8759571313858032,
|
| 550 |
+
"step": 360
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"epoch": 1.6606741573033708,
|
| 554 |
+
"grad_norm": 6.248306751251221,
|
| 555 |
+
"learning_rate": 4.983388704318937e-06,
|
| 556 |
+
"logits/chosen": -0.64997398853302,
|
| 557 |
+
"logits/rejected": -0.645468533039093,
|
| 558 |
+
"logps/chosen": -1450.2716064453125,
|
| 559 |
+
"logps/rejected": -1451.9000244140625,
|
| 560 |
+
"loss": 0.4824,
|
| 561 |
+
"rewards/accuracies": 0.862500011920929,
|
| 562 |
+
"rewards/chosen": -0.3560061454772949,
|
| 563 |
+
"rewards/margins": 0.5591068863868713,
|
| 564 |
+
"rewards/rejected": -0.915113091468811,
|
| 565 |
+
"step": 370
|
| 566 |
+
},
|
| 567 |
+
{
|
| 568 |
+
"epoch": 1.7056179775280897,
|
| 569 |
+
"grad_norm": 7.458978176116943,
|
| 570 |
+
"learning_rate": 4.817275747508306e-06,
|
| 571 |
+
"logits/chosen": -0.6436145305633545,
|
| 572 |
+
"logits/rejected": -0.6464294195175171,
|
| 573 |
+
"logps/chosen": -1480.620849609375,
|
| 574 |
+
"logps/rejected": -1482.4827880859375,
|
| 575 |
+
"loss": 0.5012,
|
| 576 |
+
"rewards/accuracies": 0.84375,
|
| 577 |
+
"rewards/chosen": -0.3419143557548523,
|
| 578 |
+
"rewards/margins": 0.5034736394882202,
|
| 579 |
+
"rewards/rejected": -0.8453879356384277,
|
| 580 |
+
"step": 380
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"epoch": 1.750561797752809,
|
| 584 |
+
"grad_norm": 6.304107666015625,
|
| 585 |
+
"learning_rate": 4.651162790697675e-06,
|
| 586 |
+
"logits/chosen": -0.6337984800338745,
|
| 587 |
+
"logits/rejected": -0.6419006586074829,
|
| 588 |
+
"logps/chosen": -1457.7503662109375,
|
| 589 |
+
"logps/rejected": -1450.752197265625,
|
| 590 |
+
"loss": 0.4727,
|
| 591 |
+
"rewards/accuracies": 0.875,
|
| 592 |
+
"rewards/chosen": -0.1469828486442566,
|
| 593 |
+
"rewards/margins": 0.5898675322532654,
|
| 594 |
+
"rewards/rejected": -0.7368504405021667,
|
| 595 |
+
"step": 390
|
| 596 |
+
},
|
| 597 |
+
{
|
| 598 |
+
"epoch": 1.7955056179775282,
|
| 599 |
+
"grad_norm": 7.871026039123535,
|
| 600 |
+
"learning_rate": 4.4850498338870435e-06,
|
| 601 |
+
"logits/chosen": -0.6506339311599731,
|
| 602 |
+
"logits/rejected": -0.6489716172218323,
|
| 603 |
+
"logps/chosen": -1471.711181640625,
|
| 604 |
+
"logps/rejected": -1500.9840087890625,
|
| 605 |
+
"loss": 0.4883,
|
| 606 |
+
"rewards/accuracies": 0.8374999761581421,
|
| 607 |
+
"rewards/chosen": -0.3221575617790222,
|
| 608 |
+
"rewards/margins": 0.5669270753860474,
|
| 609 |
+
"rewards/rejected": -0.8890846371650696,
|
| 610 |
+
"step": 400
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"epoch": 1.8404494382022472,
|
| 614 |
+
"grad_norm": 7.770719528198242,
|
| 615 |
+
"learning_rate": 4.318936877076413e-06,
|
| 616 |
+
"logits/chosen": -0.633885383605957,
|
| 617 |
+
"logits/rejected": -0.635451078414917,
|
| 618 |
+
"logps/chosen": -1452.253662109375,
|
| 619 |
+
"logps/rejected": -1468.77392578125,
|
| 620 |
+
"loss": 0.4969,
|
| 621 |
+
"rewards/accuracies": 0.846875011920929,
|
| 622 |
+
"rewards/chosen": -0.414605051279068,
|
| 623 |
+
"rewards/margins": 0.5224484801292419,
|
| 624 |
+
"rewards/rejected": -0.9370535612106323,
|
| 625 |
+
"step": 410
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 1.8853932584269661,
|
| 629 |
+
"grad_norm": 6.401175022125244,
|
| 630 |
+
"learning_rate": 4.152823920265781e-06,
|
| 631 |
+
"logits/chosen": -0.6535300016403198,
|
| 632 |
+
"logits/rejected": -0.6600362658500671,
|
| 633 |
+
"logps/chosen": -1446.4879150390625,
|
| 634 |
+
"logps/rejected": -1480.5345458984375,
|
| 635 |
+
"loss": 0.4815,
|
| 636 |
+
"rewards/accuracies": 0.856249988079071,
|
| 637 |
+
"rewards/chosen": -0.5065728425979614,
|
| 638 |
+
"rewards/margins": 0.5753783583641052,
|
| 639 |
+
"rewards/rejected": -1.0819512605667114,
|
| 640 |
+
"step": 420
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"epoch": 1.9303370786516854,
|
| 644 |
+
"grad_norm": 6.45891809463501,
|
| 645 |
+
"learning_rate": 3.98671096345515e-06,
|
| 646 |
+
"logits/chosen": -0.6408799886703491,
|
| 647 |
+
"logits/rejected": -0.6440805792808533,
|
| 648 |
+
"logps/chosen": -1451.79638671875,
|
| 649 |
+
"logps/rejected": -1458.14794921875,
|
| 650 |
+
"loss": 0.477,
|
| 651 |
+
"rewards/accuracies": 0.840624988079071,
|
| 652 |
+
"rewards/chosen": -0.42773446440696716,
|
| 653 |
+
"rewards/margins": 0.591695249080658,
|
| 654 |
+
"rewards/rejected": -1.0194295644760132,
|
| 655 |
+
"step": 430
|
| 656 |
+
},
|
| 657 |
+
{
|
| 658 |
+
"epoch": 1.9752808988764046,
|
| 659 |
+
"grad_norm": 6.308041095733643,
|
| 660 |
+
"learning_rate": 3.8205980066445185e-06,
|
| 661 |
+
"logits/chosen": -0.660961389541626,
|
| 662 |
+
"logits/rejected": -0.6538819670677185,
|
| 663 |
+
"logps/chosen": -1483.5074462890625,
|
| 664 |
+
"logps/rejected": -1495.02734375,
|
| 665 |
+
"loss": 0.4829,
|
| 666 |
+
"rewards/accuracies": 0.8374999761581421,
|
| 667 |
+
"rewards/chosen": -0.2519773840904236,
|
| 668 |
+
"rewards/margins": 0.572425365447998,
|
| 669 |
+
"rewards/rejected": -0.8244028091430664,
|
| 670 |
+
"step": 440
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
"epoch": 2.0179775280898875,
|
| 674 |
+
"grad_norm": 6.544996738433838,
|
| 675 |
+
"learning_rate": 3.6544850498338876e-06,
|
| 676 |
+
"logits/chosen": -0.6604510545730591,
|
| 677 |
+
"logits/rejected": -0.6692507266998291,
|
| 678 |
+
"logps/chosen": -1480.155517578125,
|
| 679 |
+
"logps/rejected": -1485.3016357421875,
|
| 680 |
+
"loss": 0.425,
|
| 681 |
+
"rewards/accuracies": 0.9013158082962036,
|
| 682 |
+
"rewards/chosen": -0.029593899846076965,
|
| 683 |
+
"rewards/margins": 0.7573708891868591,
|
| 684 |
+
"rewards/rejected": -0.7869648337364197,
|
| 685 |
+
"step": 450
|
| 686 |
+
},
|
| 687 |
+
{
|
| 688 |
+
"epoch": 2.0629213483146067,
|
| 689 |
+
"grad_norm": 6.268301486968994,
|
| 690 |
+
"learning_rate": 3.4883720930232564e-06,
|
| 691 |
+
"logits/chosen": -0.6653432846069336,
|
| 692 |
+
"logits/rejected": -0.6545718908309937,
|
| 693 |
+
"logps/chosen": -1456.055419921875,
|
| 694 |
+
"logps/rejected": -1484.5994873046875,
|
| 695 |
+
"loss": 0.3421,
|
| 696 |
+
"rewards/accuracies": 0.981249988079071,
|
| 697 |
+
"rewards/chosen": 0.03443983197212219,
|
| 698 |
+
"rewards/margins": 0.9891597628593445,
|
| 699 |
+
"rewards/rejected": -0.9547199010848999,
|
| 700 |
+
"step": 460
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"epoch": 2.107865168539326,
|
| 704 |
+
"grad_norm": 6.637356281280518,
|
| 705 |
+
"learning_rate": 3.322259136212625e-06,
|
| 706 |
+
"logits/chosen": -0.669964611530304,
|
| 707 |
+
"logits/rejected": -0.6756006479263306,
|
| 708 |
+
"logps/chosen": -1436.8699951171875,
|
| 709 |
+
"logps/rejected": -1450.5247802734375,
|
| 710 |
+
"loss": 0.3371,
|
| 711 |
+
"rewards/accuracies": 0.984375,
|
| 712 |
+
"rewards/chosen": -0.20131292939186096,
|
| 713 |
+
"rewards/margins": 1.0167487859725952,
|
| 714 |
+
"rewards/rejected": -1.2180618047714233,
|
| 715 |
+
"step": 470
|
| 716 |
+
},
|
| 717 |
+
{
|
| 718 |
+
"epoch": 2.152808988764045,
|
| 719 |
+
"grad_norm": 5.162894248962402,
|
| 720 |
+
"learning_rate": 3.156146179401994e-06,
|
| 721 |
+
"logits/chosen": -0.6826899647712708,
|
| 722 |
+
"logits/rejected": -0.6783708333969116,
|
| 723 |
+
"logps/chosen": -1470.298583984375,
|
| 724 |
+
"logps/rejected": -1493.745849609375,
|
| 725 |
+
"loss": 0.3365,
|
| 726 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 727 |
+
"rewards/chosen": -0.25590771436691284,
|
| 728 |
+
"rewards/margins": 1.0207464694976807,
|
| 729 |
+
"rewards/rejected": -1.2766541242599487,
|
| 730 |
+
"step": 480
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 2.197752808988764,
|
| 734 |
+
"grad_norm": 5.52967643737793,
|
| 735 |
+
"learning_rate": 2.9900332225913626e-06,
|
| 736 |
+
"logits/chosen": -0.6881505250930786,
|
| 737 |
+
"logits/rejected": -0.682505190372467,
|
| 738 |
+
"logps/chosen": -1448.319580078125,
|
| 739 |
+
"logps/rejected": -1477.046630859375,
|
| 740 |
+
"loss": 0.3315,
|
| 741 |
+
"rewards/accuracies": 0.96875,
|
| 742 |
+
"rewards/chosen": -0.37032753229141235,
|
| 743 |
+
"rewards/margins": 1.0482890605926514,
|
| 744 |
+
"rewards/rejected": -1.4186166524887085,
|
| 745 |
+
"step": 490
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 2.242696629213483,
|
| 749 |
+
"grad_norm": 5.7678399085998535,
|
| 750 |
+
"learning_rate": 2.8239202657807313e-06,
|
| 751 |
+
"logits/chosen": -0.7020605206489563,
|
| 752 |
+
"logits/rejected": -0.6847004294395447,
|
| 753 |
+
"logps/chosen": -1463.9556884765625,
|
| 754 |
+
"logps/rejected": -1515.1861572265625,
|
| 755 |
+
"loss": 0.3295,
|
| 756 |
+
"rewards/accuracies": 0.971875011920929,
|
| 757 |
+
"rewards/chosen": -0.5403343439102173,
|
| 758 |
+
"rewards/margins": 1.0608083009719849,
|
| 759 |
+
"rewards/rejected": -1.6011426448822021,
|
| 760 |
+
"step": 500
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"epoch": 2.2876404494382023,
|
| 764 |
+
"grad_norm": 6.095832347869873,
|
| 765 |
+
"learning_rate": 2.6578073089701e-06,
|
| 766 |
+
"logits/chosen": -0.6875178217887878,
|
| 767 |
+
"logits/rejected": -0.6721357107162476,
|
| 768 |
+
"logps/chosen": -1503.990234375,
|
| 769 |
+
"logps/rejected": -1523.0850830078125,
|
| 770 |
+
"loss": 0.3239,
|
| 771 |
+
"rewards/accuracies": 0.981249988079071,
|
| 772 |
+
"rewards/chosen": -0.7074209451675415,
|
| 773 |
+
"rewards/margins": 1.0821547508239746,
|
| 774 |
+
"rewards/rejected": -1.7895758152008057,
|
| 775 |
+
"step": 510
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"epoch": 2.3325842696629215,
|
| 779 |
+
"grad_norm": 5.140925884246826,
|
| 780 |
+
"learning_rate": 2.4916943521594684e-06,
|
| 781 |
+
"logits/chosen": -0.6903117299079895,
|
| 782 |
+
"logits/rejected": -0.6828970909118652,
|
| 783 |
+
"logps/chosen": -1494.531494140625,
|
| 784 |
+
"logps/rejected": -1525.615966796875,
|
| 785 |
+
"loss": 0.3329,
|
| 786 |
+
"rewards/accuracies": 0.956250011920929,
|
| 787 |
+
"rewards/chosen": -0.7834824323654175,
|
| 788 |
+
"rewards/margins": 1.0618258714675903,
|
| 789 |
+
"rewards/rejected": -1.845308542251587,
|
| 790 |
+
"step": 520
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"epoch": 2.3775280898876403,
|
| 794 |
+
"grad_norm": 5.589333534240723,
|
| 795 |
+
"learning_rate": 2.3255813953488376e-06,
|
| 796 |
+
"logits/chosen": -0.6674150824546814,
|
| 797 |
+
"logits/rejected": -0.6766713857650757,
|
| 798 |
+
"logps/chosen": -1438.752197265625,
|
| 799 |
+
"logps/rejected": -1477.069091796875,
|
| 800 |
+
"loss": 0.3327,
|
| 801 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 802 |
+
"rewards/chosen": -0.8180882334709167,
|
| 803 |
+
"rewards/margins": 1.0446465015411377,
|
| 804 |
+
"rewards/rejected": -1.8627347946166992,
|
| 805 |
+
"step": 530
|
| 806 |
+
},
|
| 807 |
+
{
|
| 808 |
+
"epoch": 2.4224719101123595,
|
| 809 |
+
"grad_norm": 5.4515814781188965,
|
| 810 |
+
"learning_rate": 2.1594684385382063e-06,
|
| 811 |
+
"logits/chosen": -0.6770123839378357,
|
| 812 |
+
"logits/rejected": -0.6860142350196838,
|
| 813 |
+
"logps/chosen": -1475.977294921875,
|
| 814 |
+
"logps/rejected": -1495.72216796875,
|
| 815 |
+
"loss": 0.3264,
|
| 816 |
+
"rewards/accuracies": 0.965624988079071,
|
| 817 |
+
"rewards/chosen": -0.7763740420341492,
|
| 818 |
+
"rewards/margins": 1.0760669708251953,
|
| 819 |
+
"rewards/rejected": -1.8524410724639893,
|
| 820 |
+
"step": 540
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"epoch": 2.4674157303370787,
|
| 824 |
+
"grad_norm": 6.506235122680664,
|
| 825 |
+
"learning_rate": 1.993355481727575e-06,
|
| 826 |
+
"logits/chosen": -0.6790199279785156,
|
| 827 |
+
"logits/rejected": -0.677984356880188,
|
| 828 |
+
"logps/chosen": -1483.64013671875,
|
| 829 |
+
"logps/rejected": -1499.1666259765625,
|
| 830 |
+
"loss": 0.3352,
|
| 831 |
+
"rewards/accuracies": 0.971875011920929,
|
| 832 |
+
"rewards/chosen": -0.6030625104904175,
|
| 833 |
+
"rewards/margins": 1.0407390594482422,
|
| 834 |
+
"rewards/rejected": -1.6438014507293701,
|
| 835 |
+
"step": 550
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 2.512359550561798,
|
| 839 |
+
"grad_norm": 4.6371331214904785,
|
| 840 |
+
"learning_rate": 1.8272425249169438e-06,
|
| 841 |
+
"logits/chosen": -0.6902989149093628,
|
| 842 |
+
"logits/rejected": -0.6996210217475891,
|
| 843 |
+
"logps/chosen": -1499.473388671875,
|
| 844 |
+
"logps/rejected": -1493.6162109375,
|
| 845 |
+
"loss": 0.3142,
|
| 846 |
+
"rewards/accuracies": 0.971875011920929,
|
| 847 |
+
"rewards/chosen": -0.38727277517318726,
|
| 848 |
+
"rewards/margins": 1.1097298860549927,
|
| 849 |
+
"rewards/rejected": -1.4970027208328247,
|
| 850 |
+
"step": 560
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"epoch": 2.5573033707865167,
|
| 854 |
+
"grad_norm": 5.494938850402832,
|
| 855 |
+
"learning_rate": 1.6611295681063126e-06,
|
| 856 |
+
"logits/chosen": -0.6864480972290039,
|
| 857 |
+
"logits/rejected": -0.6946516036987305,
|
| 858 |
+
"logps/chosen": -1488.853515625,
|
| 859 |
+
"logps/rejected": -1484.619384765625,
|
| 860 |
+
"loss": 0.3085,
|
| 861 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 862 |
+
"rewards/chosen": -0.39798271656036377,
|
| 863 |
+
"rewards/margins": 1.1385451555252075,
|
| 864 |
+
"rewards/rejected": -1.5365278720855713,
|
| 865 |
+
"step": 570
|
| 866 |
+
},
|
| 867 |
+
{
|
| 868 |
+
"epoch": 2.602247191011236,
|
| 869 |
+
"grad_norm": 5.5001959800720215,
|
| 870 |
+
"learning_rate": 1.4950166112956813e-06,
|
| 871 |
+
"logits/chosen": -0.689426600933075,
|
| 872 |
+
"logits/rejected": -0.6980553865432739,
|
| 873 |
+
"logps/chosen": -1462.147705078125,
|
| 874 |
+
"logps/rejected": -1470.9658203125,
|
| 875 |
+
"loss": 0.309,
|
| 876 |
+
"rewards/accuracies": 0.981249988079071,
|
| 877 |
+
"rewards/chosen": -0.42978817224502563,
|
| 878 |
+
"rewards/margins": 1.1281216144561768,
|
| 879 |
+
"rewards/rejected": -1.5579097270965576,
|
| 880 |
+
"step": 580
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"epoch": 2.647191011235955,
|
| 884 |
+
"grad_norm": 5.16894006729126,
|
| 885 |
+
"learning_rate": 1.32890365448505e-06,
|
| 886 |
+
"logits/chosen": -0.6979320645332336,
|
| 887 |
+
"logits/rejected": -0.6933678984642029,
|
| 888 |
+
"logps/chosen": -1492.7177734375,
|
| 889 |
+
"logps/rejected": -1465.2548828125,
|
| 890 |
+
"loss": 0.3171,
|
| 891 |
+
"rewards/accuracies": 0.96875,
|
| 892 |
+
"rewards/chosen": -0.3818231225013733,
|
| 893 |
+
"rewards/margins": 1.1230003833770752,
|
| 894 |
+
"rewards/rejected": -1.5048235654830933,
|
| 895 |
+
"step": 590
|
| 896 |
+
},
|
| 897 |
+
{
|
| 898 |
+
"epoch": 2.692134831460674,
|
| 899 |
+
"grad_norm": 5.261245250701904,
|
| 900 |
+
"learning_rate": 1.1627906976744188e-06,
|
| 901 |
+
"logits/chosen": -0.6973509788513184,
|
| 902 |
+
"logits/rejected": -0.6787486672401428,
|
| 903 |
+
"logps/chosen": -1474.0703125,
|
| 904 |
+
"logps/rejected": -1494.2147216796875,
|
| 905 |
+
"loss": 0.3119,
|
| 906 |
+
"rewards/accuracies": 0.981249988079071,
|
| 907 |
+
"rewards/chosen": -0.23206683993339539,
|
| 908 |
+
"rewards/margins": 1.1345573663711548,
|
| 909 |
+
"rewards/rejected": -1.366624116897583,
|
| 910 |
+
"step": 600
|
| 911 |
+
},
|
| 912 |
+
{
|
| 913 |
+
"epoch": 2.737078651685393,
|
| 914 |
+
"grad_norm": 4.852719783782959,
|
| 915 |
+
"learning_rate": 9.966777408637875e-07,
|
| 916 |
+
"logits/chosen": -0.7001398205757141,
|
| 917 |
+
"logits/rejected": -0.6987439393997192,
|
| 918 |
+
"logps/chosen": -1424.858154296875,
|
| 919 |
+
"logps/rejected": -1409.1744384765625,
|
| 920 |
+
"loss": 0.3177,
|
| 921 |
+
"rewards/accuracies": 0.9593750238418579,
|
| 922 |
+
"rewards/chosen": -0.19675633311271667,
|
| 923 |
+
"rewards/margins": 1.1072434186935425,
|
| 924 |
+
"rewards/rejected": -1.303999662399292,
|
| 925 |
+
"step": 610
|
| 926 |
+
},
|
| 927 |
+
{
|
| 928 |
+
"epoch": 2.7820224719101123,
|
| 929 |
+
"grad_norm": 6.0668745040893555,
|
| 930 |
+
"learning_rate": 8.305647840531563e-07,
|
| 931 |
+
"logits/chosen": -0.7101883888244629,
|
| 932 |
+
"logits/rejected": -0.7109605073928833,
|
| 933 |
+
"logps/chosen": -1480.221435546875,
|
| 934 |
+
"logps/rejected": -1482.60546875,
|
| 935 |
+
"loss": 0.3195,
|
| 936 |
+
"rewards/accuracies": 0.965624988079071,
|
| 937 |
+
"rewards/chosen": -0.19247181713581085,
|
| 938 |
+
"rewards/margins": 1.0987659692764282,
|
| 939 |
+
"rewards/rejected": -1.2912375926971436,
|
| 940 |
+
"step": 620
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 2.8269662921348315,
|
| 944 |
+
"grad_norm": 5.055160999298096,
|
| 945 |
+
"learning_rate": 6.64451827242525e-07,
|
| 946 |
+
"logits/chosen": -0.7024872899055481,
|
| 947 |
+
"logits/rejected": -0.6898430585861206,
|
| 948 |
+
"logps/chosen": -1450.166015625,
|
| 949 |
+
"logps/rejected": -1479.9564208984375,
|
| 950 |
+
"loss": 0.3166,
|
| 951 |
+
"rewards/accuracies": 0.9781249761581421,
|
| 952 |
+
"rewards/chosen": -0.2648771405220032,
|
| 953 |
+
"rewards/margins": 1.1128193140029907,
|
| 954 |
+
"rewards/rejected": -1.3776965141296387,
|
| 955 |
+
"step": 630
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 2.8719101123595507,
|
| 959 |
+
"grad_norm": 5.179624557495117,
|
| 960 |
+
"learning_rate": 4.983388704318938e-07,
|
| 961 |
+
"logits/chosen": -0.7008506059646606,
|
| 962 |
+
"logits/rejected": -0.6875109672546387,
|
| 963 |
+
"logps/chosen": -1489.664306640625,
|
| 964 |
+
"logps/rejected": -1495.178466796875,
|
| 965 |
+
"loss": 0.3158,
|
| 966 |
+
"rewards/accuracies": 0.981249988079071,
|
| 967 |
+
"rewards/chosen": -0.3089246153831482,
|
| 968 |
+
"rewards/margins": 1.1327073574066162,
|
| 969 |
+
"rewards/rejected": -1.4416319131851196,
|
| 970 |
+
"step": 640
|
| 971 |
+
},
|
| 972 |
+
{
|
| 973 |
+
"epoch": 2.9168539325842695,
|
| 974 |
+
"grad_norm": 6.70164680480957,
|
| 975 |
+
"learning_rate": 3.322259136212625e-07,
|
| 976 |
+
"logits/chosen": -0.6998602151870728,
|
| 977 |
+
"logits/rejected": -0.7018693685531616,
|
| 978 |
+
"logps/chosen": -1458.418212890625,
|
| 979 |
+
"logps/rejected": -1455.3675537109375,
|
| 980 |
+
"loss": 0.3231,
|
| 981 |
+
"rewards/accuracies": 0.9624999761581421,
|
| 982 |
+
"rewards/chosen": -0.26613983511924744,
|
| 983 |
+
"rewards/margins": 1.1134045124053955,
|
| 984 |
+
"rewards/rejected": -1.3795442581176758,
|
| 985 |
+
"step": 650
|
| 986 |
+
},
|
| 987 |
+
{
|
| 988 |
+
"epoch": 2.9617977528089887,
|
| 989 |
+
"grad_norm": 5.529868125915527,
|
| 990 |
+
"learning_rate": 1.6611295681063126e-07,
|
| 991 |
+
"logits/chosen": -0.7056189775466919,
|
| 992 |
+
"logits/rejected": -0.7010918259620667,
|
| 993 |
+
"logps/chosen": -1448.5848388671875,
|
| 994 |
+
"logps/rejected": -1449.268310546875,
|
| 995 |
+
"loss": 0.3193,
|
| 996 |
+
"rewards/accuracies": 0.965624988079071,
|
| 997 |
+
"rewards/chosen": -0.2537583112716675,
|
| 998 |
+
"rewards/margins": 1.1171658039093018,
|
| 999 |
+
"rewards/rejected": -1.3709241151809692,
|
| 1000 |
+
"step": 660
|
| 1001 |
+
}
|
| 1002 |
+
],
|
| 1003 |
+
"logging_steps": 10,
|
| 1004 |
+
"max_steps": 669,
|
| 1005 |
+
"num_input_tokens_seen": 0,
|
| 1006 |
+
"num_train_epochs": 3,
|
| 1007 |
+
"save_steps": 10000,
|
| 1008 |
+
"stateful_callbacks": {
|
| 1009 |
+
"TrainerControl": {
|
| 1010 |
+
"args": {
|
| 1011 |
+
"should_epoch_stop": false,
|
| 1012 |
+
"should_evaluate": false,
|
| 1013 |
+
"should_log": false,
|
| 1014 |
+
"should_save": true,
|
| 1015 |
+
"should_training_stop": true
|
| 1016 |
+
},
|
| 1017 |
+
"attributes": {}
|
| 1018 |
+
}
|
| 1019 |
+
},
|
| 1020 |
+
"total_flos": 0.0,
|
| 1021 |
+
"train_batch_size": 1,
|
| 1022 |
+
"trial_name": null,
|
| 1023 |
+
"trial_params": null
|
| 1024 |
+
}
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d65df17b30d9c0fee0b16beb563e4fd4ae603d6a11683ef06705512d4b6bf93
|
| 3 |
+
size 7057
|
Preference_Set_Qwen2.5-3B-Instruct_INFV_ref_as_gt_True_IterRet_individual_recall_True_top_k_30/Qwen/Qwen2.5-3B-Instruct/method_agent/checkpoint-669/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|