MartinJYHuang commited on
Commit
cfd7e17
·
verified ·
1 Parent(s): e2f5089

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -38,10 +38,10 @@ This model was trained with DPO, a method introduced in [Direct Preference Optim
38
  ### Framework versions
39
 
40
  - PEFT 0.17.1
41
- - TRL: 0.23.1
42
- - Transformers: 4.57.0
43
- - Pytorch: 2.8.0
44
- - Datasets: 4.1.1
45
  - Tokenizers: 0.22.1
46
 
47
  ## Citations
 
38
  ### Framework versions
39
 
40
  - PEFT 0.17.1
41
+ - TRL: 0.24.0
42
+ - Transformers: 4.57.1
43
+ - Pytorch: 2.9.0
44
+ - Datasets: 4.2.0
45
  - Tokenizers: 0.22.1
46
 
47
  ## Citations
adapter_config.json CHANGED
@@ -25,8 +25,8 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "v_proj",
29
- "q_proj"
30
  ],
31
  "target_parameters": null,
32
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "q_proj",
29
+ "v_proj"
30
  ],
31
  "target_parameters": null,
32
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6db4bd01e3ba164b19acfbcde1b1672b273d4c4922cb841ef72cb84d54a3c9f
3
  size 20992792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22fe8e2ca50c8fdfaeadbee04d86b2bd4084c70d1135882db04e89e9fb2db999
3
  size 20992792
trainer_state.json CHANGED
@@ -4,128 +4,23 @@
4
  "best_model_checkpoint": null,
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 72,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
- {
13
- "epoch": 0.4166666666666667,
14
- "grad_norm": 5.378512382507324,
15
- "learning_rate": 8.750000000000001e-06,
16
- "logits/chosen": -1.844266653060913,
17
- "logits/rejected": -1.6856653690338135,
18
- "logps/chosen": -486.0516662597656,
19
- "logps/rejected": -451.41192626953125,
20
- "loss": 0.6871,
21
- "rewards/accuracies": 0.49000000953674316,
22
- "rewards/chosen": 0.025482425466179848,
23
- "rewards/margins": 0.01759222522377968,
24
- "rewards/rejected": 0.007890196517109871,
25
- "step": 10
26
- },
27
- {
28
- "epoch": 0.8333333333333334,
29
- "grad_norm": 6.4273881912231445,
30
- "learning_rate": 7.361111111111112e-06,
31
- "logits/chosen": -1.8168048858642578,
32
- "logits/rejected": -1.6364498138427734,
33
- "logps/chosen": -486.64166259765625,
34
- "logps/rejected": -463.81634521484375,
35
- "loss": 0.6185,
36
- "rewards/accuracies": 0.8799999952316284,
37
- "rewards/chosen": 0.10465441644191742,
38
- "rewards/margins": 0.16221235692501068,
39
- "rewards/rejected": -0.05755792185664177,
40
- "step": 20
41
- },
42
- {
43
- "epoch": 1.25,
44
- "grad_norm": 5.65267276763916,
45
- "learning_rate": 5.972222222222222e-06,
46
- "logits/chosen": -1.8458976745605469,
47
- "logits/rejected": -1.6958911418914795,
48
- "logps/chosen": -486.38079833984375,
49
- "logps/rejected": -460.72991943359375,
50
- "loss": 0.5097,
51
- "rewards/accuracies": 0.9899999499320984,
52
- "rewards/chosen": 0.2503821849822998,
53
- "rewards/margins": 0.4185231626033783,
54
- "rewards/rejected": -0.1681409627199173,
55
- "step": 30
56
- },
57
- {
58
- "epoch": 1.6666666666666665,
59
- "grad_norm": 4.828741550445557,
60
- "learning_rate": 4.583333333333333e-06,
61
- "logits/chosen": -1.7737739086151123,
62
- "logits/rejected": -1.615502953529358,
63
- "logps/chosen": -503.5326232910156,
64
- "logps/rejected": -456.4029235839844,
65
- "loss": 0.418,
66
- "rewards/accuracies": 1.0,
67
- "rewards/chosen": 0.41943544149398804,
68
- "rewards/margins": 0.6772049069404602,
69
- "rewards/rejected": -0.2577693462371826,
70
- "step": 40
71
- },
72
- {
73
- "epoch": 2.0833333333333335,
74
- "grad_norm": 4.443270683288574,
75
- "learning_rate": 3.1944444444444443e-06,
76
- "logits/chosen": -1.8288230895996094,
77
- "logits/rejected": -1.6355512142181396,
78
- "logps/chosen": -478.97039794921875,
79
- "logps/rejected": -471.06170654296875,
80
- "loss": 0.3435,
81
- "rewards/accuracies": 1.0,
82
- "rewards/chosen": 0.5154451727867126,
83
- "rewards/margins": 0.9199325442314148,
84
- "rewards/rejected": -0.4044874608516693,
85
- "step": 50
86
- },
87
- {
88
- "epoch": 2.5,
89
- "grad_norm": 3.9862306118011475,
90
- "learning_rate": 1.8055555555555557e-06,
91
- "logits/chosen": -1.787440538406372,
92
- "logits/rejected": -1.6402308940887451,
93
- "logps/chosen": -483.3511657714844,
94
- "logps/rejected": -451.0846252441406,
95
- "loss": 0.2905,
96
- "rewards/accuracies": 1.0,
97
- "rewards/chosen": 0.6469835042953491,
98
- "rewards/margins": 1.1269721984863281,
99
- "rewards/rejected": -0.4799886643886566,
100
- "step": 60
101
- },
102
- {
103
- "epoch": 2.9166666666666665,
104
- "grad_norm": 3.802476406097412,
105
- "learning_rate": 4.1666666666666667e-07,
106
- "logits/chosen": -1.8231675624847412,
107
- "logits/rejected": -1.6182845830917358,
108
- "logps/chosen": -482.34552001953125,
109
- "logps/rejected": -476.08245849609375,
110
- "loss": 0.2487,
111
- "rewards/accuracies": 1.0,
112
- "rewards/chosen": 0.7560733556747437,
113
- "rewards/margins": 1.309217929840088,
114
- "rewards/rejected": -0.553144633769989,
115
- "step": 70
116
- },
117
  {
118
  "epoch": 3.0,
119
- "step": 72,
120
  "total_flos": 0.0,
121
- "train_loss": 0.43935056610239875,
122
- "train_runtime": 212.525,
123
- "train_samples_per_second": 3.317,
124
- "train_steps_per_second": 0.339
125
  }
126
  ],
127
  "logging_steps": 10,
128
- "max_steps": 72,
129
  "num_input_tokens_seen": 0,
130
  "num_train_epochs": 3,
131
  "save_steps": 100,
 
4
  "best_model_checkpoint": null,
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 6,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 3.0,
14
+ "step": 6,
15
  "total_flos": 0.0,
16
+ "train_loss": 0.6918749014536539,
17
+ "train_runtime": 10.2523,
18
+ "train_samples_per_second": 2.341,
19
+ "train_steps_per_second": 0.585
20
  }
21
  ],
22
  "logging_steps": 10,
23
+ "max_steps": 6,
24
  "num_input_tokens_seen": 0,
25
  "num_train_epochs": 3,
26
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3651298d483387dd8e41a7f96f9b2df27290e8b94f31024d88b04779a9c64fdc
3
- size 6865
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a9e1331ba9c68f2f41bad01edc382f75c5e0577212aa2083afc36604e5bfba
3
+ size 6929