wukeming11 commited on
Commit
b623eb7
·
verified ·
1 Parent(s): 06167c9

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +59 -236
config.json CHANGED
@@ -1,238 +1,61 @@
1
  {
2
- "data_config": {
3
- "train_json_list": [
4
- "dataset/train_250917_v3_preference_overall_num199394_shuflle_tie_case_processed.json"
5
- ],
6
- "val_json_list": [
7
- "/path/to/dataset/meta_data.json"
8
- ],
9
- "test_json_list": [
10
- [
11
- "Valid Set 1",
12
- [
13
- "dataset/valid_set2_modified.json"
14
- ]
15
- ],
16
- [
17
- "Valid Set 2",
18
- [
19
- "dataset/valid_aurora_human_ratings_pairwise_modified.json"
20
- ]
21
- ]
22
- ],
23
- "soft_label": false,
24
- "confidence_threshold": null,
25
- "max_pixels": 200704,
26
- "min_pixels": 200704,
27
- "with_instruction": true,
28
- "tied_threshold": null,
29
- "reward_dim": "overall_detail"
30
- },
31
- "training_args": {
32
- "output_dir": "",
33
- "overwrite_output_dir": false,
34
- "do_train": false,
35
- "do_eval": true,
36
- "do_predict": false,
37
- "eval_strategy": "steps",
38
- "prediction_loss_only": false,
39
- "per_device_train_batch_size": 2,
40
- "per_device_eval_batch_size": 8,
41
- "per_gpu_train_batch_size": null,
42
- "per_gpu_eval_batch_size": null,
43
- "gradient_accumulation_steps": 4,
44
- "eval_accumulation_steps": null,
45
- "eval_delay": 0,
46
- "torch_empty_cache_steps": null,
47
- "learning_rate": 2e-06,
48
- "weight_decay": 0.0,
49
- "adam_beta1": 0.9,
50
- "adam_beta2": 0.999,
51
- "adam_epsilon": 1e-08,
52
- "max_grad_norm": 1.0,
53
- "num_train_epochs": 10,
54
- "max_steps": -1,
55
- "lr_scheduler_type": "constant_with_warmup",
56
- "lr_scheduler_kwargs": {},
57
- "warmup_ratio": 0.05,
58
- "warmup_steps": 0,
59
- "log_level": "passive",
60
- "log_level_replica": "warning",
61
- "log_on_each_node": true,
62
- "logging_dir": "",
63
- "logging_strategy": "steps",
64
- "logging_first_step": false,
65
- "logging_steps": 31,
66
- "logging_nan_inf_filter": true,
67
- "save_strategy": "steps",
68
- "save_steps": 31,
69
- "save_total_limit": null,
70
- "save_safetensors": true,
71
- "save_on_each_node": false,
72
- "save_only_model": true,
73
- "restore_callback_states_from_checkpoint": false,
74
- "no_cuda": false,
75
- "use_cpu": false,
76
- "use_mps_device": false,
77
- "seed": 42,
78
- "data_seed": null,
79
- "jit_mode_eval": false,
80
- "use_ipex": false,
81
- "bf16": true,
82
- "fp16": false,
83
- "fp16_opt_level": "O1",
84
- "half_precision_backend": "auto",
85
- "bf16_full_eval": false,
86
- "fp16_full_eval": false,
87
- "tf32": null,
88
- "ddp_backend": null,
89
- "tpu_num_cores": null,
90
- "tpu_metrics_debug": false,
91
- "debug": [],
92
- "dataloader_drop_last": false,
93
- "eval_steps": 31,
94
- "dataloader_num_workers": 8,
95
- "dataloader_prefetch_factor": null,
96
- "past_index": -1,
97
- "run_name": null,
98
- "disable_tqdm": false,
99
- "remove_unused_columns": false,
100
- "label_names": null,
101
- "load_best_model_at_end": false,
102
- "metric_for_best_model": null,
103
- "greater_is_better": null,
104
- "ignore_data_skip": false,
105
- "fsdp": [],
106
- "fsdp_min_num_params": 0,
107
- "fsdp_config": {
108
- "min_num_params": 0,
109
- "xla": false,
110
- "xla_fsdp_v2": false,
111
- "xla_fsdp_grad_ckpt": false
112
- },
113
- "fsdp_transformer_layer_cls_to_wrap": null,
114
- "accelerator_config": {
115
- "split_batches": false,
116
- "dispatch_batches": null,
117
- "even_batches": true,
118
- "use_seedable_sampler": true,
119
- "non_blocking": false,
120
- "gradient_accumulation_kwargs": null,
121
- "use_configured_state": false
122
- },
123
- "parallelism_config": null,
124
- "deepspeed": "hpsv3/config/ds_config/zero2.json",
125
- "label_smoothing_factor": 0.0,
126
- "optim": "adamw_torch",
127
- "optim_args": null,
128
- "adafactor": false,
129
- "group_by_length": false,
130
- "length_column_name": "length",
131
- "report_to": [
132
- "tensorboard"
133
- ],
134
- "ddp_find_unused_parameters": null,
135
- "ddp_bucket_cap_mb": null,
136
- "ddp_broadcast_buffers": null,
137
- "dataloader_pin_memory": true,
138
- "dataloader_persistent_workers": false,
139
- "skip_memory_metrics": true,
140
- "use_legacy_prediction_loop": false,
141
- "push_to_hub": false,
142
- "resume_from_checkpoint": null,
143
- "hub_model_id": null,
144
- "hub_strategy": "every_save",
145
- "hub_token": null,
146
- "hub_private_repo": null,
147
- "hub_always_push": false,
148
- "hub_revision": null,
149
- "gradient_checkpointing": true,
150
- "gradient_checkpointing_kwargs": {
151
- "use_reentrant": false
152
- },
153
- "include_inputs_for_metrics": false,
154
- "include_for_metrics": [],
155
- "eval_do_concat_batches": true,
156
- "fp16_backend": "auto",
157
- "push_to_hub_model_id": null,
158
- "push_to_hub_organization": null,
159
- "push_to_hub_token": null,
160
- "mp_parameters": "",
161
- "auto_find_batch_size": false,
162
- "full_determinism": false,
163
- "torchdynamo": null,
164
- "ray_scope": "last",
165
- "ddp_timeout": 1800,
166
- "torch_compile": false,
167
- "torch_compile_backend": null,
168
- "torch_compile_mode": null,
169
- "include_tokens_per_second": false,
170
- "include_num_input_tokens_seen": false,
171
- "neftune_noise_alpha": null,
172
- "optim_target_modules": null,
173
- "batch_eval_metrics": false,
174
- "eval_on_start": false,
175
- "use_liger_kernel": false,
176
- "liger_kernel_config": null,
177
- "eval_use_gather_object": false,
178
- "average_tokens_across_devices": true,
179
- "dataset_num_proc": null,
180
- "center_rewards_coefficient": null,
181
- "disable_flash_attn2": false,
182
- "disable_dropout": false,
183
- "vision_lr": null,
184
- "merger_lr": null,
185
- "rm_head_lr": null,
186
- "special_token_lr": 2e-06,
187
- "conduct_eval": true,
188
- "load_from_pretrained": null,
189
- "load_from_pretrained_step": null,
190
- "logging_epochs": 0.01,
191
- "eval_epochs": 0.01,
192
- "save_epochs": 0.01,
193
- "save_full_model": true,
194
- "visualization_steps": 100,
195
- "max_viz_samples": 4
196
- },
197
- "model_config": {
198
- "model_name_or_path": "Qwen2.5-VL-7B-Instruct",
199
- "model_revision": "main",
200
- "rm_head_type": "ranknet_multi_head",
201
- "rm_head_kwargs": null,
202
- "pooling_strategy": "mean",
203
- "output_dim": 2,
204
- "use_special_tokens": true,
205
- "freeze_vision_tower": false,
206
- "freeze_llm": false,
207
- "tune_merger": true,
208
- "trainable_visual_layers": -1,
209
- "torch_dtype": "bfloat16",
210
- "trust_remote_code": false,
211
- "attn_implementation": null,
212
- "load_in_8bit": false,
213
- "load_in_4bit": false,
214
- "bnb_4bit_quant_type": "nf4",
215
- "use_bnb_nested_quant": false,
216
- "reward_token": "special",
217
- "loss_type": "uncertainty",
218
- "loss_hyperparameters": {},
219
- "checkpoint_path": null
220
- },
221
- "peft_lora_config": {
222
- "lora_enable": false,
223
- "vision_lora": false,
224
- "lora_r": 512,
225
- "lora_alpha": 1024,
226
- "lora_dropout": 0.05,
227
- "lora_target_modules": null,
228
- "lora_namespan_exclude": [
229
- "lm_head",
230
- "rm_head",
231
- "embed_tokens"
232
- ],
233
- "lora_modules_to_save": null,
234
- "lora_task_type": "CAUSAL_LM",
235
- "use_rslora": false,
236
- "num_lora_modules": -1
237
- }
238
  }
 
1
  {
2
+ "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "vision_start_token_id": 151652,
9
+ "vision_end_token_id": 151653,
10
+ "vision_token_id": 151654,
11
+ "image_token_id": 151655,
12
+ "video_token_id": 151656,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 3584,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 18944,
17
+ "max_position_embeddings": 128000,
18
+ "max_window_layers": 28,
19
+ "model_type": "qwen2_5_vl",
20
+ "num_attention_heads": 28,
21
+ "num_hidden_layers": 28,
22
+ "num_key_value_heads": 4,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_theta": 1000000.0,
25
+ "sliding_window": 32768,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.41.2",
29
+ "use_cache": true,
30
+ "use_sliding_window": false,
31
+ "vision_config": {
32
+ "depth": 32,
33
+ "hidden_act": "silu",
34
+ "hidden_size": 1280,
35
+ "intermediate_size": 3420,
36
+ "num_heads": 16,
37
+ "in_chans": 3,
38
+ "out_hidden_size": 3584,
39
+ "patch_size": 14,
40
+ "spatial_merge_size": 2,
41
+ "spatial_patch_size": 14,
42
+ "window_size": 112,
43
+ "fullatt_block_indexes": [
44
+ 7,
45
+ 15,
46
+ 23,
47
+ 31
48
+ ],
49
+ "tokens_per_second": 2,
50
+ "temporal_patch_size": 2
51
+ },
52
+ "rope_scaling": {
53
+ "type": "mrope",
54
+ "mrope_section": [
55
+ 16,
56
+ 24,
57
+ 24
58
+ ]
59
+ },
60
+ "vocab_size": 152064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }