metascroy commited on
Commit
a791890
·
verified ·
1 Parent(s): 16a3c28

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. README.md +1 -0
  2. config.json +161 -0
  3. generation_config.json +13 -0
  4. pytorch_model.bin +3 -0
README.md CHANGED
@@ -6,6 +6,7 @@ tags:
6
  - unsloth
7
  - qwen3
8
  - trl
 
9
  license: apache-2.0
10
  language:
11
  - en
 
6
  - unsloth
7
  - qwen3
8
  - trl
9
+ - sft
10
  license: apache-2.0
11
  language:
12
  - en
config.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention"
51
+ ],
52
+ "max_position_embeddings": 40960,
53
+ "max_window_layers": 36,
54
+ "model_type": "qwen3",
55
+ "num_attention_heads": 32,
56
+ "num_hidden_layers": 36,
57
+ "num_key_value_heads": 8,
58
+ "pad_token_id": 151654,
59
+ "quantization_config": {
60
+ "include_input_output_embeddings": true,
61
+ "modules_to_not_convert": [],
62
+ "quant_method": "torchao",
63
+ "quant_type": {
64
+ "default": {
65
+ "_data": {
66
+ "module_fqn_to_config": {
67
+ "_default": {
68
+ "_data": {
69
+ "act_mapping_type": {
70
+ "_data": "ASYMMETRIC",
71
+ "_type": "MappingType"
72
+ },
73
+ "intx_choose_qparams_algorithm": {
74
+ "_data": "AFFINE",
75
+ "_type": "IntxChooseQParamsAlgorithm"
76
+ },
77
+ "intx_packing_format": {
78
+ "_data": "UNPACKED_TO_INT8",
79
+ "_type": "IntxPackingFormat"
80
+ },
81
+ "layout": {
82
+ "_data": {},
83
+ "_type": "QDQLayout",
84
+ "_version": 1
85
+ },
86
+ "weight_dtype": {
87
+ "_data": "int4",
88
+ "_type": "torch.dtype"
89
+ },
90
+ "weight_granularity": {
91
+ "_data": {
92
+ "group_size": 32
93
+ },
94
+ "_type": "PerGroup",
95
+ "_version": 1
96
+ },
97
+ "weight_mapping_type": {
98
+ "_data": "SYMMETRIC",
99
+ "_type": "MappingType"
100
+ },
101
+ "weight_scale_dtype": null
102
+ },
103
+ "_type": "Int8DynamicActivationIntxWeightConfig",
104
+ "_version": 2
105
+ },
106
+ "model.embed_tokens": {
107
+ "_data": {
108
+ "granularity": {
109
+ "_data": {
110
+ "axis": 0
111
+ },
112
+ "_type": "PerAxis",
113
+ "_version": 1
114
+ },
115
+ "intx_choose_qparams_algorithm": {
116
+ "_data": "AFFINE",
117
+ "_type": "IntxChooseQParamsAlgorithm"
118
+ },
119
+ "intx_packing_format": {
120
+ "_data": "UNPACKED_TO_INT8",
121
+ "_type": "IntxPackingFormat"
122
+ },
123
+ "layout": {
124
+ "_data": {},
125
+ "_type": "QDQLayout",
126
+ "_version": 1
127
+ },
128
+ "mapping_type": {
129
+ "_data": "SYMMETRIC",
130
+ "_type": "MappingType"
131
+ },
132
+ "scale_dtype": null,
133
+ "weight_dtype": {
134
+ "_data": "int8",
135
+ "_type": "torch.dtype"
136
+ }
137
+ },
138
+ "_type": "IntxWeightOnlyConfig",
139
+ "_version": 2
140
+ }
141
+ }
142
+ },
143
+ "_type": "ModuleFqnToConfig",
144
+ "_version": 1
145
+ }
146
+ },
147
+ "quant_type_kwargs": {},
148
+ "untie_embedding_weights": false
149
+ },
150
+ "rms_norm_eps": 1e-06,
151
+ "rope_scaling": null,
152
+ "rope_theta": 1000000,
153
+ "sliding_window": null,
154
+ "tie_word_embeddings": false,
155
+ "transformers_version": "4.57.2",
156
+ "unsloth_fixed": true,
157
+ "unsloth_version": "2025.11.6",
158
+ "use_cache": true,
159
+ "use_sliding_window": false,
160
+ "vocab_size": 151936
161
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "max_length": 40960,
8
+ "pad_token_id": 151654,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.2"
13
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02229c45dbf9c67cc84c972c837e62fd94300415cefdcba89ecd77d7ff9d220
3
+ size 4789478103