Upload folder using huggingface_hub
Browse files- README.md +2 -1
- config.json +7 -7
- pytorch_model-00001-of-00002.bin +3 -0
- pytorch_model-00002-of-00002.bin +3 -0
- pytorch_model.bin.index.json +0 -0
README.md
CHANGED
|
@@ -146,7 +146,8 @@ print(processor.decode(out[0], skip_special_tokens=True))
|
|
| 146 |
|
| 147 |
##### In 8-bit precision (`int8`)
|
| 148 |
>**Important:** Paper results only use int8 for the LLM weights while this loads all weights in int8.
|
| 149 |
-
> We see that this gives slightly worse results but currently
|
|
|
|
| 150 |
<details>
|
| 151 |
<summary> Click to expand </summary>
|
| 152 |
|
|
|
|
| 146 |
|
| 147 |
##### In 8-bit precision (`int8`)
|
| 148 |
>**Important:** Paper results only use int8 for the LLM weights while this loads all weights in int8.
|
| 149 |
+
> We see that this gives slightly worse results but currently int8 for some model parts is not supported by HuggingFace.
|
| 150 |
+
|
| 151 |
<details>
|
| 152 |
<summary> Click to expand </summary>
|
| 153 |
|
config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"_commit_hash": "
|
| 3 |
"architectures": [
|
| 4 |
-
"
|
| 5 |
],
|
| 6 |
"initializer_factor": 1.0,
|
| 7 |
"initializer_range": 0.02,
|
|
@@ -82,13 +82,13 @@
|
|
| 82 |
"top_p": 1.0,
|
| 83 |
"torch_dtype": null,
|
| 84 |
"torchscript": false,
|
| 85 |
-
"transformers_version": "4.
|
| 86 |
"typical_p": 1.0,
|
| 87 |
"use_bfloat16": false,
|
| 88 |
"vocab_size": 30522
|
| 89 |
},
|
| 90 |
"text_config": {
|
| 91 |
-
"_name_or_path": "",
|
| 92 |
"add_cross_attention": false,
|
| 93 |
"architectures": [
|
| 94 |
"MT5ForConditionalGeneration"
|
|
@@ -164,7 +164,7 @@
|
|
| 164 |
"top_p": 1.0,
|
| 165 |
"torch_dtype": "float32",
|
| 166 |
"torchscript": false,
|
| 167 |
-
"transformers_version": "4.
|
| 168 |
"typical_p": 1.0,
|
| 169 |
"use_bfloat16": false,
|
| 170 |
"use_cache": true,
|
|
@@ -211,7 +211,7 @@
|
|
| 211 |
"LABEL_0": 0,
|
| 212 |
"LABEL_1": 1
|
| 213 |
},
|
| 214 |
-
"layer_norm_eps": 1e-
|
| 215 |
"length_penalty": 1.0,
|
| 216 |
"max_length": 20,
|
| 217 |
"min_length": 0,
|
|
@@ -249,7 +249,7 @@
|
|
| 249 |
"top_p": 1.0,
|
| 250 |
"torch_dtype": null,
|
| 251 |
"torchscript": false,
|
| 252 |
-
"transformers_version": "4.
|
| 253 |
"typical_p": 1.0,
|
| 254 |
"use_bfloat16": false
|
| 255 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_commit_hash": "cc2bb7bce2f7d4d1c37753c7e9c05a443a226614",
|
| 3 |
"architectures": [
|
| 4 |
+
"mBLIP"
|
| 5 |
],
|
| 6 |
"initializer_factor": 1.0,
|
| 7 |
"initializer_range": 0.02,
|
|
|
|
| 82 |
"top_p": 1.0,
|
| 83 |
"torch_dtype": null,
|
| 84 |
"torchscript": false,
|
| 85 |
+
"transformers_version": "4.31.0",
|
| 86 |
"typical_p": 1.0,
|
| 87 |
"use_bfloat16": false,
|
| 88 |
"vocab_size": 30522
|
| 89 |
},
|
| 90 |
"text_config": {
|
| 91 |
+
"_name_or_path": "/media/gregor/DATA/projects/wuerzburg/mblip/checkpoints/mt0-xl/07_24_2023_08_03_33-1-79282",
|
| 92 |
"add_cross_attention": false,
|
| 93 |
"architectures": [
|
| 94 |
"MT5ForConditionalGeneration"
|
|
|
|
| 164 |
"top_p": 1.0,
|
| 165 |
"torch_dtype": "float32",
|
| 166 |
"torchscript": false,
|
| 167 |
+
"transformers_version": "4.31.0",
|
| 168 |
"typical_p": 1.0,
|
| 169 |
"use_bfloat16": false,
|
| 170 |
"use_cache": true,
|
|
|
|
| 211 |
"LABEL_0": 0,
|
| 212 |
"LABEL_1": 1
|
| 213 |
},
|
| 214 |
+
"layer_norm_eps": 1e-06,
|
| 215 |
"length_penalty": 1.0,
|
| 216 |
"max_length": 20,
|
| 217 |
"min_length": 0,
|
|
|
|
| 249 |
"top_p": 1.0,
|
| 250 |
"torch_dtype": null,
|
| 251 |
"torchscript": false,
|
| 252 |
+
"transformers_version": "4.31.0",
|
| 253 |
"typical_p": 1.0,
|
| 254 |
"use_bfloat16": false
|
| 255 |
}
|
pytorch_model-00001-of-00002.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eff61f2f4a6d9f018d6b114f3266bd528dee8c8d902d4d8bdfd709d1f6f934ff
|
| 3 |
+
size 9960301947
|
pytorch_model-00002-of-00002.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d26f5efafa5b50329ad9212aa899f6a283404c0d07a8bffc0b0b94c42128b815
|
| 3 |
+
size 9381409638
|
pytorch_model.bin.index.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|