Update context length
Browse files- configuration_llama.py +2 -2
configuration_llama.py
CHANGED
|
@@ -114,7 +114,7 @@ class LlamaConfig(PretrainedConfig):
|
|
| 114 |
num_attention_heads=40,
|
| 115 |
num_key_value_heads=40,
|
| 116 |
hidden_act="silu",
|
| 117 |
-
max_position_embeddings=
|
| 118 |
initializer_range=0.02,
|
| 119 |
rms_norm_eps=1e-05,
|
| 120 |
use_cache=True,
|
|
@@ -124,7 +124,7 @@ class LlamaConfig(PretrainedConfig):
|
|
| 124 |
pretraining_tp=1,
|
| 125 |
tie_word_embeddings=False,
|
| 126 |
rope_theta=10000,
|
| 127 |
-
rope_scaling=
|
| 128 |
attention_bias=False,
|
| 129 |
**kwargs,
|
| 130 |
):
|
|
|
|
| 114 |
num_attention_heads=40,
|
| 115 |
num_key_value_heads=40,
|
| 116 |
hidden_act="silu",
|
| 117 |
+
max_position_embeddings=32768,
|
| 118 |
initializer_range=0.02,
|
| 119 |
rms_norm_eps=1e-05,
|
| 120 |
use_cache=True,
|
|
|
|
| 124 |
pretraining_tp=1,
|
| 125 |
tie_word_embeddings=False,
|
| 126 |
rope_theta=10000,
|
| 127 |
+
rope_scaling=None,
|
| 128 |
attention_bias=False,
|
| 129 |
**kwargs,
|
| 130 |
):
|