irodkin commited on
Commit
a2897c4
·
verified ·
1 Parent(s): d85808f

Training checkpoint at step 1000

Browse files
Files changed (1) hide show
  1. config.json +6 -6
config.json CHANGED
@@ -7,11 +7,11 @@
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
- "base_model_name": "HuggingFaceTB/SmolLM2-360M",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
- "dtype": "float32",
15
  "freeze_mem": false,
16
  "gating": false,
17
  "layers_attr": "model.layers",
@@ -21,12 +21,12 @@
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
- "segment_size": 256,
25
- "sliding_window": false,
26
  "time_penalty": 0.0,
27
- "transformers_version": "4.57.1",
28
  "use_denom": true,
29
- "use_sink": false,
30
  "wrap_layers": null,
31
  "wrap_pos": false,
32
  "auto_map": {
 
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
+ "base_model_name": "meta-llama/Llama-3.2-1B",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
+ "dtype": "bfloat16",
15
  "freeze_mem": false,
16
  "gating": false,
17
  "layers_attr": "model.layers",
 
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
+ "segment_size": 1024,
25
+ "sliding_window": true,
26
  "time_penalty": 0.0,
27
+ "transformers_version": "4.57.3",
28
  "use_denom": true,
29
+ "use_sink": true,
30
  "wrap_layers": null,
31
  "wrap_pos": false,
32
  "auto_map": {