irodkin
/

InnerLoopARMTForCausalLM_run_20

irodkin commited on Dec 16, 2025

Commit

a2897c4

verified ·

1 Parent(s): d85808f

Training checkpoint at step 1000

Files changed (1) hide show

config.json CHANGED Viewed

@@ -7,11 +7,11 @@
   ],
   "attend_to_previous_input": false,
   "base_model_config": null,
-  "base_model_name": "HuggingFaceTB/SmolLM2-360M",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
-  "dtype": "float32",
   "freeze_mem": false,
   "gating": false,
   "layers_attr": "model.layers",
@@ -21,12 +21,12 @@
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
-  "segment_size": 256,
-  "sliding_window": false,
   "time_penalty": 0.0,
-  "transformers_version": "4.57.1",
   "use_denom": true,
-  "use_sink": false,
   "wrap_layers": null,
   "wrap_pos": false,
   "auto_map": {

   ],
   "attend_to_previous_input": false,
   "base_model_config": null,
+  "base_model_name": "meta-llama/Llama-3.2-1B",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
+  "dtype": "bfloat16",
   "freeze_mem": false,
   "gating": false,
   "layers_attr": "model.layers",
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
+  "segment_size": 1024,
+  "sliding_window": true,
   "time_penalty": 0.0,
+  "transformers_version": "4.57.3",
   "use_denom": true,
+  "use_sink": true,
   "wrap_layers": null,
   "wrap_pos": false,
   "auto_map": {