irodkin's picture
Training checkpoint at step 1000
a2897c4 verified
raw
history blame contribute delete
876 Bytes
{
"act_format": "linear",
"act_on": false,
"act_type": "associative",
"architectures": [
"InnerLoopARMTForCausalLM"
],
"attend_to_previous_input": false,
"base_model_config": null,
"base_model_name": "meta-llama/Llama-3.2-1B",
"constant_depth": false,
"correction": true,
"d_mem": 64,
"dtype": "bfloat16",
"freeze_mem": false,
"gating": false,
"layers_attr": "model.layers",
"max_hop": 4,
"model_type": "armt",
"n_heads": 1,
"noisy_halting": false,
"num_mem_tokens": 32,
"segment_alignment": "left",
"segment_size": 1024,
"sliding_window": true,
"time_penalty": 0.0,
"transformers_version": "4.57.3",
"use_denom": true,
"use_sink": true,
"wrap_layers": null,
"wrap_pos": false,
"auto_map": {
"AutoConfig": "modeling_armt.ARMTConfig",
"AutoModelForCausalLM": "modeling_armt.InnerLoopARMTForCausalLM"
}
}