blahblahthron-1.1b / config.json
abeat's picture
Update config.json
2345883 verified
raw
history blame contribute delete
812 Bytes
{
"_notes": "Extra training-only keys kept below; HF ignores them but they\u2019re here for provenance.",
"architectures": [
"MyCustomModelForCausalLM"
],
"auto_map": {
"AutoConfig": "blahblahtron_1_1B.HFWrapperConfig",
"AutoModelForCausalLM": "blahblahtron_1_1B.MyCustomModelForCausalLM"
},
"bos_token_id": 50256,
"dropout": 0.1,
"embedding_dim": 2048,
"eos_token_id": 50256,
"ffn_dim_multiplier": 4.0,
"learning_rate": 0.0003,
"context_length": 1024,
"model_type": "shallowmind",
"num_heads": 16,
"num_kv_heads": 4,
"num_layers": 22,
"pad_token_id": 50256,
"rms_norm_eps": 1e-06,
"tokenizer_name_or_path": "gpt2",
"torch_dtype": "bfloat16",
"transformers_version": "4.55.2",
"use_flash_attention_2": true,
"vocab_size": 50257,
"weight_decay": 0.1
}