ShallowMind-abeat
/

blahblahthron-1.1b

custom-architecture

Model card Files Files and versions

blahblahthron-1.1b / config.json

abeat's picture

Update config.json

2345883 verified 5 months ago

history blame contribute delete

812 Bytes

	{
	"_notes": "Extra training-only keys kept below; HF ignores them but they\u2019re here for provenance.",
	"architectures": [
	"MyCustomModelForCausalLM"
	],
	"auto_map": {
	"AutoConfig": "blahblahtron_1_1B.HFWrapperConfig",
	"AutoModelForCausalLM": "blahblahtron_1_1B.MyCustomModelForCausalLM"
	},
	"bos_token_id": 50256,
	"dropout": 0.1,
	"embedding_dim": 2048,
	"eos_token_id": 50256,
	"ffn_dim_multiplier": 4.0,
	"learning_rate": 0.0003,
	"context_length": 1024,
	"model_type": "shallowmind",
	"num_heads": 16,
	"num_kv_heads": 4,
	"num_layers": 22,
	"pad_token_id": 50256,
	"rms_norm_eps": 1e-06,
	"tokenizer_name_or_path": "gpt2",
	"torch_dtype": "bfloat16",
	"transformers_version": "4.55.2",
	"use_flash_attention_2": true,
	"vocab_size": 50257,
	"weight_decay": 0.1
	}