{"vocab_size": 128000, "emb_dim": 1024, "n_heads": 16, "head_dim": 64, "n_kv_groups": 4, "n_layers": 12, "hidden_dim": 4096, "context_length": 32768, "rope_base": 10000.0, "qk_norm": true, "query_pre_attn_scalar": null, "dtype": "torch.bfloat16"}