{ "model_type": "moe_transformer", "vocab_size": 32000, "d_model": 512, "nhead": 8, "num_experts": 4, "num_layers": 6, "max_seq_len": 256, "languages": [ "en", "fr", "hi", "bn" ], "training_stage": "stage1_pretraining", "final_loss": 2.02175643123963, "final_balance_loss": 0.010806717754429852 }