arka7's picture
Upload Stage 1 model - Loss: 2.0218
780b318 verified
{
"model_type": "moe_transformer",
"vocab_size": 32000,
"d_model": 512,
"nhead": 8,
"num_experts": 4,
"num_layers": 6,
"max_seq_len": 256,
"languages": [
"en",
"fr",
"hi",
"bn"
],
"training_stage": "stage1_pretraining",
"final_loss": 2.02175643123963,
"final_balance_loss": 0.010806717754429852
}