File size: 336 Bytes
780b318 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
{
"model_type": "moe_transformer",
"vocab_size": 32000,
"d_model": 512,
"nhead": 8,
"num_experts": 4,
"num_layers": 6,
"max_seq_len": 256,
"languages": [
"en",
"fr",
"hi",
"bn"
],
"training_stage": "stage1_pretraining",
"final_loss": 2.02175643123963,
"final_balance_loss": 0.010806717754429852
} |