File size: 336 Bytes
780b318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{
  "model_type": "moe_transformer",
  "vocab_size": 32000,
  "d_model": 512,
  "nhead": 8,
  "num_experts": 4,
  "num_layers": 6,
  "max_seq_len": 256,
  "languages": [
    "en",
    "fr",
    "hi",
    "bn"
  ],
  "training_stage": "stage1_pretraining",
  "final_loss": 2.02175643123963,
  "final_balance_loss": 0.010806717754429852
}