| # generation hyper-parameters | |
| max_len: 512 | |
| penalty_alpha: 0.6 | |
| top_k: 10 | |
| top_p: 0.7 | |
| random_prefix_len: 5 | |
| sample_num: 2 | |
| decoding_method: sampling | |
| generate_len: 512 | |
| # lora hyper-parameters | |
| lora_r: 32 | |
| lora_alpha: 32 | |
| lora_dropout: 0.1 | |
| # some train configuration, more can be found under dsconfig folder | |
| train: | |
| seed: 0 | |
| warmup_rate: 0.1 | |
| epochs: 2 | |
| max_length: 1024 | |
| max_shard_size: 10GB | |