| { | |
| "adapter_path": "adapters/disarm_ew_llama3_lora_memory_optimized", | |
| "batch_size": 1, | |
| "config": "scripts/mlx_finetune_config_memory_optimized.yaml", | |
| "data": "Data/training", | |
| "eval_interval": 50, | |
| "eval_iters": 10, | |
| "fine_tune_type": "lora", | |
| "grad_checkpoint": false, | |
| "gradient_accumulation_steps": 4, | |
| "iters": 600, | |
| "learning_rate": 0.0003, | |
| "log_interval": 10, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.0, | |
| "lora_parameters": { | |
| "rank": 8, | |
| "dropout": 0.0, | |
| "scale": 20.0 | |
| }, | |
| "lora_rank": 16, | |
| "lr_schedule": null, | |
| "lr_scheduler": "cosine", | |
| "mask_prompt": false, | |
| "max_seq_length": 2048, | |
| "model": "ArapCheruiyot/disarm_ew-llama3", | |
| "num_layers": 4, | |
| "optimizer": "adam", | |
| "optimizer_config": { | |
| "adam": {}, | |
| "adamw": {}, | |
| "muon": {}, | |
| "sgd": {}, | |
| "adafactor": {} | |
| }, | |
| "output_dir": "outputs/mlx_finetune_memory_optimized", | |
| "quantize": false, | |
| "resume_adapter_file": null, | |
| "save_every": 100, | |
| "save_interval": 100, | |
| "seed": 0, | |
| "steps_per_eval": 200, | |
| "steps_per_report": 10, | |
| "test": false, | |
| "test_batches": 500, | |
| "train": true, | |
| "use_metal": true, | |
| "use_metal_float16": true, | |
| "use_wired_memory": true, | |
| "val_batches": 25, | |
| "wandb": null, | |
| "warmup_steps": 50, | |
| "weight_decay": 0.01, | |
| "wired_memory_limit_mb": 16384 | |
| } |