LLaMA2

Runtime error

Alexvatti commited on Feb 17

Commit

e06bfd0

verified ·

1 Parent(s): 9f1f2e7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import spaces
 import gradio as gr
 import os
 from huggingface_hub import login
@@ -13,9 +13,14 @@ model_name = "meta-llama/Meta-Llama-3-8B"  # Change to 13B or 70B if needed
 tokenizer = AutoTokenizer.from_pretrained(model_name,use_auth_token=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float16,  # Enable FP16
     device_map="auto"  # Automatically place model on GPU
 )

 import torch
 import spaces
 import gradio as gr
+from transformers import BitsAndBytesConfig
 import os
 from huggingface_hub import login
 tokenizer = AutoTokenizer.from_pretrained(model_name,use_auth_token=True)
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # Set `True` for 4-bit, `False` for 8-bit
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True
+)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    quantization_config=quantization_config,
     device_map="auto"  # Automatically place model on GPU
 )