Alexvatti commited on
Commit
e06bfd0
Β·
verified Β·
1 Parent(s): 9f1f2e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -2,7 +2,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
3
  import spaces
4
  import gradio as gr
5
-
6
  import os
7
  from huggingface_hub import login
8
 
@@ -13,9 +13,14 @@ model_name = "meta-llama/Meta-Llama-3-8B" # Change to 13B or 70B if needed
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_name,use_auth_token=True)
15
 
 
 
 
 
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
- torch_dtype=torch.float16, # Enable FP16
19
  device_map="auto" # Automatically place model on GPU
20
  )
21
 
 
2
  import torch
3
  import spaces
4
  import gradio as gr
5
+ from transformers import BitsAndBytesConfig
6
  import os
7
  from huggingface_hub import login
8
 
 
13
 
14
  tokenizer = AutoTokenizer.from_pretrained(model_name,use_auth_token=True)
15
 
16
+ quantization_config = BitsAndBytesConfig(
17
+ load_in_4bit=True, # Set `True` for 4-bit, `False` for 8-bit
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ bnb_4bit_use_double_quant=True
20
+ )
21
  model = AutoModelForCausalLM.from_pretrained(
22
  model_name,
23
+ quantization_config=quantization_config,
24
  device_map="auto" # Automatically place model on GPU
25
  )
26