LLaMA2

Runtime error

Alexvatti commited on Feb 17

Commit

d0f20d3

verified ·

1 Parent(s): a29c461

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import spaces
+import gradio as gr
+# Model and Tokenizer
+model_name = "meta-llama/Llama-2-7b-hf"  # Change to 13B or 70B if needed
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,  # Enable FP16
+    device_map="auto"  # Automatically place model on GPU
+)
+# Inference Function
+@spaces.GPU
+def generate_text(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    output = model.generate(**inputs, max_new_tokens=100)
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# Example Usage
+@spaces.GPU
+def chat_with_llama(prompt):
+    return generate_text(prompt)
+gr.Interface(fn=chat_with_llama, inputs="text", outputs="text", title="LLaMA 2 Chatbot").launch()