llama-321b / app.py
ewssbd's picture
Update app.py
1b32362 verified
raw
history blame
1.65 kB
import torch
from transformers import pipeline, set_seed
import gradio as gr
# ---------------------------
# Model setup
# ---------------------------
MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2"
print("Downloading and loading model...")
chatbot = pipeline(
"text-generation",
model=MODEL_NAME,
device=0 if torch.cuda.is_available() else -1
)
set_seed(42)
print("βœ… Chatbot is ready!")
# ---------------------------
# Chat prediction
# ---------------------------
def chat_with_model(user_input):
if not user_input.strip():
return "Please enter a message."
messages = [
{"role": "user", "content": user_input},
{"role": "assistant", "content": ""}
]
prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)
response = chatbot(
prompt,
do_sample=True,
max_new_tokens=256,
top_k=50,
temperature=0.2,
num_return_sequences=1,
repetition_penalty=1.1,
pad_token_id=chatbot.tokenizer.eos_token_id,
min_new_tokens=0
)
full_text = response[0]["generated_text"]
bot_response = full_text[len(prompt):].strip()
return bot_response
# ---------------------------
# Gradio interface
# ---------------------------
iface = gr.Interface(
fn=chat_with_model,
inputs=gr.Textbox(label="Enter your message"),
outputs=gr.Textbox(label="AI Reply"),
title="Tiny Chatbot 500M",
description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)