import torch
from transformers import pipeline, set_seed
import gradio as gr

# ---------------------------
# Model setup
# ---------------------------
MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2"

print("Downloading and loading model...")
chatbot = pipeline(
    "text-generation",
    model=MODEL_NAME,
    device=0 if torch.cuda.is_available() else -1
)

set_seed(42)
print("✅ Chatbot is ready!")

# ---------------------------
# Chat prediction
# ---------------------------
def chat_with_model(user_input):
    if not user_input.strip():
        return "Please enter a message."

    messages = [
        {"role": "user", "content": user_input},
        {"role": "assistant", "content": ""}
    ]

    prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)

    response = chatbot(
        prompt,
        do_sample=True,
        max_new_tokens=256,
        top_k=50,
        temperature=0.2,
        num_return_sequences=1,
        repetition_penalty=1.1,
        pad_token_id=chatbot.tokenizer.eos_token_id,
        min_new_tokens=0
    )

    full_text = response[0]["generated_text"]
    bot_response = full_text[len(prompt):].strip()

    return bot_response

# ---------------------------
# Gradio interface
# ---------------------------
iface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(label="Enter your message"),
    outputs=gr.Textbox(label="AI Reply"),
    title="Tiny Chatbot 500M",
    description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling."
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)