import torch from transformers import pipeline, set_seed import gradio as gr # --------------------------- # Model setup # --------------------------- MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2" print("Downloading and loading model...") chatbot = pipeline( "text-generation", model=MODEL_NAME, device=0 if torch.cuda.is_available() else -1 ) set_seed(42) print("✅ Chatbot is ready!") # --------------------------- # Chat prediction # --------------------------- def chat_with_model(user_input): if not user_input.strip(): return "Please enter a message." messages = [ {"role": "user", "content": user_input}, {"role": "assistant", "content": ""} ] prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False) response = chatbot( prompt, do_sample=True, max_new_tokens=256, top_k=50, temperature=0.2, num_return_sequences=1, repetition_penalty=1.1, pad_token_id=chatbot.tokenizer.eos_token_id, min_new_tokens=0 ) full_text = response[0]["generated_text"] bot_response = full_text[len(prompt):].strip() return bot_response # --------------------------- # Gradio interface # --------------------------- iface = gr.Interface( fn=chat_with_model, inputs=gr.Textbox(label="Enter your message"), outputs=gr.Textbox(label="AI Reply"), title="Tiny Chatbot 500M", description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling." ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)