Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import pipeline, set_seed | |
| import gradio as gr | |
| # --------------------------- | |
| # Model setup | |
| # --------------------------- | |
| MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2" | |
| print("Downloading and loading model...") | |
| chatbot = pipeline( | |
| "text-generation", | |
| model=MODEL_NAME, | |
| device=0 if torch.cuda.is_available() else -1 | |
| ) | |
| set_seed(42) | |
| print("β Chatbot is ready!") | |
| # --------------------------- | |
| # Chat prediction | |
| # --------------------------- | |
| def chat_with_model(user_input): | |
| if not user_input.strip(): | |
| return "Please enter a message." | |
| messages = [ | |
| {"role": "user", "content": user_input}, | |
| {"role": "assistant", "content": ""} | |
| ] | |
| prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False) | |
| response = chatbot( | |
| prompt, | |
| do_sample=True, | |
| max_new_tokens=256, | |
| top_k=50, | |
| temperature=0.2, | |
| num_return_sequences=1, | |
| repetition_penalty=1.1, | |
| pad_token_id=chatbot.tokenizer.eos_token_id, | |
| min_new_tokens=0 | |
| ) | |
| full_text = response[0]["generated_text"] | |
| bot_response = full_text[len(prompt):].strip() | |
| return bot_response | |
| # --------------------------- | |
| # Gradio interface | |
| # --------------------------- | |
| iface = gr.Interface( | |
| fn=chat_with_model, | |
| inputs=gr.Textbox(label="Enter your message"), | |
| outputs=gr.Textbox(label="AI Reply"), | |
| title="Tiny Chatbot 500M", | |
| description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |