Spaces:

ewssbd
/

llama-321b

Sleeping

llama-321b / app.py

Update app.py

1b32362 verified 2 months ago

1.65 kB

	import torch
	from transformers import pipeline, set_seed
	import gradio as gr

	# ---------------------------
	# Model setup
	# ---------------------------
	MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2"

	print("Downloading and loading model...")
	chatbot = pipeline(
	"text-generation",
	model=MODEL_NAME,
	device=0 if torch.cuda.is_available() else -1
	)

	set_seed(42)
	print("✅ Chatbot is ready!")

	# ---------------------------
	# Chat prediction
	# ---------------------------
	def chat_with_model(user_input):
	if not user_input.strip():
	return "Please enter a message."

	messages = [
	{"role": "user", "content": user_input},
	{"role": "assistant", "content": ""}
	]

	prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)

	response = chatbot(
	prompt,
	do_sample=True,
	max_new_tokens=256,
	top_k=50,
	temperature=0.2,
	num_return_sequences=1,
	repetition_penalty=1.1,
	pad_token_id=chatbot.tokenizer.eos_token_id,
	min_new_tokens=0
	)

	full_text = response[0]["generated_text"]
	bot_response = full_text[len(prompt):].strip()

	return bot_response

	# ---------------------------
	# Gradio interface
	# ---------------------------
	iface = gr.Interface(
	fn=chat_with_model,
	inputs=gr.Textbox(label="Enter your message"),
	outputs=gr.Textbox(label="AI Reply"),
	title="Tiny Chatbot 500M",
	description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling."
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)