ewssbd commited on
Commit
1b32362
·
verified ·
1 Parent(s): 7ae4e4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -37
app.py CHANGED
@@ -1,55 +1,62 @@
1
- import os
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from huggingface_hub import hf_hub_download
5
  import gradio as gr
6
 
7
  # ---------------------------
8
- # Step 1: Define model info
9
  # ---------------------------
10
- REPO_ID = "amusktweewt/tiny-model-500M-chat-v2"
11
- CACHE_DIR = "/tmp" # store model temporarily
12
 
13
- # ---------------------------
14
- # Step 2: Download model at runtime
15
- # ---------------------------
16
- print("===== Application Startup =====")
17
- print("Checking for model...")
18
- model_path = os.path.join(CACHE_DIR, REPO_ID.replace("/", "_"))
19
 
20
- if not os.path.exists(model_path):
21
- print("Downloading model...")
22
- model_path = hf_hub_download(repo_id=REPO_ID, cache_dir=CACHE_DIR)
23
- print("Download completed!")
24
- else:
25
- print("Model already cached!")
26
 
27
  # ---------------------------
28
- # Step 3: Load model and tokenizer
29
  # ---------------------------
30
- print("Loading model and tokenizer...")
31
- device = "cuda" if torch.cuda.is_available() else "cpu"
32
- tokenizer = AutoTokenizer.from_pretrained(REPO_ID)
33
- model = AutoModelForCausalLM.from_pretrained(REPO_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32).to(device)
34
 
35
- # ---------------------------
36
- # Step 4: Define prediction function
37
- # ---------------------------
38
- def predict(prompt):
39
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
40
- outputs = model.generate(**inputs, max_length=200, temperature=0.7)
41
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # ---------------------------
45
- # Step 5: Gradio interface
46
  # ---------------------------
47
  iface = gr.Interface(
48
- fn=predict,
49
- inputs=gr.Textbox(label="User Message"),
50
- outputs=gr.Textbox(label="AI Response"),
51
- title="Tiny Model 500M Chat v2",
52
- description="A lightweight AI chat model that runs free on CPU."
53
  )
54
 
55
  if __name__ == "__main__":
 
 
1
  import torch
2
+ from transformers import pipeline, set_seed
 
3
  import gradio as gr
4
 
5
  # ---------------------------
6
+ # Model setup
7
  # ---------------------------
8
+ MODEL_NAME = "amusktweewt/tiny-model-500M-chat-v2"
 
9
 
10
+ print("Downloading and loading model...")
11
+ chatbot = pipeline(
12
+ "text-generation",
13
+ model=MODEL_NAME,
14
+ device=0 if torch.cuda.is_available() else -1
15
+ )
16
 
17
+ set_seed(42)
18
+ print(" Chatbot is ready!")
 
 
 
 
19
 
20
  # ---------------------------
21
+ # Chat prediction
22
  # ---------------------------
23
+ def chat_with_model(user_input):
24
+ if not user_input.strip():
25
+ return "Please enter a message."
 
26
 
27
+ messages = [
28
+ {"role": "user", "content": user_input},
29
+ {"role": "assistant", "content": ""}
30
+ ]
31
+
32
+ prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)
33
+
34
+ response = chatbot(
35
+ prompt,
36
+ do_sample=True,
37
+ max_new_tokens=256,
38
+ top_k=50,
39
+ temperature=0.2,
40
+ num_return_sequences=1,
41
+ repetition_penalty=1.1,
42
+ pad_token_id=chatbot.tokenizer.eos_token_id,
43
+ min_new_tokens=0
44
+ )
45
+
46
+ full_text = response[0]["generated_text"]
47
+ bot_response = full_text[len(prompt):].strip()
48
+
49
+ return bot_response
50
 
51
  # ---------------------------
52
+ # Gradio interface
53
  # ---------------------------
54
  iface = gr.Interface(
55
+ fn=chat_with_model,
56
+ inputs=gr.Textbox(label="Enter your message"),
57
+ outputs=gr.Textbox(label="AI Reply"),
58
+ title="Tiny Chatbot 500M",
59
+ description="Lightweight chat model under 500MB, ideal for free Hugging Face CPU Spaces or n8n message handling."
60
  )
61
 
62
  if __name__ == "__main__":