Deadmon commited on
Commit
a8960ed
·
verified ·
1 Parent(s): 4354af8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from huggingface_hub import InferenceClient
4
+ from typing import Generator
5
+
6
+ # --- Model Configuration ---
7
+ # The ID of the model we want to use from the Hugging Face Hub.
8
+ MODEL_ID = "Deadmon/Orion-zhen-Qwen2.5-7B-Instruct-Uncensored"
9
+
10
+ # --- Hugging Face Token ---
11
+ # The Gradio app will automatically use the Hugging Face token of the
12
+ # logged-in user if the Space is private. We can also explicitly use
13
+ # a token stored in the Space's secrets.
14
+ HF_TOKEN = os.environ.get("HF_TOKEN")
15
+
16
+ # --- Initialize the Inference Client ---
17
+ # The client will be used to make API calls to the model.
18
+ # We assume the model is served via a compatible Inference API endpoint,
19
+ # which is standard for providers on the Hub.
20
+ try:
21
+ client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
22
+ except Exception as e:
23
+ # If the client fails to initialize, we'll show an error.
24
+ # This can happen if the token is missing or invalid for a private model.
25
+ print(f"Error initializing InferenceClient: {e}")
26
+ client = None
27
+
28
+ # --- Model Prediction Function ---
29
+ # This function is called by the Gradio ChatInterface.
30
+ # It takes the user's message and the conversation history,
31
+ # and returns the model's response as a streaming generator.
32
+ def predict(message: str, history: list[list[str]]) -> Generator[str, None, None]:
33
+ if client is None:
34
+ yield "Error: Could not connect to the model. Please check the server logs."
35
+ return
36
+
37
+ # Format the conversation history for the model.
38
+ # Most models expect a list of dictionaries with "role" and "content".
39
+ messages = []
40
+ for user_msg, bot_msg in history:
41
+ messages.append({"role": "user", "content": user_msg})
42
+ messages.append({"role": "assistant", "content": bot_msg})
43
+ messages.append({"role": "user", "content": message})
44
+
45
+ try:
46
+ # Use the client to generate a streaming response.
47
+ # This provides a much better user experience than waiting for the full response.
48
+ response_stream = client.chat_completion(
49
+ messages=messages,
50
+ max_tokens=1024, # You can adjust this value
51
+ stream=True
52
+ )
53
+
54
+ # Yield each token from the stream as it arrives.
55
+ full_response = ""
56
+ for token in response_stream:
57
+ if token.choices and token.choices[0].delta.content:
58
+ chunk = token.choices[0].delta.content
59
+ full_response += chunk
60
+ yield full_response
61
+
62
+ except Exception as e:
63
+ print(f"An error occurred during model inference: {e}")
64
+ yield f"Sorry, an error occurred: {e}"
65
+
66
+ # --- Gradio Interface Setup ---
67
+ with gr.Blocks(theme=gr.themes.Slate(), fill_height=True) as demo:
68
+ with gr.Sidebar():
69
+ gr.Markdown("<h1>Inference Provider</h1>")
70
+ gr.Markdown(
71
+ "This Space showcases the <strong>Orion-zhen/Qwen2.5-7B-Instruct-Uncensored</strong> model. "
72
+ "The backend is an explicit Gradio app for API stability."
73
+ )
74
+ gr.Markdown("---")
75
+ gr.Markdown("⚙️ **Backend Status:** Running explicit `gr.ChatInterface`.")
76
+
77
+ gr.ChatInterface(
78
+ fn=predict,
79
+ title="Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
80
+ description="A stable chat interface for the Orion-zhen model.",
81
+ examples=[
82
+ ["What is the capital of Pakistan?"],
83
+ ["Tell me a joke about calculus."],
84
+ ["Explain gravity to a 5-year-old."],
85
+ ],
86
+ cache_examples=False,
87
+ )
88
+
89
+ # --- Launch the Application ---
90
+ if __name__ == "__main__":
91
+ demo.launch()