Deadmon commited on
Commit
3b165d3
·
verified ·
1 Parent(s): 566608f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -87
app.py CHANGED
@@ -1,91 +1,10 @@
1
  import gradio as gr
2
- import os
3
- from huggingface_hub import InferenceClient
4
- from typing import Generator
5
 
6
- # --- Model Configuration ---
7
- # The ID of the model we want to use from the Hugging Face Hub.
8
- MODEL_ID = "Deadmon/Orion-zhen-Qwen2.5-7B-Instruct-Uncensored"
9
-
10
- # --- Hugging Face Token ---
11
- # The Gradio app will automatically use the Hugging Face token of the
12
- # logged-in user if the Space is private. We can also explicitly use
13
- # a token stored in the Space's secrets.
14
- HF_TOKEN = os.environ.get("HF_TOKEN")
15
-
16
- # --- Initialize the Inference Client ---
17
- # The client will be used to make API calls to the model.
18
- # We assume the model is served via a compatible Inference API endpoint,
19
- # which is standard for providers on the Hub.
20
- try:
21
- client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
22
- except Exception as e:
23
- # If the client fails to initialize, we'll show an error.
24
- # This can happen if the token is missing or invalid for a private model.
25
- print(f"Error initializing InferenceClient: {e}")
26
- client = None
27
-
28
- # --- Model Prediction Function ---
29
- # This function is called by the Gradio ChatInterface.
30
- # It takes the user's message and the conversation history,
31
- # and returns the model's response as a streaming generator.
32
- def predict(message: str, history: list[list[str]]) -> Generator[str, None, None]:
33
- if client is None:
34
- yield "Error: Could not connect to the model. Please check the server logs."
35
- return
36
-
37
- # Format the conversation history for the model.
38
- # Most models expect a list of dictionaries with "role" and "content".
39
- messages = []
40
- for user_msg, bot_msg in history:
41
- messages.append({"role": "user", "content": user_msg})
42
- messages.append({"role": "assistant", "content": bot_msg})
43
- messages.append({"role": "user", "content": message})
44
-
45
- try:
46
- # Use the client to generate a streaming response.
47
- # This provides a much better user experience than waiting for the full response.
48
- response_stream = client.chat_completion(
49
- messages=messages,
50
- max_tokens=1024, # You can adjust this value
51
- stream=True
52
- )
53
-
54
- # Yield each token from the stream as it arrives.
55
- full_response = ""
56
- for token in response_stream:
57
- if token.choices and token.choices[0].delta.content:
58
- chunk = token.choices[0].delta.content
59
- full_response += chunk
60
- yield full_response
61
-
62
- except Exception as e:
63
- print(f"An error occurred during model inference: {e}")
64
- yield f"Sorry, an error occurred: {e}"
65
-
66
- # --- Gradio Interface Setup ---
67
  with gr.Blocks(fill_height=True) as demo:
68
  with gr.Sidebar():
69
- gr.Markdown("<h1>Inference Provider</h1>")
70
- gr.Markdown(
71
- "This Space showcases the <strong>Orion-zhen/Qwen2.5-7B-Instruct-Uncensored</strong> model. "
72
- "The backend is an explicit Gradio app for API stability."
73
- )
74
- gr.Markdown("---")
75
- gr.Markdown("⚙️ **Backend Status:** Running explicit `gr.ChatInterface`.")
76
-
77
- gr.ChatInterface(
78
- fn=predict,
79
- title="Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
80
- description="A stable chat interface for the Orion-zhen model.",
81
- examples=[
82
- ["What is the capital of Pakistan?"],
83
- ["Tell me a joke about calculus."],
84
- ["Explain gravity to a 5-year-old."],
85
- ],
86
- cache_examples=False,
87
- )
88
-
89
- # --- Launch the Application ---
90
- if __name__ == "__main__":
91
- demo.launch()
 
1
  import gradio as gr
 
 
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  with gr.Blocks(fill_height=True) as demo:
4
  with gr.Sidebar():
5
+ gr.Markdown("# Inference Provider")
6
+ gr.Markdown("This Space showcases the Orion-zhen/Qwen2.5-7B-Instruct-Uncensored model, served by the featherless-ai API. Sign in with your Hugging Face account to use this API.")
7
+ button = gr.LoginButton("Sign in")
8
+ gr.load("models/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", accept_token=button, provider="featherless-ai")
9
+
10
+ demo.launch()