Orion-zhen-Qwen2.5-7B-Instruct-Uncensored

Sleeping

App Files Files Community

Deadmon commited on Oct 7

Commit

a8960ed

verified ·

1 Parent(s): 4354af8

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+import os
+from huggingface_hub import InferenceClient
+from typing import Generator
+# --- Model Configuration ---
+# The ID of the model we want to use from the Hugging Face Hub.
+MODEL_ID = "Deadmon/Orion-zhen-Qwen2.5-7B-Instruct-Uncensored"
+# --- Hugging Face Token ---
+# The Gradio app will automatically use the Hugging Face token of the
+# logged-in user if the Space is private. We can also explicitly use
+# a token stored in the Space's secrets.
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# --- Initialize the Inference Client ---
+# The client will be used to make API calls to the model.
+# We assume the model is served via a compatible Inference API endpoint,
+# which is standard for providers on the Hub.
+try:
+    client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+except Exception as e:
+    # If the client fails to initialize, we'll show an error.
+    # This can happen if the token is missing or invalid for a private model.
+    print(f"Error initializing InferenceClient: {e}")
+    client = None
+# --- Model Prediction Function ---
+# This function is called by the Gradio ChatInterface.
+# It takes the user's message and the conversation history,
+# and returns the model's response as a streaming generator.
+def predict(message: str, history: list[list[str]]) -> Generator[str, None, None]:
+    if client is None:
+        yield "Error: Could not connect to the model. Please check the server logs."
+        return
+    # Format the conversation history for the model.
+    # Most models expect a list of dictionaries with "role" and "content".
+    messages = []
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    messages.append({"role": "user", "content": message})
+    try:
+        # Use the client to generate a streaming response.
+        # This provides a much better user experience than waiting for the full response.
+        response_stream = client.chat_completion(
+            messages=messages,
+            max_tokens=1024, # You can adjust this value
+            stream=True
+        )
+        # Yield each token from the stream as it arrives.
+        full_response = ""
+        for token in response_stream:
+            if token.choices and token.choices[0].delta.content:
+                chunk = token.choices[0].delta.content
+                full_response += chunk
+                yield full_response
+    except Exception as e:
+        print(f"An error occurred during model inference: {e}")
+        yield f"Sorry, an error occurred: {e}"
+# --- Gradio Interface Setup ---
+with gr.Blocks(theme=gr.themes.Slate(), fill_height=True) as demo:
+    with gr.Sidebar():
+        gr.Markdown("<h1>Inference Provider</h1>")
+        gr.Markdown(
+            "This Space showcases the <strong>Orion-zhen/Qwen2.5-7B-Instruct-Uncensored</strong> model. "
+            "The backend is an explicit Gradio app for API stability."
+        )
+        gr.Markdown("---")
+        gr.Markdown("⚙️ **Backend Status:** Running explicit `gr.ChatInterface`.")
+    gr.ChatInterface(
+        fn=predict,
+        title="Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
+        description="A stable chat interface for the Orion-zhen model.",
+        examples=[
+            ["What is the capital of Pakistan?"],
+            ["Tell me a joke about calculus."],
+            ["Explain gravity to a 5-year-old."],
+        ],
+        cache_examples=False,
+    )
+# --- Launch the Application ---
+if __name__ == "__main__":
+    demo.launch()