Orion-zhen-Qwen2.5-7B-Instruct-Uncensored

Sleeping

App Files Files Community

Deadmon commited on Oct 7

Commit

3b165d3

verified ·

1 Parent(s): 566608f

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -87

app.py CHANGED Viewed

@@ -1,91 +1,10 @@
 import gradio as gr
-import os
-from huggingface_hub import InferenceClient
-from typing import Generator
-# --- Model Configuration ---
-# The ID of the model we want to use from the Hugging Face Hub.
-MODEL_ID = "Deadmon/Orion-zhen-Qwen2.5-7B-Instruct-Uncensored"
-# --- Hugging Face Token ---
-# The Gradio app will automatically use the Hugging Face token of the
-# logged-in user if the Space is private. We can also explicitly use
-# a token stored in the Space's secrets.
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# --- Initialize the Inference Client ---
-# The client will be used to make API calls to the model.
-# We assume the model is served via a compatible Inference API endpoint,
-# which is standard for providers on the Hub.
-try:
-    client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
-except Exception as e:
-    # If the client fails to initialize, we'll show an error.
-    # This can happen if the token is missing or invalid for a private model.
-    print(f"Error initializing InferenceClient: {e}")
-    client = None
-# --- Model Prediction Function ---
-# This function is called by the Gradio ChatInterface.
-# It takes the user's message and the conversation history,
-# and returns the model's response as a streaming generator.
-def predict(message: str, history: list[list[str]]) -> Generator[str, None, None]:
-    if client is None:
-        yield "Error: Could not connect to the model. Please check the server logs."
-        return
-    # Format the conversation history for the model.
-    # Most models expect a list of dictionaries with "role" and "content".
-    messages = []
-    for user_msg, bot_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": bot_msg})
-    messages.append({"role": "user", "content": message})
-    try:
-        # Use the client to generate a streaming response.
-        # This provides a much better user experience than waiting for the full response.
-        response_stream = client.chat_completion(
-            messages=messages,
-            max_tokens=1024, # You can adjust this value
-            stream=True
-        )
-        # Yield each token from the stream as it arrives.
-        full_response = ""
-        for token in response_stream:
-            if token.choices and token.choices[0].delta.content:
-                chunk = token.choices[0].delta.content
-                full_response += chunk
-                yield full_response
-    except Exception as e:
-        print(f"An error occurred during model inference: {e}")
-        yield f"Sorry, an error occurred: {e}"
-# --- Gradio Interface Setup ---
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
-        gr.Markdown("<h1>Inference Provider</h1>")
-        gr.Markdown(
-            "This Space showcases the <strong>Orion-zhen/Qwen2.5-7B-Instruct-Uncensored</strong> model. "
-            "The backend is an explicit Gradio app for API stability."
-        )
-        gr.Markdown("---")
-        gr.Markdown("⚙️ **Backend Status:** Running explicit `gr.ChatInterface`.")
-    gr.ChatInterface(
-        fn=predict,
-        title="Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
-        description="A stable chat interface for the Orion-zhen model.",
-        examples=[
-            ["What is the capital of Pakistan?"],
-            ["Tell me a joke about calculus."],
-            ["Explain gravity to a 5-year-old."],
-        ],
-        cache_examples=False,
-    )
-# --- Launch the Application ---
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
+        gr.Markdown("# Inference Provider")
+        gr.Markdown("This Space showcases the Orion-zhen/Qwen2.5-7B-Instruct-Uncensored model, served by the featherless-ai API. Sign in with your Hugging Face account to use this API.")
+        button = gr.LoginButton("Sign in")
+    gr.load("models/Orion-zhen/Qwen2.5-7B-Instruct-Uncensored", accept_token=button, provider="featherless-ai")
+demo.launch()