Spaces:

shubhamrooter
/

llama-primus-reasoning-demo

Sleeping

App Files Files Community

shubhamrooter commited on Nov 2

Commit

2373c44

verified ·

1 Parent(s): 3a4d669

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -122

app.py CHANGED Viewed

@@ -1,150 +1,149 @@
 import gradio as gr
-from transformers import pipeline
-import threading
-import time
 # Configuration
-MODEL_NAME = "trendmicro-ailab/Llama-Primus-Reasoning"
-class FastModelHandler:
-    def __init__(self):
-        self.pipe = None
-        self.loaded = False
-        self.loading = False
-        self.status = "🔄 Model loading..."
-        # Auto-start loading
-        self.start_loading()
-    def start_loading(self):
-        """Background mein model load kare"""
-        def load_in_background():
-            self.loading = True
-            self.status = "🔄 Model loading..."
-            try:
-                print("🚀 Model loading started...")
-                # Simple pipeline - fastest approach
-                self.pipe = pipeline(
-                    "text-generation",
-                    model=MODEL_NAME,
-                    device_map="auto"
-                )
-                self.loaded = True
-                self.loading = False
-                self.status = "✅ Model Ready! Prompt likho aur generate karo"
-                print("✅ Model loaded successfully!")
-            except Exception as e:
-                print(f"❌ GPU Error: {e}")
-                # Fallback to CPU
-                try:
-                    self.pipe = pipeline(
-                        "text-generation",
-                        model=MODEL_NAME,
-                        device="cpu"
-                    )
-                    self.loaded = True
-                    self.loading = False
-                    self.status = "✅ Model loaded on CPU!"
-                    print("✅ Model loaded on CPU!")
-                except Exception as e2:
-                    print(f"❌ CPU loading failed: {e2}")
-                    self.status = "❌ Model loading failed"
-        thread = threading.Thread(target=load_in_background, daemon=True)
-        thread.start()
-    def get_status(self):
-        """Current status return kare"""
-        return self.status
-    def generate_response(self, prompt, max_length=150):
-        """Fast response generate kare"""
-        if not self.loaded:
-            return f"{self.status}\n\nThoda wait karein..."
-        try:
-            start_time = time.time()
-            result = self.pipe(
-                prompt,
-                max_length=max_length,
-                temperature=0.7,
-                do_sample=True,
-                num_return_sequences=1,
-                pad_token_id=50256
-            )
-            response = result[0]['generated_text']
-            time_taken = time.time() - start_time
-            return f"{response}\n\n⏱️ Time: {time_taken:.1f}s"
-        except Exception as e:
-            return f"❌ Error: {str(e)}"
-# Auto initialize
-model = FastModelHandler()
-# Simple UI - No complex setup
-with gr.Blocks(title="Fast Reasoning AI") as demo:
     gr.Markdown("""
-    # ⚡ Fast Reasoning Model
-    **Model automatically load ho raha hai!**
-    👇 **Bas prompt likho → Generate dabao → Response mil jayega!**
     """)
-    # Status display
-    status_text = gr.Textbox(
-        label="Status",
-        value=model.get_status(),
-        interactive=False
-    )
     with gr.Row():
-        with gr.Column():
-            prompt_box = gr.Textbox(
-                label="Your Question",
-                placeholder="Yahan apna sawal likho...",
-                lines=3
             )
-            with gr.Row():
-                length_control = gr.Slider(50, 250, value=150, label="Response Length")
-                generate_btn = gr.Button("🚀 Generate", variant="primary")
-        with gr.Column():
-            response_box = gr.Textbox(
-                label="AI Response",
-                lines=6,
                 show_copy_button=True
             )
-    # Quick examples
-    gr.Examples(
         examples=[
-            ["15 + 27 × 3 kaise solve karein?"],
-            ["Socrates insaan hai, sab insaan mortal hain. Socrates?"],
-            ["Train 60 mph se 2 hours chale to kitna distance?"],
-            ["Simple reasoning example batayein"]
         ],
-        inputs=prompt_box,
-        label="Quick Examples - Click karo"
     )
-    # Refresh button for status
-    refresh_btn = gr.Button("🔄 Refresh Status", variant="secondary")
-    # Events
     generate_btn.click(
-        model.generate_response,
-        inputs=[prompt_box, length_control],
-        outputs=response_box
     )
-    refresh_btn.click(
-        model.get_status,
-        outputs=status_text
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import requests
+import os
 # Configuration
+MODEL_REPO = "AlicanKiraz0/Cybersecurity-BaronLLM_Offensive_Security_LLM_Q6_K_GGUF"
+API_URL = f"https://api-inference.huggingface.co/models/{MODEL_REPO}"
+HF_TOKEN = os.environ.get("HF_TOKEN", "")
+headers = {
+    "Authorization": f"Bearer {HF_TOKEN}",
+    "Content-Type": "application/json"
+}
+def query_model(payload):
+    """
+    Query the model using Hugging Face Inference API
+    """
+    try:
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+        response.raise_for_status()
+        return response.json()
+    except requests.exceptions.RequestException as e:
+        return {"error": f"API request failed: {str(e)}"}
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
+def generate_response(prompt, max_tokens=150, temperature=0.7):
+    """
+    Generate response using the model
+    """
+    if not prompt.strip():
+        return "Please enter a prompt."
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": 0.9,
+            "do_sample": True,
+            "return_full_text": False
+        }
+    }
+    result = query_model(payload)
+    if "error" in result:
+        error_msg = result["error"]
+        if "loading" in error_msg.lower():
+            return f"Model is currently loading. Please wait a moment and try again.\n\nError details: {error_msg}"
+        return f"Error: {error_msg}"
+    if isinstance(result, list) and len(result) > 0:
+        if "generated_text" in result[0]:
+            return result[0]["generated_text"]
+        elif "text" in result[0]:
+            return result[0]["text"]
+    return "No response generated. Please try again."
+# Create Gradio interface
+with gr.Blocks(title="Cybersecurity BaronLLM", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🔒 Cybersecurity BaronLLM
+    **Offensive Security Language Model**
+    This interface uses the Cybersecurity BaronLLM model via Hugging Face Inference API.
     """)
     with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Configuration")
+            max_tokens = gr.Slider(
+                minimum=32,
+                maximum=512,
+                value=150,
+                step=32,
+                label="Max Tokens",
+                info="Maximum length of response"
             )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature",
+                info="Higher values = more creative, lower values = more focused"
+            )
+            gr.Markdown("""
+            ### Example Prompts
+            - Explain SQL injection techniques
+            - What are common penetration testing methodologies?
+            - How to detect XSS attacks?
+            - Describe network security principles
+            """)
+        with gr.Column(scale=2):
+            prompt = gr.Textbox(
+                label="Enter your cybersecurity question or prompt:",
+                placeholder="Explain SQL injection techniques and prevention methods...",
+                lines=5,
+                max_lines=10
+            )
+            generate_btn = gr.Button("🔒 Generate Response", variant="primary", size="lg")
+            output = gr.Textbox(
+                label="Model Response",
+                lines=8,
                 show_copy_button=True
             )
+    # Examples
+    examples = gr.Examples(
         examples=[
+            ["What are the most common web application vulnerabilities and how can they be exploited?"],
+            ["Explain the difference between white hat, black hat, and gray hat hackers."],
+            ["Describe the steps involved in a penetration testing engagement."],
+            ["How does a buffer overflow attack work and what are modern defenses against it?"],
+            ["What are the key components of a cybersecurity risk assessment?"]
         ],
+        inputs=prompt,
+        outputs=output,
+        fn=generate_response,
+        cache_examples=False
     )
+    # Event handlers
     generate_btn.click(
+        fn=generate_response,
+        inputs=[prompt, max_tokens, temperature],
+        outputs=output
     )
+    # Also generate on Enter key
+    prompt.submit(
+        fn=generate_response,
+        inputs=[prompt, max_tokens, temperature],
+        outputs=output
     )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )