Spaces:

shubhamrooter
/

llama-primus-reasoning-demo

Sleeping

App Files Files Community

llama-primus-reasoning-demo / app.py

shubhamrooter

Update app.py

2373c44 verified about 1 month ago

raw

history blame contribute delete

4.74 kB

	import gradio as gr
	import requests
	import os

	# Configuration
	MODEL_REPO = "AlicanKiraz0/Cybersecurity-BaronLLM_Offensive_Security_LLM_Q6_K_GGUF"
	API_URL = f"https://api-inference.huggingface.co/models/{MODEL_REPO}"
	HF_TOKEN = os.environ.get("HF_TOKEN", "")

	headers = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	def query_model(payload):
	"""
	Query the model using Hugging Face Inference API
	"""
	try:
	response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.RequestException as e:
	return {"error": f"API request failed: {str(e)}"}
	except Exception as e:
	return {"error": f"Unexpected error: {str(e)}"}

	def generate_response(prompt, max_tokens=150, temperature=0.7):
	"""
	Generate response using the model
	"""
	if not prompt.strip():
	return "Please enter a prompt."

	payload = {
	"inputs": prompt,
	"parameters": {
	"max_new_tokens": max_tokens,
	"temperature": temperature,
	"top_p": 0.9,
	"do_sample": True,
	"return_full_text": False
	}
	}

	result = query_model(payload)

	if "error" in result:
	error_msg = result["error"]
	if "loading" in error_msg.lower():
	return f"Model is currently loading. Please wait a moment and try again.\n\nError details: {error_msg}"
	return f"Error: {error_msg}"

	if isinstance(result, list) and len(result) > 0:
	if "generated_text" in result[0]:
	return result[0]["generated_text"]
	elif "text" in result[0]:
	return result[0]["text"]

	return "No response generated. Please try again."

	# Create Gradio interface
	with gr.Blocks(title="Cybersecurity BaronLLM", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🔒 Cybersecurity BaronLLM
	Offensive Security Language Model

	This interface uses the Cybersecurity BaronLLM model via Hugging Face Inference API.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Configuration")
	max_tokens = gr.Slider(
	minimum=32,
	maximum=512,
	value=150,
	step=32,
	label="Max Tokens",
	info="Maximum length of response"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Higher values = more creative, lower values = more focused"
	)

	gr.Markdown("""
	### Example Prompts
	- Explain SQL injection techniques
	- What are common penetration testing methodologies?
	- How to detect XSS attacks?
	- Describe network security principles
	""")

	with gr.Column(scale=2):
	prompt = gr.Textbox(
	label="Enter your cybersecurity question or prompt:",
	placeholder="Explain SQL injection techniques and prevention methods...",
	lines=5,
	max_lines=10
	)

	generate_btn = gr.Button("🔒 Generate Response", variant="primary", size="lg")

	output = gr.Textbox(
	label="Model Response",
	lines=8,
	show_copy_button=True
	)

	# Examples
	examples = gr.Examples(
	examples=[
	["What are the most common web application vulnerabilities and how can they be exploited?"],
	["Explain the difference between white hat, black hat, and gray hat hackers."],
	["Describe the steps involved in a penetration testing engagement."],
	["How does a buffer overflow attack work and what are modern defenses against it?"],
	["What are the key components of a cybersecurity risk assessment?"]
	],
	inputs=prompt,
	outputs=output,
	fn=generate_response,
	cache_examples=False
	)

	# Event handlers
	generate_btn.click(
	fn=generate_response,
	inputs=[prompt, max_tokens, temperature],
	outputs=output
	)

	# Also generate on Enter key
	prompt.submit(
	fn=generate_response,
	inputs=[prompt, max_tokens, temperature],
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)