"""Gradio UI for The DETERMINATOR agent with MCP server support.""" import os from collections.abc import AsyncGenerator from typing import Any import gradio as gr import numpy as np from gradio.components.multimodal_textbox import MultimodalPostprocess # Try to import HuggingFace support (may not be available in all pydantic-ai versions) # According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires # pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface] # There are two ways to use HuggingFace: # 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally) # 2. Local models: Would use transformers directly (not via pydantic-ai) try: from huggingface_hub import AsyncInferenceClient from pydantic_ai.models.huggingface import HuggingFaceModel from pydantic_ai.providers.huggingface import HuggingFaceProvider _HUGGINGFACE_AVAILABLE = True except ImportError: HuggingFaceModel = None # type: ignore[assignment, misc] HuggingFaceProvider = None # type: ignore[assignment, misc] AsyncInferenceClient = None # type: ignore[assignment, misc] _HUGGINGFACE_AVAILABLE = False from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler from src.orchestrator_factory import create_orchestrator from src.services.audio_processing import get_audio_service from src.services.multimodal_processing import get_multimodal_service import structlog from src.tools.clinicaltrials import ClinicalTrialsTool from src.tools.europepmc import EuropePMCTool from src.tools.pubmed import PubMedTool from src.tools.search_handler import SearchHandler from src.tools.neo4j_search import Neo4jSearchTool from src.utils.config import settings from src.utils.models import AgentEvent, OrchestratorConfig logger = structlog.get_logger() def configure_orchestrator( use_mock: bool = False, mode: str = "simple", oauth_token: str | None = None, hf_model: str | None = None, hf_provider: str | None = None, graph_mode: str | None = None, use_graph: bool = True, ) -> tuple[Any, str]: """ Create an orchestrator instance. Args: use_mock: If True, use MockJudgeHandler (no API key needed) mode: Orchestrator mode ("simple", "advanced", "iterative", "deep", or "auto") oauth_token: Optional OAuth token from HuggingFace login hf_model: Selected HuggingFace model ID hf_provider: Selected inference provider graph_mode: Graph research mode ("iterative", "deep", or "auto") - used when mode is graph-based use_graph: Whether to use graph execution (True) or agent chains (False) Returns: Tuple of (Orchestrator instance, backend_name) """ # Create orchestrator config config = OrchestratorConfig( max_iterations=10, max_results_per_tool=10, ) # Create search tools with RAG enabled # Pass OAuth token to SearchHandler so it can be used by RAG service tools = [Neo4jSearchTool(),PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()] # Add web search tool if available from src.tools.web_search_factory import create_web_search_tool web_search_tool = create_web_search_tool() if web_search_tool is not None: tools.append(web_search_tool) logger.info("Web search tool added to search handler", provider=web_search_tool.name) search_handler = SearchHandler( tools=tools, timeout=config.search_timeout, include_rag=True, auto_ingest_to_rag=True, oauth_token=oauth_token, ) # Create judge (mock, real, or free tier) judge_handler: JudgeHandler | MockJudgeHandler | HFInferenceJudgeHandler backend_info = "Unknown" # 1. Forced Mock (Unit Testing) if use_mock: judge_handler = MockJudgeHandler() backend_info = "Mock (Testing)" # 2. API Key (OAuth or Env) - HuggingFace only (OAuth provides HF token) # Priority: oauth_token > env vars # On HuggingFace Spaces, OAuth token is available via request.oauth_token effective_api_key = oauth_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") if effective_api_key: # We have an API key (OAuth or env) - use pydantic-ai with JudgeHandler # This uses HuggingFace's own inference API, not third-party providers model: Any | None = None # Use selected model or fall back to env var/settings model_name = ( hf_model or os.getenv("HF_MODEL") or settings.huggingface_model or "Qwen/Qwen3-Next-80B-A3B-Thinking" ) if not _HUGGINGFACE_AVAILABLE: raise ImportError( "HuggingFace models are not available in this version of pydantic-ai. " "Please install with: uv add 'pydantic-ai[huggingface]' to use HuggingFace inference providers." ) # Inference API - uses HuggingFace Inference API # Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider # HuggingFaceProvider accepts api_key parameter directly # This is consistent with usage in src/utils/llm_factory.py and src/agent_factory/judges.py provider = HuggingFaceProvider(api_key=effective_api_key) # type: ignore[misc] model = HuggingFaceModel(model_name, provider=provider) # type: ignore[misc] backend_info = "API (HuggingFace OAuth)" if oauth_token else "API (Env Config)" judge_handler = JudgeHandler(model=model) # 3. Free Tier (HuggingFace Inference) - NO API KEY AVAILABLE else: # No API key available - use HFInferenceJudgeHandler with public models # HFInferenceJudgeHandler will use HF_TOKEN from env if available, otherwise public models # Note: OAuth token should have been caught in effective_api_key check above # If we reach here, we truly have no API key, so use public models judge_handler = HFInferenceJudgeHandler( model_id=hf_model if hf_model else None, api_key=None, # Will use HF_TOKEN from env if available, otherwise public models ) model_display = hf_model.split("/")[-1] if hf_model else "Default (Public Models)" backend_info = f"Free Tier ({model_display} - Public Models Only)" # Determine effective mode # If mode is already iterative/deep/auto, use it directly # If mode is "graph" or "simple", use graph_mode if provided effective_mode = mode if mode in ("graph", "simple") and graph_mode: effective_mode = graph_mode elif mode == "graph" and not graph_mode: effective_mode = "auto" # Default to auto if graph mode but no graph_mode specified orchestrator = create_orchestrator( search_handler=search_handler, judge_handler=judge_handler, config=config, mode=effective_mode, # type: ignore oauth_token=oauth_token, ) return orchestrator, backend_info def _is_file_path(text: str) -> bool: """Check if text appears to be a file path. Args: text: Text to check Returns: True if text looks like a file path """ import os # Check for common file extensions file_extensions = ['.md', '.pdf', '.txt', '.json', '.csv', '.xlsx', '.docx', '.html'] text_lower = text.lower().strip() # Check if it ends with a file extension if any(text_lower.endswith(ext) for ext in file_extensions): # Check if it's a valid path (absolute or relative) if os.path.sep in text or '/' in text or '\\' in text: return True # Or if it's just a filename with extension if '.' in text and len(text.split('.')) == 2: return True # Check if it's an absolute path if os.path.isabs(text): return True return False def _get_file_name(file_path: str) -> str: """Extract filename from file path. Args: file_path: Full file path Returns: Filename with extension """ import os return os.path.basename(file_path) def event_to_chat_message(event: AgentEvent) -> dict[str, Any]: """ Convert AgentEvent to gr.ChatMessage with metadata for accordion display. Args: event: The AgentEvent to convert Returns: ChatMessage with metadata for collapsible accordion """ # Map event types to accordion titles and determine if pending event_configs: dict[str, dict[str, Any]] = { "started": {"title": "🚀 Starting Research", "status": "done", "icon": "🚀"}, "searching": {"title": "🔍 Searching Literature", "status": "pending", "icon": "🔍"}, "search_complete": {"title": "📚 Search Results", "status": "done", "icon": "📚"}, "judging": {"title": "🧠 Evaluating Evidence", "status": "pending", "icon": "🧠"}, "judge_complete": {"title": "✅ Evidence Assessment", "status": "done", "icon": "✅"}, "looping": {"title": "🔄 Research Iteration", "status": "pending", "icon": "🔄"}, "synthesizing": {"title": "📝 Synthesizing Report", "status": "pending", "icon": "📝"}, "hypothesizing": {"title": "đŸ”Ŧ Generating Hypothesis", "status": "pending", "icon": "đŸ”Ŧ"}, "analyzing": {"title": "📊 Statistical Analysis", "status": "pending", "icon": "📊"}, "analysis_complete": {"title": "📈 Analysis Results", "status": "done", "icon": "📈"}, "streaming": {"title": "📡 Processing", "status": "pending", "icon": "📡"}, "complete": {"title": None, "status": "done", "icon": "🎉"}, # Main response, no accordion "error": {"title": "❌ Error", "status": "done", "icon": "❌"}, } config = event_configs.get( event.type, {"title": f"â€ĸ {event.type}", "status": "done", "icon": "â€ĸ"} ) # For complete events, return main response without accordion if event.type == "complete": # Check if event contains file information content = event.message files: list[str] | None = None # Check event.data for file paths if event.data and isinstance(event.data, dict): # Support both "files" (list) and "file" (single path) keys if "files" in event.data: files = event.data["files"] if isinstance(files, str): files = [files] elif not isinstance(files, list): files = None else: # Filter to only valid file paths files = [f for f in files if isinstance(f, str) and _is_file_path(f)] elif "file" in event.data: file_path = event.data["file"] if isinstance(file_path, str) and _is_file_path(file_path): files = [file_path] # Also check if message itself is a file path (less common, but possible) if not files and isinstance(event.message, str) and _is_file_path(event.message): files = [event.message] # Keep message as text description content = "Report generated. Download available below." # Return as dict format for Gradio Chatbot compatibility result: dict[str, Any] = { "role": "assistant", "content": content, } # Add files if present # Gradio Chatbot supports file paths in content as markdown links # The links will be clickable and downloadable if files: # Validate files exist before including them import os valid_files = [f for f in files if os.path.exists(f)] if valid_files: # Format files for Gradio: include as markdown download links # Gradio ChatInterface automatically renders file links as downloadable files import os file_links = [] for f in valid_files: file_name = _get_file_name(f) try: file_size = os.path.getsize(f) # Format file size (bytes to KB/MB) if file_size < 1024: size_str = f"{file_size} B" elif file_size < 1024 * 1024: size_str = f"{file_size / 1024:.1f} KB" else: size_str = f"{file_size / (1024 * 1024):.1f} MB" file_links.append(f"📎 [Download: {file_name} ({size_str})]({f})") except OSError: # If we can't get file size, just show the name file_links.append(f"📎 [Download: {file_name}]({f})") result["content"] = f"{content}\n\n" + "\n\n".join(file_links) # Also store in metadata for potential future use if "metadata" not in result: result["metadata"] = {} result["metadata"]["files"] = valid_files return result # Build metadata for accordion according to Gradio ChatMessage spec # Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float) # See: https://www.gradio.app/guides/agents-and-tool-usage metadata: dict[str, Any] = {} # Title is required for accordion display - must be string if config["title"]: metadata["title"] = str(config["title"]) # Set status (pending shows spinner, done is collapsed) # Must be exactly "pending" or "done" per Gradio spec if config["status"] == "pending": metadata["status"] = "pending" elif config["status"] == "done": metadata["status"] = "done" # Add duration if available in data (must be float) if event.data and isinstance(event.data, dict) and "duration" in event.data: duration = event.data["duration"] if isinstance(duration, int | float): metadata["duration"] = float(duration) # Add log info (iteration number, etc.) - must be string log_parts: list[str] = [] if event.iteration > 0: log_parts.append(f"Iteration {event.iteration}") if event.data and isinstance(event.data, dict): if "tool" in event.data: log_parts.append(f"Tool: {event.data['tool']}") if "results_count" in event.data: log_parts.append(f"Results: {event.data['results_count']}") if log_parts: metadata["log"] = " | ".join(log_parts) # Return as dict format for Gradio Chatbot compatibility # According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage # ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}} # Metadata must have "title" key for accordion display # Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float) result: dict[str, Any] = { "role": "assistant", "content": event.message, } # Only add metadata if it has a title (required for accordion display) # Ensure metadata values match Gradio's expected types if metadata and metadata.get("title"): # Ensure status is valid if present if "status" in metadata: status = metadata["status"] if status not in ("pending", "done"): metadata["status"] = "done" # Default to "done" if invalid result["metadata"] = metadata return result def extract_oauth_info(request: gr.Request | None) -> tuple[str | None, str | None]: """ Extract OAuth token and username from Gradio request. Args: request: Gradio request object containing OAuth information Returns: Tuple of (oauth_token, oauth_username) """ oauth_token: str | None = None oauth_username: str | None = None if request is None: return oauth_token, oauth_username # Try multiple ways to access OAuth token (Gradio API may vary) # Pattern 1: request.oauth_token.token if hasattr(request, "oauth_token") and request.oauth_token is not None: if hasattr(request.oauth_token, "token"): oauth_token = request.oauth_token.token elif isinstance(request.oauth_token, str): oauth_token = request.oauth_token # Pattern 2: request.headers (fallback) elif hasattr(request, "headers"): # OAuth token might be in headers auth_header = request.headers.get("authorization") or request.headers.get("Authorization") if auth_header and auth_header.startswith("Bearer "): oauth_token = auth_header.replace("Bearer ", "") # Access username from request if hasattr(request, "username") and request.username: oauth_username = request.username # Also try accessing via oauth_profile if available elif hasattr(request, "oauth_profile") and request.oauth_profile is not None: if hasattr(request.oauth_profile, "username"): oauth_username = request.oauth_profile.username elif hasattr(request.oauth_profile, "name"): oauth_username = request.oauth_profile.name return oauth_token, oauth_username async def yield_auth_messages( oauth_username: str | None, oauth_token: str | None, has_huggingface: bool, mode: str, ) -> AsyncGenerator[dict[str, Any], None]: """ Yield authentication and mode status messages. Args: oauth_username: OAuth username if available oauth_token: OAuth token if available has_huggingface: Whether HuggingFace credentials are available mode: Orchestrator mode Yields: ChatMessage objects with authentication status """ # Show user greeting if logged in via OAuth if oauth_username: yield { "role": "assistant", "content": f"👋 **Welcome, {oauth_username}!** Using your HuggingFace account.\n\n", } # Advanced mode is not currently supported with HuggingFace inference # For now, we only support simple mode with HuggingFace if mode == "advanced": yield { "role": "assistant", "content": ( "âš ī¸ **Note**: Advanced mode is not available with HuggingFace inference providers. " "Falling back to simple mode.\n\n" ), } # Inform user about authentication status if oauth_token: yield { "role": "assistant", "content": ( "🔐 **Using HuggingFace OAuth token** - " "Authenticated via your HuggingFace account.\n\n" ), } elif not has_huggingface: # No keys at all - will use FREE HuggingFace Inference (public models) yield { "role": "assistant", "content": ( "🤗 **Free Tier**: Using HuggingFace Inference (Llama 3.1 / Mistral) for AI analysis.\n" "For premium models or higher rate limits, sign in with HuggingFace above.\n\n" ), } async def handle_orchestrator_events( orchestrator: Any, message: str, ) -> AsyncGenerator[dict[str, Any], None]: """ Handle orchestrator events and yield ChatMessages. Args: orchestrator: The orchestrator instance message: The research question Yields: ChatMessage objects from orchestrator events """ # Track pending accordions for real-time updates pending_accordions: dict[str, str] = {} # title -> accumulated content async for event in orchestrator.run(message): # Convert event to ChatMessage with metadata chat_msg = event_to_chat_message(event) # Handle complete events (main response) if event.type == "complete": # Close any pending accordions first if pending_accordions: for title, content in pending_accordions.items(): yield { "role": "assistant", "content": content.strip(), "metadata": {"title": title, "status": "done"}, } pending_accordions.clear() # Yield final response (no accordion for main response) # chat_msg is already a dict from event_to_chat_message yield chat_msg continue # Handle events with metadata (accordions) # chat_msg is always a dict from event_to_chat_message metadata: dict[str, Any] = chat_msg.get("metadata", {}) if metadata: msg_title: str | None = metadata.get("title") msg_status: str | None = metadata.get("status") if msg_title: # For pending operations, accumulate content and show spinner if msg_status == "pending": if msg_title not in pending_accordions: pending_accordions[msg_title] = "" # chat_msg is always a dict, so access content via key content = chat_msg.get("content", "") pending_accordions[msg_title] += content + "\n" # Yield updated accordion with accumulated content yield { "role": "assistant", "content": pending_accordions[msg_title].strip(), "metadata": chat_msg.get("metadata", {}), } elif msg_title in pending_accordions: # Combine pending content with final content # chat_msg is always a dict, so access content via key content = chat_msg.get("content", "") final_content = pending_accordions[msg_title] + content del pending_accordions[msg_title] yield { "role": "assistant", "content": final_content.strip(), "metadata": {"title": msg_title, "status": "done"}, } else: # New done accordion (no pending state) yield chat_msg else: # No title, yield as-is yield chat_msg else: # No metadata, yield as plain message yield chat_msg async def research_agent( message: str | MultimodalPostprocess, history: list[dict[str, Any]], mode: str = "simple", hf_model: str | None = None, hf_provider: str | None = None, graph_mode: str = "auto", use_graph: bool = True, enable_image_input: bool = True, enable_audio_input: bool = True, tts_voice: str = "af_heart", tts_speed: float = 1.0, oauth_token: gr.OAuthToken | None = None, oauth_profile: gr.OAuthProfile | None = None, ) -> AsyncGenerator[dict[str, Any] | tuple[dict[str, Any], tuple[int, np.ndarray] | None], None]: """ Gradio chat function that runs the research agent. Args: message: User's research question (str or MultimodalPostprocess with text/files) history: Chat history (Gradio format) mode: Orchestrator mode ("simple" or "advanced") hf_model: Selected HuggingFace model ID (from dropdown) hf_provider: Selected inference provider (from dropdown) oauth_token: Gradio OAuth token (None if user not logged in) oauth_profile: Gradio OAuth profile (None if user not logged in) Yields: ChatMessage objects with metadata for accordion display, optionally with audio output """ import structlog logger = structlog.get_logger() # REQUIRE LOGIN BEFORE USE # Extract OAuth token and username using Gradio's OAuth types # According to Gradio docs: OAuthToken and OAuthProfile are None if user not logged in token_value: str | None = None username: str | None = None if oauth_token is not None: # OAuthToken has a .token attribute containing the access token if hasattr(oauth_token, "token"): token_value = oauth_token.token elif isinstance(oauth_token, str): # Handle case where oauth_token is already a string (shouldn't happen but defensive) token_value = oauth_token else: token_value = None if oauth_profile is not None: # OAuthProfile has .username, .name, .profile_image attributes username = ( oauth_profile.username if hasattr(oauth_profile, "username") and oauth_profile.username else ( oauth_profile.name if hasattr(oauth_profile, "name") and oauth_profile.name else None ) ) # Check if user is logged in (OAuth token or env var) # Fallback to env vars for local development or Spaces with HF_TOKEN secret has_authentication = bool( token_value or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") ) if not has_authentication: yield { "role": "assistant", "content": ( "🔐 **Authentication Required**\n\n" "Please **sign in with HuggingFace** using the login button at the top of the page " "before using this application.\n\n" "The login button is required to access the AI models and research tools." ), }, None return # Process multimodal input (text + images + audio) processed_text = "" audio_input_data: tuple[int, np.ndarray] | None = None if isinstance(message, dict): # MultimodalPostprocess format: {"text": str, "files": list[FileData], "audio": tuple | None} processed_text = message.get("text", "") or "" files = message.get("files", []) # Check for audio input in message (Gradio may include it as a separate field) audio_input_data = message.get("audio") or None # Process multimodal input (images, audio files, audio input) # Process if we have files (and image input enabled) or audio input (and audio input enabled) # Use UI settings from function parameters if (files and enable_image_input) or (audio_input_data is not None and enable_audio_input): try: multimodal_service = get_multimodal_service() # Prepend audio/image text to original text (prepend_multimodal=True) # Filter files and audio based on UI settings processed_text = await multimodal_service.process_multimodal_input( processed_text, files=files if enable_image_input else [], audio_input=audio_input_data if enable_audio_input else None, hf_token=token_value, prepend_multimodal=True, # Prepend audio/image text to text input ) except Exception as e: logger.warning("multimodal_processing_failed", error=str(e)) # Continue with text-only input else: # Plain string message processed_text = str(message) if message else "" if not processed_text.strip(): yield { "role": "assistant", "content": "Please enter a research question or provide an image/audio input.", }, None return # Check available keys (use token_value instead of oauth_token) has_huggingface = bool(os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") or token_value) # Adjust mode if needed effective_mode = mode if mode == "advanced": effective_mode = "simple" # Yield authentication and mode status messages async for msg in yield_auth_messages(username, token_value, has_huggingface, mode): yield msg # Run the agent and stream events try: # use_mock=False - let configure_orchestrator decide based on available keys # It will use: OAuth token > Env vars > HF Inference (free tier) # Convert empty strings from Textbox to None for defaults model_id = hf_model if hf_model and hf_model.strip() else None provider_name = hf_provider if hf_provider and hf_provider.strip() else None orchestrator, backend_name = configure_orchestrator( use_mock=False, # Never use mock in production - HF Inference is the free fallback mode=effective_mode, oauth_token=token_value, # Use extracted token value hf_model=model_id, # None will use defaults in configure_orchestrator hf_provider=provider_name, # None will use defaults in configure_orchestrator graph_mode=graph_mode if graph_mode else None, use_graph=use_graph, ) yield { "role": "assistant", "content": f"🧠 **Backend**: {backend_name}\n\n", } # Handle orchestrator events and generate audio output audio_output_data: tuple[int, np.ndarray] | None = None final_message = "" async for msg in handle_orchestrator_events(orchestrator, processed_text): # Track final message for TTS if isinstance(msg, dict) and msg.get("role") == "assistant": content = msg.get("content", "") metadata = msg.get("metadata", {}) # This is the main response (not an accordion) if no title in metadata if content and not metadata.get("title"): final_message = content # Yield without audio for intermediate messages yield msg, None # Generate audio output for final response if final_message and settings.enable_audio_output: try: audio_service = get_audio_service() # Use UI-configured voice and speed, fallback to settings defaults audio_output_data = await audio_service.generate_audio_output( final_message, voice=tts_voice or settings.tts_voice, speed=tts_speed if tts_speed else settings.tts_speed, ) except Exception as e: logger.warning("audio_synthesis_failed", error=str(e)) # Continue without audio output # If we have audio output, we need to yield it with the final message # Note: The final message was already yielded above, so we yield None, audio_output_data # This will update the audio output component if audio_output_data is not None: yield None, audio_output_data except Exception as e: # Return error message without metadata to avoid issues during example caching # Metadata can cause validation errors when Gradio caches examples # Gradio Chatbot requires plain text - remove all markdown and special characters error_msg = str(e).replace("**", "").replace("*", "").replace("`", "") # Ensure content is a simple string without any special formatting yield { "role": "assistant", "content": f"Error: {error_msg}. Please check your configuration and try again.", }, None def create_demo() -> gr.Blocks: """ Create the Gradio demo interface with MCP support and OAuth login. Returns: Configured Gradio Blocks interface with MCP server and OAuth enabled """ with gr.Blocks(title="đŸ”Ŧ The DETERMINATOR", fill_height=True) as demo: # Add sidebar with login button and information # Reference: Working implementation pattern from Gradio docs with gr.Sidebar(): gr.Markdown("# 🔐 Authentication") gr.Markdown( "**Sign in with Hugging Face** to access AI models and research tools.\n\n" "This application requires authentication to use the inference API." ) gr.LoginButton("Sign in with Hugging Face") gr.Markdown("---") gr.Markdown("### â„šī¸ About") # noqa: RUF001 gr.Markdown( "**The DETERMINATOR** - Generalist Deep Research Agent\n\n" "A powerful research agent that stops at nothing until finding precise answers to complex questions.\n\n" "**Available Sources**:\n" "- Web Search (general knowledge)\n" "- PubMed (biomedical literature)\n" "- ClinicalTrials.gov (clinical trials)\n" "- Europe PMC (preprints & papers)\n" "- RAG (semantic search)\n\n" "**Automatic Detection**: Automatically determines if medical knowledge sources are needed for your query.\n\n" "âš ī¸ **Research tool only** - Synthesizes evidence but cannot provide medical advice." ) gr.Markdown("---") # Settings Section - Organized in Accordions gr.Markdown("## âš™ī¸ Settings") # Research Configuration Accordion with gr.Accordion("đŸ”Ŧ Research Configuration", open=True): mode_radio = gr.Radio( choices=["simple", "advanced", "iterative", "deep", "auto"], value="simple", label="Orchestrator Mode", info=( "Simple: Linear search-judge loop | " "Advanced: Multi-agent (OpenAI) | " "Iterative: Knowledge-gap driven | " "Deep: Parallel sections | " "Auto: Smart routing" ), ) graph_mode_radio = gr.Radio( choices=["iterative", "deep", "auto"], value="auto", label="Graph Research Mode", info="Iterative: Single loop | Deep: Parallel sections | Auto: Detect from query", ) use_graph_checkbox = gr.Checkbox( value=True, label="Use Graph Execution", info="Enable graph-based workflow execution", ) # Model and Provider selection gr.Markdown("### 🤖 Model & Provider") # Popular models list popular_models = [ "", # Empty = use default "Qwen/Qwen3-Next-80B-A3B-Thinking", "Qwen/Qwen3-235B-A22B-Instruct-2507", "zai-org/GLM-4.5-Air", "meta-llama/Llama-3.1-8B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2", "google/gemma-2-9b-it", ] hf_model_dropdown = gr.Dropdown( choices=popular_models, value="", # Empty string - will be converted to None in research_agent label="Reasoning Model", info="Select a HuggingFace model (leave empty for default)", allow_custom_value=True, # Allow users to type custom model IDs ) # Provider list from README providers = [ "", # Empty string = auto-select "nebius", "together", "scaleway", "hyperbolic", "novita", "nscale", "sambanova", "ovh", "fireworks", ] hf_provider_dropdown = gr.Dropdown( choices=providers, value="", # Empty string - will be converted to None in research_agent label="Inference Provider", info="Select inference provider (leave empty for auto-select)", ) # Multimodal Input Configuration Accordion with gr.Accordion("📷 Multimodal Input", open=False): enable_image_input_checkbox = gr.Checkbox( value=settings.enable_image_input, label="Enable Image Input (OCR)", info="Extract text from uploaded images using OCR", ) enable_audio_input_checkbox = gr.Checkbox( value=settings.enable_audio_input, label="Enable Audio Input (STT)", info="Transcribe audio recordings using speech-to-text", ) # Audio/TTS Configuration Accordion with gr.Accordion("🔊 Audio Output", open=False): enable_audio_output_checkbox = gr.Checkbox( value=settings.enable_audio_output, label="Enable Audio Output", info="Generate audio responses using TTS", ) tts_voice_dropdown = gr.Dropdown( choices=[ "af_heart", "af_bella", "af_nicole", "af_aoede", "af_kore", "af_sarah", "af_nova", "af_sky", "af_alloy", "af_jessica", "af_river", "am_michael", "am_fenrir", "am_puck", "am_echo", "am_eric", "am_liam", "am_onyx", "am_santa", "am_adam", ], value=settings.tts_voice, label="TTS Voice", info="Select TTS voice (American English voices: af_*, am_*)", ) tts_speed_slider = gr.Slider( minimum=0.5, maximum=2.0, value=settings.tts_speed, step=0.1, label="TTS Speech Speed", info="Adjust TTS speech speed (0.5x to 2.0x)", ) tts_gpu_dropdown = gr.Dropdown( choices=["T4", "A10", "A100", "L4", "L40S"], value=settings.tts_gpu or "T4", label="TTS GPU Type", info="Modal GPU type for TTS (T4 is cheapest, A100 is fastest). Note: GPU changes require app restart.", visible=settings.modal_available, interactive=False, # GPU type set at function definition time, requires restart ) # Audio output component (for TTS response) - moved to sidebar audio_output = gr.Audio( label="🔊 Audio Response", visible=settings.enable_audio_output, ) # Update TTS component visibility based on enable_audio_output_checkbox # This must be after audio_output is defined def update_tts_visibility(enabled: bool) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: """Update visibility of TTS components based on enable checkbox.""" return ( gr.update(visible=enabled), gr.update(visible=enabled), gr.update(visible=enabled), ) enable_audio_output_checkbox.change( fn=update_tts_visibility, inputs=[enable_audio_output_checkbox], outputs=[tts_voice_dropdown, tts_speed_slider, audio_output], ) # Chat interface with multimodal support # Examples are provided but will NOT run at startup (cache_examples=False) # Users must log in first before using examples or submitting queries gr.ChatInterface( fn=research_agent, multimodal=True, # Enable multimodal input (text + images + audio) title="đŸ”Ŧ The DETERMINATOR", description=( "*Generalist Deep Research Agent — stops at nothing until finding precise answers to complex questions*\n\n" "---\n" "**The DETERMINATOR** uses iterative search-and-judge loops to comprehensively investigate any research question. " "It automatically determines if medical knowledge sources (PubMed, ClinicalTrials.gov) are needed and adapts its search strategy accordingly.\n\n" "**Key Features**:\n" "- 🔍 Multi-source search (Web, PubMed, ClinicalTrials.gov, Europe PMC, RAG)\n" "- 🧠 Automatic medical knowledge detection\n" "- 🔄 Iterative refinement until precise answers are found\n" "- âšī¸ Stops only at configured limits (budget, time, iterations)\n" "- 📊 Evidence synthesis with citations\n\n" "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n" "**📷🎤 Multimodal Input Support**:\n" "- **Images**: Click the 📷 image icon in the textbox to upload images (OCR)\n" "- **Audio**: Click the 🎤 microphone icon in the textbox to record audio (STT)\n" "- **Files**: Drag & drop or click to upload image/audio files\n" "- **Text**: Type your research questions directly\n\n" "💡 **Tip**: Look for the 📷 and 🎤 icons in the text input box below!\n\n" "Configure multimodal inputs in the sidebar settings.\n\n" "**âš ī¸ Authentication Required**: Please **sign in with HuggingFace** above before using this application." ), examples=[ # When additional_inputs are provided, examples must be lists of lists # Each inner list: [message, mode, hf_model, hf_provider, graph_mode, multimodal_enabled] # Using actual model IDs and provider names from inference_models.py # Note: Provider is optional - if empty, HF will auto-select # These examples will NOT run at startup - users must click them after logging in # All examples require deep iterative search and information retrieval across multiple sources [ # Medical research example (only one medical example) "Create a comprehensive report on Long COVID treatments including clinical trials, mechanisms, and safety.", "deep", "zai-org/GLM-4.5-Air", "nebius", "deep", True, ], [ # Technical/Engineering example requiring deep research "Analyze the current state of quantum computing architectures: compare different qubit technologies, error correction methods, and scalability challenges across major platforms including IBM, Google, and IonQ.", "deep", "Qwen/Qwen3-Next-80B-A3B-Thinking", "", "deep", True, ], [ # Business/Scientific example requiring iterative search "Investigate the economic and environmental impact of renewable energy transition: analyze cost trends, grid integration challenges, policy frameworks, and market dynamics across solar, wind, and battery storage technologies, in china", "deep", "Qwen/Qwen3-235B-A22B-Instruct-2507", "", "deep", True, ], ], cache_examples=False, # CRITICAL: Disable example caching to prevent examples from running at startup # Examples will only run when user explicitly clicks them (after login) # Note: additional_inputs_accordion is not a valid parameter in Gradio 6.0 ChatInterface # Components will be displayed in the order provided additional_inputs=[ mode_radio, hf_model_dropdown, hf_provider_dropdown, graph_mode_radio, use_graph_checkbox, enable_image_input_checkbox, enable_audio_input_checkbox, tts_voice_dropdown, tts_speed_slider, # Note: gr.OAuthToken and gr.OAuthProfile are automatically passed as function parameters # when user is logged in - they should NOT be added to additional_inputs ], additional_outputs=[audio_output], # Add audio output for TTS ) return demo # type: ignore[no-any-return] def main() -> None: """Run the Gradio app with MCP server enabled.""" demo = create_demo() demo.launch( # server_name="0.0.0.0", # server_port=7860, # share=False, mcp_server=True, # Enable MCP server for Claude Desktop integration ssr_mode=False, # Fix for intermittent loading/hydration issues in HF Spaces ) if __name__ == "__main__": main()