| | """Application configuration using Pydantic Settings.""" |
| |
|
| | import logging |
| | from typing import Literal |
| |
|
| | import structlog |
| | from pydantic import Field |
| | from pydantic_settings import BaseSettings, SettingsConfigDict |
| |
|
| | from src.config.domain import ResearchDomain |
| | from src.utils.exceptions import ConfigurationError |
| |
|
| |
|
| | class Settings(BaseSettings): |
| | """Strongly-typed application settings.""" |
| |
|
| | model_config = SettingsConfigDict( |
| | env_file=".env", |
| | env_file_encoding="utf-8", |
| | case_sensitive=False, |
| | extra="ignore", |
| | ) |
| |
|
| | |
| | research_domain: ResearchDomain = ResearchDomain.SEXUAL_HEALTH |
| |
|
| | |
| | openai_api_key: str | None = Field(default=None, description="OpenAI API key") |
| | llm_provider: Literal["openai", "huggingface"] = Field( |
| | default="openai", description="Which LLM provider to use" |
| | ) |
| | openai_model: str = Field(default="gpt-5", description="OpenAI model name") |
| | |
| | |
| | |
| | |
| | huggingface_model: str | None = Field( |
| | default="Qwen/Qwen2.5-7B-Instruct", description="HuggingFace model name" |
| | ) |
| | hf_token: str | None = Field( |
| | default=None, alias="HF_TOKEN", description="HuggingFace API token" |
| | ) |
| |
|
| | |
| | |
| | openai_embedding_model: str = Field( |
| | default="text-embedding-3-small", |
| | description="OpenAI embedding model (used by LlamaIndex RAG)", |
| | ) |
| | local_embedding_model: str = Field( |
| | default="all-MiniLM-L6-v2", |
| | description="Local sentence-transformers model (used by EmbeddingService)", |
| | ) |
| |
|
| | |
| | ncbi_api_key: str | None = Field( |
| | default=None, description="NCBI API key for higher rate limits" |
| | ) |
| |
|
| | |
| | max_iterations: int = Field(default=10, ge=1, le=50) |
| | advanced_max_rounds: int = Field( |
| | default=5, |
| | ge=1, |
| | le=20, |
| | description="Max coordination rounds for Advanced mode (default 5 for faster demos)", |
| | ) |
| | advanced_timeout: float = Field( |
| | default=600.0, |
| | ge=60.0, |
| | le=900.0, |
| | description="Timeout for Advanced mode in seconds (default 10 min)", |
| | ) |
| | search_timeout: int = Field(default=30, description="Seconds to wait for search") |
| |
|
| | |
| | log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = "INFO" |
| |
|
| | |
| | chroma_db_path: str = Field(default="./chroma_db", description="ChromaDB storage path") |
| |
|
| | def get_api_key(self) -> str: |
| | """Get the API key for the configured provider.""" |
| | |
| | provider_lower = self.llm_provider.lower() if self.llm_provider else "" |
| |
|
| | if provider_lower == "openai": |
| | if not self.openai_api_key: |
| | raise ConfigurationError("OPENAI_API_KEY not set") |
| | return self.openai_api_key |
| |
|
| | raise ConfigurationError(f"Unknown LLM provider: {self.llm_provider}") |
| |
|
| | def get_openai_api_key(self) -> str: |
| | """Get OpenAI API key (required for Magentic function calling).""" |
| | if not self.openai_api_key: |
| | raise ConfigurationError( |
| | "OPENAI_API_KEY not set. Magentic mode requires OpenAI for function calling. " |
| | "Use mode='simple' for other providers." |
| | ) |
| | return self.openai_api_key |
| |
|
| | @property |
| | def has_openai_key(self) -> bool: |
| | """Check if OpenAI API key is available.""" |
| | return bool(self.openai_api_key) |
| |
|
| | @property |
| | def has_huggingface_key(self) -> bool: |
| | """Check if HuggingFace token is available.""" |
| | return bool(self.hf_token) |
| |
|
| | @property |
| | def has_any_llm_key(self) -> bool: |
| | """Check if any LLM API key is available.""" |
| | return self.has_openai_key or self.has_huggingface_key |
| |
|
| |
|
| | def get_settings() -> Settings: |
| | """Factory function to get settings (allows mocking in tests).""" |
| | return Settings() |
| |
|
| |
|
| | def configure_logging(settings: Settings) -> None: |
| | """Configure structured logging with the configured log level.""" |
| | |
| | logging.basicConfig( |
| | level=getattr(logging, settings.log_level), |
| | format="%(message)s", |
| | ) |
| |
|
| | structlog.configure( |
| | processors=[ |
| | structlog.stdlib.filter_by_level, |
| | structlog.stdlib.add_logger_name, |
| | structlog.stdlib.add_log_level, |
| | structlog.processors.TimeStamper(fmt="iso"), |
| | structlog.processors.JSONRenderer(), |
| | ], |
| | wrapper_class=structlog.stdlib.BoundLogger, |
| | context_class=dict, |
| | logger_factory=structlog.stdlib.LoggerFactory(), |
| | ) |
| |
|
| |
|
| | |
| | settings = get_settings() |
| |
|