import os # Default: Hugging Face or Ollama fallback def run_llm(prompt: str) -> str: """ Route prompt to an available LLM backend. - Prefers Hugging Face Inference API if HF_API_TOKEN is set - Falls back to Ollama (local) if OLLAMA_MODEL is set - Otherwise returns a static placeholder response """ # Hugging Face API hf_token = os.getenv("HF_API_TOKEN") hf_model = os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3") if hf_token: try: import requests response = requests.post( f"https://api-inference.huggingface.co/models/{hf_model}", headers={"Authorization": f"Bearer {hf_token}"}, json={"inputs": prompt}, timeout=30 ) if response.status_code == 200: return response.json()[0]["generated_text"] else: return f"⚠️ HF error {response.status_code}: {response.text}" except Exception as e: return f"⚠️ HF backend error: {e}" # Ollama API ollama_model = os.getenv("OLLAMA_MODEL", "llama2") try: import requests response = requests.post( "http://localhost:11434/api/generate", json={"model": ollama_model, "prompt": prompt}, timeout=30, ) text = "" for line in response.iter_lines(): if line: chunk = line.decode("utf-8") if '"response":"' in chunk: text += chunk.split('"response":"')[1].split('"')[0] return text if text else "⚠️ Ollama returned no output" except Exception: pass # Fallback static return "⚠️ No LLM backend configured. Please set HF_API_TOKEN or run Ollama."