import os

# Default: Hugging Face or Ollama fallback
def run_llm(prompt: str) -> str:
    """
    Route prompt to an available LLM backend.
    - Prefers Hugging Face Inference API if HF_API_TOKEN is set
    - Falls back to Ollama (local) if OLLAMA_MODEL is set
    - Otherwise returns a static placeholder response
    """
    # Hugging Face API
    hf_token = os.getenv("HF_API_TOKEN")
    hf_model = os.getenv("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
    if hf_token:
        try:
            import requests
            response = requests.post(
                f"https://api-inference.huggingface.co/models/{hf_model}",
                headers={"Authorization": f"Bearer {hf_token}"},
                json={"inputs": prompt},
                timeout=30
            )
            if response.status_code == 200:
                return response.json()[0]["generated_text"]
            else:
                return f"⚠️ HF error {response.status_code}: {response.text}"
        except Exception as e:
            return f"⚠️ HF backend error: {e}"

    # Ollama API
    ollama_model = os.getenv("OLLAMA_MODEL", "llama2")
    try:
        import requests
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": ollama_model, "prompt": prompt},
            timeout=30,
        )
        text = ""
        for line in response.iter_lines():
            if line:
                chunk = line.decode("utf-8")
                if '"response":"' in chunk:
                    text += chunk.split('"response":"')[1].split('"')[0]
        return text if text else "⚠️ Ollama returned no output"
    except Exception:
        pass

    # Fallback static
    return "⚠️ No LLM backend configured. Please set HF_API_TOKEN or run Ollama."