{ "overall_accuracy": 0.9047619047619048, "total_tests": 84, "total_failures": 8, "failures_by_category": { "General Questions (Non-Bond LLM)": [ { "query": "What's the weather today", "expected_intent": "non_bond_llm", "expected_route": "llm", "got_intent": "non_bond_search", "got_route": "search", "confidence": 0.8000989556312561 }, { "query": "What is machine learning", "expected_intent": "non_bond_llm", "expected_route": "llm", "got_intent": "non_bond_search", "got_route": "search", "confidence": 0.5214624404907227 } ], "Hard Negatives (Confusing Non-Bond)": [ { "query": "How do ionic bonds work", "expected_intent": "non_bond_search", "expected_route": "search", "got_intent": "non_bond_llm", "got_route": "llm", "confidence": 0.6707904934883118 }, { "query": "How to bond with my family", "expected_intent": "non_bond_search", "expected_route": "search", "got_intent": "non_bond_llm", "got_route": "llm", "confidence": 0.8499083518981934 }, { "query": "Team bonding activities", "expected_intent": "non_bond_search", "expected_route": "search", "got_intent": "non_bond_llm", "got_route": "llm", "confidence": 0.7155904769897461 }, { "query": "Strengthen emotional bonds", "expected_intent": "non_bond_llm", "expected_route": "llm", "got_intent": "hedge_volatility", "got_route": "bond", "confidence": 0.5427064895629883 } ], "Mixed Context (Portfolio Questions)": [ { "query": "I have 70% stocks and 30% bonds, should I rebalance", "expected_intent": "non_bond_llm", "expected_route": "llm", "got_intent": "sector_rebalance", "got_route": "bond", "confidence": 0.48324716091156006 }, { "query": "My portfolio has bonds and stocks", "expected_intent": "non_bond_llm", "expected_route": "llm", "got_intent": "portfolio_analysis", "got_route": "bond", "confidence": 0.8792734146118164 } ] } }