| { | |
| "overall_accuracy": 0.9047619047619048, | |
| "total_tests": 84, | |
| "total_failures": 8, | |
| "failures_by_category": { | |
| "General Questions (Non-Bond LLM)": [ | |
| { | |
| "query": "What's the weather today", | |
| "expected_intent": "non_bond_llm", | |
| "expected_route": "llm", | |
| "got_intent": "non_bond_search", | |
| "got_route": "search", | |
| "confidence": 0.8000989556312561 | |
| }, | |
| { | |
| "query": "What is machine learning", | |
| "expected_intent": "non_bond_llm", | |
| "expected_route": "llm", | |
| "got_intent": "non_bond_search", | |
| "got_route": "search", | |
| "confidence": 0.5214624404907227 | |
| } | |
| ], | |
| "Hard Negatives (Confusing Non-Bond)": [ | |
| { | |
| "query": "How do ionic bonds work", | |
| "expected_intent": "non_bond_search", | |
| "expected_route": "search", | |
| "got_intent": "non_bond_llm", | |
| "got_route": "llm", | |
| "confidence": 0.6707904934883118 | |
| }, | |
| { | |
| "query": "How to bond with my family", | |
| "expected_intent": "non_bond_search", | |
| "expected_route": "search", | |
| "got_intent": "non_bond_llm", | |
| "got_route": "llm", | |
| "confidence": 0.8499083518981934 | |
| }, | |
| { | |
| "query": "Team bonding activities", | |
| "expected_intent": "non_bond_search", | |
| "expected_route": "search", | |
| "got_intent": "non_bond_llm", | |
| "got_route": "llm", | |
| "confidence": 0.7155904769897461 | |
| }, | |
| { | |
| "query": "Strengthen emotional bonds", | |
| "expected_intent": "non_bond_llm", | |
| "expected_route": "llm", | |
| "got_intent": "hedge_volatility", | |
| "got_route": "bond", | |
| "confidence": 0.5427064895629883 | |
| } | |
| ], | |
| "Mixed Context (Portfolio Questions)": [ | |
| { | |
| "query": "I have 70% stocks and 30% bonds, should I rebalance", | |
| "expected_intent": "non_bond_llm", | |
| "expected_route": "llm", | |
| "got_intent": "sector_rebalance", | |
| "got_route": "bond", | |
| "confidence": 0.48324716091156006 | |
| }, | |
| { | |
| "query": "My portfolio has bonds and stocks", | |
| "expected_intent": "non_bond_llm", | |
| "expected_route": "llm", | |
| "got_intent": "portfolio_analysis", | |
| "got_route": "bond", | |
| "confidence": 0.8792734146118164 | |
| } | |
| ] | |
| } | |
| } |