bonds-query-classifier-v2 / evaluation_failures.json
aryannzzz's picture
Upload trained bond classifier with 15 intents (v2)
12a887a verified
{
"overall_accuracy": 0.9047619047619048,
"total_tests": 84,
"total_failures": 8,
"failures_by_category": {
"General Questions (Non-Bond LLM)": [
{
"query": "What's the weather today",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "non_bond_search",
"got_route": "search",
"confidence": 0.8000989556312561
},
{
"query": "What is machine learning",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "non_bond_search",
"got_route": "search",
"confidence": 0.5214624404907227
}
],
"Hard Negatives (Confusing Non-Bond)": [
{
"query": "How do ionic bonds work",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.6707904934883118
},
{
"query": "How to bond with my family",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.8499083518981934
},
{
"query": "Team bonding activities",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.7155904769897461
},
{
"query": "Strengthen emotional bonds",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "hedge_volatility",
"got_route": "bond",
"confidence": 0.5427064895629883
}
],
"Mixed Context (Portfolio Questions)": [
{
"query": "I have 70% stocks and 30% bonds, should I rebalance",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "sector_rebalance",
"got_route": "bond",
"confidence": 0.48324716091156006
},
{
"query": "My portfolio has bonds and stocks",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "portfolio_analysis",
"got_route": "bond",
"confidence": 0.8792734146118164
}
]
}
}