File size: 2,316 Bytes
12a887a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
{
"overall_accuracy": 0.9047619047619048,
"total_tests": 84,
"total_failures": 8,
"failures_by_category": {
"General Questions (Non-Bond LLM)": [
{
"query": "What's the weather today",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "non_bond_search",
"got_route": "search",
"confidence": 0.8000989556312561
},
{
"query": "What is machine learning",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "non_bond_search",
"got_route": "search",
"confidence": 0.5214624404907227
}
],
"Hard Negatives (Confusing Non-Bond)": [
{
"query": "How do ionic bonds work",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.6707904934883118
},
{
"query": "How to bond with my family",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.8499083518981934
},
{
"query": "Team bonding activities",
"expected_intent": "non_bond_search",
"expected_route": "search",
"got_intent": "non_bond_llm",
"got_route": "llm",
"confidence": 0.7155904769897461
},
{
"query": "Strengthen emotional bonds",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "hedge_volatility",
"got_route": "bond",
"confidence": 0.5427064895629883
}
],
"Mixed Context (Portfolio Questions)": [
{
"query": "I have 70% stocks and 30% bonds, should I rebalance",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "sector_rebalance",
"got_route": "bond",
"confidence": 0.48324716091156006
},
{
"query": "My portfolio has bonds and stocks",
"expected_intent": "non_bond_llm",
"expected_route": "llm",
"got_intent": "portfolio_analysis",
"got_route": "bond",
"confidence": 0.8792734146118164
}
]
}
} |