Commit
·
98d21b9
1
Parent(s):
332c22e
feat(tests): Add end-to-end smoke tests (formatted)
Browse files- Makefile +15 -5
- tests/e2e/test_smoke.py +65 -0
Makefile
CHANGED
|
@@ -28,9 +28,19 @@ format:
|
|
| 28 |
typecheck:
|
| 29 |
uv run mypy src
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
clean:
|
| 35 |
-
rm -rf .pytest_cache .mypy_cache .ruff_cache __pycache__ .coverage htmlcov
|
| 36 |
-
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
|
|
|
| 28 |
typecheck:
|
| 29 |
uv run mypy src
|
| 30 |
|
| 31 |
+
# Run all checks (lint, typecheck, test)
|
| 32 |
+
check: lint typecheck test
|
| 33 |
+
|
| 34 |
+
# Smoke tests - run against real APIs (slow, not for CI)
|
| 35 |
+
smoke-free:
|
| 36 |
+
@echo "Running Free Tier smoke test..."
|
| 37 |
+
uv run python -m pytest tests/e2e/test_smoke.py::test_free_tier_synthesis -v -s
|
| 38 |
+
|
| 39 |
+
smoke-paid:
|
| 40 |
+
@echo "Running Paid Tier smoke test (requires OPENAI_API_KEY)..."
|
| 41 |
+
uv run python -m pytest tests/e2e/test_smoke.py::test_paid_tier_synthesis -v -s
|
| 42 |
+
|
| 43 |
+
smoke: smoke-free # Default to free tier
|
| 44 |
+
|
| 45 |
+
# Clean up cache and artifacts
|
| 46 |
|
|
|
|
|
|
|
|
|
tests/e2e/test_smoke.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smoke tests for regression prevention.
|
| 3 |
+
|
| 4 |
+
These tests run against REAL APIs and verify end-to-end functionality.
|
| 5 |
+
They are slow (2-5 minutes) and should NOT run in CI.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
make smoke-free # Test Free Tier (HuggingFace)
|
| 9 |
+
make smoke-paid # Test Paid Tier (OpenAI BYOK)
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import pytest
|
| 13 |
+
|
| 14 |
+
from src.orchestrators.advanced import AdvancedOrchestrator
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@pytest.mark.e2e
|
| 18 |
+
@pytest.mark.timeout(600) # 10 minute timeout for Free Tier
|
| 19 |
+
async def test_free_tier_synthesis():
|
| 20 |
+
"""Verify Free Tier produces actual synthesis (not just 'Research complete.')"""
|
| 21 |
+
# Use a simple query that is likely to yield results quickly
|
| 22 |
+
orch = AdvancedOrchestrator(max_rounds=2)
|
| 23 |
+
|
| 24 |
+
events = []
|
| 25 |
+
print("\nRunning Free Tier Smoke Test...")
|
| 26 |
+
async for event in orch.run("What is libido?"):
|
| 27 |
+
if event.type == "complete":
|
| 28 |
+
events.append(event)
|
| 29 |
+
print(f"Received complete event: {event.message[:50]}...")
|
| 30 |
+
|
| 31 |
+
# MUST have a complete event
|
| 32 |
+
assert len(events) >= 1, "No complete event received"
|
| 33 |
+
|
| 34 |
+
# Complete event MUST have substantive content (not just signal)
|
| 35 |
+
final = events[-1]
|
| 36 |
+
|
| 37 |
+
# P2 Bug Regression Check: Ensure content isn't just "Research complete."
|
| 38 |
+
assert len(final.message) > 100, f"Synthesis too short: {len(final.message)} chars"
|
| 39 |
+
|
| 40 |
+
# P1 Bug Regression Check: Ensure we got actual text
|
| 41 |
+
assert "Research complete." not in final.message or len(final.message) > 50, (
|
| 42 |
+
"Got empty synthesis signal instead of actual report"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@pytest.mark.e2e
|
| 47 |
+
@pytest.mark.timeout(300) # 5 minute timeout for Paid Tier
|
| 48 |
+
async def test_paid_tier_synthesis():
|
| 49 |
+
"""Verify Paid Tier (BYOK) produces synthesis."""
|
| 50 |
+
import os
|
| 51 |
+
|
| 52 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 53 |
+
if not api_key:
|
| 54 |
+
pytest.skip("OPENAI_API_KEY not set")
|
| 55 |
+
|
| 56 |
+
orch = AdvancedOrchestrator(max_rounds=2, api_key=api_key)
|
| 57 |
+
|
| 58 |
+
events = []
|
| 59 |
+
print("\nRunning Paid Tier Smoke Test...")
|
| 60 |
+
async for event in orch.run("What is libido?"):
|
| 61 |
+
if event.type == "complete":
|
| 62 |
+
events.append(event)
|
| 63 |
+
|
| 64 |
+
assert len(events) >= 1, "No complete event received"
|
| 65 |
+
assert len(events[-1].message) > 100, "Synthesis too short"
|