chron/validate-optimizations.sh at main · xdrabbit/chron · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash

# Quick validation script to check if latency optimizations are working

echo "🔍 Validating AI Latency Optimizations..."
echo ""

# Check if files exist
echo "1. Checking modified files..."
FILES=(
    "backend/services/ollama_service.py"
    "backend/services/whisper_service.py"
    "backend/services/performance.py"
    "backend/routes/ask.py"
    "frontend/src/services/api.js"
    "optimize-network.sh"
    "test_performance.py"
)

for file in "${FILES[@]}"; do
    if [ -f "$file" ]; then
        echo "  ✓ $file"
    else
        echo "  ✗ $file NOT FOUND"
    fi
done

# Check for connection pooling code
echo ""
echo "2. Checking for connection pooling implementation..."
if grep -q "requests.Session" backend/services/ollama_service.py; then
    echo "  ✓ Connection pooling found in ollama_service.py"
else
    echo "  ✗ Connection pooling NOT found in ollama_service.py"
fi

if grep -q "HTTPAdapter" backend/services/ollama_service.py; then
    echo "  ✓ HTTPAdapter configuration found"
else
    echo "  ✗ HTTPAdapter NOT found"
fi

# Check for reduced context
echo ""
echo "3. Checking for optimized context settings..."
if grep -q '"num_ctx": 1024' backend/services/ollama_service.py; then
    echo "  ✓ Reduced context window (1024)"
else
    echo "  ⚠  Context window might not be optimized"
fi

if grep -q '"num_predict": 200' backend/services/ollama_service.py; then
    echo "  ✓ Reduced prediction limit (200)"
else
    echo "  ⚠  Prediction limit might not be optimized"
fi

# Check for timing instrumentation
echo ""
echo "4. Checking for performance monitoring..."
if grep -q "time.time()" backend/services/ollama_service.py; then
    echo "  ✓ Timing instrumentation found"
else
    echo "  ✗ Timing instrumentation NOT found"
fi

if grep -q "timing" backend/services/ollama_service.py; then
    echo "  ✓ Timing data returned in response"
else
    echo "  ⚠  Timing data might not be included"
fi

# Check frontend timeout
echo ""
echo "5. Checking frontend timeout..."
if grep -q "timeout: 45000" frontend/src/services/api.js; then
    echo "  ✓ Frontend timeout optimized (45s)"
else
    echo "  ⚠  Frontend timeout might not be optimized"
fi

# Check Python dependencies
echo ""
echo "6. Checking Python dependencies..."
python3 -c "import requests" 2>/dev/null && echo "  ✓ requests module available" || echo "  ✗ requests module NOT available"
python3 -c "from urllib3.util.retry import Retry" 2>/dev/null && echo "  ✓ urllib3 module available" || echo "  ✗ urllib3 module NOT available"

# Summary
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "✅ Validation complete!"
echo ""
echo "Next steps:"
echo "  1. Run: ./optimize-network.sh (optional, requires sudo)"
echo "  2. Restart backend: cd backend && uvicorn main:app --reload"
echo "  3. Test: python3 test_performance.py"
echo ""
echo "Expected improvement: 5-10x faster AI requests! 🚀"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"