Skip to content

feat: Production CI/CD Pipeline with Docker Compose Integration Testing #26

feat: Production CI/CD Pipeline with Docker Compose Integration Testing

feat: Production CI/CD Pipeline with Docker Compose Integration Testing #26

# Production Integration Testing Pipeline
name: Production Integration Tests
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop, "feature/*", "refactor/*" ]
workflow_dispatch:
# Cancel in-progress runs when new commits are pushed
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
actions: read
jobs:
tests:
strategy:
fail-fast: true # Cancel all jobs immediately if any job fails
matrix:
test-type: [unit, integration]
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 1
# Unit Tests Branch - Fast feedback, no infrastructure
- name: Set up Python
if: matrix.test-type == 'unit'
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install Python Dependencies
if: matrix.test-type == 'unit'
run: |
cd src
pip install -r requirements.txt
- name: Run Unit Tests
if: matrix.test-type == 'unit'
run: |
cd src
echo "Running unit tests (no infrastructure required)..."
pytest ctutor_backend/tests/ -m unit -v --tb=short --strict-markers
# Integration Tests Branch - Full production environment
- name: Set up Docker Buildx
if: matrix.test-type == 'integration'
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:buildx-stable-1
network=host
- name: Cache Docker Base Images
if: matrix.test-type == 'integration'
uses: actions/cache@v4
with:
path: /tmp/docker-images
# SECURITY: Only cache base images (python:3.10-slim, node:20) - no application code
key: ${{ runner.os }}-base-images-${{ hashFiles('docker/*/Dockerfile') }}
restore-keys: |
${{ runner.os }}-base-images-
- name: Cache Docker BuildKit Layers
if: matrix.test-type == 'integration'
uses: actions/cache@v4
with:
path: /tmp/buildkit-cache
# Cache invalidation: ANY source code change OR dependency change triggers rebuild
key: ${{ runner.os }}-buildkit-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildkit-
- name: Cache Python Dependencies
if: matrix.test-type == 'integration'
uses: actions/cache@v4
with:
path: ~/.cache/pip
# SECURITY: Only cache pip dependencies - invalidates when requirements.txt changes
key: ${{ runner.os }}-pip-${{ hashFiles('src/requirements.txt', 'src/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Cache Node Dependencies
if: matrix.test-type == 'integration'
uses: actions/cache@v4
with:
path: |
~/.npm
frontend/node_modules
# SECURITY: Only cache npm dependencies - invalidates when package files change
key: ${{ runner.os }}-node-${{ hashFiles('frontend/package-lock.json', 'frontend/yarn.lock') }}
restore-keys: |
${{ runner.os }}-node-
- name: Create Required Directories
if: matrix.test-type == 'integration'
run: |
# Create deployment directories as specified in .env.prod
sudo mkdir -p /tmp/codeability/{postgres,temporal-postgres,redis,redis-data,minio/data,execution-backend/shared}
sudo mkdir -p /tmp/codeability/execution-backend/shared/{documents,courses,course-contents,defaults,repositories}
sudo mkdir -p /tmp/codeability/keycloak/{imports,themes}
# Set proper ownership to avoid Docker permission issues
sudo chown -R $USER:$USER /tmp/codeability
sudo chmod -R 777 /tmp/codeability
# Copy required files
if [ -d "src/defaults" ]; then
cp -r src/defaults /tmp/codeability/execution-backend/shared/
fi
if [ -f "data/keycloak/computor-realm.json" ]; then
cp data/keycloak/computor-realm.json /tmp/codeability/keycloak/imports/
fi
- name: Validate Environment Configuration
if: matrix.test-type == 'integration'
run: |
echo "=== Validating .env.prod configuration ==="
# Check that .env.prod exists and has required variables
if [ ! -f ".env.prod" ]; then
echo "ERROR: .env.prod file not found"
exit 1
fi
# Source the environment file
set -a
source .env.prod
set +a
# Validate critical environment variables
required_vars=(
"SYSTEM_DEPLOYMENT_PATH"
"POSTGRES_HOST" "POSTGRES_PORT" "POSTGRES_USER" "POSTGRES_PASSWORD" "POSTGRES_DB"
"REDIS_HOST" "REDIS_PORT" "REDIS_PASSWORD"
"TEMPORAL_HOST" "TEMPORAL_PORT"
"TOKEN_SECRET"
)
for var in "${required_vars[@]}"; do
if [ -z "${!var}" ]; then
echo "ERROR: Required environment variable $var is not set"
exit 1
fi
done
echo "Environment configuration validated"
echo "System deployment path: $SYSTEM_DEPLOYMENT_PATH"
echo "Database: $POSTGRES_USER@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB"
echo "Redis: $REDIS_HOST:$REDIS_PORT"
echo "Temporal: $TEMPORAL_HOST:$TEMPORAL_PORT"
- name: Load/Pull Base Images
if: matrix.test-type == 'integration'
run: |
echo "=== Loading/Pulling Base Images ==="
mkdir -p /tmp/docker-images
# Try to load cached images first
if [ -f /tmp/docker-images/python-3.10-slim.tar ]; then
echo "Loading cached python:3.10-slim"
docker load < /tmp/docker-images/python-3.10-slim.tar
else
echo "Pulling python:3.10-slim"
docker pull python:3.10-slim
docker save python:3.10-slim -o /tmp/docker-images/python-3.10-slim.tar
fi
if [ -f /tmp/docker-images/node-20.tar ]; then
echo "Loading cached node:20"
docker load < /tmp/docker-images/node-20.tar
else
echo "Pulling node:20"
docker pull node:20
docker save node:20 -o /tmp/docker-images/node-20.tar
fi
echo "Base images ready"
- name: Build Production Images
if: matrix.test-type == 'integration'
run: |
echo "=== Building Production Docker Images ==="
# Set environment variables from .env.prod
set -a
source .env.prod
set +a
# Configure Docker buildkit for optimal layer caching with persistent cache
export DOCKER_BUILDKIT=1
# Generate weekly cache bust for system packages (security updates)
CACHE_BUST=$(date +%Y-%W) # Year-Week format for weekly rebuilds
# Create cache directory for BuildKit
mkdir -p /tmp/buildkit-cache
# Build images with BuildKit cache export/import for true layer caching across runs
echo "Building with persistent Docker layer caching..."
# Build API image with cache (dependencies only - application layers always fresh)
docker buildx build \
--file docker/api/Dockerfile \
--tag computor-fullstack-uvicorn:latest \
--cache-from=type=local,src=/tmp/buildkit-cache/api \
--cache-to=type=local,dest=/tmp/buildkit-cache/api,mode=min \
--build-arg CACHE_BUST=$CACHE_BUST \
--load .
# Build Frontend image with cache
docker buildx build \
--file docker/frontend/Dockerfile \
--tag computor-fullstack-frontend:latest \
--cache-from=type=local,src=/tmp/buildkit-cache/frontend \
--cache-to=type=local,dest=/tmp/buildkit-cache/frontend,mode=max \
--build-arg REACT_APP_BACKEND_URL=http://localhost:8000 \
--load .
# Build Temporal Worker images with cache
docker buildx build \
--file docker/temporal-worker-dev/Dockerfile \
--tag computor-fullstack-temporal-worker:latest \
--cache-from=type=local,src=/tmp/buildkit-cache/temporal-worker \
--cache-to=type=local,dest=/tmp/buildkit-cache/temporal-worker,mode=max \
--build-arg CACHE_BUST=$CACHE_BUST \
--load .
docker buildx build \
--file docker/temporal-worker-python/Dockerfile \
--tag computor-fullstack-temporal-worker-python:latest \
--cache-from=type=local,src=/tmp/buildkit-cache/temporal-worker-python \
--cache-to=type=local,dest=/tmp/buildkit-cache/temporal-worker-python,mode=max \
--build-arg CACHE_BUST=$CACHE_BUST \
--load .
echo "Production images built successfully"
echo "Docker layer caching: pip/npm dependencies cached until requirements change"
- name: Start Production Services
if: matrix.test-type == 'integration'
run: |
echo "=== Starting Production Docker Compose Stack ==="
# Set environment variables
set -a
source .env.prod
set +a
# Start services in background (excluding MATLAB)
docker compose -f docker-compose-prod.yaml up -d \
traefik redis postgres temporal-postgres temporal temporal-ui \
minio static-server
echo "Infrastructure services started"
- name: Wait for Infrastructure Health
if: matrix.test-type == 'integration'
run: |
echo "=== Waiting for Infrastructure Services ==="
# Function to wait for service health
wait_for_service() {
local service=$1
local health_check=$2
local timeout=${3:-60}
local interval=5
local elapsed=0
echo "Waiting for $service to be healthy..."
while [ $elapsed -lt $timeout ]; do
if eval "$health_check"; then
echo "$service is healthy"
return 0
fi
echo "$service not ready yet... (${elapsed}s/${timeout}s)"
sleep $interval
elapsed=$((elapsed + interval))
done
echo "$service failed to become healthy within ${timeout}s"
return 1
}
# Wait for PostgreSQL (main database)
wait_for_service "PostgreSQL Main" \
"docker exec computor-fullstack-postgres-1 pg_isready -U postgres" \
120
# Wait for Temporal PostgreSQL
wait_for_service "Temporal PostgreSQL" \
"docker exec temporal-postgres pg_isready -U temporal" \
60
# Wait for Redis
wait_for_service "Redis" \
"docker exec computor-fullstack-redis-1 redis-cli -a redis_password ping" \
60
# Wait for Temporal Server
wait_for_service "Temporal Server" \
"docker logs temporal 2>&1 | grep -q 'rpc server listen succeeded' || docker logs temporal 2>&1 | grep -q 'Started'" \
90
# Wait for MinIO
wait_for_service "MinIO" \
"docker inspect computor-minio --format='{{.State.Health.Status}}' | grep -q healthy" \
60
echo "All infrastructure services are healthy"
- name: Start Application Services
if: matrix.test-type == 'integration'
run: |
echo "=== Starting Application Services ==="
# Set environment variables
set -a
source .env.prod
set +a
# Start application services
docker compose -f docker-compose-prod.yaml up -d \
uvicorn frontend temporal-worker temporal-worker-python
echo "Application services started"
- name: Wait for Application Health
if: matrix.test-type == 'integration'
run: |
echo "=== Waiting for Application Services ==="
# Function to wait for HTTP endpoint
wait_for_http() {
local name=$1
local url=$2
local timeout=${3:-60}
local interval=5
local elapsed=0
echo "Waiting for $name at $url..."
while [ $elapsed -lt $timeout ]; do
if curl -f -s "$url" > /dev/null 2>&1; then
echo "$name is responding"
return 0
fi
echo "$name not responding... (${elapsed}s/${timeout}s)"
sleep $interval
elapsed=$((elapsed + interval))
done
echo "$name failed to respond within ${timeout}s"
return 1
}
# Wait for Backend API
wait_for_http "Backend API" "http://localhost:8000/docs" 120
# Wait for Frontend
wait_for_http "Frontend" "http://localhost:3000" 60
# Wait for Temporal UI
wait_for_http "Temporal UI" "http://localhost:8088" 30
echo "All application services are responding"
- name: Initialize Database Schema
if: matrix.test-type == 'integration'
run: |
echo "=== Initializing Database Schema ==="
# Set environment variables for database connection
set -a
source .env.prod
set +a
# Wait for database to be ready
echo "Waiting for database to be ready..."
max_attempts=30
attempt=0
while [ $attempt -lt $max_attempts ]; do
if docker exec computor-fullstack-postgres-1 psql -U postgres -d codeability -c "SELECT 1;" >/dev/null 2>&1; then
echo "Database is ready"
break
fi
echo "Database not ready, waiting... ($attempt/$max_attempts)"
sleep 2
attempt=$((attempt + 1))
done
if [ $attempt -eq $max_attempts ]; then
echo "Database failed to become ready"
exit 1
fi
# Install Python dependencies in backend container
echo "Installing test dependencies..."
docker exec computor-fullstack-uvicorn-1 pip install pytest pytest-env pytest-asyncio
# Run database migrations
echo "Running database migrations..."
docker exec computor-fullstack-uvicorn-1 sh -c "cd /home/uvicorn/src/ctutor_backend && alembic upgrade head"
echo "Database schema initialized"
- name: Run Real Integration Tests
if: matrix.test-type == 'integration'
run: |
echo "=== Running Real Integration Tests ==="
# Set environment variables
set -a
source .env.prod
set +a
# Set test environment variables for the container
export POSTGRES_HOST=postgres
export POSTGRES_PORT=5437
export POSTGRES_USER=postgres
export POSTGRES_PASSWORD=postgres_secret
export POSTGRES_DB=codeability
export SKIP_TEMPORAL_TESTS=false
export TEMPORAL_HOST=temporal
export TEMPORAL_PORT=7233
echo "Running pytest integration tests..."
# First check what files are available in the container
echo "Checking container structure..."
docker exec computor-fullstack-uvicorn-1 find /home/uvicorn -name "test.sh" -o -name "*.py" | head -10
docker exec computor-fullstack-uvicorn-1 ls -la /home/uvicorn/
# Try to run tests directly with pytest since test.sh might not be available
docker exec \
-e POSTGRES_HOST=$POSTGRES_HOST \
-e POSTGRES_PORT=$POSTGRES_PORT \
-e POSTGRES_USER=$POSTGRES_USER \
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
-e POSTGRES_DB=$POSTGRES_DB \
-e SKIP_TEMPORAL_TESTS=$SKIP_TEMPORAL_TESTS \
-e TEMPORAL_HOST=$TEMPORAL_HOST \
-e TEMPORAL_PORT=$TEMPORAL_PORT \
computor-fullstack-uvicorn-1 \
bash -c "cd /home/uvicorn/src && pytest ctutor_backend/tests/ -m integration -v"
echo "Real integration tests completed"
- name: Run Basic Service Health Tests
if: matrix.test-type == 'integration'
run: |
echo "=== Running Basic Service Health Tests ==="
# Test Backend API Health
echo "Testing Backend API..."
response=$(curl -s http://localhost:8000/docs)
if [[ "$response" == *"FastAPI"* ]] || [[ "$response" == *"Swagger"* ]]; then
echo "Backend API serving documentation"
else
echo "Backend API documentation not accessible"
exit 1
fi
# Test Frontend
echo "Testing Frontend..."
response=$(curl -s http://localhost:3000)
if [[ "$response" == *"<html"* ]] || [[ "$response" == *"react"* ]]; then
echo "Frontend serving content"
else
echo "Frontend not serving content properly"
exit 1
fi
# Test Temporal UI
echo "Testing Temporal UI..."
response=$(curl -s http://localhost:8088)
if [[ "$response" == *"<html"* ]]; then
echo "Temporal UI accessible"
else
echo "Temporal UI not accessible"
exit 1
fi
echo "Basic service health tests passed"
- name: Test Service Communication
if: matrix.test-type == 'integration'
run: |
echo "=== Testing Service Communication ==="
# Check Docker network connectivity
echo "Testing Docker network..."
docker network ls
# Test container-to-container communication
echo "Testing container communication..."
# Test if backend can reach postgres
docker exec computor-fullstack-uvicorn-1 sh -c \
"python -c 'import psycopg2; psycopg2.connect(host=\"postgres\", port=5437, user=\"postgres\", password=\"postgres_secret\", database=\"codeability\"); print(\"Backend → PostgreSQL OK\")'" || \
echo "Backend → PostgreSQL failed"
# Test if backend can reach redis
docker exec computor-fullstack-uvicorn-1 sh -c \
"python -c 'import redis; r=redis.Redis(host=\"redis\", port=6379, password=\"redis_password\"); r.ping(); print(\"Backend → Redis OK\")'" || \
echo "Backend → Redis failed"
echo "Service communication tests completed"
- name: Show Service Status
if: matrix.test-type == 'integration' && always()
run: |
echo "=== Service Status Summary ==="
# Set environment variables
set -a
source .env.prod || true
set +a
echo "Docker Compose Services:"
docker compose -f docker-compose-prod.yaml ps
echo ""
echo "Container Resource Usage:"
docker stats --no-stream --format "table {{.Container}}\t{{.CPUPerc}}\t{{.MemUsage}}"
echo ""
echo "Service URLs:"
echo " Backend API: http://localhost:8000/docs"
echo " Frontend: http://localhost:3000"
echo " Temporal UI: http://localhost:8088"
echo " Traefik Dashboard: http://localhost:8080"
echo " MinIO Console: http://localhost:9001"
- name: Collect Service Logs
if: matrix.test-type == 'integration' && failure()
run: |
echo "=== Collecting Service Logs for Debugging ==="
# Set environment variables
set -a
source .env.prod || true
set +a
# Create logs directory
mkdir -p logs
# Collect logs from all services
services=(traefik redis postgres temporal-postgres temporal temporal-ui uvicorn frontend temporal-worker temporal-worker-python minio static-server)
for service in "${services[@]}"; do
echo "Collecting logs for $service..."
docker compose -f docker-compose-prod.yaml logs --tail=100 "$service" > "logs/${service}.log" 2>&1 || true
done
# Show recent logs
echo "Recent Backend Logs:"
docker compose -f docker-compose-prod.yaml logs --tail=20 uvicorn || true
echo ""
echo "Recent Frontend Logs:"
docker compose -f docker-compose-prod.yaml logs --tail=20 frontend || true
- name: Cleanup Services
if: matrix.test-type == 'integration' && always()
run: |
echo "=== Cleaning Up Services ==="
# Set environment variables
set -a
source .env.prod || true
set +a
# Stop and remove all containers
docker compose -f docker-compose-prod.yaml down -v --remove-orphans || true
# Clean up deployment directories
sudo rm -rf /tmp/codeability || true
# Show remaining containers (should be none)
echo "Remaining containers:"
docker ps -a --filter "name=computor" || true
echo "Cleanup completed"