@@ -217,3 +217,122 @@ jobs:
217217 if : always()
218218 run : |
219219 helm uninstall "$RELEASE_NAME" || true
220+
221+ observability-tests :
222+ name : Observability Tests
223+ if : github.event.pull_request.head.repo.full_name == github.repository
224+ permissions :
225+ contents : ' read'
226+ id-token : ' write'
227+ needs : k3s-integration-tests
228+ runs-on : ubuntu-latest
229+ steps :
230+ - uses : actions/checkout@v5
231+
232+ - name : Start K3s cluster
233+ uses : jupyterhub/action-k3s-helm@v4
234+ with :
235+ k3s-channel : latest
236+ helm-version : ${{ env.HELM_VERSION }}
237+ metrics-enabled : false
238+ docker-enabled : true
239+
240+ - name : Set release name
241+ run : echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV"
242+
243+ - name : Wait for K3s to be fully ready
244+ run : |
245+ echo "=== Waiting for K3s to be fully ready ==="
246+ kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s
247+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s
248+ kubectl get nodes
249+ kubectl get pods --all-namespaces
250+ sleep 10
251+ echo "✅ K3s is ready"
252+
253+ - name : Deploy eoAPI with monitoring
254+ run : |
255+ echo "=== Deploying eoAPI with monitoring stack ==="
256+ export RELEASE_NAME="$RELEASE_NAME"
257+ export PGO_VERSION="${{ env.PGO_VERSION }}"
258+ export GITHUB_SHA="${{ github.sha }}"
259+ ./scripts/deploy.sh --ci
260+
261+ # Enable monitoring components
262+ helm upgrade "$RELEASE_NAME" ./charts/eoapi \
263+ --set monitoring.prometheus.enabled=true \
264+ --set monitoring.prometheusAdapter.enabled=true \
265+ --set monitoring.kube-state-metrics.enabled=true \
266+ --set monitoring.prometheus-node-exporter.enabled=true \
267+ --set observability.grafana.enabled=true \
268+ --set stac.autoscaling.enabled=true \
269+ --set raster.autoscaling.enabled=true \
270+ --set vector.autoscaling.enabled=true \
271+ --namespace eoapi \
272+ --wait --timeout=10m
273+
274+ - name : Wait for monitoring stack to be ready
275+ run : |
276+ echo "=== Waiting for monitoring components ==="
277+
278+ # Wait for Prometheus
279+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n eoapi --timeout=300s || echo "Prometheus not ready"
280+
281+ # Wait for Grafana
282+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n eoapi --timeout=300s || echo "Grafana not ready"
283+
284+ # Wait for prometheus-adapter
285+ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus-adapter -n eoapi --timeout=300s || echo "prometheus-adapter not ready"
286+
287+ # Wait for HPA to be created
288+ sleep 30
289+
290+ echo "=== Final monitoring stack status ==="
291+ kubectl get pods -n eoapi -l 'app.kubernetes.io/component in (server,grafana,prometheus-adapter)' || true
292+ kubectl get hpa -n eoapi || true
293+
294+ - name : Run observability tests
295+ run : |
296+ echo "=== Running observability test suite ==="
297+ export RELEASE_NAME="$RELEASE_NAME"
298+ export NAMESPACE="eoapi"
299+
300+ # Install python dependencies for testing
301+ python -m pip install --upgrade pip
302+ pip install pytest requests
303+
304+ # Run observability tests
305+ python -m pytest .github/workflows/tests/test_observability.py -v --tb=short
306+
307+ # Run autoscaling tests
308+ python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow"
309+
310+ - name : Debug observability stack on failure
311+ if : failure()
312+ run : |
313+ echo "=== Observability Debug Information ==="
314+
315+ echo "=== Monitoring Pods Status ==="
316+ kubectl get pods -n eoapi -l 'app.kubernetes.io/name in (prometheus,grafana,prometheus-adapter)' -o wide || true
317+
318+ echo "=== HPA Status ==="
319+ kubectl get hpa -n eoapi -o wide || true
320+ kubectl describe hpa -n eoapi || true
321+
322+ echo "=== Custom Metrics API ==="
323+ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" || true
324+
325+ echo "=== Pod Metrics ==="
326+ kubectl top pods -n eoapi || true
327+
328+ echo "=== Recent Events ==="
329+ kubectl get events -n eoapi --sort-by='.lastTimestamp' | tail -20 || true
330+
331+ echo "=== Component Logs ==="
332+ kubectl logs -l app.kubernetes.io/name=prometheus-adapter -n eoapi --tail=50 || true
333+ kubectl logs -l app.kubernetes.io/name=grafana -n eoapi --tail=30 || true
334+
335+ - name : Cleanup observability test
336+ if : always()
337+ run : |
338+ helm uninstall "$RELEASE_NAME" || true
0 commit comments