@@ -153,3 +153,122 @@ jobs:
153153        if : always() 
154154        run : | 
155155          helm uninstall "$RELEASE_NAME" || true  
156+ 
157+ observability-tests :
158+     name : Observability Tests 
159+     if : github.event.pull_request.head.repo.full_name == github.repository 
160+     permissions :
161+       contents : ' read' 
162+       id-token : ' write' 
163+     needs : integration 
164+     runs-on : ubuntu-latest 
165+     steps :
166+       - uses : actions/checkout@v5 
167+ 
168+       - name : Start K3s cluster 
169+         uses : jupyterhub/action-k3s-helm@v4 
170+         with :
171+           k3s-channel : latest 
172+           helm-version : ${{ env.HELM_VERSION }} 
173+           metrics-enabled : false 
174+           docker-enabled : true 
175+ 
176+       - name : Set release name 
177+         run : echo "RELEASE_NAME=eoapi-$(echo "${{ github.sha }}" | cut -c1-8)" >> "$GITHUB_ENV" 
178+ 
179+       - name : Wait for K3s to be fully ready 
180+         run : | 
181+           echo "=== Waiting for K3s to be fully ready ===" 
182+           kubectl wait --for=condition=Ready pod -l k8s-app=kube-dns -n kube-system --timeout=300s 
183+           kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=traefik -n kube-system --timeout=300s 
184+           kubectl get nodes 
185+           kubectl get pods --all-namespaces 
186+           sleep 10 
187+           echo "✅ K3s is ready" 
188+ 
189+ name : Deploy eoAPI with monitoring 
190+         run : | 
191+           echo "=== Deploying eoAPI with monitoring stack ===" 
192+           export RELEASE_NAME="$RELEASE_NAME" 
193+           export PGO_VERSION="${{ env.PGO_VERSION }}" 
194+           export GITHUB_SHA="${{ github.sha }}" 
195+           ./scripts/deploy.sh --ci 
196+ 
197+           # Enable monitoring components 
198+           helm upgrade "$RELEASE_NAME" ./charts/eoapi \ 
199+             --set monitoring.prometheus.enabled=true \ 
200+             --set monitoring.prometheusAdapter.enabled=true \ 
201+             --set monitoring.kube-state-metrics.enabled=true \ 
202+             --set monitoring.prometheus-node-exporter.enabled=true \ 
203+             --set observability.grafana.enabled=true \ 
204+             --set stac.autoscaling.enabled=true \ 
205+             --set raster.autoscaling.enabled=true \ 
206+             --set vector.autoscaling.enabled=true \ 
207+             --namespace eoapi \ 
208+             --wait --timeout=10m 
209+ 
210+ name : Wait for monitoring stack to be ready 
211+         run : | 
212+           echo "=== Waiting for monitoring components ===" 
213+ 
214+           # Wait for Prometheus 
215+           kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n eoapi --timeout=300s || echo "Prometheus not ready" 
216+ 
217+           # Wait for Grafana 
218+           kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n eoapi --timeout=300s || echo "Grafana not ready" 
219+ 
220+           # Wait for prometheus-adapter 
221+           kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus-adapter -n eoapi --timeout=300s || echo "prometheus-adapter not ready" 
222+ 
223+           # Wait for HPA to be created 
224+           sleep 30 
225+ 
226+           echo "=== Final monitoring stack status ===" 
227+           kubectl get pods -n eoapi -l 'app.kubernetes.io/component in (server,grafana,prometheus-adapter)' || true  
228+           kubectl get hpa -n eoapi || true  
229+ 
230+ name : Run observability tests 
231+         run : | 
232+           echo "=== Running observability test suite ===" 
233+           export RELEASE_NAME="$RELEASE_NAME" 
234+           export NAMESPACE="eoapi" 
235+ 
236+           # Install python dependencies for testing 
237+           python -m pip install --upgrade pip 
238+           pip install pytest requests 
239+ 
240+           # Run observability tests 
241+           python -m pytest .github/workflows/tests/test_observability.py -v --tb=short 
242+ 
243+           # Run autoscaling tests 
244+           python -m pytest .github/workflows/tests/test_autoscaling.py -v --tb=short -m "not slow" 
245+ 
246+ name : Debug observability stack on failure 
247+         if : failure() 
248+         run : | 
249+           echo "=== Observability Debug Information ===" 
250+ 
251+           echo "=== Monitoring Pods Status ===" 
252+           kubectl get pods -n eoapi -l 'app.kubernetes.io/name in (prometheus,grafana,prometheus-adapter)' -o wide || true  
253+ 
254+           echo "=== HPA Status ===" 
255+           kubectl get hpa -n eoapi -o wide || true  
256+           kubectl describe hpa -n eoapi || true  
257+ 
258+           echo "=== Custom Metrics API ===" 
259+           kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" || true  
260+ 
261+           echo "=== Pod Metrics ===" 
262+           kubectl top pods -n eoapi || true  
263+ 
264+           echo "=== Recent Events ===" 
265+           kubectl get events -n eoapi --sort-by='.lastTimestamp' | tail -20 || true  
266+ 
267+           echo "=== Component Logs ===" 
268+           kubectl logs -l app.kubernetes.io/name=prometheus-adapter -n eoapi --tail=50 || true  
269+           kubectl logs -l app.kubernetes.io/name=grafana -n eoapi --tail=30 || true  
270+ 
271+ name : Cleanup observability test 
272+         if : always() 
273+         run : | 
274+           helm uninstall "$RELEASE_NAME" || true  
0 commit comments