diff --git a/.github/workflows/cd-aws-eks.yml b/.github/workflows/cd-aws-eks.yml new file mode 100644 index 0000000..f1c00a5 --- /dev/null +++ b/.github/workflows/cd-aws-eks.yml @@ -0,0 +1,459 @@ +name: CD-AWS-EKS + +on: + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + environment: + description: 'Deployment environment' + required: true + default: 'staging' + type: choice + options: + - staging + - production + +env: + GO_VERSION: "1.22" + AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }} + CLUSTER_NAME_STAGING: aex-eks-staging + CLUSTER_NAME_PRODUCTION: aex-eks + ENVIRONMENT_NAME: aex + +jobs: + # ============================================================ + # Build and Push Images to ECR + # ============================================================ + build-and-push: + name: Build and Push to ECR + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + + outputs: + version: ${{ steps.version.outputs.version }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + if [[ "${{ github.ref }}" == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + else + VERSION=sha-${GITHUB_SHA::8} + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push AEX core services + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ steps.version.outputs.version }} + run: | + services=( + "aex-gateway" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-provider-registry" + "aex-trust-broker" + "aex-identity" + "aex-settlement" + "aex-telemetry" + ) + + for service in "${services[@]}"; do + echo "Building and pushing $service..." + docker buildx build \ + --platform linux/amd64 \ + -f "src/$service/Dockerfile" \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$service:$VERSION" \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$service:latest" \ + --push \ + src/ + done + + - name: Build and push code review demo agents + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ steps.version.outputs.version }} + run: | + agents=("code-reviewer-a" "code-reviewer-b" "code-reviewer-c" "orchestrator") + + for agent in "${agents[@]}"; do + echo "Building and pushing $agent..." + docker buildx build \ + --platform linux/amd64 \ + -f demo/agents/Dockerfile \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$agent:$VERSION" \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$agent:latest" \ + --build-arg AGENT_DIR=$agent \ + --push \ + demo/agents/ + done + + - name: Build and push payment agents + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ steps.version.outputs.version }} + run: | + agents=("payment-devpay" "payment-codeauditpay" "payment-securitypay") + + for agent in "${agents[@]}"; do + echo "Building and pushing $agent..." + docker buildx build \ + --platform linux/amd64 \ + -f demo/agents/Dockerfile \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$agent:$VERSION" \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$agent:latest" \ + --build-arg AGENT_DIR=$agent \ + --push \ + demo/agents/ + done + + - name: Build and push Demo UI + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ steps.version.outputs.version }} + run: | + echo "Building and pushing demo-ui-nicegui..." + docker buildx build \ + --platform linux/amd64 \ + -f demo/ui/Dockerfile \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/demo-ui-nicegui:$VERSION" \ + -t "$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/demo-ui-nicegui:latest" \ + --push \ + demo/ui/ + + # ============================================================ + # Deploy to EKS Staging + # ============================================================ + deploy-staging: + name: Deploy to EKS Staging + runs-on: ubuntu-latest + needs: build-and-push + if: github.event.inputs.environment == 'staging' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) + environment: eks-staging + permissions: + contents: read + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Configure kubectl for EKS + run: | + aws eks update-kubeconfig \ + --name ${{ env.CLUSTER_NAME_STAGING }} \ + --region ${{ env.AWS_REGION }} + + - name: Apply Kustomize manifests (staging) + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ needs.build-and-push.outputs.version }} + run: | + # Check if staging overlay exists + if [ -d "deploy/k8s/overlays/staging" ]; then + echo "Applying staging overlay..." + kubectl apply -k deploy/k8s/overlays/staging + elif [ -d "deploy/k8s/base" ]; then + echo "Applying base manifests..." + kubectl apply -k deploy/k8s/base + fi + + - name: Update deployment images + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ needs.build-and-push.outputs.version }} + run: | + NAMESPACE=aex + + # AEX core services + core_services=( + "aex-gateway" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-provider-registry" + "aex-trust-broker" + "aex-identity" + "aex-settlement" + "aex-telemetry" + ) + + for service in "${core_services[@]}"; do + if kubectl get deployment "$service" -n "$NAMESPACE" &>/dev/null; then + echo "Updating $service to $VERSION..." + kubectl set image deployment/"$service" \ + "$service=$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$service:$VERSION" \ + -n "$NAMESPACE" + fi + done + + # Demo agents + demo_services=( + "code-reviewer-a" + "code-reviewer-b" + "code-reviewer-c" + "orchestrator" + "payment-devpay" + "payment-codeauditpay" + "payment-securitypay" + "demo-ui-nicegui" + ) + + for service in "${demo_services[@]}"; do + if kubectl get deployment "$service" -n "$NAMESPACE" &>/dev/null; then + echo "Updating $service to $VERSION..." + kubectl set image deployment/"$service" \ + "$service=$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$service:$VERSION" \ + -n "$NAMESPACE" + fi + done + + - name: Wait for staging rollout + run: | + NAMESPACE=aex + TIMEOUT=300 + + critical_services=( + "aex-gateway" + "aex-provider-registry" + "aex-work-publisher" + ) + + for service in "${critical_services[@]}"; do + if kubectl get deployment "$service" -n "$NAMESPACE" &>/dev/null; then + echo "Waiting for $service rollout..." + kubectl rollout status deployment/"$service" \ + -n "$NAMESPACE" --timeout="${TIMEOUT}s" || true + fi + done + + echo "Staging deployment complete." + + - name: Run smoke tests + run: | + NAMESPACE=aex + + echo "Running smoke tests..." + + # Check all pods are running + echo "Checking pod status..." + kubectl get pods -n "$NAMESPACE" + + # Check gateway health endpoint + echo "Checking gateway health..." + kubectl exec -n "$NAMESPACE" deploy/aex-gateway -- \ + wget -qO- http://localhost:8080/health 2>/dev/null || \ + echo "Gateway health check skipped (wget not available in container)" + + # Verify all deployments have available replicas + echo "Checking deployment availability..." + UNAVAILABLE=$(kubectl get deployments -n "$NAMESPACE" -o json | \ + jq -r '.items[] | select(.status.availableReplicas == 0 or .status.availableReplicas == null) | .metadata.name' 2>/dev/null || echo "") + + if [ -n "$UNAVAILABLE" ]; then + echo "Warning: The following deployments have no available replicas:" + echo "$UNAVAILABLE" + else + echo "All deployments have available replicas." + fi + + echo "Smoke tests complete." + + # ============================================================ + # Deploy to EKS Production + # ============================================================ + deploy-production: + name: Deploy to EKS Production + runs-on: ubuntu-latest + needs: [build-and-push, deploy-staging] + if: github.event.inputs.environment == 'production' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + environment: eks-production + permissions: + contents: read + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Configure AWS credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_ROLE_ARN }} + aws-region: ${{ env.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Configure kubectl for EKS + run: | + aws eks update-kubeconfig \ + --name ${{ env.CLUSTER_NAME_PRODUCTION }} \ + --region ${{ env.AWS_REGION }} + + - name: Apply Kustomize manifests (production) + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ needs.build-and-push.outputs.version }} + run: | + # Check if production overlay exists + if [ -d "deploy/k8s/overlays/production" ]; then + echo "Applying production overlay..." + kubectl apply -k deploy/k8s/overlays/production + elif [ -d "deploy/k8s/base" ]; then + echo "Applying base manifests..." + kubectl apply -k deploy/k8s/base + fi + + - name: Update deployment images (rolling update) + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + VERSION: ${{ needs.build-and-push.outputs.version }} + run: | + NAMESPACE=aex + + all_services=( + "aex-gateway" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-provider-registry" + "aex-trust-broker" + "aex-identity" + "aex-settlement" + "aex-telemetry" + "code-reviewer-a" + "code-reviewer-b" + "code-reviewer-c" + "orchestrator" + "payment-devpay" + "payment-codeauditpay" + "payment-securitypay" + "demo-ui-nicegui" + ) + + for service in "${all_services[@]}"; do + if kubectl get deployment "$service" -n "$NAMESPACE" &>/dev/null; then + echo "Updating $service to $VERSION (production)..." + kubectl set image deployment/"$service" \ + "$service=$ECR_REGISTRY/${{ env.ENVIRONMENT_NAME }}/$service:$VERSION" \ + -n "$NAMESPACE" + fi + done + + - name: Wait for production rollout + run: | + NAMESPACE=aex + TIMEOUT=600 + + critical_services=( + "aex-gateway" + "aex-provider-registry" + "aex-work-publisher" + "aex-bid-gateway" + "aex-contract-engine" + ) + + for service in "${critical_services[@]}"; do + if kubectl get deployment "$service" -n "$NAMESPACE" &>/dev/null; then + echo "Waiting for $service rollout..." + kubectl rollout status deployment/"$service" \ + -n "$NAMESPACE" --timeout="${TIMEOUT}s" || true + fi + done + + echo "Production deployment complete." + + - name: Create release notes + if: startsWith(github.ref, 'refs/tags/v') + uses: actions/github-script@v7 + with: + script: | + const tag = context.ref.replace('refs/tags/', ''); + + try { + const { data: existingRelease } = await github.rest.repos.getReleaseByTag({ + owner: context.repo.owner, + repo: context.repo.repo, + tag: tag + }); + + await github.rest.repos.updateRelease({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: existingRelease.id, + body: existingRelease.body + `\n\n**AWS EKS Deployment:** Deployed to production EKS cluster.` + }); + console.log(`Updated release: ${existingRelease.html_url}`); + } catch (e) { + const { data: release } = await github.rest.repos.createRelease({ + owner: context.repo.owner, + repo: context.repo.repo, + tag_name: tag, + name: `Release ${tag}`, + body: `Automated release for ${tag}\n\nDeployed to AWS EKS production.`, + draft: false, + prerelease: false + }); + console.log(`Created release: ${release.html_url}`); + } + + # ============================================================ + # Notify + # ============================================================ + notify: + name: Notify EKS Deployment + runs-on: ubuntu-latest + needs: [build-and-push, deploy-staging, deploy-production] + if: always() + + steps: + - name: Deployment Summary + run: | + echo "## AWS EKS Deployment Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Step | Status |" >> $GITHUB_STEP_SUMMARY + echo "|------|--------|" >> $GITHUB_STEP_SUMMARY + echo "| Build & Push | ${{ needs.build-and-push.result }} |" >> $GITHUB_STEP_SUMMARY + echo "| Deploy EKS Staging | ${{ needs.deploy-staging.result }} |" >> $GITHUB_STEP_SUMMARY + echo "| Deploy EKS Production | ${{ needs.deploy-production.result }} |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Version:** ${{ needs.build-and-push.outputs.version }}" >> $GITHUB_STEP_SUMMARY + echo "**Region:** ${{ env.AWS_REGION }}" >> $GITHUB_STEP_SUMMARY + echo "**Orchestration:** Kubernetes (EKS)" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/cd-gcp-gke.yml b/.github/workflows/cd-gcp-gke.yml new file mode 100644 index 0000000..afa9ca9 --- /dev/null +++ b/.github/workflows/cd-gcp-gke.yml @@ -0,0 +1,411 @@ +name: CD - GKE + +on: + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + environment: + description: 'Deployment environment' + required: true + default: 'staging' + type: choice + options: + - staging + - production + skip_tests: + description: 'Skip smoke tests' + required: false + default: false + type: boolean + +env: + GO_VERSION: "1.22" + GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + GCP_REGION: ${{ secrets.GKE_CLUSTER_REGION || 'us-central1' }} + GKE_CLUSTER: ${{ secrets.GKE_CLUSTER_NAME || 'aex-cluster' }} + ARTIFACT_REGISTRY: ${{ secrets.GKE_CLUSTER_REGION || 'us-central1' }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/aex + NAMESPACE: aex + +jobs: + # ============================================================ + # Build and Push Docker Images + # ============================================================ + build-and-push: + name: Build and Push Images + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + + outputs: + version: ${{ steps.version.outputs.version }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Determine version + id: version + run: | + if [[ "${{ github.ref }}" == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + else + VERSION=sha-${GITHUB_SHA::8} + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Configure Docker for Artifact Registry + run: | + gcloud auth configure-docker ${{ env.GCP_REGION }}-docker.pkg.dev + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push AEX core services + run: | + VERSION=${{ steps.version.outputs.version }} + REGISTRY=${{ env.ARTIFACT_REGISTRY }} + + services=( + "aex-gateway" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-provider-registry" + "aex-trust-broker" + "aex-identity" + "aex-settlement" + "aex-telemetry" + "aex-credentials-provider" + ) + + for service in "${services[@]}"; do + echo "::group::Building $service" + docker build \ + -f "src/$service/Dockerfile" \ + -t "$REGISTRY/$service:$VERSION" \ + -t "$REGISTRY/$service:latest" \ + src/ + + docker push "$REGISTRY/$service:$VERSION" + docker push "$REGISTRY/$service:latest" + echo "::endgroup::" + done + + - name: Build and push code review demo agents + run: | + VERSION=${{ steps.version.outputs.version }} + REGISTRY=${{ env.ARTIFACT_REGISTRY }} + + agents=( + "code-reviewer-a" + "code-reviewer-b" + "code-reviewer-c" + "orchestrator" + "payment-devpay" + "payment-codeauditpay" + "payment-securitypay" + ) + + for agent in "${agents[@]}"; do + echo "::group::Building $agent" + docker build \ + -f "demo/code_review/agents/Dockerfile" \ + --build-arg "AGENT_DIR=$agent" \ + -t "$REGISTRY/$agent:$VERSION" \ + -t "$REGISTRY/$agent:latest" \ + demo/code_review/agents/ + + docker push "$REGISTRY/$agent:$VERSION" + docker push "$REGISTRY/$agent:latest" + echo "::endgroup::" + done + + - name: Build and push demo UI + run: | + VERSION=${{ steps.version.outputs.version }} + REGISTRY=${{ env.ARTIFACT_REGISTRY }} + + echo "::group::Building demo-ui-nicegui" + docker build \ + -f "demo/code_review/ui/Dockerfile" \ + -t "$REGISTRY/demo-ui-nicegui:$VERSION" \ + -t "$REGISTRY/demo-ui-nicegui:latest" \ + demo/code_review/ui/ + + docker push "$REGISTRY/demo-ui-nicegui:$VERSION" + docker push "$REGISTRY/demo-ui-nicegui:latest" + echo "::endgroup::" + + # ============================================================ + # Deploy to Staging + # ============================================================ + deploy-staging: + name: Deploy to GKE Staging + runs-on: ubuntu-latest + needs: build-and-push + if: github.event.inputs.environment == 'staging' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) + environment: staging + permissions: + contents: read + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Get GKE credentials + uses: google-github-actions/get-gke-credentials@v2 + with: + cluster_name: ${{ env.GKE_CLUSTER }} + location: ${{ env.GCP_REGION }} + project_id: ${{ env.GCP_PROJECT_ID }} + + - name: Deploy to GKE (Staging) + run: | + VERSION=${{ needs.build-and-push.outputs.version }} + REGISTRY=${{ env.ARTIFACT_REGISTRY }} + + # Ensure namespace exists + kubectl get namespace ${{ env.NAMESPACE }} || kubectl create namespace ${{ env.NAMESPACE }} + + # Apply manifests using kustomize with image overrides + KUSTOMIZE_DIR="deploy/k8s/base" + if [ -d "deploy/k8s/overlays/staging" ] && [ -f "deploy/k8s/overlays/staging/kustomization.yaml" ]; then + KUSTOMIZE_DIR="deploy/k8s/overlays/staging" + fi + + kubectl kustomize "$KUSTOMIZE_DIR" | \ + sed "s|\${REGISTRY}|$REGISTRY|g" | \ + sed "s|\${TAG}|$VERSION|g" | \ + kubectl apply -n ${{ env.NAMESPACE }} -f - + + # Wait for critical deployments + echo "Waiting for deployments to be ready..." + deployments=( + "aex-gateway" + "aex-work-publisher" + "aex-provider-registry" + "aex-bid-gateway" + "aex-settlement" + ) + + for deploy in "${deployments[@]}"; do + echo "Waiting for $deploy..." + kubectl rollout status deployment/$deploy \ + -n ${{ env.NAMESPACE }} \ + --timeout=300s || true + done + + echo "" + echo "=== Staging Deployment Status ===" + kubectl get pods -n ${{ env.NAMESPACE }} + + # ============================================================ + # Smoke Tests + # ============================================================ + smoke-tests: + name: Run Smoke Tests + runs-on: ubuntu-latest + needs: [build-and-push, deploy-staging] + if: github.event.inputs.skip_tests != 'true' + permissions: + contents: read + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Get GKE credentials + uses: google-github-actions/get-gke-credentials@v2 + with: + cluster_name: ${{ env.GKE_CLUSTER }} + location: ${{ env.GCP_REGION }} + project_id: ${{ env.GCP_PROJECT_ID }} + + - name: Run smoke tests + run: | + echo "Running smoke tests against GKE staging..." + + # Get ingress IP + INGRESS_IP=$(kubectl get svc ingress-nginx-controller \ + -n ingress-nginx \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") + + if [ -z "$INGRESS_IP" ]; then + echo "No ingress IP found. Using port-forward for tests..." + + # Port-forward gateway + kubectl port-forward -n ${{ env.NAMESPACE }} svc/aex-gateway 8080:8080 & + PF_PID=$! + sleep 5 + + GATEWAY_URL="http://localhost:8080" + else + GATEWAY_URL="http://$INGRESS_IP" + fi + + # Health check + echo "Testing gateway health..." + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$GATEWAY_URL/health" || echo "000") + if [ "$HTTP_CODE" = "200" ]; then + echo "Gateway health check: PASSED" + else + echo "Gateway health check: FAILED (HTTP $HTTP_CODE)" + echo "Note: Gateway may still be starting up" + fi + + # Check pod status + echo "" + echo "Pod status:" + kubectl get pods -n ${{ env.NAMESPACE }} -o wide + + # Check for crash loops + CRASHES=$(kubectl get pods -n ${{ env.NAMESPACE }} --no-headers | grep -c "CrashLoopBackOff" || echo "0") + if [ "$CRASHES" -gt "0" ]; then + echo "" + echo "WARNING: $CRASHES pod(s) in CrashLoopBackOff" + kubectl get pods -n ${{ env.NAMESPACE }} | grep "CrashLoopBackOff" + fi + + # Cleanup port-forward if used + if [ -n "${PF_PID:-}" ]; then + kill $PF_PID 2>/dev/null || true + fi + + echo "" + echo "Smoke tests completed" + + # ============================================================ + # Deploy to Production + # ============================================================ + deploy-production: + name: Deploy to GKE Production + runs-on: ubuntu-latest + needs: [build-and-push, deploy-staging, smoke-tests] + if: github.event.inputs.environment == 'production' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + environment: production + permissions: + contents: read + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v2 + + - name: Get GKE credentials + uses: google-github-actions/get-gke-credentials@v2 + with: + cluster_name: ${{ env.GKE_CLUSTER }} + location: ${{ env.GCP_REGION }} + project_id: ${{ env.GCP_PROJECT_ID }} + + - name: Deploy to GKE (Production) + run: | + VERSION=${{ needs.build-and-push.outputs.version }} + REGISTRY=${{ env.ARTIFACT_REGISTRY }} + + # Ensure namespace exists + kubectl get namespace ${{ env.NAMESPACE }} || kubectl create namespace ${{ env.NAMESPACE }} + + # Apply manifests using kustomize with image overrides + KUSTOMIZE_DIR="deploy/k8s/base" + if [ -d "deploy/k8s/overlays/production" ] && [ -f "deploy/k8s/overlays/production/kustomization.yaml" ]; then + KUSTOMIZE_DIR="deploy/k8s/overlays/production" + fi + + kubectl kustomize "$KUSTOMIZE_DIR" | \ + sed "s|\${REGISTRY}|$REGISTRY|g" | \ + sed "s|\${TAG}|$VERSION|g" | \ + kubectl apply -n ${{ env.NAMESPACE }} -f - + + # Wait for all deployments + echo "Waiting for deployments to be ready..." + kubectl get deployments -n ${{ env.NAMESPACE }} -o name | while read deploy; do + echo "Waiting for $deploy..." + kubectl rollout status $deploy \ + -n ${{ env.NAMESPACE }} \ + --timeout=600s || true + done + + echo "" + echo "=== Production Deployment Status ===" + kubectl get pods -n ${{ env.NAMESPACE }} -o wide + echo "" + kubectl get svc -n ${{ env.NAMESPACE }} + + - name: Get service URLs + run: | + INGRESS_IP=$(kubectl get svc ingress-nginx-controller \ + -n ingress-nginx \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "pending") + + echo "## GKE Production Deployment" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Version:** ${{ needs.build-and-push.outputs.version }}" >> $GITHUB_STEP_SUMMARY + echo "**Cluster:** ${{ env.GKE_CLUSTER }}" >> $GITHUB_STEP_SUMMARY + echo "**Region:** ${{ env.GCP_REGION }}" >> $GITHUB_STEP_SUMMARY + echo "**Ingress IP:** $INGRESS_IP" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Services" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + kubectl get svc -n ${{ env.NAMESPACE }} >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + - name: Create release notes + if: startsWith(github.ref, 'refs/tags/v') + uses: actions/github-script@v7 + with: + script: | + const tag = context.ref.replace('refs/tags/', ''); + const { data: release } = await github.rest.repos.createRelease({ + owner: context.repo.owner, + repo: context.repo.repo, + tag_name: tag, + name: `Release ${tag} (GKE)`, + body: `Automated release for ${tag}\n\nDeployed to GKE production cluster.`, + draft: false, + prerelease: false + }); + console.log(`Created release: ${release.html_url}`); diff --git a/demo/README.md b/demo/README.md index 4f0a371..0078e0b 100644 --- a/demo/README.md +++ b/demo/README.md @@ -1,17 +1,27 @@ -# Agent Exchange (AEX) Demo +# Agent Exchange (AEX) Demos -A complete demonstration of the Agent Exchange platform showcasing **A2A Protocol** (Agent-to-Agent communication) and **AP2 Protocol** (Agent Payments Protocol) integration. +Complete demonstrations of the Agent Exchange platform showcasing **A2A Protocol** (Agent-to-Agent communication) and **AP2 Protocol** (Agent Payments Protocol) integration. -## What This Demo Shows +## Available Demos + +| Demo | Domain | Directory | Description | +|------|--------|-----------|-------------| +| **Legal Agents** | Legal document review | [`demo/aex/`](aex/) | 3 legal agents + 3 payment agents review contracts, NDAs, compliance docs | +| **Code Review** | Software development | [`demo/code_review/`](code_review/) | 3 code review agents + 3 payment agents review code for bugs, security, architecture | +| **Moltbot** | Payment integration | [`demo/moltbot_integration/`](moltbot_integration/) | Moltbot + AEX + AP2 payment flow | + +All demos share the same **7-step AEX workflow** and **AP2 payment settlement** — only the domain and agents differ. + +## Common 7-Step Workflow ``` -User Request: "Review this NDA for potential risks" +User submits request via NiceGUI UI (:8502) │ ▼ ┌─────────────────────────────────────────────────────────────────────────┐ -│ 7-STEP WORKFLOW │ +│ 7-STEP WORKFLOW (same for all demos) │ │ │ -│ 1. COLLECT BIDS Legal agents compete with pricing offers │ +│ 1. COLLECT BIDS Domain agents compete with pricing offers │ │ │ │ │ ▼ │ │ 2. EVALUATE BIDS Score bids by price, trust, confidence │ @@ -20,7 +30,7 @@ User Request: "Review this NDA for potential risks" │ 3. AWARD CONTRACT Best agent wins, contract created │ │ │ │ │ ▼ │ -│ 4. EXECUTE (A2A) Winner processes request via JSON-RPC │ +│ 4. EXECUTE (A2A) Winner processes request via JSON-RPC 2.0 │ │ │ │ │ ▼ │ │ 5. AP2 SELECT Payment providers bid on transaction │ @@ -39,88 +49,101 @@ User Request: "Review this NDA for potential risks" ### Prerequisites - Docker & Docker Compose -- Anthropic API Key (for LLM-powered agents) +- Anthropic API Key (for LLM-powered agents; demos work without it using mock responses) -### 1. Configure Environment +### Legal Agents Demo ```bash -cd demo -cp .env.example .env -# Edit .env and add your ANTHROPIC_API_KEY +cd demo/aex +cp .env.example .env # Add ANTHROPIC_API_KEY +docker compose up --build +open http://localhost:8502 ``` -### 2. Start Everything +### Code Review Demo ```bash -docker-compose up -d --build +cd demo/code_review +cp .env.example .env # Add ANTHROPIC_API_KEY +docker compose up --build +open http://localhost:8502 ``` -### 3. Open the Demo UI - -**NiceGUI (Recommended)**: http://localhost:8502 -Mesop (Legacy): http://localhost:8501 - ## Architecture +All demos share the same AEX core services. Only the domain agents and payment agents differ. + ``` ┌─────────────────────────────────────────────────────────────────────────────┐ -│ DEMO COMPONENTS │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌──────────────────────────────────────────────────────────────────────┐ │ -│ │ AEX CORE SERVICES │ │ -│ │ │ │ -│ │ Gateway ─── Work Publisher ─── Bid Gateway ─── Bid Evaluator │ │ -│ │ :8080 :8081 :8082 :8083 │ │ -│ │ │ │ -│ │ Contract Engine ─── Provider Registry ─── Trust Broker ─── Identity │ │ -│ │ :8084 :8085 :8086 :8087 │ │ -│ │ │ │ -│ │ Settlement ─── Credentials Provider (AP2) │ │ -│ │ :8088 :8090 │ │ -│ └──────────────────────────────────────────────────────────────────────┘ │ +│ AEX CORE SERVICES (shared) │ │ │ -│ ┌──────────────────────────────────────────────────────────────────────┐ │ -│ │ LEGAL AGENTS (Providers) │ │ -│ │ │ │ -│ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ │ -│ │ │ Budget Legal │ │ Standard Legal │ │ Premium Legal │ │ │ -│ │ │ $5 + $2/pg │ │ $15 + $0.50/pg │ │ $30 + $0.20/pg │ │ │ -│ │ │ :8100 │ │ :8101 │ │ :8102 │ │ │ -│ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ │ -│ └──────────────────────────────────────────────────────────────────────┘ │ +│ Gateway ─── Work Publisher ─── Bid Gateway ─── Bid Evaluator │ +│ :8080 :8081 :8082 :8083 │ │ │ -│ ┌──────────────────────────────────────────────────────────────────────┐ │ -│ │ PAYMENT AGENTS (AP2 Providers) │ │ -│ │ │ │ -│ │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ │ -│ │ │ LegalPay │ │ ContractPay │ │ CompliancePay │ │ │ -│ │ │ 2% - 1% = 1% │ │ 2.5% - 3% = - │ │ 3% - 4% = -1% │ │ │ -│ │ │ :8200 │ │ :8201 │ │ :8202 │ │ │ -│ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ │ -│ │ fee 0.5% CASHBACK 1% CASHBACK │ │ -│ └──────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────┐ ┌──────────────────┐ ┌────────────────────────┐ │ -│ │ Orchestrator │ │ Demo UI (Mesop) │ │ Demo UI (NiceGUI) │ │ -│ │ :8103 │ │ :8501 │ │ :8502 (Recommended) │ │ -│ └──────────────────┘ └──────────────────┘ └────────────────────────┘ │ +│ Contract Engine ─── Provider Registry ─── Trust Broker ─── Identity │ +│ :8084 :8085 :8086 :8087 │ │ │ +│ Settlement ─── Telemetry ─── Credentials Provider (AP2) │ +│ :8088 :8089 :8090 │ └─────────────────────────────────────────────────────────────────────────────┘ ``` -## Demo Workflow Explained +### Legal Agents Demo (`demo/aex/`) + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ DOMAIN AGENTS PAYMENT AGENTS (AP2) │ +│ │ +│ Budget Legal $5 + $2/pg LegalPay 2% fee / 1% │ +│ :8100 :8200 │ +│ Standard Legal $15 + $0.50/pg ContractPay 2.5% / 3% CB │ +│ :8101 :8201 │ +│ Premium Legal $30 + $0.20/pg CompliancePay 3% / 4% CB │ +│ :8102 :8202 │ +│ │ +│ Orchestrator :8103 NiceGUI UI :8502 │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Code Review Demo (`demo/code_review/`) + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ DOMAIN AGENTS PAYMENT AGENTS (AP2) │ +│ │ +│ QuickReview AI $3 + $1/file DevPay 2% fee / 1% │ +│ :8100 :8200 │ +│ CodeGuard AI $10 + $3/file CodeAuditPay 2.5% / 3% CB │ +│ :8101 :8201 │ +│ ArchitectAI $25 + $5/file SecurityPay 3% / 4% CB │ +│ :8102 :8202 │ +│ │ +│ Orchestrator :8103 NiceGUI UI :8502 │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Workflow Explained ### Step 1: Collect Bids -Legal agents receive the work request and submit bids based on their pricing model: +Domain agents receive the work request and submit bids based on their pricing model: -| Agent | Tier | Pricing Formula | 10-page doc | -|-------|------|-----------------|-------------| +**Legal Demo:** + +| Agent | Tier | Pricing | 10-page doc | +|-------|------|---------|-------------| | Budget Legal AI | VERIFIED | $5 + $2/page | **$25** | | Standard Legal AI | TRUSTED | $15 + $0.50/page | **$20** | | Premium Legal AI | PREFERRED | $30 + $0.20/page | **$32** | +**Code Review Demo:** + +| Agent | Tier | Pricing | 5-file review | +|-------|------|---------|---------------| +| QuickReview AI | VERIFIED | $3 + $1/file | **$8** | +| CodeGuard AI | TRUSTED | $10 + $3/file | **$25** | +| ArchitectAI | PREFERRED | $25 + $5/file | **$50** | + ### Step 2: Evaluate Bids Bids are scored using the selected strategy: @@ -129,7 +152,7 @@ Bids are scored using the selected strategy: |----------|-------|-------|------------|----------| | **Balanced** | 40% | 35% | 25% | General use | | **Lowest Price** | 70% | 20% | 10% | Budget-conscious | -| **Best Quality** | 20% | 50% | 30% | Critical documents | +| **Best Quality** | 20% | 50% | 30% | Critical work | ### Step 3: Award Contract @@ -149,7 +172,7 @@ Direct agent-to-agent call using JSON-RPC 2.0: "params": { "message": { "role": "user", - "parts": [{"type": "text", "text": "Review this NDA..."}] + "parts": [{"type": "text", "text": "Review this code for security issues..."}] } } } @@ -159,31 +182,44 @@ Direct agent-to-agent call using JSON-RPC 2.0: Payment agents compete for the transaction: -| Provider | Base Fee | Reward | Net Fee | Specialization | -|----------|----------|--------|---------|----------------| +**Legal Demo:** + +| Provider | Fee | Reward | Net | Specialization | +|----------|-----|--------|-----|----------------| | LegalPay | 2.0% | 1.0% | **1.0%** | General legal | -| ContractPay | 2.5% | 3.0% | **-0.5%** | Contracts/Real Estate | -| CompliancePay | 3.0% | 4.0% | **-1.0%** | Compliance/Regulatory | +| ContractPay | 2.5% | 3.0% | **-0.5% CB** | Contracts | +| CompliancePay | 3.0% | 4.0% | **-1.0% CB** | Compliance | + +**Code Review Demo:** + +| Provider | Fee | Reward | Net | Specialization | +|----------|-----|--------|-----|----------------| +| DevPay | 2.0% | 1.0% | **1.0%** | General dev | +| CodeAuditPay | 2.5% | 3.0% | **-0.5% CB** | Code audit | +| SecurityPay | 3.0% | 4.0% | **-1.0% CB** | Security audit | **Negative net fee = You earn CASHBACK!** ### Step 6: AP2 Payment Processing -The AP2 protocol processes the payment: +The AP2 protocol processes the payment through a 4-mandate chain: -1. **Cart Mandate** - Items and total amount -2. **Payment Mandate** - Selected payment method -3. **Payment Receipt** - Transaction confirmation +1. **Intent Mandate** - Payment intent declared +2. **Cart Mandate** - Items and total amount +3. **Payment Mandate** - Selected payment method +4. **Payment Receipt** - Transaction confirmation ### Step 7: Settlement Final distribution: -- **Platform Fee**: 10% of agreed price -- **Provider Payout**: 90% to winning agent +- **Platform Fee**: 10-15% of agreed price +- **Provider Payout**: Remainder to winning agent - **Ledger Updated**: All transactions recorded ## Port Reference +### AEX Core Services (shared) + | Service | Port | Description | |---------|------|-------------| | AEX Gateway | 8080 | Main API endpoint | @@ -197,18 +233,24 @@ Final distribution: | Settlement | 8088 | Payment settlement with AP2 | | Telemetry | 8089 | Platform telemetry | | Credentials Provider | 8090 | AP2 payment methods | -| Legal Agent A | 8100 | Budget tier | -| Legal Agent B | 8101 | Standard tier | -| Legal Agent C | 8102 | Premium tier | -| LegalPay | 8200 | Payment provider | -| ContractPay | 8201 | Payment provider | -| CompliancePay | 8202 | Payment provider | -| Orchestrator | 8103 | Consumer orchestrator | -| Demo UI (Mesop) | 8501 | Legacy interface | -| **Demo UI (NiceGUI)** | **8502** | **Real-time WebSocket UI** | + +### Demo Agents (same ports, different agents per demo) + +| Port | Legal Demo | Code Review Demo | +|------|-----------|-----------------| +| 8100 | Budget Legal AI | QuickReview AI | +| 8101 | Standard Legal AI | CodeGuard AI | +| 8102 | Premium Legal AI | ArchitectAI | +| 8103 | Orchestrator | Orchestrator | +| 8200 | LegalPay | DevPay | +| 8201 | ContractPay | CodeAuditPay | +| 8202 | CompliancePay | SecurityPay | +| 8502 | NiceGUI UI | NiceGUI UI | ## API Examples +These work with any running demo (legal or code review). + ### Check Registered Agents ```bash @@ -218,12 +260,17 @@ curl http://localhost:8085/v1/providers | jq ### Get Agent Card (A2A Standard) ```bash +# Legal demo +curl http://localhost:8100/.well-known/agent.json | jq + +# Code review demo curl http://localhost:8100/.well-known/agent.json | jq ``` ### Direct A2A Call ```bash +# Code review agent curl -X POST http://localhost:8100/a2a \ -H "Content-Type: application/json" \ -d '{ @@ -233,25 +280,7 @@ curl -X POST http://localhost:8100/a2a \ "params": { "message": { "role": "user", - "parts": [{"type": "text", "text": "Review this NDA clause..."}] - } - } - }' | jq -``` - -### Request Bid from Agent - -```bash -curl -X POST http://localhost:8100/a2a \ - -H "Content-Type: application/json" \ - -d '{ - "jsonrpc": "2.0", - "method": "message/send", - "id": "bid-request", - "params": { - "message": { - "role": "user", - "parts": [{"type": "text", "text": "{\"action\": \"get_bid\", \"document_pages\": 10}"}] + "parts": [{"type": "text", "text": "Review this Python function for bugs"}] } } }' | jq @@ -269,7 +298,7 @@ curl -X POST http://localhost:8200/a2a \ "params": { "message": { "role": "user", - "parts": [{"type": "text", "text": "{\"action\": \"bid\", \"amount\": 25.00, \"work_category\": \"contracts\"}"}] + "parts": [{"type": "text", "text": "{\"action\": \"bid\", \"amount\": 25.00, \"work_category\": \"code_review\"}"}] } } }' | jq @@ -279,27 +308,50 @@ curl -X POST http://localhost:8200/a2a \ ``` demo/ -├── agents/ -│ ├── common/ # Shared utilities -│ │ ├── aex_client.py # AEX integration client -│ │ ├── agent_card.py # A2A agent card generation -│ │ └── config.py # Configuration management -│ ├── legal-agent-a/ # Budget tier ($5 + $2/page) -│ ├── legal-agent-b/ # Standard tier ($15 + $0.50/page) -│ ├── legal-agent-c/ # Premium tier ($30 + $0.20/page) -│ ├── payment-legalpay/ # Payment provider (1% fee) -│ ├── payment-contractpay/ # Payment provider (0.5% cashback) -│ ├── payment-compliancepay/ # Payment provider (1% cashback) -│ └── orchestrator/ # Consumer orchestrator -├── ui/ -│ ├── main.py # Mesop UI (legacy) -│ ├── nicegui_app.py # NiceGUI UI (recommended) -│ ├── Dockerfile # Mesop container -│ └── Dockerfile.nicegui # NiceGUI container -├── docker-compose.yml # All services -└── README.md # This file +├── aex/ # Legal Agents Demo +│ ├── agents/ +│ │ ├── common/ # Shared agent framework (BaseAgent, AEXClient, A2A, AP2) +│ │ ├── legal-agent-a/ # Budget tier ($5 + $2/page) +│ │ ├── legal-agent-b/ # Standard tier ($15 + $0.50/page) +│ │ ├── legal-agent-c/ # Premium tier ($30 + $0.20/page) +│ │ ├── payment-legalpay/ # Payment provider (1% fee) +│ │ ├── payment-contractpay/ # Payment provider (0.5% cashback) +│ │ ├── payment-compliancepay/ # Payment provider (1% cashback) +│ │ └── orchestrator/ # Consumer orchestrator +│ ├── ui/ # NiceGUI + Mesop UIs +│ └── docker-compose.yml +│ +├── code_review/ # Code Review Demo +│ ├── agents/ +│ │ ├── common -> ../aex/agents/common # Symlink to shared framework +│ │ ├── code-reviewer-a/ # QuickReview ($3 + $1/file) +│ │ ├── code-reviewer-b/ # CodeGuard ($10 + $3/file) +│ │ ├── code-reviewer-c/ # ArchitectAI ($25 + $5/file) +│ │ ├── payment-devpay/ # DevPay (2% fee / 1% reward) +│ │ ├── payment-codeauditpay/ # CodeAuditPay (2.5% / 3% cashback) +│ │ ├── payment-securitypay/ # SecurityPay (3% / 4% cashback) +│ │ └── orchestrator/ # Code review orchestrator +│ ├── ui/ # NiceGUI UI +│ └── docker-compose.yml +│ +├── moltbot_integration/ # Moltbot Payment Flow Demo +│ +└── README.md # This file ``` +## Creating a New Demo + +The agent framework is domain-agnostic. To create a new demo in any domain: + +1. `cp -r demo/code_review demo/your_domain` +2. Update agent system prompts in each `agent.py` +3. Update skills and pricing in each `config.yaml` +4. Update payment agent reward categories +5. Update `docker-compose.yml` service names +6. Update `ui/nicegui_app.py` sample inputs + +The shared `common/` library (BaseAgent, BasePaymentAgent, AEXClient, A2AServer) handles all AEX registration, bidding, A2A execution, and AP2 payment flows automatically. + ## Troubleshooting ### Services Not Starting @@ -340,33 +392,35 @@ docker logs aex-settlement | grep -i ap2 ## Development -### Adding a New Legal Agent +### Adding a New Agent to an Existing Demo -1. Copy existing agent: `cp -r agents/legal-agent-a agents/legal-agent-d` -2. Update `config.yaml` with new pricing/capabilities -3. Add to `docker-compose.yml` -4. Agent auto-registers with AEX on startup +1. Copy existing agent: `cp -r agents/code-reviewer-a agents/code-reviewer-d` +2. Update `agent.py` with new system prompt and behavior +3. Update `config.yaml` with new pricing/capabilities +4. Add to `docker-compose.yml` +5. Agent auto-registers with AEX on startup ### Adding a New Payment Agent -1. Copy existing: `cp -r agents/payment-legalpay agents/payment-newpay` -2. Update `config.yaml` with fee structure -3. Add to `docker-compose.yml` -4. Ensure capabilities include `payment` +1. Copy existing: `cp -r agents/payment-devpay agents/payment-newpay` +2. Update `agent.py` with fee structure and reward categories +3. Update `config.yaml` with capabilities +4. Add to `docker-compose.yml` +5. Ensure capabilities include `payment` ### Running Locally (Development) ```bash -# Terminal 1: Start AEX services +# Terminal 1: Start AEX core services cd .. && make docker-up -# Terminal 2: Start a legal agent -cd agents/legal-agent-a +# Terminal 2: Start an agent +cd demo/code_review/agents/code-reviewer-a pip install -r requirements.txt python main.py # Terminal 3: Start the NiceGUI UI -cd ui +cd demo/code_review/ui pip install -r requirements.txt python nicegui_app.py ``` @@ -385,8 +439,30 @@ python nicegui_app.py - **Mandates**: Intent -> Cart -> Payment -> Receipt - **Extension URI**: `https://github.com/google-agentic-commerce/ap2/v1` +## Deployment + +### Local (Docker Compose) +Each demo directory has its own `docker-compose.yml`. + +### Kubernetes +K8s manifests at [`deploy/k8s/`](../deploy/k8s/) with Kustomize overlays for dev/staging/production. + +```bash +# Kind (local K8s) +kind create cluster --config deploy/k8s/kind-config.yaml +kubectl apply -k deploy/k8s/overlays/dev/ +``` + +### Cloud +- **AWS EKS**: [`deploy/aws/deploy-eks.sh`](../deploy/aws/deploy-eks.sh) +- **GCP GKE**: [`deploy/gcp/deploy-gke.sh`](../deploy/gcp/deploy-gke.sh) +- **AWS ECS**: [`deploy/aws/deploy.sh`](../deploy/aws/deploy.sh) +- **GCP Cloud Run**: [`deploy/gcp/deploy.sh`](../deploy/gcp/deploy.sh) + ## Related Documentation - [AEX A2A Integration](../docs/a2a-integration/) - [AP2 Integration](../docs/AP2_INTEGRATION.md) +- [Kubernetes Deployment](../deploy/k8s/README.md) - [AWS Deployment](../deploy/aws/README.md) +- [GCP Deployment](../deploy/gcp/README.md) diff --git a/demo/aex/.env.example b/demo/aex/.env.example index 79e0a10..bc17e94 100644 --- a/demo/aex/.env.example +++ b/demo/aex/.env.example @@ -15,3 +15,12 @@ LEGAL_AGENT_B_PORT=8101 LEGAL_AGENT_C_PORT=8102 ORCHESTRATOR_PORT=8103 DEMO_UI_PORT=8501 + +# Payment Agent Ports (AP2 Protocol) +# Payment agents compete for transactions with different fee/reward structures: +# LegalPay: 2.0% fee / 1.0% reward (general legal payments) +# ContractPay: 2.5% fee / 3.0% reward (CASHBACK on contracts!) +# CompliancePay: 3.0% fee / 4.0% reward (CASHBACK on compliance!) +LEGALPAY_PORT=8200 +CONTRACTPAY_PORT=8201 +COMPLIANCEPAY_PORT=8202 diff --git a/demo/code_review/.env.example b/demo/code_review/.env.example new file mode 100644 index 0000000..eba9f62 --- /dev/null +++ b/demo/code_review/.env.example @@ -0,0 +1,26 @@ +# API Key for LLM provider +# Claude - used by all agents (A, B, C) and Orchestrator +ANTHROPIC_API_KEY=sk-ant-api03-... + +# AEX Configuration +AEX_GATEWAY_URL=http://localhost:8080 + +# Agent Ports (optional, defaults in config.yaml) +# Code Review Agents with tiered pricing: +# A (QuickReview): $3 base + $1/file - Budget, fast basic review +# B (CodeGuard): $10 base + $3/file - Standard, security-focused +# C (ArchitectAI): $25 base + $5/file - Premium, deep architecture review +CODE_REVIEWER_A_PORT=8100 +CODE_REVIEWER_B_PORT=8101 +CODE_REVIEWER_C_PORT=8102 +ORCHESTRATOR_PORT=8103 +DEMO_UI_PORT=8502 + +# Payment Agent Ports (AP2 Protocol) +# Payment agents compete for transactions with different fee/reward structures: +# DevPay: 2.0% fee / 1.0% reward (general dev payments) +# CodeAuditPay: 2.5% fee / 3.0% reward (CASHBACK on code reviews!) +# SecurityPay: 3.0% fee / 4.0% reward (CASHBACK on security audits!) +DEVPAY_PORT=8200 +CODEAUDITPAY_PORT=8201 +SECURITYPAY_PORT=8202 diff --git a/demo/code_review/README.md b/demo/code_review/README.md new file mode 100644 index 0000000..ade9478 --- /dev/null +++ b/demo/code_review/README.md @@ -0,0 +1,71 @@ +# AEX Code Review Demo + +Claude-powered code review agents competing through the Agent Exchange marketplace with AP2 payment settlement. + +## Architecture + +``` +User pastes code via NiceGUI UI (:8502) + -> Orchestrator submits work to AEX + -> AEX broadcasts to 3 code review agents + -> QuickReview AI (:8100) - Budget: $3 + $1/file, fast basic review + -> CodeGuard AI (:8101) - Standard: $10 + $3/file, security-focused + -> ArchitectAI (:8102) - Premium: $25 + $5/file, deep architecture review + -> AEX evaluates bids, awards contract + -> Winner executes review via A2A (Claude API) + -> AP2 Payment flow with 3 payment agents: + -> DevPay (:8200) - 2% fee / 1% reward + -> CodeAuditPay (:8201) - 2.5% fee / 3% reward (CASHBACK on code review!) + -> SecurityPay (:8202) - 3% fee / 4% reward (CASHBACK on security audits!) + -> Settlement: 15% platform fee, provider payout +``` + +## Quick Start + +```bash +# 1. Set your Anthropic API key +cp .env.example .env +# Edit .env and add your ANTHROPIC_API_KEY + +# 2. Build and run +docker compose up --build + +# 3. Open the UI +open http://localhost:8502 +``` + +## Services + +| Service | Port | Description | +|---------|------|-------------| +| AEX Gateway | 8080 | API Gateway | +| Work Publisher | 8081 | Work spec management | +| Bid Gateway | 8082 | Bid collection | +| Bid Evaluator | 8083 | Bid scoring | +| Contract Engine | 8084 | Contract lifecycle | +| Provider Registry | 8085 | Agent discovery | +| Trust Broker | 8086 | Trust scores | +| Identity | 8087 | Auth & keys | +| Settlement | 8088 | Billing + AP2 | +| Telemetry | 8089 | Metrics | +| Credentials Provider | 8090 | AP2 credentials | +| **QuickReview AI** | **8100** | Budget code review | +| **CodeGuard AI** | **8101** | Security-focused review | +| **ArchitectAI** | **8102** | Architecture review | +| **Orchestrator** | **8103** | Workflow coordination | +| **DevPay** | **8200** | General dev payments | +| **CodeAuditPay** | **8201** | Code audit payments | +| **SecurityPay** | **8202** | Security payments | +| **NiceGUI UI** | **8502** | Web dashboard | + +## Usage + +1. Open http://localhost:8502 +2. Select a sample code snippet or paste your own +3. Choose a bid strategy (balanced / lowest_price / best_quality) +4. Click "Run Code Review" +5. Watch the 7-step workflow: Bids -> Evaluate -> Award -> Execute -> AP2 Select -> AP2 Pay -> Settle + +## Without API Key + +The demo works without an Anthropic API key — agents will return mock responses. Set `ANTHROPIC_API_KEY` in `.env` for real Claude-powered reviews. diff --git a/demo/code_review/agents/Dockerfile b/demo/code_review/agents/Dockerfile new file mode 100644 index 0000000..6af78ff --- /dev/null +++ b/demo/code_review/agents/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.11-slim + +ARG AGENT_DIR + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Copy common utilities first +COPY common/ /app/common/ + +# Install common dependencies first +RUN pip install --no-cache-dir -r /app/common/requirements.txt + +# Copy agent-specific files +COPY ${AGENT_DIR}/ /app/ + +# Install agent-specific dependencies (skip the -r ../common/requirements.txt line) +RUN grep -v "^-r " requirements.txt > agent_requirements.txt || true && \ + pip install --no-cache-dir -r agent_requirements.txt || true + +# Run the agent +CMD ["python", "main.py"] diff --git a/demo/code_review/agents/code-reviewer-a/agent.py b/demo/code_review/agents/code-reviewer-a/agent.py new file mode 100644 index 0000000..84e490a --- /dev/null +++ b/demo/code_review/agents/code-reviewer-a/agent.py @@ -0,0 +1,155 @@ +"""Code Reviewer A (Budget) - Fast, affordable code review using Claude.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Any, Optional + +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import StateGraph, END + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.base_agent import BaseAgent, AgentState +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + +# Budget tier prompts - concise and fast +CODE_REVIEW_PROMPT = """You are a quick code reviewer providing CONCISE feedback. +Keep responses SHORT and focused on the most critical issues only. + +Provide: +1. 3-5 key issues (one line each) +2. Top 3 action items +3. Overall code quality rating (Poor/Fair/Good/Excellent) + +Be direct. No lengthy explanations. Speed is priority.""" + +LINTING_PROMPT = """You are a code linter providing QUICK style and formatting feedback. +Keep responses SHORT and actionable. + +Provide: +1. Style violations (bullet points) +2. Naming convention issues +3. Immediate fixes needed + +Be concise. No detailed explanations.""" + + +@dataclass +class CodeReviewerA(BaseAgent): + """Budget Code Reviewer using Claude for fast, affordable analysis.""" + + llm: Optional[ChatAnthropic] = field(default=None, init=False) + + def _setup_llm(self): + """Initialize Claude LLM.""" + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + logger.warning("ANTHROPIC_API_KEY not set, using mock responses") + self.llm = None + return + + self.llm = ChatAnthropic( + model=self.config.llm.model, + temperature=self.config.llm.temperature, + max_tokens=self.config.llm.max_tokens, + anthropic_api_key=api_key, + ) + logger.info(f"Initialized Claude LLM (Budget): {self.config.llm.model}") + + def _build_graph(self): + """Build the LangGraph workflow.""" + self._graph = StateGraph(AgentState) + + def _detect_skill(self, content: str) -> str: + """Detect which skill to use based on content.""" + content_lower = content.lower() + lint_keywords = ["lint", "style", "format", "naming", "convention", "pep8", "eslint"] + if any(kw in content_lower for kw in lint_keywords): + return "linting" + return "code_review" + + async def process(self, state: AgentState) -> AgentState: + """Process the code review request through Claude (fast mode).""" + messages = state["messages"] + if not messages: + state["result"] = "No message provided." + return state + + user_content = messages[-1].get("content", "") + skill = self._detect_skill(user_content) + + system_prompt = ( + CODE_REVIEW_PROMPT if skill == "code_review" + else LINTING_PROMPT + ) + + if self.llm is None: + state["result"] = self._mock_response(skill, user_content) + state["artifacts"] = [{ + "name": f"{skill}_quick_analysis.txt", + "parts": [{"type": "text", "text": state["result"]}], + }] + return state + + try: + response = await self.llm.ainvoke([ + SystemMessage(content=system_prompt), + HumanMessage(content=user_content), + ]) + + result = response.content + state["result"] = result + state["artifacts"] = [{ + "name": f"{skill}_quick_analysis.txt", + "parts": [{"type": "text", "text": result}], + }] + + except Exception as e: + logger.exception(f"Error calling Claude: {e}") + state["result"] = f"Error processing request: {str(e)}" + + return state + + def _mock_response(self, skill: str, content: str) -> str: + """Generate mock response for testing (budget tier - concise).""" + if skill == "code_review": + return """## Quick Code Review + +**Code Quality: FAIR** + +### Key Issues: +- Missing error handling in main function +- Variable naming inconsistent (camelCase vs snake_case) +- No input validation on user parameters +- Hardcoded configuration values +- Missing type hints on public functions + +### Action Items: +1. Add try/except blocks around I/O operations +2. Standardize naming convention to snake_case +3. Extract config values to environment variables + +*Budget review - $5 | ~2 min*""" + else: + return """## Quick Lint Report + +### Style Violations: +- Line 12: line too long (120 > 88 chars) +- Line 25: missing blank line after function +- Line 38: trailing whitespace + +### Naming Issues: +- `getData` should be `get_data` (PEP 8) +- `processItem` should be `process_item` + +### Fixes Needed: +1. Run black formatter +2. Fix variable names +3. Add missing docstrings + +*Budget lint - $3 | ~1 min*""" diff --git a/demo/code_review/agents/code-reviewer-a/config.yaml b/demo/code_review/agents/code-reviewer-a/config.yaml new file mode 100644 index 0000000..f69301d --- /dev/null +++ b/demo/code_review/agents/code-reviewer-a/config.yaml @@ -0,0 +1,57 @@ +agent: + name: "QuickReview AI" + description: "Fast, affordable code review - basic analysis at low cost" + version: "1.0.0" + provider: + organization: "QuickReview Inc" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8100 + +llm: + provider: "anthropic" + model: "claude-sonnet-4-20250514" + temperature: 0.3 + max_tokens: 2048 + +characteristics: + tier: "budget" + response_style: "concise" + detail_level: "basic" + turnaround: "fast" + +skills: + - id: "code_review" + name: "Code Review" + description: "Quick code review highlighting major issues" + tags: ["code", "review", "development"] + examples: + - "Review this code quickly" + - "What are the main issues?" + + - id: "linting" + name: "Code Linting" + description: "Basic style and formatting checks" + tags: ["code", "lint", "style"] + examples: + - "Check code style" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "VERIFIED" + trust_score: 0.70 + pricing: + base_rate: 3.00 + per_page_rate: 1.00 + max_pages_optimal: 5 + currency: "USD" + description: "Best for quick reviews of small code files (1-5 files)" + bidding: + confidence: 0.75 + estimated_time_minutes: 2 + max_document_pages: 10 diff --git a/demo/code_review/agents/code-reviewer-a/main.py b/demo/code_review/agents/code-reviewer-a/main.py new file mode 100644 index 0000000..ca59262 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-a/main.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""Code Reviewer A - Main entry point.""" + +import asyncio +import logging +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import CodeReviewerA + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the Code Reviewer A server.""" + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + logger.info(f"Skills: {[s.id for s in config.skills]}") + + agent = CodeReviewerA(config=config) + + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, + ) + + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/code-reviewer-a/requirements.txt b/demo/code_review/agents/code-reviewer-a/requirements.txt new file mode 100644 index 0000000..5ea8624 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-a/requirements.txt @@ -0,0 +1,5 @@ +# Code Reviewer A (Budget) dependencies +-r ../common/requirements.txt + +# Uses Claude via langchain-anthropic (included in common) +anthropic>=0.34.0 diff --git a/demo/code_review/agents/code-reviewer-b/agent.py b/demo/code_review/agents/code-reviewer-b/agent.py new file mode 100644 index 0000000..ac76032 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-b/agent.py @@ -0,0 +1,268 @@ +"""Code Reviewer B (Standard) - Security-focused code review using Claude.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Any, Optional + +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import StateGraph, END + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.base_agent import BaseAgent, AgentState +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + +# Standard tier prompts - balanced detail with security focus +CODE_REVIEW_PROMPT = """You are an experienced code reviewer with a strong security background. + +Provide a COMPREHENSIVE review including: + +1. **Executive Summary** (2-3 sentences) +2. **Code Quality Assessment** + - Readability and maintainability + - Error handling coverage + - Test coverage gaps +3. **Security Analysis** + - Input validation issues + - Authentication/authorization flaws + - Data exposure risks + - Injection vulnerabilities +4. **Bug Detection** + - Logic errors + - Edge cases not handled + - Race conditions + - Memory/resource leaks +5. **Recommendations** + - Prioritized fixes (Critical/High/Medium/Low) + - Suggested improvements + - Security hardening steps + +Be thorough but organized. Use tables where helpful.""" + +SECURITY_AUDIT_PROMPT = """You are a security specialist performing a detailed code security audit. + +Provide a COMPREHENSIVE security assessment: + +1. **Vulnerability Summary** + - Critical vulnerabilities found + - OWASP Top 10 mapping + - CVSS severity scores +2. **Injection Analysis** + - SQL injection vectors + - XSS vulnerabilities + - Command injection risks + - Path traversal issues +3. **Authentication & Authorization** + - Auth bypass possibilities + - Session management flaws + - Privilege escalation risks +4. **Data Security** + - Sensitive data exposure + - Encryption weaknesses + - Logging of secrets +5. **Remediation Plan** + - Prioritized fix list + - Code examples for fixes + - Timeline recommendations + +Be specific about which lines and functions are affected.""" + + +@dataclass +class CodeReviewerB(BaseAgent): + """Standard Code Reviewer using Claude for security-focused analysis.""" + + llm: Optional[ChatAnthropic] = field(default=None, init=False) + + def _setup_llm(self): + """Initialize Claude LLM.""" + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + logger.warning("ANTHROPIC_API_KEY not set, using mock responses") + self.llm = None + return + + self.llm = ChatAnthropic( + model=self.config.llm.model, + temperature=self.config.llm.temperature, + max_tokens=self.config.llm.max_tokens, + api_key=api_key, + ) + logger.info(f"Initialized Claude LLM (Standard): {self.config.llm.model}") + + def _build_graph(self): + """Build the LangGraph workflow.""" + self._graph = StateGraph(AgentState) + + def _detect_skill(self, content: str) -> str: + """Detect which skill to use based on content.""" + content_lower = content.lower() + + security_keywords = [ + "security", "vulnerability", "exploit", "injection", + "xss", "csrf", "auth", "owasp", "audit", "penetration", + "cve", "attack", "threat" + ] + if any(kw in content_lower for kw in security_keywords): + return "security_audit" + + return "code_review" + + async def process(self, state: AgentState) -> AgentState: + """Process the code review request through Claude (standard mode).""" + messages = state["messages"] + if not messages: + state["result"] = "No message provided." + return state + + user_content = messages[-1].get("content", "") + skill = self._detect_skill(user_content) + + prompts = { + "code_review": CODE_REVIEW_PROMPT, + "security_audit": SECURITY_AUDIT_PROMPT, + } + system_prompt = prompts.get(skill, CODE_REVIEW_PROMPT) + + if self.llm is None: + state["result"] = self._mock_response(skill, user_content) + state["artifacts"] = [{ + "name": f"{skill}_standard_report.txt", + "parts": [{"type": "text", "text": state["result"]}], + }] + return state + + try: + response = await self.llm.ainvoke([ + SystemMessage(content=system_prompt), + HumanMessage(content=user_content), + ]) + + result = response.content + state["result"] = result + state["artifacts"] = [{ + "name": f"{skill}_standard_report.txt", + "parts": [{"type": "text", "text": result}], + }] + + except Exception as e: + logger.exception(f"Error calling Claude: {e}") + state["result"] = f"Error processing request: {str(e)}" + + return state + + def _mock_response(self, skill: str, content: str) -> str: + """Generate mock response for testing (standard tier - security focused).""" + if skill == "code_review": + return """## Code Review Report (Security Focus) + +### Executive Summary +This codebase has several security concerns that need immediate attention. While the general code quality is acceptable, input validation and error handling gaps create exploitable attack vectors. + +### Code Quality Assessment + +| Aspect | Rating | Notes | +|--------|--------|-------| +| **Readability** | Good | Clear naming, decent structure | +| **Maintainability** | Fair | Some functions too long (>50 lines) | +| **Error Handling** | Poor | Missing try/catch in 3 critical paths | +| **Test Coverage** | Fair | ~60% coverage, no security tests | + +### Security Analysis + +| Vulnerability | Severity | Location | OWASP Category | +|--------------|----------|----------|----------------| +| SQL Injection | **Critical** | `db.query()` line 45 | A03:2021 | +| XSS (Reflected) | **High** | `render()` line 78 | A03:2021 | +| Hardcoded Secret | **High** | `config.py` line 12 | A02:2021 | +| Missing Auth Check | **Medium** | `api/admin.py` line 30 | A01:2021 | +| Verbose Error Messages | **Low** | `handler.py` line 92 | A09:2021 | + +### Bug Detection + +- **Logic Error**: Off-by-one in pagination (`page * size` should be `(page-1) * size`) +- **Edge Case**: Null user input crashes `process_data()` at line 67 +- **Race Condition**: Concurrent writes to shared cache without locking +- **Resource Leak**: Database connection not closed in error path (line 52) + +### Recommendations + +**Critical (Fix Immediately):** +1. Parameterize all SQL queries - use prepared statements +2. Sanitize user input before rendering HTML output +3. Move secrets to environment variables + +**High (Fix This Sprint):** +4. Add authentication middleware to admin endpoints +5. Close DB connections in finally blocks + +**Medium (Fix Next Sprint):** +6. Add input validation on all public API endpoints +7. Implement rate limiting on authentication endpoints + +*Standard review - $16 | ~5 min*""" + else: + return """## Security Audit Report + +### Vulnerability Summary + +**Overall Security Posture: HIGH RISK** + +| Severity | Count | Status | +|----------|-------|--------| +| Critical | 2 | Requires immediate fix | +| High | 3 | Fix within 48 hours | +| Medium | 4 | Fix within 1 week | +| Low | 2 | Fix in next release | + +### Injection Analysis + +#### SQL Injection (CRITICAL) +- **Location**: `models/user.py:45` - `db.query(f"SELECT * FROM users WHERE id={user_id}")` +- **Impact**: Full database access, data exfiltration +- **Fix**: Use parameterized queries: `db.query("SELECT * FROM users WHERE id=?", [user_id])` + +#### XSS - Cross-Site Scripting (HIGH) +- **Location**: `views/profile.py:78` - `return f"

Welcome {username}

"` +- **Impact**: Session hijacking, credential theft +- **Fix**: HTML-encode all user input: `html.escape(username)` + +#### Command Injection (HIGH) +- **Location**: `utils/file_handler.py:23` - `os.system(f"convert {filename}")` +- **Impact**: Arbitrary command execution on server +- **Fix**: Use `subprocess.run()` with shell=False and argument list + +### Authentication & Authorization + +| Issue | Risk | Details | +|-------|------|---------| +| No CSRF tokens | **High** | State-changing operations unprotected | +| Session fixation | **Medium** | Session ID not rotated after login | +| Weak password policy | **Medium** | No minimum length or complexity | +| Missing rate limiting | **Medium** | Brute force attacks possible | + +### Data Security + +- **Hardcoded API Key**: `config.py:12` - AWS key in source code +- **Sensitive Logging**: `auth.py:34` - Password logged in plaintext +- **No Encryption**: User PII stored in plaintext in database +- **Insecure Cookie**: Session cookie missing Secure and HttpOnly flags + +### Remediation Plan + +| Priority | Action | Effort | Deadline | +|----------|--------|--------|----------| +| 1 | Fix SQL injection vectors | 2 hours | Immediate | +| 2 | Implement input sanitization | 4 hours | Day 1 | +| 3 | Remove hardcoded secrets | 1 hour | Day 1 | +| 4 | Add CSRF protection | 3 hours | Day 2 | +| 5 | Implement rate limiting | 2 hours | Day 3 | +| 6 | Add encryption for PII | 4 hours | Week 1 | +| 7 | Security test suite | 8 hours | Week 2 | + +*Standard security audit - $16 | ~5 min*""" diff --git a/demo/code_review/agents/code-reviewer-b/config.yaml b/demo/code_review/agents/code-reviewer-b/config.yaml new file mode 100644 index 0000000..81cd613 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-b/config.yaml @@ -0,0 +1,59 @@ +agent: + name: "CodeGuard AI" + description: "Thorough code review with security focus - finds vulnerabilities and bugs" + version: "1.0.0" + provider: + organization: "CodeGuard Security" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8101 + +llm: + provider: "anthropic" + model: "claude-sonnet-4-20250514" + temperature: 0.4 + max_tokens: 4096 + +# Agent characteristics for bidding +characteristics: + tier: "standard" + response_style: "detailed" + detail_level: "thorough" + turnaround: "moderate" + +skills: + - id: "code_review" + name: "Code Review" + description: "Comprehensive code review with security analysis and bug detection" + tags: ["code", "review", "security"] + examples: + - "Review this code for security issues" + - "Find bugs and vulnerabilities" + + - id: "security_audit" + name: "Security Audit" + description: "Detailed security vulnerability assessment with OWASP mapping" + tags: ["security", "audit", "vulnerability"] + examples: + - "Perform a security audit on this code" + - "Check for injection vulnerabilities" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "TRUSTED" + trust_score: 0.85 + pricing: + base_rate: 10.00 + per_page_rate: 3.00 + max_pages_optimal: 15 + currency: "USD" + description: "Best for security-sensitive code reviews (5-15 files)" + bidding: + confidence: 0.85 + estimated_time_minutes: 5 + max_document_pages: 30 diff --git a/demo/code_review/agents/code-reviewer-b/main.py b/demo/code_review/agents/code-reviewer-b/main.py new file mode 100644 index 0000000..08dd639 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-b/main.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Code Reviewer B - Main entry point.""" + +import asyncio +import logging +import os +import sys + +# Add parent directory to path for common imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import CodeReviewerB + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the Code Reviewer B server.""" + # Load configuration + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + logger.info(f"Skills: {[s.id for s in config.skills]}") + + # Create agent + agent = CodeReviewerB(config=config) + + # Generate agent card + # Use AGENT_HOSTNAME env var for Docker, fallback to localhost + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + # Create A2A server + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, # For demo, don't require auth + ) + + # Register with AEX if enabled + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + # Run server + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/code-reviewer-b/requirements.txt b/demo/code_review/agents/code-reviewer-b/requirements.txt new file mode 100644 index 0000000..795dc55 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-b/requirements.txt @@ -0,0 +1,5 @@ +# Code Reviewer B (Standard) dependencies +-r ../common/requirements.txt + +# Uses Claude via langchain-anthropic (included in common) +anthropic>=0.34.0 diff --git a/demo/code_review/agents/code-reviewer-c/agent.py b/demo/code_review/agents/code-reviewer-c/agent.py new file mode 100644 index 0000000..925525f --- /dev/null +++ b/demo/code_review/agents/code-reviewer-c/agent.py @@ -0,0 +1,576 @@ +"""Code Reviewer C (Premium) - Expert-level architectural review using Claude.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Any, Optional + +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import StateGraph, END + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.base_agent import BaseAgent, AgentState +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + +# Premium tier prompts - exhaustive expert analysis +CODE_REVIEW_PROMPT = """You are a principal software engineer at a top tech company providing EXHAUSTIVE code analysis. + +Deliver EXPERT-LEVEL review including: + +1. **Executive Summary** - Strategic overview for engineering leadership +2. **Architecture Assessment** + - System design evaluation + - Component coupling analysis + - Dependency graph concerns + - Scalability implications +3. **Code Quality Deep Dive** + - SOLID principles adherence + - DRY/KISS/YAGNI compliance + - Cyclomatic complexity hotspots + - Test coverage and quality +4. **Design Pattern Analysis** + - Patterns currently used + - Anti-patterns detected + - Recommended pattern improvements + - Pattern migration strategy +5. **Performance Analysis** + - Time complexity of critical paths + - Space complexity concerns + - Database query optimization + - Caching opportunities +6. **Maintainability Score** + - Technical debt assessment + - Documentation quality + - API design evaluation + - Breaking change risks +7. **Refactoring Roadmap** + - Prioritized refactoring items + - Effort estimates + - Risk/reward analysis + - Migration strategies +8. **Strategic Recommendations** + - Short-term improvements + - Long-term architecture evolution + - Team skill development areas + +This is staff-engineer-level analysis. Be exhaustive. Miss nothing.""" + +ARCHITECTURE_REVIEW_PROMPT = """You are a chief architect providing EXHAUSTIVE architectural analysis. + +Deliver EXPERT-LEVEL architecture assessment: + +1. **Executive Summary** - Board-level architecture status +2. **System Architecture Evaluation** + - Current architecture style (monolith/microservices/serverless) + - Component boundaries and cohesion + - Service communication patterns + - Data flow analysis +3. **Design Pattern Assessment** + - Patterns in use and their effectiveness + - Anti-patterns and technical debt + - Pattern recommendations with examples + - Migration path for pattern changes +4. **Scalability Analysis** + - Horizontal vs vertical scaling readiness + - Bottleneck identification + - Load distribution concerns + - Database scaling strategy +5. **Reliability & Resilience** + - Single points of failure + - Fault tolerance mechanisms + - Circuit breaker patterns + - Disaster recovery readiness +6. **Performance Architecture** + - Critical path analysis + - Caching strategy evaluation + - Async processing opportunities + - Resource utilization optimization +7. **Evolution Roadmap** + - Current state assessment + - Target architecture vision + - Migration phases with milestones + - Investment requirements + - Risk mitigation strategies +8. **Technology Stack Review** + - Current stack evaluation + - Upgrade recommendations + - Emerging technology opportunities + - Vendor lock-in assessment + +This is for architecture review board. Be exhaustive.""" + + +@dataclass +class CodeReviewerC(BaseAgent): + """Premium Code Reviewer using Claude for exhaustive, expert-level analysis.""" + + llm: Optional[ChatAnthropic] = field(default=None, init=False) + + def _setup_llm(self): + """Initialize Claude LLM.""" + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + logger.warning("ANTHROPIC_API_KEY not set, using mock responses") + self.llm = None + return + + self.llm = ChatAnthropic( + model=self.config.llm.model, + temperature=self.config.llm.temperature, + max_tokens=self.config.llm.max_tokens, + api_key=api_key, + ) + logger.info(f"Initialized Claude LLM (Premium): {self.config.llm.model}") + + def _build_graph(self): + """Build the LangGraph workflow.""" + self._graph = StateGraph(AgentState) + + def _detect_skill(self, content: str) -> str: + """Detect which skill to use based on content.""" + content_lower = content.lower() + + architecture_keywords = [ + "architecture", "design pattern", "refactor", "scalability", + "microservice", "monolith", "coupling", "cohesion", + "system design", "component", "module", "dependency" + ] + if any(kw in content_lower for kw in architecture_keywords): + return "architecture_review" + + return "code_review" + + async def process(self, state: AgentState) -> AgentState: + """Process the code review request through Claude (premium mode).""" + messages = state["messages"] + if not messages: + state["result"] = "No message provided." + return state + + user_content = messages[-1].get("content", "") + skill = self._detect_skill(user_content) + + prompts = { + "code_review": CODE_REVIEW_PROMPT, + "architecture_review": ARCHITECTURE_REVIEW_PROMPT, + } + system_prompt = prompts.get(skill, CODE_REVIEW_PROMPT) + + if self.llm is None: + state["result"] = self._mock_response(skill, user_content) + state["artifacts"] = [{ + "name": f"{skill}_premium_report.txt", + "parts": [{"type": "text", "text": state["result"]}], + }] + return state + + try: + response = await self.llm.ainvoke([ + SystemMessage(content=system_prompt), + HumanMessage(content=user_content), + ]) + + result = response.content + state["result"] = result + state["artifacts"] = [{ + "name": f"{skill}_premium_report.txt", + "parts": [{"type": "text", "text": result}], + }] + + except Exception as e: + logger.exception(f"Error calling Claude: {e}") + state["result"] = f"Error processing request: {str(e)}" + + return state + + def _mock_response(self, skill: str, content: str) -> str: + """Generate mock response for testing (premium tier - exhaustive).""" + if skill == "code_review": + return """## Premium Code Analysis Report +### Prepared by ArchitectAI Labs + +--- + +## 1. Executive Summary + +This codebase exhibits a **moderate architectural maturity level** with significant opportunities for improvement. The primary concerns are tight coupling between modules, inconsistent application of design patterns, and accumulating technical debt in the data layer. **Recommendation: Prioritize refactoring before adding new features.** + +--- + +## 2. Architecture Assessment + +### 2.1 Component Coupling Analysis + +| Component Pair | Coupling Type | Severity | Impact | +|---------------|---------------|----------|--------| +| UserService <-> Database | Tight (direct SQL) | **High** | Hard to test, migrate | +| Controller <-> BusinessLogic | Moderate | **Medium** | Mixed concerns | +| API Layer <-> Validation | Loose | **Low** | Well-structured | +| Config <-> All Modules | Global state | **High** | Testing nightmare | + +### 2.2 Dependency Graph Concerns +- **Circular dependency**: `auth` -> `user` -> `permissions` -> `auth` +- **God object**: `AppContext` class has 23 methods, 15 dependencies +- **Missing abstraction**: Direct database calls in 4 controller files + +### 2.3 Scalability Implications +- Current architecture supports ~1000 concurrent users +- Database layer is the primary bottleneck (no connection pooling) +- Stateful session management prevents horizontal scaling + +--- + +## 3. Code Quality Deep Dive + +### 3.1 SOLID Principles Adherence + +| Principle | Score | Issues Found | +|-----------|-------|-------------| +| **S**ingle Responsibility | 4/10 | UserService handles auth + profile + notifications | +| **O**pen/Closed | 6/10 | Some extension points, but many switch statements | +| **L**iskov Substitution | 8/10 | Good interface compliance | +| **I**nterface Segregation | 5/10 | Fat interfaces in data layer | +| **D**ependency Inversion | 3/10 | Concrete dependencies everywhere | + +### 3.2 Complexity Hotspots + +| Function | Cyclomatic Complexity | Risk | Recommendation | +|----------|----------------------|------|----------------| +| `process_order()` | 24 | **Critical** | Split into 4-5 functions | +| `validate_input()` | 18 | **High** | Use strategy pattern | +| `generate_report()` | 15 | **High** | Extract report builders | +| `handle_request()` | 12 | **Medium** | Simplify branching | + +--- + +## 4. Design Pattern Analysis + +### 4.1 Patterns Currently Used +- **Repository Pattern**: Partially implemented (3 of 8 models) +- **MVC**: Present but controller layer is bloated +- **Singleton**: Overused (config, logger, cache, db - all singletons) + +### 4.2 Anti-Patterns Detected + +| Anti-Pattern | Location | Severity | Fix | +|-------------|----------|----------|-----| +| God Class | `AppContext` | **Critical** | Decompose into focused services | +| Spaghetti Code | `process_order()` | **High** | Apply chain of responsibility | +| Magic Numbers | Throughout | **Medium** | Extract to named constants | +| Copy-Paste Code | `validators/` | **Medium** | Create base validator class | +| Premature Optimization | `cache_layer.py` | **Low** | Remove unused cache logic | + +### 4.3 Recommended Pattern Improvements +1. **Strategy Pattern** for validation logic (eliminate switch statements) +2. **Factory Pattern** for service instantiation (remove manual wiring) +3. **Observer Pattern** for event handling (decouple notification system) +4. **Repository Pattern** completion for all data models + +--- + +## 5. Performance Analysis + +### 5.1 Critical Path Analysis + +| Operation | Current Latency | Bottleneck | Optimized Target | +|-----------|----------------|------------|-----------------| +| User login | 450ms | Password hashing + DB | 120ms | +| List items | 800ms | N+1 query problem | 150ms | +| Generate report | 3.2s | Sequential processing | 800ms | +| File upload | 2.1s | Synchronous processing | 200ms (async) | + +### 5.2 Database Query Issues +- **N+1 Queries**: 6 endpoints fetch related data in loops +- **Missing Indexes**: `orders.user_id`, `products.category_id` not indexed +- **Full Table Scans**: Search endpoint scans entire products table +- **No Connection Pooling**: New connection per request (~50ms overhead) + +### 5.3 Caching Opportunities +- User session data: **Save ~200ms/request** with Redis +- Product catalog: **Save ~500ms** with 5-minute TTL cache +- Report generation: **Save ~2s** with pre-computation + +--- + +## 6. Maintainability Score + +### Overall: 42/100 (Needs Improvement) + +| Dimension | Score | Details | +|-----------|-------|---------| +| Code readability | 55/100 | Inconsistent style, missing docs | +| Test quality | 35/100 | Low coverage, no integration tests | +| Documentation | 30/100 | Outdated README, no API docs | +| API design | 50/100 | Inconsistent naming, missing versioning | +| Technical debt | 40/100 | Significant accumulated debt | + +### Technical Debt Inventory + +| Item | Effort | Business Impact | Priority | +|------|--------|----------------|----------| +| Database abstraction | 3 weeks | Enables migration | **P1** | +| Test suite expansion | 2 weeks | Reduces bug rate 40% | **P1** | +| Service decomposition | 4 weeks | Enables scaling | **P2** | +| API versioning | 1 week | Client stability | **P2** | +| Documentation overhaul | 1 week | Onboarding speed 2x | **P3** | + +--- + +## 7. Refactoring Roadmap + +### Phase 1: Foundation (Weeks 1-2) - Risk: Low +1. Add dependency injection container +2. Extract configuration to typed config classes +3. Standardize error handling with custom exceptions +4. Add comprehensive logging + +### Phase 2: Data Layer (Weeks 3-4) - Risk: Medium +5. Complete Repository Pattern for all models +6. Add connection pooling +7. Fix N+1 queries with eager loading +8. Add database migrations framework + +### Phase 3: Service Layer (Weeks 5-8) - Risk: Medium +9. Decompose `AppContext` into focused services +10. Apply Strategy Pattern to validators +11. Implement event-driven notifications +12. Add caching layer with Redis + +### Phase 4: API & Testing (Weeks 9-10) - Risk: Low +13. Add API versioning +14. Write integration test suite +15. Add OpenAPI documentation +16. Implement health check endpoints + +--- + +## 8. Strategic Recommendations + +### Short-Term (This Quarter) +- Fix critical performance bottlenecks (N+1 queries, connection pooling) +- Add dependency injection to enable proper testing +- Establish coding standards and automated linting + +### Long-Term (Next 2 Quarters) +- Migrate to hexagonal architecture for better testability +- Evaluate microservices extraction for scaling-critical components +- Implement CI/CD pipeline with automated quality gates + +### Team Development +- Design patterns workshop (focus on SOLID) +- Code review culture improvement +- Architecture decision records (ADR) practice + +--- + +*Premium analysis - $30 | ~10 min* +*Prepared by ArchitectAI Labs - Confidential*""" + else: + return """## Premium Architecture Review Report +### Prepared by ArchitectAI Labs + +--- + +## 1. Executive Summary + +**Architecture Maturity: Level 2 of 5 (Developing)** + +The current system architecture is a tightly-coupled monolith with emerging microservice aspirations. Key risks include single points of failure in the data layer, lack of service boundaries, and insufficient resilience patterns. A phased migration strategy is recommended. + +--- + +## 2. System Architecture Evaluation + +### 2.1 Current Architecture Style +- **Primary**: Monolithic with layered architecture +- **Emerging**: Some service extraction attempted (auth, notifications) +- **Data**: Single relational database, no event sourcing +- **Communication**: Synchronous HTTP only, no message queues + +### 2.2 Component Boundaries + +| Component | Cohesion | Coupling | Boundary Quality | +|-----------|----------|----------|-----------------| +| Auth Service | High | Medium | **Good** - well-defined API | +| User Module | Medium | High | **Poor** - leaky abstractions | +| Order Processing | Low | High | **Critical** - god module | +| Notification | High | Low | **Good** - loosely coupled | +| Reporting | Low | High | **Poor** - queries everything | + +### 2.3 Data Flow Analysis +``` +Client -> API Gateway -> Monolith -> Single Database + |-> Auth Service (extracted) + |-> Notification Service (extracted) + |-> [Everything else still coupled] +``` + +**Issues identified:** +- No API gateway pattern (direct monolith access) +- Database as integration point (shared tables between modules) +- Synchronous chain: one slow service blocks everything + +--- + +## 3. Design Pattern Assessment + +### 3.1 Pattern Effectiveness + +| Pattern | Implementation | Effectiveness | Recommendation | +|---------|---------------|---------------|----------------| +| MVC | Full | 60% | Migrate to hexagonal | +| Repository | Partial (3/8) | 40% | Complete implementation | +| Singleton | Overused | 30% | Replace with DI container | +| Observer | None | N/A | Add for event handling | +| CQRS | None | N/A | Add for read-heavy paths | + +### 3.2 Recommended Architecture Patterns + +**1. Hexagonal Architecture (Ports & Adapters)** +- Decouple business logic from infrastructure +- Enable testing without database/external services +- Clear dependency direction (inward only) + +**2. Event-Driven Architecture** +- Decouple service communication +- Enable eventual consistency +- Support audit trail and replay + +**3. Strangler Fig Pattern (for migration)** +- Incrementally extract services from monolith +- Route traffic between old and new implementations +- Zero-downtime migration + +--- + +## 4. Scalability Analysis + +### 4.1 Current Capacity Limits + +| Resource | Current Max | Scaling Type | Bottleneck | +|----------|-------------|-------------|------------| +| Concurrent Users | ~1,000 | Vertical only | DB connections | +| Requests/sec | ~200 | N/A | CPU-bound processing | +| Data Volume | ~50GB | Single node | No sharding | +| File Storage | ~10GB | Local disk | No CDN/S3 | + +### 4.2 Scaling Readiness Assessment + +| Criteria | Ready? | Blocker | +|----------|--------|---------| +| Horizontal scaling | No | Stateful sessions, local file storage | +| Database scaling | No | No read replicas, no sharding strategy | +| Auto-scaling | No | No containerization, no metrics | +| Geographic distribution | No | Single region, no CDN | + +### 4.3 Recommended Scaling Strategy +1. **Immediate**: Add connection pooling + read replicas +2. **Short-term**: Containerize with Kubernetes, externalize state +3. **Medium-term**: Extract hot-path services, add message queue +4. **Long-term**: Multi-region deployment with data partitioning + +--- + +## 5. Reliability & Resilience + +### 5.1 Single Points of Failure + +| SPOF | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Primary database | **Total outage** | Medium | Add failover replica | +| Application server | **Total outage** | Medium | Add load balancer + 2nd instance | +| Auth service | **Login blocked** | Low | Add circuit breaker + cache | +| File storage | **Data loss** | Low | Migrate to S3 with versioning | + +### 5.2 Missing Resilience Patterns +- No circuit breakers on external calls +- No retry logic with exponential backoff +- No bulkhead isolation between components +- No graceful degradation strategy +- No health check endpoints + +--- + +## 6. Performance Architecture + +### 6.1 Critical Path Optimization + +| Path | Current P95 | Target P95 | Strategy | +|------|------------|------------|----------| +| User authentication | 450ms | 100ms | Cache + async token | +| Product listing | 800ms | 150ms | CQRS + materialized view | +| Order placement | 1.2s | 300ms | Async processing + queue | +| Report generation | 5s | 500ms | Pre-computation + cache | + +### 6.2 Caching Architecture Recommendation +``` +Client -> CDN (static assets) + -> API Gateway (response cache, 30s TTL) + -> Application (Redis session + entity cache) + -> Database (query cache + read replica) +``` + +--- + +## 7. Evolution Roadmap + +### Phase 1: Stabilize (Months 1-2) +- Add health checks and monitoring (Prometheus + Grafana) +- Implement circuit breakers (Resilience4j / Polly) +- Add database connection pooling and read replica +- Containerize application (Docker) +- **Investment: $40K | Risk: Low** + +### Phase 2: Decouple (Months 3-4) +- Introduce message queue (RabbitMQ / SQS) +- Extract order processing to async pipeline +- Implement CQRS for read-heavy endpoints +- Add API gateway (Kong / AWS API Gateway) +- **Investment: $80K | Risk: Medium** + +### Phase 3: Scale (Months 5-8) +- Deploy to Kubernetes with auto-scaling +- Extract 2-3 bounded contexts to microservices +- Implement event sourcing for audit trail +- Add CDN and edge caching +- **Investment: $120K | Risk: Medium-High** + +### Phase 4: Optimize (Months 9-12) +- Multi-region deployment +- Advanced observability (distributed tracing) +- Performance tuning and load testing +- Chaos engineering practices +- **Investment: $60K | Risk: Low** + +### Total Investment: $300K over 12 months +### Expected ROI: 4x through reduced incidents, faster delivery, scaling capability + +--- + +## 8. Technology Stack Review + +### 8.1 Current Stack Assessment + +| Technology | Version | Status | Recommendation | +|------------|---------|--------|----------------| +| Python | 3.9 | Aging | Upgrade to 3.12 | +| Flask | 2.0 | Adequate | Consider FastAPI for new services | +| PostgreSQL | 13 | Adequate | Upgrade to 16, add replicas | +| Redis | None | Missing | Add for caching + sessions | +| Docker | None | Missing | Add for containerization | +| CI/CD | Basic | Incomplete | Add quality gates | + +### 8.2 Vendor Lock-in Assessment +- **Low risk**: Open-source stack, portable +- **Watch**: If moving to cloud-managed services, prefer abstractions +- **Recommendation**: Use infrastructure-as-code (Terraform) from day 1 + +--- + +*Premium architecture analysis - $30 | ~10 min* +*Prepared by ArchitectAI Labs - Confidential*""" diff --git a/demo/code_review/agents/code-reviewer-c/config.yaml b/demo/code_review/agents/code-reviewer-c/config.yaml new file mode 100644 index 0000000..d031796 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-c/config.yaml @@ -0,0 +1,59 @@ +agent: + name: "ArchitectAI" + description: "Deep architectural review with design pattern analysis and refactoring suggestions" + version: "1.0.0" + provider: + organization: "ArchitectAI Labs" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8102 + +llm: + provider: "anthropic" + model: "claude-sonnet-4-20250514" + temperature: 0.5 + max_tokens: 8192 + +# Agent characteristics for bidding +characteristics: + tier: "premium" + response_style: "exhaustive" + detail_level: "expert" + turnaround: "thorough" + +skills: + - id: "code_review" + name: "Code Review" + description: "Expert-level code review with architecture assessment and design pattern analysis" + tags: ["code", "review", "architecture"] + examples: + - "Provide a deep code review with architecture assessment" + - "Analyze code quality, patterns, and refactoring opportunities" + + - id: "architecture_review" + name: "Architecture Review" + description: "Comprehensive architecture evaluation with scalability analysis and evolution roadmap" + tags: ["architecture", "patterns", "refactoring"] + examples: + - "Review the system architecture and suggest improvements" + - "Analyze design patterns and provide refactoring roadmap" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "PREFERRED" + trust_score: 0.95 + pricing: + base_rate: 25.00 + per_page_rate: 5.00 + max_pages_optimal: 30 + currency: "USD" + description: "Best for comprehensive architecture reviews and large codebases (10-30+ files)" + bidding: + confidence: 0.95 + estimated_time_minutes: 10 + max_document_pages: 50 diff --git a/demo/code_review/agents/code-reviewer-c/main.py b/demo/code_review/agents/code-reviewer-c/main.py new file mode 100644 index 0000000..ec9cb0a --- /dev/null +++ b/demo/code_review/agents/code-reviewer-c/main.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Code Reviewer C - Main entry point.""" + +import asyncio +import logging +import os +import sys + +# Add parent directory to path for common imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import CodeReviewerC + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the Code Reviewer C server.""" + # Load configuration + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + logger.info(f"Skills: {[s.id for s in config.skills]}") + + # Create agent + agent = CodeReviewerC(config=config) + + # Generate agent card + # Use AGENT_HOSTNAME env var for Docker, fallback to localhost + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + # Create A2A server + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, # For demo, don't require auth + ) + + # Register with AEX if enabled + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + # Run server + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/code-reviewer-c/requirements.txt b/demo/code_review/agents/code-reviewer-c/requirements.txt new file mode 100644 index 0000000..260fbc5 --- /dev/null +++ b/demo/code_review/agents/code-reviewer-c/requirements.txt @@ -0,0 +1,5 @@ +# Code Reviewer C (Premium) dependencies +-r ../common/requirements.txt + +# Uses Claude via langchain-anthropic (included in common) +anthropic>=0.34.0 diff --git a/demo/code_review/agents/common b/demo/code_review/agents/common new file mode 120000 index 0000000..c6a4c9a --- /dev/null +++ b/demo/code_review/agents/common @@ -0,0 +1 @@ +../../aex/agents/common \ No newline at end of file diff --git a/demo/code_review/agents/orchestrator/agent.py b/demo/code_review/agents/orchestrator/agent.py new file mode 100644 index 0000000..1b56180 --- /dev/null +++ b/demo/code_review/agents/orchestrator/agent.py @@ -0,0 +1,476 @@ +"""Orchestrator Agent - Task decomposition and multi-agent coordination for code review.""" + +import asyncio +import json +import logging +import os +from dataclasses import dataclass, field +from typing import Any, Optional +import aiohttp + +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import StateGraph, END + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.base_agent import BaseAgent, AgentState +from common.config import AgentConfig +from common.aex_client import AEXClient + +logger = logging.getLogger(__name__) + +ORCHESTRATOR_PROMPT = """You are an intelligent orchestrator that decomposes complex code review requests into subtasks. + +Given a user request, identify the required subtasks and the skills needed for each. +Output a JSON object with the following structure: + +{ + "understanding": "Brief summary of what the user wants", + "subtasks": [ + { + "id": "task_1", + "description": "What needs to be done", + "skill_tags": ["skill_tag_1", "skill_tag_2"], + "input": "Specific input for this subtask", + "depends_on": [] + } + ], + "execution_order": "parallel" or "sequential" +} + +Available skill tags: +- code_review: General code review (readability, correctness, best practices) +- linting: Code style, formatting, and lint rule enforcement +- security_audit: Security vulnerability scanning and threat analysis +- architecture_review: Architecture, design patterns, and refactoring recommendations +- bug_detection: Bug detection, error handling, and edge case analysis +- performance_review: Performance optimization and bottleneck analysis + +For comprehensive code review requests, consider decomposing into: +1. Basic code_review for quick overview and readability +2. security_audit for vulnerability scanning +3. architecture_review for design pattern and structural feedback +4. performance_review for optimization opportunities + +Be specific about what each subtask should accomplish. +Return ONLY the JSON object, no additional text.""" + + +@dataclass +class SubTask: + """A subtask identified by the orchestrator.""" + id: str + description: str + skill_tags: list[str] + input: str + depends_on: list[str] + result: Optional[str] = None + status: str = "pending" + provider_id: Optional[str] = None + agent_url: Optional[str] = None + + +@dataclass +class OrchestratorAgent(BaseAgent): + """Orchestrator that coordinates multiple code review agents via AEX + A2A.""" + + llm: Optional[ChatAnthropic] = field(default=None, init=False) + http_session: Optional[aiohttp.ClientSession] = field(default=None, init=False) + + def _setup_llm(self): + """Initialize Claude LLM for task decomposition.""" + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + logger.warning("ANTHROPIC_API_KEY not set, using mock decomposition") + self.llm = None + return + + self.llm = ChatAnthropic( + model=self.config.llm.model, + temperature=self.config.llm.temperature, + max_tokens=self.config.llm.max_tokens, + api_key=api_key, + ) + logger.info(f"Initialized Claude LLM: {self.config.llm.model}") + + def _build_graph(self): + """Build the orchestration workflow.""" + self._graph = StateGraph(AgentState) + + async def process(self, state: AgentState) -> AgentState: + """Process request through orchestration pipeline.""" + messages = state["messages"] + if not messages: + state["result"] = "No message provided." + return state + + user_content = messages[-1].get("content", "") + + # Step 1: Decompose task + subtasks = await self._decompose_task(user_content) + if not subtasks: + state["result"] = "Could not decompose the request into subtasks." + return state + + logger.info(f"Decomposed into {len(subtasks)} subtasks") + + # Step 2: Discover providers via AEX for each subtask + await self._discover_providers(subtasks) + + # Step 3: Execute subtasks via A2A + results = await self._execute_subtasks(subtasks) + + # Step 4: Aggregate results + state["result"] = self._aggregate_results(user_content, subtasks) + state["artifacts"] = [ + { + "name": "orchestration_report.json", + "parts": [{"type": "text", "text": json.dumps({ + "subtasks": [ + { + "id": st.id, + "description": st.description, + "status": st.status, + "provider": st.provider_id, + } + for st in subtasks + ] + }, indent=2)}], + } + ] + + return state + + async def _decompose_task(self, user_request: str) -> list[SubTask]: + """Use LLM to decompose request into subtasks.""" + if self.llm is None: + return self._mock_decompose(user_request) + + try: + response = await self.llm.ainvoke([ + SystemMessage(content=ORCHESTRATOR_PROMPT), + HumanMessage(content=user_request), + ]) + + # Parse JSON response + content = response.content.strip() + # Handle markdown code blocks + if content.startswith("```"): + content = content.split("```")[1] + if content.startswith("json"): + content = content[4:] + content = content.strip() + + data = json.loads(content) + + subtasks = [] + for st in data.get("subtasks", []): + subtasks.append(SubTask( + id=st["id"], + description=st["description"], + skill_tags=st.get("skill_tags", []), + input=st.get("input", ""), + depends_on=st.get("depends_on", []), + )) + + return subtasks + + except Exception as e: + logger.exception(f"Error decomposing task: {e}") + return self._mock_decompose(user_request) + + def _mock_decompose(self, user_request: str) -> list[SubTask]: + """Mock decomposition for testing.""" + request_lower = user_request.lower() + + subtasks = [] + task_id = 1 + + # Security audit (Standard agent - CodeGuard) + if "security" in request_lower or "vulnerability" in request_lower or "injection" in request_lower: + subtasks.append(SubTask( + id=f"task_{task_id}", + description="Scan code for security vulnerabilities and threats", + skill_tags=["security_audit", "code_review"], + input=user_request, + depends_on=[], + )) + task_id += 1 + + # Architecture review (Premium agent - ArchitectAI) + if "architecture" in request_lower or "design" in request_lower or "pattern" in request_lower or "refactor" in request_lower: + subtasks.append(SubTask( + id=f"task_{task_id}", + description="Review architecture, design patterns, and structural quality", + skill_tags=["architecture_review", "code_review"], + input=user_request, + depends_on=[], + )) + task_id += 1 + + # Linting (Budget agent - QuickReview) + if "lint" in request_lower or "style" in request_lower or "format" in request_lower: + subtasks.append(SubTask( + id=f"task_{task_id}", + description="Check code style, formatting, and lint compliance", + skill_tags=["linting", "code_review"], + input=user_request, + depends_on=[], + )) + task_id += 1 + + # Bug detection (Budget agent - QuickReview) + if "bug" in request_lower or "error" in request_lower or "fix" in request_lower: + subtasks.append(SubTask( + id=f"task_{task_id}", + description="Detect bugs, error handling issues, and edge cases", + skill_tags=["bug_detection", "code_review"], + input=user_request, + depends_on=[], + )) + task_id += 1 + + # Performance review (Premium agent - ArchitectAI) + if "performance" in request_lower or "optimize" in request_lower or "slow" in request_lower: + subtasks.append(SubTask( + id=f"task_{task_id}", + description="Analyze performance bottlenecks and optimization opportunities", + skill_tags=["performance_review", "code_review"], + input=user_request, + depends_on=[], + )) + task_id += 1 + + # Default: comprehensive review using all three agents + if not subtasks: + subtasks = [ + SubTask( + id="task_1", + description="Quick code review for readability and correctness", + skill_tags=["code_review"], + input=user_request, + depends_on=[], + ), + SubTask( + id="task_2", + description="Security vulnerability scan", + skill_tags=["security_audit", "code_review"], + input=user_request, + depends_on=[], + ), + SubTask( + id="task_3", + description="Architecture and design pattern review", + skill_tags=["architecture_review", "code_review"], + input=user_request, + depends_on=[], + ), + ] + + return subtasks + + async def _discover_providers(self, subtasks: list[SubTask]): + """Discover providers via AEX for each subtask.""" + # Demo agent URLs (Docker network hostnames) + # Each agent has different specialties to demonstrate multi-agent coordination + demo_agents = { + # Budget agent - quick basic reviews and linting + "code_review": ("http://code-reviewer-a:8100", "Budget QuickReview ($5+$2/file)"), + "linting": ("http://code-reviewer-a:8100", "Budget QuickReview ($5+$2/file)"), + "bug_detection": ("http://code-reviewer-a:8100", "Budget QuickReview ($5+$2/file)"), + # Standard agent - security scanning + "security_audit": ("http://code-reviewer-b:8101", "Standard CodeGuard ($15+$0.50/file)"), + # Premium agent - architecture and performance + "architecture_review": ("http://code-reviewer-c:8102", "Premium ArchitectAI ($30+$0.20/file)"), + "performance_review": ("http://code-reviewer-c:8102", "Premium ArchitectAI ($30+$0.20/file)"), + } + + for st in subtasks: + # Try AEX discovery first + if self.aex_client: + try: + providers = await self.aex_client.search_providers( + skill_tags=st.skill_tags, + ) + if providers: + # Select best provider based on skill requirements + selected = self._select_best_provider(providers, st.skill_tags) + st.provider_id = selected.get("provider_id") + st.agent_url = selected.get("endpoint") + agent_name = selected.get("name", st.provider_id) + logger.info(f"[AEX] Found {len(providers)} providers, selected {agent_name} for {st.id} (skills: {st.skill_tags})") + continue + except Exception as e: + logger.warning(f"[AEX] Discovery failed for {st.id}: {e}") + + # Fall back to demo agents + for tag in st.skill_tags: + if tag in demo_agents: + st.agent_url, st.provider_id = demo_agents[tag] + logger.info(f"[A2A] Using {st.provider_id} for subtask: {st.description}") + break + + # Default to budget code review agent + if not st.agent_url: + st.agent_url = "http://code-reviewer-a:8100" + st.provider_id = "Budget QuickReview ($5+$2/file)" + logger.info(f"[A2A] Default to {st.provider_id} for subtask: {st.description}") + + def _select_best_provider(self, providers: list[dict], skill_tags: list[str]) -> dict: + """Select the best provider based on skill requirements. + + Strategy: + - For premium-only skills (architecture_review, performance_review), use Premium + - For standard skills (security_audit), prefer Standard over Premium + - For basic skills (code_review, linting, bug_detection), use Budget + """ + premium_only_skills = {"architecture_review", "performance_review"} + standard_skills = {"security_audit"} + + # Check if any skill requires premium + needs_premium = any(tag in premium_only_skills for tag in skill_tags) + needs_standard = any(tag in standard_skills for tag in skill_tags) + + def get_tier(p): + """Get tier priority (lower = cheaper).""" + name = p.get("name", "") + if "QuickReview" in name: + return 0 + elif "CodeGuard" in name: + return 1 + elif "ArchitectAI" in name: + return 2 + return 3 # Unknown + + sorted_providers = sorted(providers, key=get_tier) + + if needs_premium: + # Must use Premium + for p in reversed(sorted_providers): + if "ArchitectAI" in p.get("name", ""): + return p + elif needs_standard: + # Use Standard if available, otherwise cheapest + for p in sorted_providers: + if "CodeGuard" in p.get("name", ""): + return p + + # Default: use cheapest (first in sorted list) + return sorted_providers[0] if sorted_providers else providers[0] + + async def _execute_subtasks(self, subtasks: list[SubTask]) -> dict[str, str]: + """Execute subtasks via A2A protocol.""" + results = {} + + async with aiohttp.ClientSession() as session: + for st in subtasks: + if not st.agent_url: + st.status = "failed" + st.result = "No provider available" + continue + + st.status = "running" + try: + result = await self._call_a2a_agent(session, st) + st.result = result + st.status = "completed" + results[st.id] = result + except Exception as e: + logger.exception(f"Error executing {st.id}: {e}") + st.status = "failed" + st.result = str(e) + + return results + + async def _call_a2a_agent(self, session: aiohttp.ClientSession, subtask: SubTask) -> str: + """Call an agent via A2A JSON-RPC.""" + a2a_url = f"{subtask.agent_url}/a2a" + logger.info(f"[A2A] Calling {subtask.provider_id} at {a2a_url}") + + payload = { + "jsonrpc": "2.0", + "method": "message/send", + "id": subtask.id, + "params": { + "message": { + "role": "user", + "parts": [{"type": "text", "text": subtask.input}], + } + }, + } + + try: + async with session.post(a2a_url, json=payload) as resp: + if resp.status != 200: + error = await resp.text() + raise Exception(f"A2A call failed: {error}") + + data = await resp.json() + + if "error" in data: + raise Exception(data["error"].get("message", "Unknown error")) + + result = data.get("result", {}) + history = result.get("history", []) + + # Extract agent response + for msg in reversed(history): + if msg.get("role") == "agent": + parts = msg.get("parts", []) + for part in parts: + if part.get("type") == "text": + return part.get("text", "") + + return "No response from agent" + + except aiohttp.ClientError as e: + # If agent not reachable, return mock response + logger.warning(f"Could not reach agent at {a2a_url}: {e}") + return f"[Demo] Mock response for: {subtask.description}" + + def _aggregate_results(self, original_request: str, subtasks: list[SubTask]) -> str: + """Aggregate results from all subtasks.""" + lines = ["# Orchestration Results\n"] + lines.append(f"**Original Request**: {original_request}\n") + lines.append(f"**Subtasks Executed**: {len(subtasks)}\n") + + # Agent Selection Summary + lines.append("\n## Agent Selection Summary\n") + lines.append("| Subtask | Skill Tags | Selected Agent | Selection Reason |") + lines.append("|---------|------------|----------------|------------------|") + + for st in subtasks: + tags_str = ", ".join(st.skill_tags[:2]) if st.skill_tags else "N/A" + reason = self._get_selection_reason(st.skill_tags) + lines.append(f"| {st.description[:40]}... | `{tags_str}` | {st.provider_id or 'Unknown'} | {reason} |") + + lines.append("\n---\n") + + for st in subtasks: + status_icon = "PASS" if st.status == "completed" else "FAIL" + lines.append(f"\n## [{status_icon}] {st.description}") + lines.append(f"**Provider**: {st.provider_id or 'Unknown'}") + lines.append(f"**Skill Tags**: {', '.join(st.skill_tags)}") + lines.append(f"**Status**: {st.status}\n") + if st.result: + lines.append(st.result) + lines.append("\n---") + + return "\n".join(lines) + + def _get_selection_reason(self, skill_tags: list[str]) -> str: + """Get reason for agent selection based on skill tags.""" + reasons = { + "code_review": "Basic review -> Budget tier (QuickReview)", + "linting": "Lint/style check -> Budget tier (QuickReview)", + "bug_detection": "Bug detection -> Budget tier (QuickReview)", + "security_audit": "Security scan -> Standard tier (CodeGuard)", + "architecture_review": "Architecture review -> Premium tier (ArchitectAI)", + "performance_review": "Performance analysis -> Premium tier (ArchitectAI)", + } + for tag in skill_tags: + if tag in reasons: + return reasons[tag] + return "Default routing" diff --git a/demo/code_review/agents/orchestrator/config.yaml b/demo/code_review/agents/orchestrator/config.yaml new file mode 100644 index 0000000..5014684 --- /dev/null +++ b/demo/code_review/agents/orchestrator/config.yaml @@ -0,0 +1,35 @@ +agent: + name: "Code Review Orchestrator" + description: "Intelligent code review orchestration using AEX for agent discovery and A2A for execution" + version: "1.0.0" + provider: + organization: "AEX Demo" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8103 + +llm: + provider: "anthropic" + model: "claude-sonnet-4-20250514" + temperature: 0.3 + max_tokens: 4096 + +skills: + - id: "code_review_orchestration" + name: "Code Review Orchestration" + description: "Decompose code review requests and coordinate multiple review agents" + tags: ["orchestration", "code_review"] + examples: + - "Review this code for security and architecture issues" + - "Do a comprehensive code review" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: false + auto_bid: false + pricing: + base_rate: 0.00 + currency: "USD" diff --git a/demo/code_review/agents/orchestrator/main.py b/demo/code_review/agents/orchestrator/main.py new file mode 100644 index 0000000..8ff4993 --- /dev/null +++ b/demo/code_review/agents/orchestrator/main.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""Code Review Orchestrator Agent - Main entry point.""" + +import asyncio +import logging +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import OrchestratorAgent + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the Code Review Orchestrator Agent server.""" + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + logger.info(f"AEX Gateway: {config.aex.gateway_url}") + + agent = OrchestratorAgent(config=config) + + base_url = f"http://{config.server.host}:{config.server.port}" + if config.server.host == "0.0.0.0": + base_url = f"http://localhost:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, + ) + + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/orchestrator/requirements.txt b/demo/code_review/agents/orchestrator/requirements.txt new file mode 100644 index 0000000..769ac3a --- /dev/null +++ b/demo/code_review/agents/orchestrator/requirements.txt @@ -0,0 +1,4 @@ +-r ../common/requirements.txt + +anthropic>=0.34.0 +aiohttp>=3.9.0 diff --git a/demo/code_review/agents/payment-codeauditpay/agent.py b/demo/code_review/agents/payment-codeauditpay/agent.py new file mode 100644 index 0000000..8a081d4 --- /dev/null +++ b/demo/code_review/agents/payment-codeauditpay/agent.py @@ -0,0 +1,74 @@ +"""CodeAuditPay - Code audit payment specialist with highest rewards on code review work.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.payment_agent import BasePaymentAgent +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class CodeAuditPayAgent(BasePaymentAgent): + """ + CodeAuditPay - Code Audit Payment Specialist + + Characteristics: + - Base fee: 2.5% + - Rewards: UP TO 3.5% on security audits (NET CASHBACK!) + - Net fee: -0.5% on code reviews (you earn money!) + - Processing: Standard (5 seconds) + - Fraud protection: Standard + + Best for: + - Code reviews and audits + - Security audits + - Architecture reviews + + Fee breakdown: + - Code Review: 2.5% - 3.0% = -0.5% (CASHBACK!) + - Security Audit: 2.5% - 3.5% = -1.0% (CASHBACK!) + - Architecture Review: 2.5% - 2.5% = 0% (FREE!) + - Development: 2.5% - 1.0% = 1.5% + - Other: 2.5% - 1.0% = 1.5% + """ + + # Payment provider characteristics + base_fee_percent: float = 2.5 + processing_time_seconds: int = 5 + supported_methods: list[str] = field(default_factory=lambda: ["card", "bank_transfer", "aex_balance"]) + fraud_protection: str = "standard" + + # Category rewards - specializes in code audits + category_rewards: dict[str, float] = field(default_factory=lambda: { + "code_review": 3.0, # CASHBACK territory! + "security_audit": 3.5, # BIG CASHBACK! + "architecture_review": 2.5, # Free processing + "development": 1.0, + "linting": 0.5, + "default": 1.0, + }) + + def __post_init__(self): + """Initialize with config-based overrides.""" + super().__post_init__() + + # Load from config if available + if hasattr(self.config, '_raw_config') and 'payment' in self.config._raw_config: + payment_cfg = self.config._raw_config['payment'] + self.base_fee_percent = payment_cfg.get('base_fee_percent', self.base_fee_percent) + self.processing_time_seconds = payment_cfg.get('processing_time_seconds', self.processing_time_seconds) + self.fraud_protection = payment_cfg.get('fraud_protection', self.fraud_protection) + if 'supported_methods' in payment_cfg: + self.supported_methods = payment_cfg['supported_methods'] + if 'rewards' in payment_cfg: + for category, reward in payment_cfg['rewards'].items(): + self.category_rewards[category] = reward + + logger.info(f"CodeAuditPay initialized: {self.base_fee_percent}% base fee, UP TO 3.5% rewards on audits!") diff --git a/demo/code_review/agents/payment-codeauditpay/config.yaml b/demo/code_review/agents/payment-codeauditpay/config.yaml new file mode 100644 index 0000000..8cac291 --- /dev/null +++ b/demo/code_review/agents/payment-codeauditpay/config.yaml @@ -0,0 +1,55 @@ +agent: + name: "CodeAuditPay" + description: "Code audit payment specialist - highest rewards on code review work" + version: "1.0.0" + provider: + organization: "CodeAuditPay Solutions" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8201 + +# Payment provider characteristics +payment: + base_fee_percent: 2.5 + processing_time_seconds: 5 + fraud_protection: "standard" + supported_methods: + - "card" + - "bank_transfer" + - "aex_balance" + + # Category-specific rewards (cashback percentages) + # Specializes in code audits and reviews + rewards: + code_review: 3.0 # 3% back on code review → net -0.5% (CASHBACK!) + security_audit: 3.5 # 3.5% back on security audit → net -1.0% (CASHBACK!) + architecture_review: 2.5 # 2.5% back on architecture → net 0% (FREE!) + development: 1.0 # 1% back on development → net 1.5% + linting: 0.5 # 0.5% back on linting → net 2.0% + default: 1.0 # 1% back on other → net 1.5% + + # Marketing copy + tagline: "Get PAID to pay for code audits - up to 1.0% cashback!" + best_for: "Code reviews, security audits, architecture reviews" + +skills: + - id: "payment_processing" + name: "Payment Processing" + description: "Process payments with bonus rewards for code audit work" + tags: ["payment", "code_review", "security_audit"] + examples: + - "Process payment for code review" + - "Process payment for security audit" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "TRUSTED" + trust_score: 0.88 + pricing: + base_rate: 0.0 + currency: "USD" diff --git a/demo/code_review/agents/payment-codeauditpay/main.py b/demo/code_review/agents/payment-codeauditpay/main.py new file mode 100644 index 0000000..5d4506a --- /dev/null +++ b/demo/code_review/agents/payment-codeauditpay/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""CodeAuditPay Payment Agent - Main entry point.""" + +import asyncio +import logging +import os +import sys + +# Add parent directory to path for common imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import CodeAuditPayAgent + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the CodeAuditPay payment agent server.""" + # Load configuration + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + + # Create agent + agent = CodeAuditPayAgent(config=config) + + # Generate agent card + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + # Create A2A server + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, + ) + + # Register with AEX if enabled + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + # Run server + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + logger.info(f"Payment provider ready - GET CASHBACK on code audits!") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/payment-codeauditpay/requirements.txt b/demo/code_review/agents/payment-codeauditpay/requirements.txt new file mode 100644 index 0000000..ad0b54b --- /dev/null +++ b/demo/code_review/agents/payment-codeauditpay/requirements.txt @@ -0,0 +1,2 @@ +# CodeAuditPay payment agent dependencies +-r ../common/requirements.txt diff --git a/demo/code_review/agents/payment-devpay/agent.py b/demo/code_review/agents/payment-devpay/agent.py new file mode 100644 index 0000000..58c4e71 --- /dev/null +++ b/demo/code_review/agents/payment-devpay/agent.py @@ -0,0 +1,73 @@ +"""DevPay - General development payment processor with standard rewards.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.payment_agent import BasePaymentAgent +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class DevPayAgent(BasePaymentAgent): + """ + DevPay - General Development Payment Processor + + Characteristics: + - Base fee: 2.0% + - Rewards: UP TO 1.5% on development categories + - Net fee: 0.5% on development work + - Processing: Fast (3 seconds) + - Fraud protection: Basic + + Best for: + - General code reviews + - Development tasks + - Small to medium transactions + + Fee breakdown: + - Code Review: 2.0% - 1.0% = 1.0% + - Development: 2.0% - 1.5% = 0.5% + - Security Audit: 2.0% - 0.5% = 1.5% + - Other: 2.0% - 0.5% = 1.5% + """ + + # Payment provider characteristics + base_fee_percent: float = 2.0 + processing_time_seconds: int = 3 + supported_methods: list[str] = field(default_factory=lambda: ["card", "aex_balance"]) + fraud_protection: str = "basic" + + # Category rewards - general development focus + category_rewards: dict[str, float] = field(default_factory=lambda: { + "code_review": 1.0, + "development": 1.5, + "linting": 0.5, + "security_audit": 0.5, + "architecture_review": 0.5, + "default": 0.5, + }) + + def __post_init__(self): + """Initialize with config-based overrides.""" + super().__post_init__() + + # Load from config if available + if hasattr(self.config, '_raw_config') and 'payment' in self.config._raw_config: + payment_cfg = self.config._raw_config['payment'] + self.base_fee_percent = payment_cfg.get('base_fee_percent', self.base_fee_percent) + self.processing_time_seconds = payment_cfg.get('processing_time_seconds', self.processing_time_seconds) + self.fraud_protection = payment_cfg.get('fraud_protection', self.fraud_protection) + if 'supported_methods' in payment_cfg: + self.supported_methods = payment_cfg['supported_methods'] + if 'rewards' in payment_cfg: + for category, reward in payment_cfg['rewards'].items(): + self.category_rewards[category] = reward + + logger.info(f"DevPay initialized: {self.base_fee_percent}% base fee, general dev payment processor") diff --git a/demo/code_review/agents/payment-devpay/config.yaml b/demo/code_review/agents/payment-devpay/config.yaml new file mode 100644 index 0000000..8a01eaf --- /dev/null +++ b/demo/code_review/agents/payment-devpay/config.yaml @@ -0,0 +1,53 @@ +agent: + name: "DevPay" + description: "General development payment processor - fast and affordable" + version: "1.0.0" + provider: + organization: "DevPay Solutions" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8200 + +# Payment provider characteristics +payment: + base_fee_percent: 2.0 + processing_time_seconds: 3 + fraud_protection: "basic" + supported_methods: + - "card" + - "aex_balance" + + # Category-specific rewards (cashback percentages) + # General development focus + rewards: + code_review: 1.0 # 1% back on code review → net 1.0% + development: 1.5 # 1.5% back on development → net 0.5% + linting: 0.5 # 0.5% back on linting → net 1.5% + security_audit: 0.5 # 0.5% back on security → net 1.5% + architecture_review: 0.5 + default: 0.5 # 0.5% back on other → net 1.5% + + # Marketing copy + tagline: "Fast, affordable payment processing for dev work" + best_for: "General code reviews, development tasks" + +skills: + - id: "payment_processing" + name: "Payment Processing" + description: "Process payments for development services" + tags: ["payment", "development", "code_review"] + examples: + - "Process payment for code review" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "TRUSTED" + trust_score: 0.82 + pricing: + base_rate: 0.0 + currency: "USD" diff --git a/demo/code_review/agents/payment-devpay/main.py b/demo/code_review/agents/payment-devpay/main.py new file mode 100644 index 0000000..ca6fa82 --- /dev/null +++ b/demo/code_review/agents/payment-devpay/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""DevPay Payment Agent - Main entry point.""" + +import asyncio +import logging +import os +import sys + +# Add parent directory to path for common imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import DevPayAgent + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the DevPay payment agent server.""" + # Load configuration + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + + # Create agent + agent = DevPayAgent(config=config) + + # Generate agent card + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + # Create A2A server + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, + ) + + # Register with AEX if enabled + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + # Run server + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + logger.info(f"Payment processor ready - affordable dev payments!") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/payment-devpay/requirements.txt b/demo/code_review/agents/payment-devpay/requirements.txt new file mode 100644 index 0000000..509de42 --- /dev/null +++ b/demo/code_review/agents/payment-devpay/requirements.txt @@ -0,0 +1,2 @@ +# DevPay payment agent dependencies +-r ../common/requirements.txt diff --git a/demo/code_review/agents/payment-securitypay/agent.py b/demo/code_review/agents/payment-securitypay/agent.py new file mode 100644 index 0000000..238091d --- /dev/null +++ b/demo/code_review/agents/payment-securitypay/agent.py @@ -0,0 +1,74 @@ +"""SecurityPay - Security-focused payment processor with premium rewards on security audits.""" + +import logging +import os +from dataclasses import dataclass, field +from typing import Optional + +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.payment_agent import BasePaymentAgent +from common.config import AgentConfig + +logger = logging.getLogger(__name__) + + +@dataclass +class SecurityPayAgent(BasePaymentAgent): + """ + SecurityPay - Security-Focused Payment Processor + + Characteristics: + - Base fee: 3.0% + - Rewards: UP TO 4.0% on security audits (BIG CASHBACK!) + - Net fee: -1.0% on security audits (you earn money!) + - Processing: Thorough (7 seconds) + - Fraud protection: Advanced + + Best for: + - Security audits and penetration testing + - Architecture reviews + - High-value security-sensitive transactions + + Fee breakdown: + - Security Audit: 3.0% - 4.0% = -1.0% (CASHBACK!) + - Architecture Review: 3.0% - 3.0% = 0% (FREE!) + - Code Review: 3.0% - 2.0% = 1.0% + - Development: 3.0% - 1.0% = 2.0% + - Other: 3.0% - 1.0% = 2.0% + """ + + # Payment provider characteristics + base_fee_percent: float = 3.0 + processing_time_seconds: int = 7 + supported_methods: list[str] = field(default_factory=lambda: ["card", "bank_transfer", "aex_balance", "crypto"]) + fraud_protection: str = "advanced" + + # Category rewards - specializes in security + category_rewards: dict[str, float] = field(default_factory=lambda: { + "security_audit": 4.0, # BIG CASHBACK! + "architecture_review": 3.0, # Free processing + "code_review": 2.0, + "development": 1.0, + "linting": 0.5, + "default": 1.0, + }) + + def __post_init__(self): + """Initialize with config-based overrides.""" + super().__post_init__() + + # Load from config if available + if hasattr(self.config, '_raw_config') and 'payment' in self.config._raw_config: + payment_cfg = self.config._raw_config['payment'] + self.base_fee_percent = payment_cfg.get('base_fee_percent', self.base_fee_percent) + self.processing_time_seconds = payment_cfg.get('processing_time_seconds', self.processing_time_seconds) + self.fraud_protection = payment_cfg.get('fraud_protection', self.fraud_protection) + if 'supported_methods' in payment_cfg: + self.supported_methods = payment_cfg['supported_methods'] + if 'rewards' in payment_cfg: + for category, reward in payment_cfg['rewards'].items(): + self.category_rewards[category] = reward + + logger.info(f"SecurityPay initialized: {self.base_fee_percent}% base fee, UP TO 4% rewards on security audits!") diff --git a/demo/code_review/agents/payment-securitypay/config.yaml b/demo/code_review/agents/payment-securitypay/config.yaml new file mode 100644 index 0000000..f99a101 --- /dev/null +++ b/demo/code_review/agents/payment-securitypay/config.yaml @@ -0,0 +1,56 @@ +agent: + name: "SecurityPay" + description: "Security-focused payment processor - premium rewards on security audits" + version: "1.0.0" + provider: + organization: "SecurityPay Global" + url: "https://github.com/open-experiments/agent-exchange" + +server: + host: "0.0.0.0" + port: 8202 + +# Payment provider characteristics +payment: + base_fee_percent: 3.0 + processing_time_seconds: 7 + fraud_protection: "advanced" + supported_methods: + - "card" + - "bank_transfer" + - "aex_balance" + - "crypto" + + # Category-specific rewards (cashback percentages) + # Specializes in security audits + rewards: + security_audit: 4.0 # 4% back on security audit → net -1.0% (BIG CASHBACK!) + architecture_review: 3.0 # 3% back on architecture → net 0% (FREE!) + code_review: 2.0 # 2% back on code review → net 1.0% + development: 1.0 # 1% back on development → net 2.0% + linting: 0.5 # 0.5% back on linting → net 2.5% + default: 1.0 # 1% back on other → net 2.0% + + # Marketing copy + tagline: "Premium security payment processing - up to 1.0% cashback on security audits!" + best_for: "Security audits, architecture reviews, high-value transactions" + +skills: + - id: "payment_processing" + name: "Payment Processing" + description: "Process payments with premium rewards for security audit work" + tags: ["payment", "security", "audit", "architecture"] + examples: + - "Process payment for security audit" + - "Process payment for architecture review" + +aex: + enabled: true + gateway_url: "http://aex-gateway:8080" + auto_register: true + auto_bid: true + trust_tier: "PREFERRED" + trust_score: 0.92 + pricing: + base_rate: 0.0 + currency: "USD" diff --git a/demo/code_review/agents/payment-securitypay/main.py b/demo/code_review/agents/payment-securitypay/main.py new file mode 100644 index 0000000..f17d9a7 --- /dev/null +++ b/demo/code_review/agents/payment-securitypay/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""SecurityPay Payment Agent - Main entry point.""" + +import asyncio +import logging +import os +import sys + +# Add parent directory to path for common imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from common.a2a_server import A2AServer +from common.config import load_config +from agent import SecurityPayAgent + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +async def main(): + """Start the SecurityPay payment agent server.""" + # Load configuration + config_path = os.environ.get("CONFIG_PATH", "config.yaml") + config = load_config(config_path) + + logger.info(f"Starting {config.name}") + + # Create agent + agent = SecurityPayAgent(config=config) + + # Generate agent card + hostname = os.environ.get("AGENT_HOSTNAME", "localhost") + base_url = f"http://{hostname}:{config.server.port}" + + agent_card = config.get_agent_card(base_url) + + # Create A2A server + server = A2AServer( + agent_card=agent_card, + handler=agent, + require_auth=False, + ) + + # Register with AEX if enabled + if config.aex.enabled and config.aex.auto_register: + try: + await agent.register_with_aex(base_url) + except Exception as e: + logger.warning(f"Could not register with AEX: {e}") + + # Run server + logger.info(f"Agent Card: {base_url}/.well-known/agent-card.json") + logger.info(f"A2A Endpoint: {base_url}/a2a") + logger.info(f"Payment processor ready - PREMIUM rewards on security work!") + await server.run_async(host=config.server.host, port=config.server.port) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demo/code_review/agents/payment-securitypay/requirements.txt b/demo/code_review/agents/payment-securitypay/requirements.txt new file mode 100644 index 0000000..a37f83c --- /dev/null +++ b/demo/code_review/agents/payment-securitypay/requirements.txt @@ -0,0 +1,2 @@ +# SecurityPay payment agent dependencies +-r ../common/requirements.txt diff --git a/demo/code_review/docker-compose.yml b/demo/code_review/docker-compose.yml new file mode 100644 index 0000000..f97c0be --- /dev/null +++ b/demo/code_review/docker-compose.yml @@ -0,0 +1,453 @@ +version: '3.8' + +services: + # =========================================== + # AEX Core Services + # =========================================== + + mongo: + image: mongo:7 + container_name: cr-aex-mongo + environment: + MONGO_INITDB_ROOT_USERNAME: root + MONGO_INITDB_ROOT_PASSWORD: root + ports: + - "27017:27017" + volumes: + - cr_mongo_data:/data/db + networks: + - cr-network + healthcheck: + test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + + aex-gateway: + build: + context: ../../src + dockerfile: aex-gateway/Dockerfile + container_name: cr-aex-gateway + environment: + PORT: "8080" + IDENTITY_URL: "http://aex-identity:8080" + BID_GATEWAY_URL: "http://aex-bid-gateway:8080" + PROVIDER_REGISTRY_URL: "http://aex-provider-registry:8080" + CONTRACT_ENGINE_URL: "http://aex-contract-engine:8080" + TRUST_BROKER_URL: "http://aex-trust-broker:8080" + ports: + - "8080:8080" + depends_on: + - aex-identity + - aex-bid-gateway + - aex-provider-registry + - aex-contract-engine + - aex-trust-broker + networks: + - cr-network + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"] + interval: 10s + timeout: 5s + retries: 3 + + aex-work-publisher: + build: + context: ../../src + dockerfile: aex-work-publisher/Dockerfile + container_name: cr-aex-work-publisher + environment: + PORT: "8080" + STORE_TYPE: "mongo" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + MONGO_COLLECTION_WORK: "work_specs" + PROVIDER_REGISTRY_URL: "http://aex-provider-registry:8080" + ENVIRONMENT: "development" + ports: + - "8081:8080" + depends_on: + mongo: + condition: service_healthy + aex-provider-registry: + condition: service_started + networks: + - cr-network + + aex-bid-gateway: + build: + context: ../../src + dockerfile: aex-bid-gateway/Dockerfile + container_name: cr-aex-bid-gateway + environment: + PORT: "8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + PROVIDER_REGISTRY_URL: "http://aex-provider-registry:8080" + ports: + - "8082:8080" + depends_on: + - mongo + - aex-provider-registry + networks: + - cr-network + + aex-bid-evaluator: + build: + context: ../../src + dockerfile: aex-bid-evaluator/Dockerfile + container_name: cr-aex-bid-evaluator + environment: + PORT: "8080" + BID_GATEWAY_URL: "http://aex-bid-gateway:8080" + TRUST_BROKER_URL: "http://aex-trust-broker:8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ports: + - "8083:8080" + depends_on: + - mongo + - aex-bid-gateway + - aex-trust-broker + networks: + - cr-network + + aex-contract-engine: + build: + context: ../../src + dockerfile: aex-contract-engine/Dockerfile + container_name: cr-aex-contract-engine + environment: + PORT: "8080" + BID_GATEWAY_URL: "http://aex-bid-gateway:8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ports: + - "8084:8080" + depends_on: + - mongo + - aex-bid-gateway + networks: + - cr-network + + aex-provider-registry: + build: + context: ../../src + dockerfile: aex-provider-registry/Dockerfile + container_name: cr-aex-provider-registry + environment: + PORT: "8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ENVIRONMENT: "development" + ports: + - "8085:8080" + depends_on: + - mongo + networks: + - cr-network + + aex-trust-broker: + build: + context: ../../src + dockerfile: aex-trust-broker/Dockerfile + container_name: cr-aex-trust-broker + environment: + PORT: "8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ports: + - "8086:8080" + depends_on: + - mongo + networks: + - cr-network + + aex-identity: + build: + context: ../../src + dockerfile: aex-identity/Dockerfile + container_name: cr-aex-identity + environment: + PORT: "8080" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ports: + - "8087:8080" + depends_on: + - mongo + networks: + - cr-network + + aex-settlement: + build: + context: ../../src + dockerfile: aex-settlement/Dockerfile + container_name: cr-aex-settlement + environment: + PORT: "8080" + ENVIRONMENT: "development" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + AP2_ENABLED: "true" + CREDENTIALS_PROVIDER_URL: "http://aex-credentials-provider:8080" + DEVPAY_URL: "http://payment-devpay:8200" + CODEAUDITPAY_URL: "http://payment-codeauditpay:8201" + SECURITYPAY_URL: "http://payment-securitypay:8202" + ports: + - "8088:8080" + depends_on: + mongo: + condition: service_healthy + aex-credentials-provider: + condition: service_started + networks: + - cr-network + + aex-credentials-provider: + build: + context: ../../src + dockerfile: aex-credentials-provider/Dockerfile + container_name: cr-aex-credentials-provider + environment: + PORT: "8080" + ENVIRONMENT: "development" + MONGO_URI: "mongodb://root:root@mongo:27017/?authSource=admin" + MONGO_DB: "aex" + ports: + - "8090:8080" + depends_on: + mongo: + condition: service_healthy + networks: + - cr-network + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"] + interval: 10s + timeout: 5s + retries: 3 + + aex-telemetry: + build: + context: ../../src + dockerfile: aex-telemetry/Dockerfile + container_name: cr-aex-telemetry + environment: + PORT: "8080" + ports: + - "8089:8080" + networks: + - cr-network + + # =========================================== + # Code Review Agents (Tiered Pricing) + # =========================================== + + # Budget Code Reviewer - $3 + $1/file, fast, concise review (Claude) + code-reviewer-a: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: code-reviewer-a + container_name: cr-code-reviewer-a + ports: + - "8100:8100" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - AEX_GATEWAY_URL=http://aex-gateway:8080 + - AEX_API_KEY=dev-api-key + - AGENT_HOSTNAME=code-reviewer-a + - CONFIG_PATH=config.yaml + depends_on: + - aex-gateway + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8100/health"] + interval: 10s + timeout: 5s + retries: 3 + + # Standard Code Reviewer - $10 + $3/file, security-focused review (Claude) + code-reviewer-b: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: code-reviewer-b + container_name: cr-code-reviewer-b + ports: + - "8101:8101" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - AEX_GATEWAY_URL=http://aex-gateway:8080 + - AEX_API_KEY=dev-api-key + - AGENT_HOSTNAME=code-reviewer-b + - CONFIG_PATH=config.yaml + depends_on: + - aex-gateway + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8101/health"] + interval: 10s + timeout: 5s + retries: 3 + + # Premium Code Reviewer - $25 + $5/file, deep architectural review (Claude) + code-reviewer-c: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: code-reviewer-c + container_name: cr-code-reviewer-c + ports: + - "8102:8102" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - AEX_GATEWAY_URL=http://aex-gateway:8080 + - AEX_API_KEY=dev-api-key + - AGENT_HOSTNAME=code-reviewer-c + - CONFIG_PATH=config.yaml + depends_on: + - aex-gateway + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8102/health"] + interval: 10s + timeout: 5s + retries: 3 + + # Code Review Orchestrator + orchestrator: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: orchestrator + container_name: cr-orchestrator + ports: + - "8103:8103" + environment: + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - AEX_GATEWAY_URL=http://aex-gateway:8080 + - AEX_API_KEY=dev-api-key + - CONFIG_PATH=config.yaml + depends_on: + - aex-gateway + - code-reviewer-a + - code-reviewer-b + - code-reviewer-c + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8103/health"] + interval: 10s + timeout: 5s + retries: 3 + + # =========================================== + # Payment Provider Agents (AP2 - Competing) + # =========================================== + + # DevPay - General dev payments (2% fee, 1% reward on code_review = 1% net) + payment-devpay: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: payment-devpay + container_name: cr-payment-devpay + ports: + - "8200:8200" + environment: + - CONFIG_PATH=config.yaml + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8200/health"] + interval: 10s + timeout: 5s + retries: 3 + + # CodeAuditPay - Code audit specialist (2.5% fee, 3% reward on code_review = -0.5% CASHBACK!) + payment-codeauditpay: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: payment-codeauditpay + container_name: cr-payment-codeauditpay + ports: + - "8201:8201" + environment: + - CONFIG_PATH=config.yaml + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8201/health"] + interval: 10s + timeout: 5s + retries: 3 + + # SecurityPay - Security specialist (3% fee, 4% reward on security = -1% CASHBACK!) + payment-securitypay: + build: + context: ./agents + dockerfile: Dockerfile + args: + AGENT_DIR: payment-securitypay + container_name: cr-payment-securitypay + ports: + - "8202:8202" + environment: + - CONFIG_PATH=config.yaml + networks: + - cr-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8202/health"] + interval: 10s + timeout: 5s + retries: 3 + + # =========================================== + # NiceGUI Demo UI (Real-Time WebSocket) + # =========================================== + demo-ui-nicegui: + build: + context: ./ui + dockerfile: Dockerfile + container_name: cr-demo-ui-nicegui + ports: + - "8502:8502" + environment: + - PORT=8502 + - AEX_GATEWAY_URL=http://aex-gateway:8080 + - AEX_SETTLEMENT_URL=http://aex-settlement:8080 + - AEX_PROVIDER_REGISTRY_URL=http://aex-provider-registry:8080 + - CODE_REVIEWER_A_URL=http://code-reviewer-a:8100 + - CODE_REVIEWER_B_URL=http://code-reviewer-b:8101 + - CODE_REVIEWER_C_URL=http://code-reviewer-c:8102 + - DEVPAY_URL=http://payment-devpay:8200 + - CODEAUDITPAY_URL=http://payment-codeauditpay:8201 + - SECURITYPAY_URL=http://payment-securitypay:8202 + depends_on: + - orchestrator + - aex-settlement + - aex-provider-registry + - payment-devpay + - payment-codeauditpay + - payment-securitypay + networks: + - cr-network + +volumes: + cr_mongo_data: + +networks: + cr-network: + driver: bridge diff --git a/demo/code_review/ui/Dockerfile b/demo/code_review/ui/Dockerfile new file mode 100644 index 0000000..a8ae003 --- /dev/null +++ b/demo/code_review/ui/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir nicegui>=2.0.0 httpx>=0.27.0 + +COPY nicegui_app.py . + +EXPOSE 8502 + +CMD ["python", "nicegui_app.py"] diff --git a/demo/code_review/ui/nicegui_app.py b/demo/code_review/ui/nicegui_app.py new file mode 100644 index 0000000..4437faa --- /dev/null +++ b/demo/code_review/ui/nicegui_app.py @@ -0,0 +1,911 @@ +"""AEX Code Review Demo UI - NiceGUI with Real-Time WebSocket Updates.""" + +import asyncio +import json +import os +import time +import httpx +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional +from nicegui import ui, app + +# Configuration +AEX_GATEWAY_URL = os.environ.get("AEX_GATEWAY_URL", "http://localhost:8080") +AEX_SETTLEMENT_URL = os.environ.get("AEX_SETTLEMENT_URL", "http://localhost:8088") +AEX_PROVIDER_REGISTRY_URL = os.environ.get("AEX_PROVIDER_REGISTRY_URL", "http://localhost:8085") +CODE_REVIEWER_A_URL = os.environ.get("CODE_REVIEWER_A_URL", "http://localhost:8100") +CODE_REVIEWER_B_URL = os.environ.get("CODE_REVIEWER_B_URL", "http://localhost:8101") +CODE_REVIEWER_C_URL = os.environ.get("CODE_REVIEWER_C_URL", "http://localhost:8102") +DEVPAY_URL = os.environ.get("DEVPAY_URL", "http://localhost:8200") +CODEAUDITPAY_URL = os.environ.get("CODEAUDITPAY_URL", "http://localhost:8201") +SECURITYPAY_URL = os.environ.get("SECURITYPAY_URL", "http://localhost:8202") + +# Provider URL mapping +PROVIDER_URL_MAP = { + "quickreview-ai": CODE_REVIEWER_A_URL, + "codeguard-ai": CODE_REVIEWER_B_URL, + "architectai": CODE_REVIEWER_C_URL, + "code-reviewer-a": CODE_REVIEWER_A_URL, + "code-reviewer-b": CODE_REVIEWER_B_URL, + "code-reviewer-c": CODE_REVIEWER_C_URL, +} + +# Theme colors +COLORS = { + "bg_dark": "#0f172a", + "bg_card": "#1e293b", + "border": "#334155", + "text_primary": "#f1f5f9", + "text_secondary": "#94a3b8", + "accent_green": "#22c55e", + "accent_blue": "#3b82f6", + "accent_cyan": "#06b6d4", + "accent_orange": "#f97316", + "accent_purple": "#a855f7", +} + +TIER_COLORS = { + "VERIFIED": COLORS["accent_orange"], + "TRUSTED": COLORS["accent_blue"], + "PREFERRED": COLORS["accent_green"], + "UNVERIFIED": "#666", +} + +SAMPLE_CODE = { + "Python - Auth Handler": '''def login(username, password): + query = "SELECT * FROM users WHERE username='" + username + "' AND password='" + password + "'" + result = db.execute(query) + if result: + token = str(result[0]["id"]) + return {"token": token, "status": "ok"} + return {"status": "fail"} + +def get_user_data(user_id): + data = db.execute(f"SELECT * FROM users WHERE id={user_id}") + return data +''', + "JavaScript - API Server": '''const express = require('express'); +const app = express(); + +app.get('/api/users/:id', (req, res) => { + const userId = req.params.id; + const query = `SELECT * FROM users WHERE id = ${userId}`; + db.query(query, (err, results) => { + if (err) throw err; + res.json(results); + }); +}); + +app.post('/api/upload', (req, res) => { + const file = req.body.file; + fs.writeFileSync('/uploads/' + req.body.filename, file); + res.json({ status: 'uploaded' }); +}); + +app.listen(3000); +''', + "Go - HTTP Handler": '''package main + +import ( + "database/sql" + "fmt" + "net/http" + "os/exec" +) + +func handleRequest(w http.ResponseWriter, r *http.Request) { + userInput := r.URL.Query().Get("cmd") + out, _ := exec.Command("sh", "-c", userInput).Output() + fmt.Fprintf(w, string(out)) +} + +func getUser(w http.ResponseWriter, r *http.Request) { + id := r.URL.Query().Get("id") + query := fmt.Sprintf("SELECT * FROM users WHERE id = %s", id) + rows, _ := db.Query(query) + defer rows.Close() +} + +func main() { + http.HandleFunc("/run", handleRequest) + http.HandleFunc("/user", getUser) + http.ListenAndServe(":8080", nil) +} +''', + "Custom Code": "", +} + + +@dataclass +class TaskResult: + """Result of a code review task execution.""" + task_id: str = "" + description: str = "" + code_files: int = 3 + bid_strategy: str = "balanced" + bids: list = field(default_factory=list) + winner_name: str = "" + winner_tier: str = "" + winner_price: float = 0.0 + winner_score: float = 0.0 + contract_id: str = "" + agent_response: str = "" + execution_time_ms: int = 0 + platform_fee: float = 0.0 + provider_payout: float = 0.0 + timestamp: str = "" + status: str = "pending" + current_step: int = 0 + # AP2 Payment fields + ap2_payment_provider: str = "" + ap2_payment_method: str = "" + ap2_cart_mandate_id: str = "" + ap2_payment_receipt_id: str = "" + ap2_base_fee: float = 0.0 + ap2_reward: float = 0.0 + ap2_net_fee: float = 0.0 + + +class AppState: + def __init__(self): + self.tasks: list[TaskResult] = [] + self.logs: list[str] = [] + self.is_running: bool = False + self.current_task: Optional[TaskResult] = None + self.stats = { + "total_reviews": 0, + "platform_revenue": 0.0, + "avg_response_time": 0, + } + + +def get_state() -> AppState: + if not hasattr(app.storage.user, 'state'): + app.storage.user.state = AppState() + return app.storage.user.state + + +def add_log(message: str, log_container=None): + """Add a log message with timestamp.""" + state = get_state() + timestamp = datetime.now().strftime("%H:%M:%S") + log_entry = f"[{timestamp}] {message}" + state.logs.append(log_entry) + if log_container: + with log_container: + ui.label(log_entry).classes('font-mono text-xs text-slate-400') + + +async def fetch_registered_agents() -> list[dict]: + """Fetch registered agents from provider registry.""" + agents = [] + + try: + async with httpx.AsyncClient(timeout=5.0) as client: + resp = await client.get(f"{AEX_PROVIDER_REGISTRY_URL}/v1/providers") + if resp.status_code == 200: + data = resp.json() + providers = data.get("providers", []) if isinstance(data, dict) else data + for p in providers: + capabilities = p.get("capabilities", []) + agent_type = "payment" if "payment" in capabilities or "payment_processing" in capabilities else "reviewer" + + agents.append({ + "id": p.get("provider_id", ""), + "name": p.get("name", "Unknown"), + "description": p.get("description", ""), + "endpoint": p.get("endpoint", ""), + "status": p.get("status", "ACTIVE"), + "tier": p.get("trust_tier", p.get("metadata", {}).get("trust_tier", "UNVERIFIED")), + "trust_score": p.get("trust_score", p.get("metadata", {}).get("trust_score", 0.5)), + "capabilities": capabilities, + "type": agent_type, + }) + except Exception as e: + print(f"Error fetching from provider registry: {e}") + + # Probe known agent endpoints + known_agents = [ + {"name": "QuickReview AI", "url": CODE_REVIEWER_A_URL, "type": "reviewer"}, + {"name": "CodeGuard AI", "url": CODE_REVIEWER_B_URL, "type": "reviewer"}, + {"name": "ArchitectAI", "url": CODE_REVIEWER_C_URL, "type": "reviewer"}, + {"name": "DevPay", "url": DEVPAY_URL, "type": "payment"}, + {"name": "CodeAuditPay", "url": CODEAUDITPAY_URL, "type": "payment"}, + {"name": "SecurityPay", "url": SECURITYPAY_URL, "type": "payment"}, + ] + + async with httpx.AsyncClient(timeout=3.0) as client: + for agent in known_agents: + existing = [a for a in agents if a.get("name") == agent["name"]] + if existing: + continue + try: + health_resp = await client.get(f"{agent['url']}/health") + if health_resp.status_code != 200: + continue + + agent_card = {} + try: + card_resp = await client.get(f"{agent['url']}/.well-known/agent.json") + if card_resp.status_code == 200: + agent_card = card_resp.json() + except Exception: + pass + + agents.append({ + "id": agent_card.get("name", agent["name"]).lower().replace(" ", "-"), + "name": agent_card.get("name", agent["name"]), + "description": agent_card.get("description", ""), + "endpoint": agent["url"], + "status": "ACTIVE", + "tier": "VERIFIED", + "trust_score": 0.5, + "capabilities": [s.get("id", "") for s in agent_card.get("skills", [])], + "type": agent["type"], + }) + except Exception: + pass + + return agents + + +async def fetch_real_bids(code_files: int) -> list[dict]: + """Fetch bids from code review agents via A2A protocol.""" + agents = [ + {"name": "QuickReview AI", "url": CODE_REVIEWER_A_URL}, + {"name": "CodeGuard AI", "url": CODE_REVIEWER_B_URL}, + {"name": "ArchitectAI", "url": CODE_REVIEWER_C_URL}, + ] + + bids = [] + bid_request = json.dumps({"action": "get_bid", "document_pages": code_files}) + + async with httpx.AsyncClient(timeout=10.0) as client: + for agent in agents: + try: + payload = { + "jsonrpc": "2.0", + "method": "message/send", + "id": f"bid-{agent['name']}-{int(time.time())}", + "params": { + "message": { + "role": "user", + "parts": [{"type": "text", "text": bid_request}], + } + }, + } + resp = await client.post(f"{agent['url']}/a2a", json=payload) + if resp.status_code == 200: + data = resp.json() + result = data.get("result", {}) + for msg in result.get("history", []): + if msg.get("role") == "agent": + for part in msg.get("parts", []): + if part.get("type") == "text": + try: + bid_resp = json.loads(part.get("text", "{}")) + if bid_resp.get("action") == "bid_response": + bid_data = bid_resp.get("bid", {}) + bids.append({ + "provider_id": bid_data.get("provider_id"), + "provider_name": bid_data.get("provider_name"), + "price": bid_data.get("price", 0), + "confidence": bid_data.get("confidence", 0.8), + "estimated_minutes": bid_data.get("estimated_minutes", 10), + "trust_score": bid_data.get("trust_score", 0.5), + "tier": bid_data.get("tier", "UNVERIFIED"), + "a2a_endpoint": f"{agent['url']}/a2a", + }) + except json.JSONDecodeError: + pass + except Exception as e: + print(f"Error fetching bid from {agent['name']}: {e}") + + # Fallback simulated bids if agents not reachable + if not bids: + bids = [ + {"provider_id": "quickreview-ai", "provider_name": "QuickReview AI", "price": 6.0, + "confidence": 0.75, "estimated_minutes": 2, "trust_score": 0.70, "tier": "VERIFIED", + "a2a_endpoint": f"{CODE_REVIEWER_A_URL}/a2a"}, + {"provider_id": "codeguard-ai", "provider_name": "CodeGuard AI", "price": 19.0, + "confidence": 0.85, "estimated_minutes": 5, "trust_score": 0.85, "tier": "TRUSTED", + "a2a_endpoint": f"{CODE_REVIEWER_B_URL}/a2a"}, + {"provider_id": "architectai", "provider_name": "ArchitectAI", "price": 40.0, + "confidence": 0.95, "estimated_minutes": 10, "trust_score": 0.95, "tier": "PREFERRED", + "a2a_endpoint": f"{CODE_REVIEWER_C_URL}/a2a"}, + ] + return bids + + +async def fetch_payment_bids(amount: float, category: str) -> list[dict]: + """Fetch payment provider bids via A2A.""" + providers = [ + {"name": "DevPay", "url": DEVPAY_URL}, + {"name": "CodeAuditPay", "url": CODEAUDITPAY_URL}, + {"name": "SecurityPay", "url": SECURITYPAY_URL}, + ] + + bids = [] + async with httpx.AsyncClient(timeout=10.0) as client: + for provider in providers: + try: + payload = { + "jsonrpc": "2.0", + "method": "message/send", + "id": f"payment-bid-{int(time.time())}", + "params": { + "message": { + "role": "user", + "parts": [{"type": "text", "text": json.dumps({ + "action": "bid", + "amount": amount, + "work_category": category, + "currency": "USD", + })}], + } + }, + } + resp = await client.post(f"{provider['url']}/a2a", json=payload) + if resp.status_code == 200: + data = resp.json() + for msg in data.get("result", {}).get("history", []): + if msg.get("role") == "agent": + for part in msg.get("parts", []): + if part.get("type") == "text": + try: + bid_resp = json.loads(part.get("text", "{}")) + if bid_resp.get("action") == "bid_response": + bid_data = bid_resp.get("bid", {}) + bids.append({ + "provider_name": bid_data.get("provider_name"), + "base_fee_percent": bid_data.get("base_fee_percent", 2.0), + "reward_percent": bid_data.get("reward_percent", 1.0), + "net_fee_percent": bid_data.get("net_fee_percent", 1.0), + }) + except json.JSONDecodeError: + pass + except Exception as e: + print(f"Error fetching payment bid from {provider['name']}: {e}") + + if not bids: + bids = [ + {"provider_name": "DevPay", "base_fee_percent": 2.0, "reward_percent": 1.0, "net_fee_percent": 1.0}, + {"provider_name": "CodeAuditPay", "base_fee_percent": 2.5, "reward_percent": 3.0, "net_fee_percent": -0.5}, + {"provider_name": "SecurityPay", "base_fee_percent": 3.0, "reward_percent": 4.0, "net_fee_percent": -1.0}, + ] + return bids + + +def evaluate_bids(bids: list[dict], strategy: str) -> list[dict]: + """Evaluate and score bids based on strategy.""" + weights = { + "balanced": {"price": 0.4, "trust": 0.35, "confidence": 0.25}, + "lowest_price": {"price": 0.7, "trust": 0.2, "confidence": 0.1}, + "best_quality": {"price": 0.2, "trust": 0.5, "confidence": 0.3}, + } + w = weights.get(strategy, weights["balanced"]) + + if not bids: + return [] + + max_price = max(b["price"] for b in bids) or 1 + + for bid in bids: + price_score = 1 - (bid["price"] / max_price) + trust_score = bid.get("trust_score", 0.5) + confidence = bid.get("confidence", 0.8) + bid["score"] = ( + w["price"] * price_score + + w["trust"] * trust_score + + w["confidence"] * confidence + ) + + return sorted(bids, key=lambda x: x["score"], reverse=True) + + +async def call_agent_a2a(url: str, task: str) -> tuple[str, int]: + """Call an agent via A2A protocol.""" + start = time.time() + payload = { + "jsonrpc": "2.0", + "method": "message/send", + "id": f"task-{int(time.time())}", + "params": { + "message": { + "role": "user", + "parts": [{"type": "text", "text": task}], + } + }, + } + + try: + async with httpx.AsyncClient(timeout=120.0) as client: + resp = await client.post(f"{url}/a2a", json=payload) + elapsed_ms = int((time.time() - start) * 1000) + + if resp.status_code == 200: + data = resp.json() + for msg in data.get("result", {}).get("history", []): + if msg.get("role") == "agent": + for part in msg.get("parts", []): + if part.get("type") == "text": + return part.get("text", "No response"), elapsed_ms + return "No response from agent", elapsed_ms + except Exception as e: + elapsed_ms = int((time.time() - start) * 1000) + return f"Error: {str(e)}", elapsed_ms + + +def create_task_card(task: TaskResult): + """Create a task result card with real-time updates.""" + status_config = { + "pending": {"color": "gray", "label": "Pending", "icon": "hourglass_empty"}, + "bidding": {"color": "cyan", "label": "Collecting Bids", "icon": "gavel"}, + "evaluating": {"color": "orange", "label": "Evaluating", "icon": "analytics"}, + "awarded": {"color": "blue", "label": "Contract Awarded", "icon": "assignment_turned_in"}, + "executing": {"color": "green", "label": "Executing Review", "icon": "play_circle"}, + "paying": {"color": "purple", "label": "Processing Payment", "icon": "payment"}, + "settling": {"color": "amber", "label": "Settling", "icon": "account_balance"}, + "completed": {"color": "green", "label": "Completed", "icon": "check_circle"}, + "failed": {"color": "red", "label": "Failed", "icon": "error"}, + } + + status = status_config.get(task.status, status_config["pending"]) + is_active = task.status not in ["completed", "failed"] + + with ui.card().classes(f'w-full bg-slate-800 border {"border-2 border-" + status["color"] + "-500" if is_active else "border-slate-700"}'): + with ui.row().classes('w-full justify-between items-center'): + with ui.row().classes('items-center gap-2'): + ui.label(f"Review: {task.task_id}").classes('font-bold text-white') + ui.badge(status["label"], color=status["color"]).props(f'icon={status["icon"]}') + ui.label(task.timestamp or "In progress...").classes('text-xs text-slate-400') + + # Progress bar (7 steps) + if task.current_step > 0: + steps = ["Bids", "Eval", "Award", "Execute", "AP2 Select", "AP2 Pay", "Settle"] + with ui.row().classes('w-full gap-1 my-2'): + for i, step in enumerate(steps): + completed = i < task.current_step + active = i == task.current_step - 1 + color = "green" if completed else (status["color"] if active else "slate-600") + with ui.column().classes('flex-1'): + ui.linear_progress(value=1 if completed else (0.5 if active else 0), color=color).classes('h-1') + ui.label(step).classes(f'text-xs {"text-white" if completed else "text-slate-500"}') + + # Code snippet preview + desc = task.description + if len(desc) > 120: + desc = desc[:120] + "..." + ui.label(f'"{desc}"').classes('text-slate-400 italic my-2 text-xs font-mono') + + # Bids section + if task.bids: + ui.label("Bids Received:").classes('font-bold text-white mt-2') + for bid in task.bids: + is_winner = bid.get("provider_name") == task.winner_name + with ui.row().classes(f'w-full justify-between items-center p-2 rounded {"bg-slate-700" if is_winner else ""}'): + with ui.row().classes('items-center gap-2'): + if is_winner: + ui.icon("star", color="green").classes('text-sm') + ui.label(bid.get("provider_name", "Unknown")).classes(f'{"text-white" if is_winner else "text-slate-400"}') + tier = bid.get("tier", "UNVERIFIED") + ui.badge(tier, color={"VERIFIED": "orange", "TRUSTED": "blue", "PREFERRED": "green"}.get(tier, "gray")) + with ui.row().classes('gap-4'): + ui.label(f"${bid.get('price', 0):.2f}").classes(f'{"text-white" if is_winner else "text-slate-400"}') + if bid.get("score", 0) > 0: + ui.label(f"score: {bid.get('score', 0):.3f}").classes('text-xs text-slate-500') + + # Winner info + if task.winner_name: + with ui.card().classes('w-full bg-slate-700 mt-2'): + with ui.row().classes('w-full justify-between items-center'): + ui.label(f"Winner: {task.winner_name}").classes('font-bold text-green-400') + ui.label(f"${task.winner_price:.2f}").classes('text-xl font-bold text-green-400') + with ui.row().classes('gap-4 text-xs text-slate-400'): + if task.contract_id: + ui.label(f"Contract: {task.contract_id[:20]}...") + if task.execution_time_ms > 0: + ui.label(f"Execution: {task.execution_time_ms}ms") + if task.platform_fee > 0: + ui.label(f"Platform fee: ${task.platform_fee:.2f}") + + # AP2 Payment info + if task.ap2_payment_provider: + with ui.card().classes('w-full bg-gradient-to-r from-purple-900 to-indigo-900 border border-purple-500 mt-2'): + with ui.row().classes('w-full justify-between items-center'): + ui.label("AP2 Payment").classes('font-bold text-purple-300') + ui.badge("PAID", color="green") + with ui.column().classes('gap-1'): + ui.label(f"Provider: {task.ap2_payment_provider}").classes('text-white text-sm') + if task.ap2_cart_mandate_id: + ui.label(f"Cart Mandate: {task.ap2_cart_mandate_id}").classes('text-slate-400 text-xs') + if task.ap2_payment_receipt_id: + ui.label(f"Receipt: {task.ap2_payment_receipt_id}").classes('text-slate-400 text-xs') + if task.ap2_net_fee != 0: + if task.ap2_net_fee < 0: + ui.label(f"You earned ${abs(task.ap2_net_fee):.2f} CASHBACK!").classes('text-green-400 font-bold') + else: + ui.label(f"Net fee: ${task.ap2_net_fee:.2f}").classes('text-slate-300') + + # Agent response + if task.agent_response: + with ui.expansion("Code Review Results", icon="code").classes('w-full mt-2'): + ui.markdown(task.agent_response).classes('text-sm bg-slate-900 p-4 rounded max-h-96 overflow-y-auto') + + +def create_agent_card(agent: dict): + """Create a card for displaying an agent.""" + is_online = agent.get("status") == "ACTIVE" + agent_type = agent.get("type", "reviewer") + tier = agent.get("tier", "UNVERIFIED") + tier_color = {"VERIFIED": "orange", "TRUSTED": "blue", "PREFERRED": "green", "UNVERIFIED": "gray"}.get(tier, "gray") + + with ui.card().classes(f'w-full bg-slate-800 border {"border-green-500" if is_online else "border-red-500"} mb-2'): + with ui.row().classes('w-full justify-between items-center'): + with ui.row().classes('items-center gap-2'): + ui.icon("circle", color="green" if is_online else "red").classes('text-xs') + ui.label(agent.get("name", "Unknown")).classes('font-bold text-white text-sm') + type_color = "cyan" if agent_type == "reviewer" else "purple" + ui.badge(agent_type.upper(), color=type_color).props('dense') + + with ui.row().classes('items-center gap-2 mt-1'): + ui.badge(tier, color=tier_color).props('dense') + trust_score = agent.get("trust_score", 0) + ui.label(f"Trust: {trust_score:.0%}").classes('text-xs text-slate-400') + + desc = agent.get("description", "") + if desc: + ui.label(desc[:60] + "..." if len(desc) > 60 else desc).classes('text-xs text-slate-500 mt-1') + + capabilities = agent.get("capabilities", []) + if capabilities: + with ui.row().classes('gap-1 mt-1 flex-wrap'): + for cap in capabilities[:3]: + ui.badge(cap, color="gray").props('dense outline') + if len(capabilities) > 3: + ui.label(f"+{len(capabilities) - 3}").classes('text-xs text-slate-500') + + +@ui.page('/') +async def main_page(): + """Main dashboard page.""" + state = get_state() + + ui.dark_mode().enable() + ui.add_head_html(''' + + ''') + + agents_data = await fetch_registered_agents() + + with ui.column().classes('w-full p-6 gap-6'): + # Header + with ui.row().classes('w-full justify-between items-center'): + with ui.column(): + ui.label("Agent Exchange - Code Review").classes('text-3xl font-bold text-white') + ui.label("Claude-Powered Code Review Agents + AEX Marketplace + AP2 Payments").classes('text-slate-400') + with ui.row().classes('gap-2'): + reviewer_count = len([a for a in agents_data if a.get("type") == "reviewer" and a.get("status") == "ACTIVE"]) + payment_count = len([a for a in agents_data if a.get("type") == "payment" and a.get("status") == "ACTIVE"]) + ui.badge(f"Review Agents: {reviewer_count}", color="cyan") + ui.badge(f"Payment Agents: {payment_count}", color="purple") + ui.badge(f"Reviews: {len(state.tasks)}", color="blue") + ui.badge(f"Revenue: ${state.stats['platform_revenue']:.2f}", color="green") + + # Main content - three columns + with ui.row().classes('w-full gap-4'): + # Left column - Registered Agents + with ui.column().classes('w-72'): + with ui.card().classes('w-full bg-slate-800 border border-slate-700'): + with ui.row().classes('w-full justify-between items-center mb-3'): + ui.label("Registered Agents").classes('text-lg font-bold text-white') + async def refresh_agents(): + nonlocal agents_data + agents_data = await fetch_registered_agents() + agents_container.refresh() + ui.button(icon="refresh", on_click=refresh_agents).props('flat dense round').classes('text-slate-400') + + @ui.refreshable + def agents_container(): + reviewer_agents = [a for a in agents_data if a.get("type") == "reviewer"] + payment_agents = [a for a in agents_data if a.get("type") == "payment"] + + with ui.expansion(value=True).classes('w-full bg-slate-700 mb-2').props('dense header-class="bg-slate-700"') as rev_exp: + with rev_exp.add_slot('header'): + with ui.row().classes('items-center gap-2 w-full'): + ui.icon("code", color="cyan").classes('text-lg') + ui.label("Code Review Agents").classes('font-bold text-cyan-400') + ui.badge(f"{len(reviewer_agents)}", color="cyan").props('dense') + + if reviewer_agents: + for agent in reviewer_agents: + create_agent_card(agent) + else: + ui.label("No review agents registered").classes('text-slate-500 text-sm italic') + + with ui.expansion(value=True).classes('w-full bg-slate-700 mb-2').props('dense header-class="bg-slate-700"') as pay_exp: + with pay_exp.add_slot('header'): + with ui.row().classes('items-center gap-2 w-full'): + ui.icon("payment", color="purple").classes('text-lg') + ui.label("Payment Agents (AP2)").classes('font-bold text-purple-400') + ui.badge(f"{len(payment_agents)}", color="purple").props('dense') + + if payment_agents: + for agent in payment_agents: + create_agent_card(agent) + else: + ui.label("No payment agents registered").classes('text-slate-500 text-sm italic') + + online = len([a for a in agents_data if a.get("status") == "ACTIVE"]) + offline = len([a for a in agents_data if a.get("status") != "ACTIVE"]) + with ui.row().classes('w-full justify-center gap-4 mt-2 pt-2 border-t border-slate-700'): + ui.label(f"Online: {online}").classes('text-xs text-green-400') + ui.label(f"Offline: {offline}").classes('text-xs text-red-400') + + agents_container() + + async def auto_refresh_agents(): + nonlocal agents_data + while True: + await asyncio.sleep(5) + try: + agents_data = await fetch_registered_agents() + agents_container.refresh() + except Exception as e: + print(f"Auto-refresh error: {e}") + + ui.timer(0.1, lambda: asyncio.create_task(auto_refresh_agents()), once=True) + + # Middle column - Task submission + with ui.column().classes('w-96'): + with ui.card().classes('w-full bg-slate-800 border border-slate-700'): + ui.label("Submit Code Review").classes('text-xl font-bold text-white mb-4') + + # Sample code selector + sample_select = ui.select( + label="Load Sample Code", + options=list(SAMPLE_CODE.keys()), + value="Custom Code", + ).classes('w-full') + + code_input = ui.textarea( + label="Paste your code for review", + placeholder="def my_function():\n # paste code here...", + ).classes('w-full font-mono').props('rows=12') + + def load_sample(e): + selected = sample_select.value + if selected and selected in SAMPLE_CODE and SAMPLE_CODE[selected]: + code_input.value = SAMPLE_CODE[selected] + + sample_select.on_value_change(load_sample) + + with ui.row().classes('w-full gap-4'): + files_input = ui.number(label="Files", value=3, min=1, max=50).classes('w-24') + strategy_select = ui.select( + label="Bid Strategy", + options=["balanced", "lowest_price", "best_quality"], + value="balanced" + ).classes('flex-1') + + error_label = ui.label().classes('text-red-400 hidden') + + async def run_workflow(): + if not code_input.value or not code_input.value.strip(): + error_label.text = "Please paste code to review" + error_label.classes(remove='hidden') + return + + error_label.classes(add='hidden') + state.is_running = True + submit_btn.disable() + + task = TaskResult( + task_id=f"review-{int(time.time())}", + description=code_input.value, + code_files=int(files_input.value), + bid_strategy=strategy_select.value, + status="pending", + current_step=0, + ) + state.tasks.insert(0, task) + state.current_task = task + + state.logs = [] + log_container.clear() + tasks_container.refresh() + + add_log("=== AEX Code Review Workflow Started ===", log_container) + add_log(f"Review ID: {task.task_id}", log_container) + add_log(f"Code: {task.description[:60]}...", log_container) + add_log("", log_container) + + # STEP 1: Collect bids + task.status = "bidding" + task.current_step = 1 + tasks_container.refresh() + add_log("[STEP 1/7] COLLECTING BIDS from Code Review Agents...", log_container) + await asyncio.sleep(0.1) + + bids = await fetch_real_bids(task.code_files) + task.bids = bids + for b in bids: + add_log(f" - {b['provider_name']}: ${b['price']:.2f} | {b['tier']}", log_container) + tasks_container.refresh() + add_log("", log_container) + + # STEP 2: Evaluate bids + task.status = "evaluating" + task.current_step = 2 + tasks_container.refresh() + add_log(f"[STEP 2/7] EVALUATING BIDS using '{task.bid_strategy}' strategy...", log_container) + await asyncio.sleep(0.1) + + evaluated = evaluate_bids(bids, task.bid_strategy) + task.bids = evaluated + for i, b in enumerate(evaluated): + marker = " << WINNER" if i == 0 else "" + add_log(f" {i+1}. {b['provider_name']}: score={b['score']:.3f}{marker}", log_container) + tasks_container.refresh() + add_log("", log_container) + + # STEP 3: Award contract + if evaluated: + winner = evaluated[0] + task.status = "awarded" + task.current_step = 3 + task.winner_name = winner["provider_name"] + task.winner_tier = winner["tier"] + task.winner_price = winner["price"] + task.winner_score = winner["score"] + task.contract_id = f"contract-{int(time.time())}" + tasks_container.refresh() + + add_log(f"[STEP 3/7] CONTRACT AWARDED", log_container) + add_log(f" Winner: {task.winner_name}", log_container) + add_log(f" Price: ${task.winner_price:.2f}", log_container) + add_log(f" Contract ID: {task.contract_id}", log_container) + add_log("", log_container) + + # STEP 4: Execute code review + task.status = "executing" + task.current_step = 4 + tasks_container.refresh() + add_log(f"[STEP 4/7] EXECUTING CODE REVIEW via A2A Protocol...", log_container) + + url = PROVIDER_URL_MAP.get(winner.get("provider_id"), CODE_REVIEWER_A_URL) + add_log(f" Calling {task.winner_name} at {url}...", log_container) + await asyncio.sleep(0.1) + + review_prompt = f"Review the following code for issues, security vulnerabilities, and improvements:\n\n{task.description}" + response, elapsed_ms = await call_agent_a2a(url, review_prompt) + task.agent_response = response + task.execution_time_ms = elapsed_ms + tasks_container.refresh() + + add_log(f" Review received in {elapsed_ms}ms", log_container) + add_log(f" Response length: {len(response)} chars", log_container) + add_log("", log_container) + + # STEP 5: AP2 Payment Selection + task.status = "paying" + task.current_step = 5 + tasks_container.refresh() + add_log(f"[STEP 5/7] AP2 PAYMENT - Selecting Payment Provider...", log_container) + + category = "code_review" + code_lower = task.description.lower() + if any(kw in code_lower for kw in ["security", "injection", "xss", "vulnerability"]): + category = "security_audit" + elif any(kw in code_lower for kw in ["architecture", "design", "pattern"]): + category = "architecture_review" + + payment_bids = await fetch_payment_bids(task.winner_price, category) + for pb in payment_bids: + net = pb.get("net_fee_percent", 0) + net_str = f"{net:.1f}% fee" if net >= 0 else f"{abs(net):.1f}% CASHBACK" + add_log(f" - {pb['provider_name']}: {pb.get('base_fee_percent', 0):.1f}% base, {pb.get('reward_percent', 0):.1f}% reward = {net_str}", log_container) + + best = min(payment_bids, key=lambda x: x.get("net_fee_percent", 99)) + task.ap2_payment_provider = best["provider_name"] + task.ap2_cart_mandate_id = f"cart-{int(time.time())}" + task.ap2_base_fee = best.get("base_fee_percent", 2.0) + task.ap2_reward = best.get("reward_percent", 1.0) + task.ap2_net_fee = round(task.winner_price * best.get("net_fee_percent", 1.0) / 100, 2) + tasks_container.refresh() + + add_log(f" Selected: {task.ap2_payment_provider}", log_container) + if best.get("net_fee_percent", 0) < 0: + add_log(f" YOU EARN {abs(best.get('net_fee_percent', 0)):.1f}% CASHBACK!", log_container) + add_log("", log_container) + + # STEP 6: Process Payment + task.current_step = 6 + tasks_container.refresh() + add_log(f"[STEP 6/7] AP2 PAYMENT - Processing...", log_container) + add_log(f" Amount: ${task.winner_price:.2f}", log_container) + add_log(f" Base fee ({task.ap2_base_fee}%): ${task.winner_price * task.ap2_base_fee / 100:.2f}", log_container) + add_log(f" Reward ({task.ap2_reward}%): -${task.winner_price * task.ap2_reward / 100:.2f}", log_container) + + task.ap2_payment_receipt_id = f"receipt-{int(time.time())}" + task.ap2_payment_method = "card" + tasks_container.refresh() + + add_log(f" Receipt ID: {task.ap2_payment_receipt_id}", log_container) + add_log(f" Status: COMPLETED", log_container) + add_log("", log_container) + + # STEP 7: Settlement + task.status = "settling" + task.current_step = 7 + tasks_container.refresh() + add_log(f"[STEP 7/7] SETTLEMENT", log_container) + + task.platform_fee = round(task.winner_price * 0.15, 2) + task.provider_payout = round(task.winner_price - task.platform_fee, 2) + task.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + add_log(f" Platform Fee (15%): ${task.platform_fee:.2f}", log_container) + add_log(f" Provider Payout: ${task.provider_payout:.2f}", log_container) + add_log("", log_container) + + task.status = "completed" + state.stats["total_reviews"] += 1 + state.stats["platform_revenue"] += task.platform_fee + tasks_container.refresh() + + add_log(f"=== CODE REVIEW COMPLETE ===", log_container) + add_log(f"Total reviews: {state.stats['total_reviews']}", log_container) + add_log(f"Platform revenue: ${state.stats['platform_revenue']:.2f}", log_container) + + state.is_running = False + submit_btn.enable() + + submit_btn = ui.button( + "Run Code Review", + on_click=run_workflow, + color="green" + ).classes('w-full mt-4').props('icon=play_arrow') + + # Live log + with ui.card().classes('w-full bg-slate-900 border border-slate-700 mt-4'): + ui.label("Live Output").classes('font-bold text-cyan-400 mb-2') + log_container = ui.column().classes('w-full max-h-64 overflow-y-auto') + + # Right column - Task results + with ui.column().classes('flex-1'): + with ui.row().classes('w-full justify-between items-center mb-4'): + ui.label(f"Review Results ({len(state.tasks)})").classes('text-xl font-bold text-white') + if state.tasks: + ui.button( + "Clear History", + on_click=lambda: (state.tasks.clear(), tasks_container.refresh()), + color="gray" + ).props('flat size=sm') + + @ui.refreshable + def tasks_container(): + if not state.tasks: + with ui.card().classes('w-full bg-slate-800 border border-slate-700 p-8 text-center'): + ui.icon("code", color="slate").classes('text-4xl mb-2') + ui.label("No code reviews yet").classes('text-slate-400') + ui.label("Paste code and click 'Run Code Review' to start").classes('text-slate-500 text-sm') + else: + for task in state.tasks: + create_task_card(task) + + tasks_container() + + +if __name__ in {"__main__", "__mp_main__"}: + ui.run( + title="AEX Code Review - Agent Exchange", + host="0.0.0.0", + port=8502, + dark=True, + storage_secret="aex-code-review-secret", + ) diff --git a/demo/code_review/ui/requirements.txt b/demo/code_review/ui/requirements.txt new file mode 100644 index 0000000..c8fbcfd --- /dev/null +++ b/demo/code_review/ui/requirements.txt @@ -0,0 +1,2 @@ +nicegui>=2.0.0 +httpx>=0.27.0 diff --git a/deploy/aws/README.md b/deploy/aws/README.md index 1eb5dce..d867960 100644 --- a/deploy/aws/README.md +++ b/deploy/aws/README.md @@ -1,16 +1,29 @@ # AWS Deployment Guide -This guide explains how to deploy Agent Exchange (AEX) and demo agents to AWS using ECS Fargate and CloudFormation. +This guide explains how to deploy Agent Exchange (AEX) and demo agents to AWS. Two deployment options are supported: + +- **ECS Fargate** -- Serverless containers, simpler operational model +- **EKS (Kubernetes)** -- Full Kubernetes, richer ecosystem, advanced scheduling + +Both options share the same infrastructure foundation (VPC, ECR, Secrets Manager). ## Prerequisites +### Common (both ECS and EKS) + 1. **AWS Account** with appropriate permissions 2. **AWS CLI** installed and configured (`aws configure`) 3. **Docker** installed locally for building images 4. **API Keys** for LLM providers: - Anthropic API Key (for Claude - used by all agents) -## Architecture8 +### Additional for EKS + +5. **kubectl** -- Kubernetes CLI ([install](https://kubernetes.io/docs/tasks/tools/)) +6. **helm** -- Kubernetes package manager ([install](https://helm.sh/docs/intro/install/)) +7. **eksctl** (optional) -- EKS management CLI ([install](https://eksctl.io/installation/)) + +## ECS Fargate Architecture ``` ┌─────────────────────────────────────────────────────────────────────────────┐ @@ -388,3 +401,282 @@ aws servicediscovery list-instances \ --service-id \ --region us-east-1 ``` + +--- + +## EKS Deployment (Kubernetes) + +### EKS Architecture + +``` ++-----------------------------------------------------------------------------+ +| AWS Cloud | +| | +| +-----------------------------------------------------------------------+ | +| | Nginx Ingress / AWS ALB | | +| | (Public LB) | | +| +-----------------------------------------------------------------------+ | +| | | | +| | /api/* | /* | +| v v | +| +-------------+ +---------------+ | +| | AEX Gateway | | Demo UI | | +| | (8080) | | NiceGUI(8502) | | +| +-------------+ +---------------+ | +| | | | +| +--------+----------------------------------------------+ | +| | EKS Cluster (K8s) | | +| | Namespace: aex | | +| | | | +| | +--- AEX Core (Deployments) -----------------------+ | | +| | | provider-registry work-publisher bid-gateway | | | +| | | bid-evaluator contract-engine settlement | | | +| | | trust-broker identity telemetry | | | +| | | credentials-provider | | | +| | +---------------------------------------------------+ | | +| | | | +| | +--- Code Review Demo (Deployments) ---------------+ | | +| | | code-reviewer-a code-reviewer-b code-reviewer-c | | | +| | | orchestrator | | | +| | +---------------------------------------------------+ | | +| | | | +| | +--- Payment Agents (Deployments) -----------------+ | | +| | | payment-devpay payment-codeauditpay | | | +| | | payment-securitypay | | | +| | +---------------------------------------------------+ | | +| | | | +| | +--- Data (StatefulSet) ---------------------------+ | | +| | | MongoDB:27017 | | | +| | +---------------------------------------------------+ | | +| | | | +| | Add-ons: CoreDNS, kube-proxy, vpc-cni, ebs-csi | | +| | Helm: AWS LB Controller, Nginx Ingress, | | +| | External Secrets, Metrics Server | | +| +--------------------------------------------------------+ | +| | +| +--- Shared Infrastructure (CloudFormation) ----------------------------+ | +| | VPC (public+private subnets) | ECR | Secrets Manager | IAM | ALB | | +| +-----------------------------------------------------------------------+ | ++------------------------------------------------------------------------------+ +``` + +### EKS Quick Start + +```bash +# 1. Set up prerequisites and create cluster +hack/deploy/setup-eks.sh + +# 2. Deploy all services to EKS +deploy/aws/deploy-eks.sh --region us-east-1 --env dev + +# 3. Verify +kubectl get pods -n aex + +# 4. Access services locally +kubectl port-forward svc/aex-gateway 8080:8080 -n aex +kubectl port-forward svc/demo-ui-nicegui 8502:8502 -n aex +``` + +### EKS Quick Start (Step by Step) + +```bash +# 1. Install prerequisites +hack/deploy/setup-eks.sh prerequisites + +# 2. Validate configuration +hack/deploy/setup-eks.sh validate + +# 3. Create the cluster +hack/deploy/setup-eks.sh cluster + +# 4. Install add-ons (LB controller, ingress, metrics) +hack/deploy/setup-eks.sh addons + +# 5. Deploy services +deploy/aws/deploy-eks.sh --region us-east-1 --env dev +``` + +### EKS CloudFormation Stack + +The EKS deployment adds one CloudFormation stack on top of the shared infrastructure: + +#### eks-cluster.yaml + +Creates EKS-specific resources: +- EKS cluster (Kubernetes 1.29) +- Managed node group (auto-scaling) +- IAM roles: cluster role, node group role +- OIDC provider for IRSA (IAM Roles for Service Accounts) +- IRSA roles: pod role, AWS LB controller role, EBS CSI driver role +- Security groups: cluster SG, node SG +- EKS add-ons: CoreDNS, kube-proxy, vpc-cni, ebs-csi-driver +- CloudWatch log groups + +Parameters: + +| Parameter | Default | Description | +|-----------|---------|-------------| +| EnvironmentName | aex | Prefix for resource names | +| Environment | dev | dev, staging, or production | +| ClusterName | aex-eks | EKS cluster name | +| KubernetesVersion | 1.29 | K8s version | +| NodeInstanceType | t3.medium | EC2 instance type | +| MinSize | 2 | Min nodes (3 for prod) | +| MaxSize | 5 | Max nodes (10 for prod) | +| DesiredSize | 2 | Desired nodes (3 for prod) | + +### ECS vs EKS Comparison + +| Feature | ECS Fargate | EKS | +|---------|-------------|-----| +| **Compute** | Serverless (no nodes) | Managed EC2 node groups | +| **Complexity** | Lower | Higher (full K8s) | +| **Scaling** | Per-task auto-scaling | HPA + Cluster Autoscaler | +| **Service Discovery** | AWS Cloud Map | K8s DNS (CoreDNS) | +| **Secrets** | ECS Secrets integration | External Secrets Operator / IRSA | +| **Load Balancing** | ALB target groups | Ingress controllers | +| **Cost (dev)** | ~$150/month | ~$183/month (+EKS fee) | +| **Cost (prod)** | Higher per-task cost | Better at scale with Spot | +| **Ecosystem** | AWS-native | K8s ecosystem (Helm, Kustomize) | +| **Portability** | AWS only | Multi-cloud capable | +| **CI/CD** | cd-aws.yml | cd-aws-eks.yml | +| **Deploy script** | deploy.sh | deploy-eks.sh | +| **Setup script** | hack/deploy/setup-aws.sh | hack/deploy/setup-eks.sh | +| **Teardown** | hack/deploy/teardown-aws.sh | hack/deploy/teardown-eks.sh | + +### EKS Monitoring + +#### CloudWatch Container Insights + +EKS cluster logging is enabled for all control plane log types (api, audit, authenticator, controllerManager, scheduler). Logs are stored in: + +``` +/aws/eks/aex-eks/cluster -- Control plane logs +/aws/containerinsights/aex-eks/ -- Container Insights +``` + +#### kubectl Monitoring + +```bash +# View pod status +kubectl get pods -n aex -o wide + +# View pod resource usage (requires metrics-server) +kubectl top pods -n aex + +# View node resource usage +kubectl top nodes + +# View pod logs +kubectl logs -f deployment/aex-gateway -n aex + +# Describe a problematic pod +kubectl describe pod -n aex + +# View events +kubectl get events -n aex --sort-by='.lastTimestamp' +``` + +### EKS Scaling + +#### Horizontal Pod Autoscaler (HPA) + +```bash +# Create HPA for gateway (2-10 replicas, target 70% CPU) +kubectl autoscale deployment aex-gateway \ + --min=2 --max=10 --cpu-percent=70 -n aex + +# View HPA status +kubectl get hpa -n aex +``` + +#### Manual Scaling + +```bash +# Scale a specific deployment +kubectl scale deployment/aex-gateway --replicas=3 -n aex + +# Scale all deployments to 0 (cost saving) +kubectl get deployments -n aex -o name | \ + xargs -I {} kubectl scale {} --replicas=0 -n aex + +# Scale all back to 1 +kubectl get deployments -n aex -o name | \ + xargs -I {} kubectl scale {} --replicas=1 -n aex +``` + +#### Cluster Autoscaler + +The cluster autoscaler is installed by `setup-eks.sh` and automatically adjusts the number of nodes based on pod scheduling demands. Configuration: + +- Scale-down delay after add: 5 minutes +- Scale-down unneeded time: 5 minutes +- Expander strategy: least-waste + +### EKS Cleanup + +```bash +# Delete EKS resources only (keep shared infrastructure) +hack/deploy/teardown-eks.sh + +# Delete everything including shared VPC, ECR, secrets +hack/deploy/teardown-eks.sh --include-infra + +# Quick cleanup via deploy script +deploy/aws/deploy-eks.sh --clean +``` + +### EKS Troubleshooting + +#### Pods Not Starting + +```bash +# Check pod status and events +kubectl describe pod -n aex + +# Check if image pull is failing +kubectl get events -n aex --field-selector reason=Failed + +# Verify ECR images exist +aws ecr describe-images --repository-name aex/aex-gateway --region us-east-1 +``` + +#### Node Issues + +```bash +# Check node status +kubectl get nodes -o wide + +# Check node conditions +kubectl describe node + +# View cluster autoscaler logs +kubectl logs -f deployment/cluster-autoscaler -n kube-system +``` + +#### IRSA / Secret Issues + +```bash +# Verify service account annotation +kubectl get sa aex-service-account -n aex -o yaml + +# Test secret access from a pod +kubectl exec -it deployment/aex-gateway -n aex -- \ + env | grep -i secret + +# Check External Secrets Operator +kubectl get externalsecrets -n aex +``` + +#### Ingress Not Working + +```bash +# Check ingress status +kubectl get ingress -n aex + +# Check nginx-ingress controller logs +kubectl logs -f deployment/ingress-nginx-controller -n ingress-nginx + +# Check AWS LB controller logs +kubectl logs -f deployment/aws-load-balancer-controller -n kube-system +``` diff --git a/deploy/aws/deploy-eks.sh b/deploy/aws/deploy-eks.sh new file mode 100644 index 0000000..ef48d4d --- /dev/null +++ b/deploy/aws/deploy-eks.sh @@ -0,0 +1,589 @@ +#!/bin/bash +# Deploy AEX to AWS EKS using CloudFormation and Kubernetes manifests +# Usage: ./deploy-eks.sh [--region ] [--env ] [--cluster ] [--clean] + +set -euo pipefail + +# ============================================================ +# Configuration +# ============================================================ + +REGION="${AWS_REGION:-us-east-1}" +ENVIRONMENT_NAME="${ENVIRONMENT_NAME:-aex}" +ENVIRONMENT="${ENVIRONMENT:-dev}" +CLUSTER_NAME="${CLUSTER_NAME:-aex-eks}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +K8S_DIR="$PROJECT_ROOT/deploy/k8s" +CLEAN=false + +# ============================================================ +# Argument parsing +# ============================================================ + +while [[ $# -gt 0 ]]; do + case $1 in + --region) + REGION="$2" + shift 2 + ;; + --env) + ENVIRONMENT="$2" + shift 2 + ;; + --cluster) + CLUSTER_NAME="$2" + shift 2 + ;; + --name) + ENVIRONMENT_NAME="$2" + shift 2 + ;; + --clean) + CLEAN=true + shift + ;; + -h|--help) + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " --region AWS region (default: us-east-1)" + echo " --env Environment: dev, staging, production (default: dev)" + echo " --cluster EKS cluster name (default: aex-eks)" + echo " --name Environment name prefix (default: aex)" + echo " --clean Teardown EKS resources" + echo " -h, --help Show this help" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +echo "=========================================" +echo " AEX EKS Deployment" +echo "=========================================" +echo "Region: $REGION" +echo "Environment: $ENVIRONMENT" +echo "Cluster: $CLUSTER_NAME" +echo "Env Name: $ENVIRONMENT_NAME" +echo "" + +# ============================================================ +# Teardown mode +# ============================================================ + +if [ "$CLEAN" = true ]; then + echo "Tearing down EKS resources..." + echo "" + + # Check if cluster exists and kubeconfig can be configured + if aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" &>/dev/null; then + echo "Configuring kubectl..." + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$REGION" 2>/dev/null || true + + echo "Deleting Kubernetes namespace 'aex'..." + kubectl delete namespace aex --ignore-not-found --timeout=120s 2>/dev/null || true + + echo "Uninstalling Helm charts..." + helm uninstall aws-load-balancer-controller -n kube-system 2>/dev/null || true + helm uninstall ingress-nginx -n ingress-nginx 2>/dev/null || true + helm uninstall external-secrets -n external-secrets 2>/dev/null || true + helm uninstall metrics-server -n kube-system 2>/dev/null || true + kubectl delete namespace ingress-nginx --ignore-not-found 2>/dev/null || true + kubectl delete namespace external-secrets --ignore-not-found 2>/dev/null || true + fi + + echo "Deleting EKS CloudFormation stack..." + aws cloudformation delete-stack \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --region "$REGION" 2>/dev/null || true + + echo "Waiting for EKS stack deletion..." + aws cloudformation wait stack-delete-complete \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --region "$REGION" 2>/dev/null || true + + echo "" + echo "EKS teardown complete." + echo "Note: Infrastructure stack (VPC, ECR, secrets) was NOT deleted." + echo "To delete everything: hack/deploy/teardown-eks.sh" + exit 0 +fi + +# ============================================================ +# Step 0: Validate prerequisites +# ============================================================ + +echo "Step 0: Validating prerequisites..." + +if ! command -v aws &>/dev/null; then + echo "Error: AWS CLI is not installed." + exit 1 +fi + +if ! command -v kubectl &>/dev/null; then + echo "Error: kubectl is not installed." + echo "Install: https://kubernetes.io/docs/tasks/tools/" + exit 1 +fi + +if ! command -v helm &>/dev/null; then + echo "Error: Helm is not installed." + echo "Install: https://helm.sh/docs/intro/install/" + exit 1 +fi + +AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null) +if [ -z "$AWS_ACCOUNT_ID" ]; then + echo "Error: Unable to get AWS account ID. Please configure AWS credentials." + exit 1 +fi +echo "AWS Account ID: $AWS_ACCOUNT_ID" + +# ============================================================ +# Step 1: Deploy infrastructure stack (VPC, ECR, Secrets) +# ============================================================ + +echo "" +echo "Step 1: Deploying infrastructure stack (VPC, ECR, Secrets)..." +aws cloudformation deploy \ + --template-file "$SCRIPT_DIR/infrastructure.yaml" \ + --stack-name "${ENVIRONMENT_NAME}-infrastructure" \ + --parameter-overrides EnvironmentName="$ENVIRONMENT_NAME" \ + --capabilities CAPABILITY_NAMED_IAM \ + --region "$REGION" \ + --no-fail-on-empty-changeset + +echo "Infrastructure stack deployed." + +# ============================================================ +# Step 2: Deploy EKS cluster stack +# ============================================================ + +echo "" +echo "Step 2: Deploying EKS cluster stack..." + +# Determine node sizing based on environment +case "$ENVIRONMENT" in + production) + NODE_INSTANCE_TYPE="m5.large" + MIN_SIZE=3 + MAX_SIZE=10 + DESIRED_SIZE=3 + ;; + staging) + NODE_INSTANCE_TYPE="t3.large" + MIN_SIZE=2 + MAX_SIZE=5 + DESIRED_SIZE=2 + ;; + *) + NODE_INSTANCE_TYPE="t3.medium" + MIN_SIZE=2 + MAX_SIZE=5 + DESIRED_SIZE=2 + ;; +esac + +aws cloudformation deploy \ + --template-file "$SCRIPT_DIR/eks-cluster.yaml" \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --parameter-overrides \ + EnvironmentName="$ENVIRONMENT_NAME" \ + Environment="$ENVIRONMENT" \ + ClusterName="$CLUSTER_NAME" \ + NodeInstanceType="$NODE_INSTANCE_TYPE" \ + MinSize="$MIN_SIZE" \ + MaxSize="$MAX_SIZE" \ + DesiredSize="$DESIRED_SIZE" \ + --capabilities CAPABILITY_NAMED_IAM \ + --region "$REGION" \ + --no-fail-on-empty-changeset + +echo "EKS cluster stack deployed." + +# ============================================================ +# Step 3: Configure kubectl +# ============================================================ + +echo "" +echo "Step 3: Configuring kubectl context..." +aws eks update-kubeconfig \ + --name "$CLUSTER_NAME" \ + --region "$REGION" \ + --alias "$CLUSTER_NAME" + +echo "kubectl context configured: $CLUSTER_NAME" +kubectl cluster-info + +# ============================================================ +# Step 4: Install Helm add-ons +# ============================================================ + +echo "" +echo "Step 4: Installing Helm add-ons..." + +# Get the LB controller role ARN from CloudFormation +LB_CONTROLLER_ROLE_ARN=$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --query "Stacks[0].Outputs[?OutputKey=='AWSLoadBalancerControllerRoleArn'].OutputValue" \ + --output text \ + --region "$REGION") + +# AWS Load Balancer Controller +echo "Installing AWS Load Balancer Controller..." +helm repo add eks https://aws.github.io/eks-charts 2>/dev/null || true +helm repo update eks +helm upgrade --install aws-load-balancer-controller eks/aws-load-balancer-controller \ + --namespace kube-system \ + --set clusterName="$CLUSTER_NAME" \ + --set serviceAccount.create=true \ + --set serviceAccount.name=aws-load-balancer-controller \ + --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"="$LB_CONTROLLER_ROLE_ARN" \ + --set region="$REGION" \ + --set vpcId="$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-infrastructure" \ + --query "Stacks[0].Outputs[?OutputKey=='VPCId'].OutputValue" \ + --output text --region "$REGION")" \ + --wait + +# Nginx Ingress Controller +echo "Installing Nginx Ingress Controller..." +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true +helm repo update ingress-nginx +kubectl create namespace ingress-nginx 2>/dev/null || true +helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --set controller.service.type=LoadBalancer \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-type"=nlb \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-scheme"=internet-facing \ + --wait + +# External Secrets Operator +echo "Installing External Secrets Operator..." +helm repo add external-secrets https://charts.external-secrets.io 2>/dev/null || true +helm repo update external-secrets +kubectl create namespace external-secrets 2>/dev/null || true +helm upgrade --install external-secrets external-secrets/external-secrets \ + --namespace external-secrets \ + --set installCRDs=true \ + --wait + +# Metrics Server (for HPA) +echo "Installing Metrics Server..." +helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ 2>/dev/null || true +helm repo update metrics-server +helm upgrade --install metrics-server metrics-server/metrics-server \ + --namespace kube-system \ + --set args[0]="--kubelet-preferred-address-types=InternalIP" \ + --wait + +echo "Helm add-ons installed." + +# ============================================================ +# Step 5: Create Kubernetes namespace and secrets +# ============================================================ + +echo "" +echo "Step 5: Creating Kubernetes namespace and secrets..." + +kubectl create namespace aex 2>/dev/null || true + +# Get Pod role ARN for service account annotation +POD_ROLE_ARN=$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --query "Stacks[0].Outputs[?OutputKey=='EKSPodRoleArn'].OutputValue" \ + --output text \ + --region "$REGION") + +# Create annotated service account for IRSA +kubectl apply -f - </dev/null || echo '{"api_key":"placeholder-update-me"}') + +MONGO_URI=$(aws secretsmanager get-secret-value \ + --secret-id "${ENVIRONMENT_NAME}/mongo-uri" \ + --query 'SecretString' --output text \ + --region "$REGION" 2>/dev/null || echo '{"uri":"placeholder-update-me"}') + +JWT_KEY=$(aws secretsmanager get-secret-value \ + --secret-id "${ENVIRONMENT_NAME}/jwt-signing-key" \ + --query 'SecretString' --output text \ + --region "$REGION" 2>/dev/null || echo '{"key":"placeholder-update-me"}') + +# Extract values from JSON +ANTHROPIC_API_KEY_VAL=$(echo "$ANTHROPIC_KEY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('api_key','placeholder'))" 2>/dev/null || echo "placeholder") +MONGO_URI_VAL=$(echo "$MONGO_URI" | python3 -c "import sys,json; print(json.load(sys.stdin).get('uri','placeholder'))" 2>/dev/null || echo "placeholder") +JWT_KEY_VAL=$(echo "$JWT_KEY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('key','placeholder'))" 2>/dev/null || echo "placeholder") + +kubectl create secret generic aex-secrets \ + --namespace aex \ + --from-literal=ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY_VAL" \ + --from-literal=MONGO_URI="$MONGO_URI_VAL" \ + --from-literal=JWT_SIGNING_KEY="$JWT_KEY_VAL" \ + --dry-run=client -o yaml | kubectl apply -f - + +echo "Namespace and secrets configured." + +# ============================================================ +# Step 6: Build and push Docker images to ECR +# ============================================================ + +echo "" +echo "Step 6: Building and pushing Docker images to ECR..." + +ECR_REGISTRY="$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com" +aws ecr get-login-password --region "$REGION" | docker login --username AWS --password-stdin "$ECR_REGISTRY" + +COMMIT_HASH=$(git -C "$PROJECT_ROOT" rev-parse --short HEAD 2>/dev/null || echo "latest") +IMAGE_TAG="$COMMIT_HASH" +echo "Image tag: $IMAGE_TAG" + +# Setup buildx +docker buildx create --name aex-eks-builder --use 2>/dev/null || docker buildx use aex-eks-builder 2>/dev/null || true + +# AEX core services +SERVICES=( + "aex-gateway" + "aex-provider-registry" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-settlement" + "aex-trust-broker" + "aex-identity" + "aex-telemetry" + "aex-credentials-provider" +) + +cd "$PROJECT_ROOT" + +for service in "${SERVICES[@]}"; do + echo "Building $service..." + docker buildx build --platform linux/amd64 \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$service:$IMAGE_TAG" \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$service:latest" \ + -f "src/$service/Dockerfile" src/ \ + --push +done + +# Code review demo agents +DEMO_AGENTS=("code-reviewer-a" "code-reviewer-b" "code-reviewer-c" "orchestrator") + +for agent in "${DEMO_AGENTS[@]}"; do + AGENT_DIR="demo/code_review/agents/$agent" + if [ -d "$AGENT_DIR" ]; then + echo "Building $agent..." + docker buildx build --platform linux/amd64 \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$agent:$IMAGE_TAG" \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$agent:latest" \ + --build-arg AGENT_DIR="$agent" \ + -f demo/code_review/agents/Dockerfile demo/code_review/agents/ \ + --push + fi +done + +# Payment agents +PAYMENT_AGENTS=("payment-devpay" "payment-codeauditpay" "payment-securitypay") + +for agent in "${PAYMENT_AGENTS[@]}"; do + AGENT_DIR="demo/code_review/agents/$agent" + if [ -d "$AGENT_DIR" ]; then + echo "Building $agent..." + docker buildx build --platform linux/amd64 \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$agent:$IMAGE_TAG" \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/$agent:latest" \ + --build-arg AGENT_DIR="$agent" \ + -f demo/code_review/agents/Dockerfile demo/code_review/agents/ \ + --push + fi +done + +# Demo UI (NiceGUI) +if [ -d "demo/code_review/ui" ]; then + echo "Building demo-ui-nicegui..." + docker buildx build --platform linux/amd64 \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/demo-ui-nicegui:$IMAGE_TAG" \ + -t "$ECR_REGISTRY/$ENVIRONMENT_NAME/demo-ui-nicegui:latest" \ + -f demo/code_review/ui/Dockerfile demo/code_review/ui/ \ + --push +fi + +echo "All images built and pushed." + +# ============================================================ +# Step 7: Apply Kubernetes manifests with Kustomize +# ============================================================ + +echo "" +echo "Step 7: Applying Kubernetes manifests..." + +# Check if kustomize overlay exists for this environment +OVERLAY_DIR="$K8S_DIR/overlays/$ENVIRONMENT" +if [ -d "$OVERLAY_DIR" ]; then + echo "Using kustomize overlay: $ENVIRONMENT" + kubectl apply -k "$OVERLAY_DIR" +elif [ -d "$K8S_DIR/base" ]; then + echo "Using kustomize base (no overlay for $ENVIRONMENT)" + kubectl apply -k "$K8S_DIR/base" +else + echo "Applying individual manifests from $K8S_DIR..." + kubectl apply -f "$K8S_DIR/namespace.yaml" + for f in "$K8S_DIR"/*.yaml; do + kubectl apply -f "$f" 2>/dev/null || true + done +fi + +# Update image references to ECR +echo "Patching deployments with ECR images..." + +ALL_DEPLOYMENTS=( + "aex-gateway" + "aex-provider-registry" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-settlement" + "aex-trust-broker" + "aex-identity" + "aex-telemetry" + "code-reviewer-a" + "code-reviewer-b" + "code-reviewer-c" + "orchestrator" + "payment-devpay" + "payment-codeauditpay" + "payment-securitypay" + "demo-ui-nicegui" +) + +for deploy in "${ALL_DEPLOYMENTS[@]}"; do + if kubectl get deployment "$deploy" -n aex &>/dev/null; then + echo " Patching $deploy with ECR image..." + kubectl set image deployment/"$deploy" \ + "$deploy=$ECR_REGISTRY/$ENVIRONMENT_NAME/$deploy:$IMAGE_TAG" \ + -n aex 2>/dev/null || true + fi +done + +# Patch service account on all deployments +for deploy in "${ALL_DEPLOYMENTS[@]}"; do + if kubectl get deployment "$deploy" -n aex &>/dev/null; then + kubectl patch deployment "$deploy" -n aex \ + --type=json \ + -p='[{"op": "add", "path": "/spec/template/spec/serviceAccountName", "value": "aex-service-account"}]' \ + 2>/dev/null || true + fi +done + +echo "Kubernetes manifests applied." + +# ============================================================ +# Step 8: Wait for deployments +# ============================================================ + +echo "" +echo "Step 8: Waiting for deployments to become ready..." + +TIMEOUT=300 +for deploy in "${ALL_DEPLOYMENTS[@]}"; do + if kubectl get deployment "$deploy" -n aex &>/dev/null; then + echo " Waiting for $deploy..." + kubectl rollout status deployment/"$deploy" -n aex --timeout="${TIMEOUT}s" 2>/dev/null || { + echo " Warning: $deploy did not become ready within ${TIMEOUT}s" + } + fi +done + +echo "Deployment rollout complete." + +# ============================================================ +# Step 9: Output endpoints +# ============================================================ + +echo "" +echo "=========================================" +echo " EKS Deployment Complete!" +echo "=========================================" +echo "" + +# Get ingress/LB endpoints +echo "Service Endpoints:" +echo "" + +# Check for ingress +INGRESS_HOST=$(kubectl get ingress -n aex -o jsonpath='{.items[0].status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "") +if [ -n "$INGRESS_HOST" ]; then + echo " Ingress URL: http://$INGRESS_HOST" +fi + +# Check for nginx-ingress LB +NGINX_LB=$(kubectl get svc -n ingress-nginx ingress-nginx-controller -o jsonpath='{.status.loadBalancer.ingress[0].hostname}' 2>/dev/null || echo "") +if [ -n "$NGINX_LB" ]; then + echo " Nginx Ingress LB: http://$NGINX_LB" +fi + +echo "" +echo "Cluster: $CLUSTER_NAME" +echo "Region: $REGION" +echo "Namespace: aex" +echo "Image Tag: $IMAGE_TAG" +echo "" + +echo "Useful commands:" +echo "" +echo " # View all pods" +echo " kubectl get pods -n aex" +echo "" +echo " # View services" +echo " kubectl get svc -n aex" +echo "" +echo " # View logs for a service" +echo " kubectl logs -f deployment/aex-gateway -n aex" +echo "" +echo " # Port forward gateway locally" +echo " kubectl port-forward svc/aex-gateway 8080:8080 -n aex" +echo "" +echo " # Port forward demo UI locally" +echo " kubectl port-forward svc/demo-ui-nicegui 8502:8502 -n aex" +echo "" +echo " # Scale a deployment" +echo " kubectl scale deployment/aex-gateway --replicas=3 -n aex" +echo "" + +echo "Next Steps:" +echo "" +echo "1. Update secrets with actual values:" +echo " aws secretsmanager update-secret \\" +echo " --secret-id ${ENVIRONMENT_NAME}/anthropic-api-key \\" +echo " --secret-string '{\"api_key\":\"sk-ant-...\"}' \\" +echo " --region $REGION" +echo "" +echo "2. Re-run secret sync:" +echo " $0 --region $REGION --env $ENVIRONMENT --cluster $CLUSTER_NAME" +echo "" +echo "3. Clean up:" +echo " $0 --clean --region $REGION --cluster $CLUSTER_NAME" +echo "" diff --git a/deploy/aws/eks-cluster.yaml b/deploy/aws/eks-cluster.yaml new file mode 100644 index 0000000..0ade363 --- /dev/null +++ b/deploy/aws/eks-cluster.yaml @@ -0,0 +1,653 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: 'AEX EKS Cluster - Amazon Elastic Kubernetes Service for Agent Exchange' + +Parameters: + EnvironmentName: + Type: String + Default: aex + Description: Environment name prefix for resources + + Environment: + Type: String + Default: dev + AllowedValues: + - dev + - staging + - production + Description: Deployment environment (controls node group sizing) + + ClusterName: + Type: String + Default: aex-eks + Description: Name of the EKS cluster + + KubernetesVersion: + Type: String + Default: '1.29' + Description: Kubernetes version for the EKS cluster + + NodeInstanceType: + Type: String + Default: t3.medium + AllowedValues: + - t3.medium + - t3.large + - t3.xlarge + - m5.large + - m5.xlarge + Description: EC2 instance type for EKS managed node group + + MinSize: + Type: Number + Default: 2 + MinValue: 1 + MaxValue: 10 + Description: Minimum number of nodes in the managed node group + + MaxSize: + Type: Number + Default: 5 + MinValue: 2 + MaxValue: 20 + Description: Maximum number of nodes in the managed node group + + DesiredSize: + Type: Number + Default: 2 + MinValue: 1 + MaxValue: 10 + Description: Desired number of nodes in the managed node group + +Conditions: + IsProduction: !Equals [!Ref Environment, 'production'] + +Resources: + # ============================================================ + # IAM Roles + # ============================================================ + + # EKS Cluster IAM Role + EKSClusterRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub ${EnvironmentName}-eks-cluster-role + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: eks.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonEKSClusterPolicy + - arn:aws:iam::aws:policy/AmazonEKSVPCResourceController + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-cluster-role + + # EKS Node Group IAM Role + EKSNodeGroupRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub ${EnvironmentName}-eks-nodegroup-role + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: ec2.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy + - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + - arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore + Policies: + - PolicyName: EKSNodeSecretsManagerAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + - secretsmanager:DescribeSecret + Resource: + - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${EnvironmentName}/* + - PolicyName: EKSNodeCloudWatchAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + - logs:DescribeLogStreams + Resource: '*' + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-nodegroup-role + + # ============================================================ + # EKS Cluster Security Group + # ============================================================ + + EKSClusterSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupName: !Sub ${EnvironmentName}-eks-cluster-sg + GroupDescription: Security group for EKS cluster control plane + VpcId: !ImportValue + Fn::Sub: ${EnvironmentName}-VPCId + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: 443 + ToPort: 443 + CidrIp: 0.0.0.0/0 + Description: Allow HTTPS access to the Kubernetes API server + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-cluster-sg + + EKSNodeSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupName: !Sub ${EnvironmentName}-eks-node-sg + GroupDescription: Security group for EKS worker nodes + VpcId: !ImportValue + Fn::Sub: ${EnvironmentName}-VPCId + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-node-sg + + # Allow nodes to communicate with each other + EKSNodeSecurityGroupSelfIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSNodeSecurityGroup + IpProtocol: '-1' + SourceSecurityGroupId: !Ref EKSNodeSecurityGroup + Description: Allow nodes to communicate with each other + + # Allow nodes to communicate with the cluster control plane + EKSNodeToClusterIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSClusterSecurityGroup + IpProtocol: tcp + FromPort: 443 + ToPort: 443 + SourceSecurityGroupId: !Ref EKSNodeSecurityGroup + Description: Allow worker nodes to communicate with cluster API + + # Allow cluster control plane to communicate with nodes + EKSClusterToNodeIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSNodeSecurityGroup + IpProtocol: tcp + FromPort: 1025 + ToPort: 65535 + SourceSecurityGroupId: !Ref EKSClusterSecurityGroup + Description: Allow cluster control plane to communicate with nodes + + # Allow cluster to reach nodes on 443 (for webhooks) + EKSClusterToNodeWebhookIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSNodeSecurityGroup + IpProtocol: tcp + FromPort: 443 + ToPort: 443 + SourceSecurityGroupId: !Ref EKSClusterSecurityGroup + Description: Allow cluster to reach node webhooks + + # Allow ALB traffic to reach nodes on application ports + EKSNodeALBIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSNodeSecurityGroup + IpProtocol: tcp + FromPort: 8080 + ToPort: 8502 + SourceSecurityGroupId: !ImportValue + Fn::Sub: ${EnvironmentName}-ALBSecurityGroup + Description: Allow ALB to reach application ports on nodes + + # Allow NodePort range for ALB ingress controller + EKSNodeNodePortIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref EKSNodeSecurityGroup + IpProtocol: tcp + FromPort: 30000 + ToPort: 32767 + SourceSecurityGroupId: !ImportValue + Fn::Sub: ${EnvironmentName}-ALBSecurityGroup + Description: Allow ALB to reach NodePort services + + # ============================================================ + # EKS Cluster + # ============================================================ + + EKSCluster: + Type: AWS::EKS::Cluster + Properties: + Name: !Ref ClusterName + Version: !Ref KubernetesVersion + RoleArn: !GetAtt EKSClusterRole.Arn + ResourcesVpcConfig: + SubnetIds: + - !ImportValue + Fn::Sub: ${EnvironmentName}-PrivateSubnet1 + - !ImportValue + Fn::Sub: ${EnvironmentName}-PrivateSubnet2 + - !ImportValue + Fn::Sub: ${EnvironmentName}-PublicSubnet1 + - !ImportValue + Fn::Sub: ${EnvironmentName}-PublicSubnet2 + SecurityGroupIds: + - !Ref EKSClusterSecurityGroup + EndpointPublicAccess: true + EndpointPrivateAccess: true + Logging: + ClusterLogging: + EnabledTypes: + - Type: api + - Type: audit + - Type: authenticator + - Type: controllerManager + - Type: scheduler + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-cluster + - Key: Environment + Value: !Ref Environment + + # ============================================================ + # OIDC Provider for IRSA (IAM Roles for Service Accounts) + # ============================================================ + + EKSOIDCProvider: + Type: AWS::IAM::OIDCProvider + DependsOn: EKSCluster + Properties: + Url: !GetAtt EKSCluster.OpenIdConnectIssuerUrl + ClientIdList: + - sts.amazonaws.com + ThumbprintList: + - 9e99a48a9960b14926bb7f3b02e22da2b0ab7280 + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-oidc + + # ============================================================ + # IRSA Role for Pod-Level AWS Permissions + # ============================================================ + + # Note: OIDC issuer host is derived by stripping https:// from the + # EKS cluster's OpenIdConnectIssuerUrl using Fn::Select + Fn::Split. + # This value is used in IAM trust policy conditions. + + EKSPodRole: + Type: AWS::IAM::Role + DependsOn: EKSOIDCProvider + Properties: + RoleName: !Sub ${EnvironmentName}-eks-pod-role + AssumeRolePolicyDocument: + Fn::Sub: + - | + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${AWS::AccountId}:oidc-provider/${OIDCHost}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringLike": { + "${OIDCHost}:sub": "system:serviceaccount:aex:*" + }, + "StringEquals": { + "${OIDCHost}:aud": "sts.amazonaws.com" + } + } + } + ] + } + - OIDCHost: !Select + - 1 + - !Split + - '//' + - !GetAtt EKSCluster.OpenIdConnectIssuerUrl + Policies: + - PolicyName: PodSecretsManagerAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - secretsmanager:GetSecretValue + - secretsmanager:DescribeSecret + Resource: + - !Sub arn:aws:secretsmanager:${AWS::Region}:${AWS::AccountId}:secret:${EnvironmentName}/* + - PolicyName: PodECRAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - ecr:GetAuthorizationToken + - ecr:BatchCheckLayerAvailability + - ecr:GetDownloadUrlForLayer + - ecr:BatchGetImage + Resource: '*' + - PolicyName: PodCloudWatchAccess + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: '*' + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-eks-pod-role + + # ============================================================ + # IRSA Role for AWS Load Balancer Controller + # ============================================================ + + AWSLoadBalancerControllerRole: + Type: AWS::IAM::Role + DependsOn: EKSOIDCProvider + Properties: + RoleName: !Sub ${EnvironmentName}-aws-lb-controller-role + AssumeRolePolicyDocument: + Fn::Sub: + - | + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${AWS::AccountId}:oidc-provider/${OIDCHost}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDCHost}:sub": "system:serviceaccount:kube-system:aws-load-balancer-controller", + "${OIDCHost}:aud": "sts.amazonaws.com" + } + } + } + ] + } + - OIDCHost: !Select + - 1 + - !Split + - '//' + - !GetAtt EKSCluster.OpenIdConnectIssuerUrl + ManagedPolicyArns: + - arn:aws:iam::aws:policy/ElasticLoadBalancingFullAccess + Policies: + - PolicyName: AWSLoadBalancerControllerPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - ec2:DescribeAccountAttributes + - ec2:DescribeAddresses + - ec2:DescribeAvailabilityZones + - ec2:DescribeInternetGateways + - ec2:DescribeVpcs + - ec2:DescribeVpcPeeringConnections + - ec2:DescribeSubnets + - ec2:DescribeSecurityGroups + - ec2:DescribeInstances + - ec2:DescribeNetworkInterfaces + - ec2:DescribeTags + - ec2:CreateSecurityGroup + - ec2:CreateTags + - ec2:DeleteTags + - ec2:AuthorizeSecurityGroupIngress + - ec2:RevokeSecurityGroupIngress + - ec2:DeleteSecurityGroup + - ec2:GetCoipPoolUsage + - ec2:DescribeCoipPools + - ec2:GetSecurityGroupsForVpc + - elasticloadbalancing:* + - cognito-idp:DescribeUserPoolClient + - acm:ListCertificates + - acm:DescribeCertificate + - iam:ListServerCertificates + - iam:GetServerCertificate + - waf-regional:* + - wafv2:* + - shield:* + - tag:GetResources + - tag:TagResources + Resource: '*' + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-aws-lb-controller-role + + # ============================================================ + # EKS Add-ons + # ============================================================ + + CoreDNSAddon: + Type: AWS::EKS::Addon + DependsOn: EKSNodeGroup + Properties: + AddonName: coredns + ClusterName: !Ref EKSCluster + ResolveConflicts: OVERWRITE + + KubeProxyAddon: + Type: AWS::EKS::Addon + DependsOn: EKSCluster + Properties: + AddonName: kube-proxy + ClusterName: !Ref EKSCluster + ResolveConflicts: OVERWRITE + + VpcCniAddon: + Type: AWS::EKS::Addon + DependsOn: EKSCluster + Properties: + AddonName: vpc-cni + ClusterName: !Ref EKSCluster + ResolveConflicts: OVERWRITE + ConfigurationValues: | + { + "env": { + "ENABLE_PREFIX_DELEGATION": "true" + } + } + + EbsCsiDriverAddon: + Type: AWS::EKS::Addon + DependsOn: EKSNodeGroup + Properties: + AddonName: aws-ebs-csi-driver + ClusterName: !Ref EKSCluster + ResolveConflicts: OVERWRITE + ServiceAccountRoleArn: !GetAtt EBSCSIDriverRole.Arn + + # IAM Role for EBS CSI Driver + EBSCSIDriverRole: + Type: AWS::IAM::Role + DependsOn: EKSOIDCProvider + Properties: + RoleName: !Sub ${EnvironmentName}-ebs-csi-driver-role + AssumeRolePolicyDocument: + Fn::Sub: + - | + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${AWS::AccountId}:oidc-provider/${OIDCHost}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDCHost}:sub": "system:serviceaccount:kube-system:ebs-csi-controller-sa", + "${OIDCHost}:aud": "sts.amazonaws.com" + } + } + } + ] + } + - OIDCHost: !Select + - 1 + - !Split + - '//' + - !GetAtt EKSCluster.OpenIdConnectIssuerUrl + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy + Tags: + - Key: Name + Value: !Sub ${EnvironmentName}-ebs-csi-driver-role + + # ============================================================ + # EKS Managed Node Group + # ============================================================ + + EKSNodeGroup: + Type: AWS::EKS::Nodegroup + DependsOn: EKSCluster + Properties: + NodegroupName: !Sub ${EnvironmentName}-nodegroup + ClusterName: !Ref EKSCluster + NodeRole: !GetAtt EKSNodeGroupRole.Arn + Subnets: + - !ImportValue + Fn::Sub: ${EnvironmentName}-PrivateSubnet1 + - !ImportValue + Fn::Sub: ${EnvironmentName}-PrivateSubnet2 + InstanceTypes: + - !Ref NodeInstanceType + ScalingConfig: + MinSize: !If [IsProduction, 3, !Ref MinSize] + MaxSize: !If [IsProduction, 10, !Ref MaxSize] + DesiredSize: !If [IsProduction, 3, !Ref DesiredSize] + DiskSize: 50 + AmiType: AL2_x86_64 + Labels: + role: workload + environment: !Ref Environment + app.kubernetes.io/part-of: agent-exchange + Tags: + Name: !Sub ${EnvironmentName}-eks-node + Environment: !Ref Environment + UpdateConfig: + MaxUnavailable: 1 + + # ============================================================ + # CloudWatch Log Group for EKS + # ============================================================ + + EKSLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub /aws/eks/${ClusterName}/cluster + RetentionInDays: 30 + + EKSContainerInsightsLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub /aws/containerinsights/${ClusterName}/application + RetentionInDays: 30 + +Outputs: + EKSClusterName: + Description: EKS Cluster Name + Value: !Ref EKSCluster + Export: + Name: !Sub ${EnvironmentName}-EKSClusterName + + EKSClusterArn: + Description: EKS Cluster ARN + Value: !GetAtt EKSCluster.Arn + Export: + Name: !Sub ${EnvironmentName}-EKSClusterArn + + EKSClusterEndpoint: + Description: EKS Cluster API Endpoint + Value: !GetAtt EKSCluster.Endpoint + Export: + Name: !Sub ${EnvironmentName}-EKSClusterEndpoint + + EKSClusterOIDCIssuer: + Description: EKS Cluster OIDC Issuer URL + Value: !GetAtt EKSCluster.OpenIdConnectIssuerUrl + Export: + Name: !Sub ${EnvironmentName}-EKSClusterOIDCIssuer + + EKSClusterOIDCIssuerHost: + Description: EKS OIDC Issuer (without https://) for IAM conditions + Value: !Select + - 1 + - !Split + - '//' + - !GetAtt EKSCluster.OpenIdConnectIssuerUrl + Export: + Name: !Sub ${EnvironmentName}-EKSClusterOIDCIssuerHost + + EKSClusterSecurityGroup: + Description: EKS Cluster Security Group ID + Value: !Ref EKSClusterSecurityGroup + Export: + Name: !Sub ${EnvironmentName}-EKSClusterSecurityGroup + + EKSNodeSecurityGroup: + Description: EKS Node Security Group ID + Value: !Ref EKSNodeSecurityGroup + Export: + Name: !Sub ${EnvironmentName}-EKSNodeSecurityGroup + + EKSNodeGroupName: + Description: EKS Node Group Name + Value: !Ref EKSNodeGroup + Export: + Name: !Sub ${EnvironmentName}-EKSNodeGroupName + + EKSNodeGroupRoleArn: + Description: EKS Node Group IAM Role ARN + Value: !GetAtt EKSNodeGroupRole.Arn + Export: + Name: !Sub ${EnvironmentName}-EKSNodeGroupRoleArn + + EKSPodRoleArn: + Description: EKS Pod IRSA Role ARN + Value: !GetAtt EKSPodRole.Arn + Export: + Name: !Sub ${EnvironmentName}-EKSPodRoleArn + + AWSLoadBalancerControllerRoleArn: + Description: AWS Load Balancer Controller IRSA Role ARN + Value: !GetAtt AWSLoadBalancerControllerRole.Arn + Export: + Name: !Sub ${EnvironmentName}-AWSLBControllerRoleArn + + EBSCSIDriverRoleArn: + Description: EBS CSI Driver IRSA Role ARN + Value: !GetAtt EBSCSIDriverRole.Arn + Export: + Name: !Sub ${EnvironmentName}-EBSCSIDriverRoleArn + + EKSClusterRoleArn: + Description: EKS Cluster IAM Role ARN + Value: !GetAtt EKSClusterRole.Arn + Export: + Name: !Sub ${EnvironmentName}-EKSClusterRoleArn diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md index e1fab41..d976e5f 100644 --- a/deploy/gcp/README.md +++ b/deploy/gcp/README.md @@ -1,6 +1,20 @@ # GCP Deployment Guide -This guide explains how to deploy Agent Exchange (AEX) and demo agents to Google Cloud Platform using Cloud Run. +This guide explains how to deploy Agent Exchange (AEX) and demo agents to Google Cloud Platform using **Cloud Run** or **GKE (Google Kubernetes Engine)**. + +## Deployment Options + +| Feature | Cloud Run | GKE | +|---------|-----------|-----| +| Infrastructure management | Fully managed (serverless) | Managed K8s (Autopilot) or self-managed (Standard) | +| Scale to zero | Yes | No (minimum nodes required in Standard) | +| Startup latency | Cold start possible | Always warm | +| Networking | Per-service URLs | Single Ingress IP, K8s DNS | +| Service discovery | HTTP URLs | K8s DNS (`svc.cluster.local`) | +| Persistent storage | No (use external DB) | PersistentVolumeClaims | +| Cost model | Pay per request | Pay per node-hour | +| Best for | Low/variable traffic, demos | Production, high traffic, complex networking | +| MongoDB | External (Atlas/Cloud SQL) | In-cluster StatefulSet | ## Prerequisites @@ -9,46 +23,96 @@ This guide explains how to deploy Agent Exchange (AEX) and demo agents to Google 3. **API Keys** for LLM providers: - Anthropic API Key (for Claude - used by all agents) +### Additional prerequisites for GKE + +4. **kubectl** (`gcloud components install kubectl`) +5. **helm** (https://helm.sh/docs/intro/install/) + ## Architecture +### Cloud Run Architecture + +``` ++-----------------------------------------------------------------------------+ +| GCP Cloud Run | +| | +| +-----------+ +--------------------------------------------+ | +| | Demo UI |--->| AEX Gateway | | +| | (NiceGUI) | | (8080) | | +| +-----------+ +--------------------------------------------+ | +| | | | +| | +---------------+----------------+ | +| | | | | | +| v v v v | +| +-----------+ +---------+ +-----------+ +-----------------+ | +| |Orchestrator| |Work Pub | |Bid Gateway| |Provider Registry| | +| | (8103) | | (8081) | | (8082) | | (8085) | | +| +-----------+ +---------+ +-----------+ +-----------------+ | +| | | | +| | A2A Skill Search| | +| v | | +| +----------------------------------------------+-+ | +| | Provider Agents (A2A) | | +| | +----------+ +----------+ +----------+ | | +| | |Reviewer A| |Reviewer B| |Reviewer C| | | +| | |Budget | |Standard | |Premium | | | +| | |(Claude) | |(Claude) | |(Claude) | | | +| | +----------+ +----------+ +----------+ | | +| +-------------------------------------------------+ | +| | +| +-----------------------------------------------------------------------+ | +| | Secret Manager | | +| | ANTHROPIC_API_KEY | | +| +-----------------------------------------------------------------------+ | ++-----------------------------------------------------------------------------+ ``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ GCP Cloud Run │ -│ │ -│ ┌─────────────┐ ┌────────────────────────────────────────────────┐ │ -│ │ Demo UI │───▶│ AEX Gateway │ │ -│ │ (Streamlit)│ │ (8080) │ │ -│ └─────────────┘ └────────────────────────────────────────────────┘ │ -│ │ │ │ -│ │ ┌──────────────────┼──────────────────┐ │ -│ │ │ │ │ │ -│ ▼ ▼ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────┐ ┌─────────────┐ ┌─────────────────┐ │ -│ │ Orchestrator│ │Work Pub │ │Bid Gateway │ │Provider Registry│ │ -│ │ (8103) │ │ (8081) │ │ (8082) │ │ (8085) │ │ -│ └─────────────┘ └─────────┘ └─────────────┘ └─────────────────┘ │ -│ │ │ │ -│ │ A2A Skill Search │ │ -│ ▼ │ │ -│ ┌──────────────────────────────────────────────────┐ │ │ -│ │ Provider Agents (A2A) │◀─┘ │ -│ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ │ │ -│ │ │Legal A │ │Legal B │ │Legal C │ │ │ -│ │ │Budget 8100 │ │Standard8101│ │Premium 8102│ │ │ -│ │ │(Claude) │ │(Claude) │ │(Claude) │ │ │ -│ │ └────────────┘ └────────────┘ └────────────┘ │ │ -│ └──────────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ Secret Manager │ │ -│ │ ANTHROPIC_API_KEY │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## Quick Start - -### 1. Set up your project + +### GKE Architecture + +``` ++-----------------------------------------------------------------------------+ +| GKE Cluster (Autopilot/Standard) | +| | +| +-- Namespace: aex -------------------------------------------------------+| +| | || +| | +--- Ingress (nginx) -----------------------------------------------+ || +| | | External IP -> /api -> aex-gateway:8080 | || +| | | -> /demo -> demo-ui-nicegui:8502 | || +| | | -> /agents -> code-reviewer-{a,b,c}, orchestrator | || +| | +-------------------------------------------------------------------+ || +| | || +| | +--- AEX Core (Deployments) ----------------------------------------+ || +| | | gateway | work-publisher | bid-gateway | bid-evaluator | || +| | | contract-engine | provider-registry | trust-broker | || +| | | identity | settlement | telemetry | credentials-provider | || +| | +-------------------------------------------------------------------+ || +| | || +| | +--- Code Review Agents (Deployments) ------------------------------+ || +| | | code-reviewer-a:8100 | code-reviewer-b:8101 | code-reviewer-c:8102|| +| | | orchestrator:8103 | || +| | +-------------------------------------------------------------------+ || +| | || +| | +--- Payment Agents (Deployments) ----------------------------------+ || +| | | payment-devpay:8200 | payment-codeauditpay:8201 | || +| | | payment-securitypay:8202 | || +| | +-------------------------------------------------------------------+ || +| | || +| | +--- MongoDB (StatefulSet) -----------------------------------------+ || +| | | mongodb:27017 with PersistentVolumeClaim | || +| | +-------------------------------------------------------------------+ || +| | || +| +-------------------------------------------------------------------------+| +| | +| +--- Cluster-level -------------------------------------------------------+| +| | ingress-nginx (LoadBalancer) | cert-manager | external-secrets || +| | Workload Identity -> GCP Secret Manager || +| +-------------------------------------------------------------------------+| ++-----------------------------------------------------------------------------+ +``` + +## Cloud Run Deployment + +### Quick Start ```bash # Set your project ID @@ -58,16 +122,13 @@ export REGION="us-central1" # Authenticate gcloud auth login gcloud config set project $PROJECT_ID -``` - -### 2. Deploy everything -```bash +# Deploy everything cd deploy/gcp ./deploy.sh $PROJECT_ID $REGION ``` -### 3. Configure API keys +### Configure API keys After deployment, update the secrets with your actual API keys: @@ -76,24 +137,19 @@ After deployment, update the secrets with your actual API keys: echo "sk-ant-..." | gcloud secrets versions add ANTHROPIC_API_KEY --data-file=- ``` -### 4. Access the demo +### Access the demo The deploy script will output the Demo UI URL. Open it in your browser to try the demo. -## Manual Deployment +### Manual Deployment If you prefer to deploy services individually: -### Build images - ```bash # Build with Cloud Build gcloud builds submit --config=deploy/gcp/cloudbuild.yaml . -``` -### Deploy a single service - -```bash +# Deploy a single service gcloud run deploy aex-gateway \ --image gcr.io/$PROJECT_ID/aex-gateway:latest \ --region $REGION \ @@ -101,6 +157,100 @@ gcloud run deploy aex-gateway \ --allow-unauthenticated ``` +## GKE Deployment + +### Quick Start + +```bash +# Set your project ID +export GCP_PROJECT_ID="your-project-id" +export GCP_REGION="us-central1" + +# Option A: Full setup (cluster + deploy) +./hack/deploy/setup-gke.sh +./deploy/gcp/deploy-gke.sh --project-id $GCP_PROJECT_ID + +# Option B: Step by step +./deploy/gcp/gke-cluster.sh --project-id $GCP_PROJECT_ID --mode autopilot +./deploy/gcp/deploy-gke.sh --project-id $GCP_PROJECT_ID --environment staging +``` + +### GKE Autopilot vs Standard + +| Feature | Autopilot | Standard | +|---------|-----------|----------| +| Node management | Google-managed | User-managed | +| Node pools | Automatic | Configurable | +| Pricing | Per-pod resource requests | Per-node (VM) | +| HPA/VPA | Automatic | Manual setup | +| GPU/TPU | Supported | Supported | +| Min cost | ~$70/month (base) | ~$150/month (3 e2-standard-4 nodes) | +| Best for | Most workloads | Custom requirements, cost control | + +**Recommendation:** Use Autopilot for most cases. Use Standard mode if you need specific node configurations, DaemonSets, or tighter cost control. + +### GKE Cluster Setup + +```bash +# Autopilot (recommended) +./deploy/gcp/gke-cluster.sh \ + --project-id your-project \ + --region us-central1 + +# Standard mode (more control) +./deploy/gcp/gke-cluster.sh \ + --project-id your-project \ + --mode standard \ + --min-nodes 2 \ + --max-nodes 5 + +# Delete cluster +./deploy/gcp/gke-cluster.sh \ + --project-id your-project \ + --delete +``` + +### GKE Application Deployment + +```bash +# Full deploy (build + push + apply manifests) +./deploy/gcp/deploy-gke.sh --project-id your-project + +# Deploy to staging +./deploy/gcp/deploy-gke.sh --project-id your-project --environment staging + +# Skip build (use existing images) +./deploy/gcp/deploy-gke.sh --project-id your-project --skip-build + +# Build images only (no deploy) +./deploy/gcp/deploy-gke.sh --project-id your-project --build-only + +# Clean up deployed resources +./deploy/gcp/deploy-gke.sh --project-id your-project --clean +``` + +### GKE with Cloud Build + +Use the GKE-specific Cloud Build configuration: + +```bash +# Build and deploy to GKE via Cloud Build +gcloud builds submit --config=deploy/gcp/cloudbuild-gke.yaml \ + --substitutions=_GKE_CLUSTER=aex-cluster,_GKE_REGION=us-central1 . +``` + +### GKE with Kustomize (Direct) + +If you prefer using `kubectl` directly: + +```bash +# Apply base manifests +kubectl apply -k deploy/k8s/base/ + +# Apply with environment overlay (if available) +kubectl apply -k deploy/k8s/overlays/staging/ +``` + ## Environment Variables ### AEX Services @@ -119,41 +269,86 @@ gcloud run deploy aex-gateway \ | Agent | Tier | LLM | Port | Required Secrets | |-------|------|-----|------|-----------------| -| legal-agent-a | Budget | Claude | 8100 | ANTHROPIC_API_KEY | -| legal-agent-b | Standard | Claude | 8101 | ANTHROPIC_API_KEY | -| legal-agent-c | Premium | Claude | 8102 | ANTHROPIC_API_KEY | +| code-reviewer-a | Budget | Claude | 8100 | ANTHROPIC_API_KEY | +| code-reviewer-b | Standard | Claude | 8101 | ANTHROPIC_API_KEY | +| code-reviewer-c | Premium | Claude | 8102 | ANTHROPIC_API_KEY | | orchestrator | - | Claude | 8103 | ANTHROPIC_API_KEY | +| payment-devpay | - | - | 8200 | - | +| payment-codeauditpay | - | - | 8201 | - | +| payment-securitypay | - | - | 8202 | - | ## Cost Optimization +### Cloud Run + - All services use **min-instances: 0** to scale to zero when idle - Services auto-scale based on traffic - Memory is set conservatively (512Mi-1Gi) -To reduce costs further: ```bash # Set all services to min-instances 0 -for service in aex-gateway aex-provider-registry legal-agent-a legal-agent-b legal-agent-c; do +for service in aex-gateway aex-provider-registry code-reviewer-a code-reviewer-b code-reviewer-c; do gcloud run services update $service --min-instances 0 --region $REGION done ``` +### GKE + +- **Autopilot:** Pay only for pod resource requests. Set resource requests/limits carefully. +- **Standard:** Use cluster autoscaler with min-nodes=1 for dev/staging. +- Use `kubectl top pods -n aex` to right-size resource requests. +- Consider Spot/Preemptible nodes for non-production workloads. + +```bash +# Scale down non-critical services in dev +kubectl scale deployment code-reviewer-b code-reviewer-c --replicas=0 -n aex + +# Check resource usage +kubectl top pods -n aex +kubectl top nodes +``` + ## Monitoring -View logs in Cloud Console: +### Cloud Run + ```bash gcloud logging read "resource.type=cloud_run_revision" --limit 50 ``` Or use the Cloud Console: https://console.cloud.google.com/run +### GKE + +```bash +# Pod logs +kubectl logs -n aex deployment/aex-gateway -f + +# All pod status +kubectl get pods -n aex -o wide + +# Resource usage +kubectl top pods -n aex +kubectl top nodes + +# Events (troubleshooting) +kubectl get events -n aex --sort-by='.lastTimestamp' + +# Port-forward for local access +kubectl port-forward -n aex svc/aex-gateway 8080:8080 +kubectl port-forward -n aex svc/demo-ui-nicegui 8502:8502 +``` + +GKE Dashboard: https://console.cloud.google.com/kubernetes +Cloud Monitoring: https://console.cloud.google.com/monitoring + ## Cleanup -To delete all deployed services: +### Cloud Run ```bash # Delete Cloud Run services -for service in demo-ui orchestrator legal-agent-c legal-agent-b legal-agent-a \ +for service in demo-ui orchestrator code-reviewer-c code-reviewer-b code-reviewer-a \ aex-gateway aex-telemetry aex-identity aex-settlement aex-contract-engine \ aex-bid-evaluator aex-trust-broker aex-bid-gateway aex-work-publisher \ aex-provider-registry; do @@ -165,28 +360,109 @@ gcloud container images list --repository gcr.io/$PROJECT_ID | \ xargs -I {} gcloud container images delete {} --force-delete-tags --quiet ``` +### GKE + +```bash +# Delete namespace only (keep cluster) +./hack/deploy/teardown-gke.sh namespace + +# Delete everything (namespace + Helm charts + cluster + IAM) +./hack/deploy/teardown-gke.sh all + +# Or use the cluster script directly +./deploy/gcp/gke-cluster.sh --project-id $PROJECT_ID --delete +``` + ## Troubleshooting -### Service not starting +### Cloud Run + +#### Service not starting -Check logs: ```bash gcloud run services logs read aex-gateway --region $REGION ``` -### Secret not found +#### Secret not found -Ensure secrets are created: ```bash gcloud secrets list ``` -### Connection refused between services +#### Connection refused between services -Ensure services allow unauthenticated access: ```bash gcloud run services add-iam-policy-binding SERVICE_NAME \ --member="allUsers" \ --role="roles/run.invoker" \ --region $REGION ``` + +### GKE + +#### Pods stuck in Pending + +```bash +# Check events +kubectl describe pod -n aex + +# Check node capacity (Standard mode) +kubectl describe nodes | grep -A 5 "Allocated resources" + +# Autopilot: pods may take 1-2 minutes to schedule (node provisioning) +``` + +#### Pods in CrashLoopBackOff + +```bash +# Check logs +kubectl logs -n aex --previous + +# Check environment variables +kubectl describe pod -n aex | grep -A 20 "Environment" +``` + +#### Ingress not getting external IP + +```bash +# Check ingress-nginx controller +kubectl get svc -n ingress-nginx +kubectl logs -n ingress-nginx deployment/ingress-nginx-controller + +# Check ingress resource +kubectl describe ingress -n aex +``` + +#### Workload Identity issues + +```bash +# Verify K8s SA annotation +kubectl describe sa aex-workload -n aex + +# Test from a pod +kubectl run test --rm -it --image=google/cloud-sdk:slim \ + --serviceaccount=aex-workload -n aex -- \ + gcloud secrets list --project=$PROJECT_ID +``` + +## CI/CD + +### Cloud Run CI/CD + +The existing `.github/workflows/cd.yml` handles Cloud Run deployments on tag pushes and manual dispatch. + +### GKE CI/CD + +The `.github/workflows/cd-gcp-gke.yml` workflow provides: +- Build and push images to Artifact Registry +- Deploy to GKE staging (automatic on tags) +- Run smoke tests against staging +- Deploy to GKE production (manual approval) +- Uses Workload Identity Federation for authentication + +Required GitHub secrets for GKE: +- `GCP_PROJECT_ID` - GCP project ID +- `GCP_WORKLOAD_IDENTITY_PROVIDER` - Workload Identity provider +- `GCP_SERVICE_ACCOUNT` - GitHub Actions service account +- `GKE_CLUSTER_NAME` - GKE cluster name (default: aex-cluster) +- `GKE_CLUSTER_REGION` - GKE cluster region (default: us-central1) diff --git a/deploy/gcp/cloudbuild-gke.yaml b/deploy/gcp/cloudbuild-gke.yaml new file mode 100644 index 0000000..71944c7 --- /dev/null +++ b/deploy/gcp/cloudbuild-gke.yaml @@ -0,0 +1,268 @@ +# Cloud Build configuration for AEX services - GKE deployment +# Builds images, pushes to Artifact Registry, and deploys to GKE +# +# Usage: +# gcloud builds submit --config=deploy/gcp/cloudbuild-gke.yaml \ +# --substitutions=_GKE_CLUSTER=aex-cluster,_GKE_REGION=us-central1 . + +substitutions: + _GKE_CLUSTER: aex-cluster + _GKE_REGION: us-central1 + _GKE_NAMESPACE: aex + _REGISTRY: ${_GKE_REGION}-docker.pkg.dev/${PROJECT_ID}/aex + +steps: + # ============================================================ + # Build AEX Core Services + # ============================================================ + + - name: 'gcr.io/cloud-builders/docker' + id: build-gateway + args: ['build', '-t', '${_REGISTRY}/aex-gateway:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-gateway:latest', '-f', 'aex-gateway/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-work-publisher + args: ['build', '-t', '${_REGISTRY}/aex-work-publisher:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-work-publisher:latest', '-f', 'aex-work-publisher/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-bid-gateway + args: ['build', '-t', '${_REGISTRY}/aex-bid-gateway:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-bid-gateway:latest', '-f', 'aex-bid-gateway/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-bid-evaluator + args: ['build', '-t', '${_REGISTRY}/aex-bid-evaluator:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-bid-evaluator:latest', '-f', 'aex-bid-evaluator/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-contract-engine + args: ['build', '-t', '${_REGISTRY}/aex-contract-engine:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-contract-engine:latest', '-f', 'aex-contract-engine/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-provider-registry + args: ['build', '-t', '${_REGISTRY}/aex-provider-registry:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-provider-registry:latest', '-f', 'aex-provider-registry/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-trust-broker + args: ['build', '-t', '${_REGISTRY}/aex-trust-broker:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-trust-broker:latest', '-f', 'aex-trust-broker/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-identity + args: ['build', '-t', '${_REGISTRY}/aex-identity:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-identity:latest', '-f', 'aex-identity/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-settlement + args: ['build', '-t', '${_REGISTRY}/aex-settlement:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-settlement:latest', '-f', 'aex-settlement/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-telemetry + args: ['build', '-t', '${_REGISTRY}/aex-telemetry:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-telemetry:latest', '-f', 'aex-telemetry/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-credentials-provider + args: ['build', '-t', '${_REGISTRY}/aex-credentials-provider:${COMMIT_SHA}', '-t', '${_REGISTRY}/aex-credentials-provider:latest', '-f', 'aex-credentials-provider/Dockerfile', '.'] + dir: 'src' + waitFor: ['-'] + + # ============================================================ + # Build Code Review Demo Agents + # ============================================================ + + - name: 'gcr.io/cloud-builders/docker' + id: build-code-reviewer-a + args: ['build', '-t', '${_REGISTRY}/code-reviewer-a:${COMMIT_SHA}', '-t', '${_REGISTRY}/code-reviewer-a:latest', '--build-arg', 'AGENT_DIR=code-reviewer-a', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-code-reviewer-b + args: ['build', '-t', '${_REGISTRY}/code-reviewer-b:${COMMIT_SHA}', '-t', '${_REGISTRY}/code-reviewer-b:latest', '--build-arg', 'AGENT_DIR=code-reviewer-b', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-code-reviewer-c + args: ['build', '-t', '${_REGISTRY}/code-reviewer-c:${COMMIT_SHA}', '-t', '${_REGISTRY}/code-reviewer-c:latest', '--build-arg', 'AGENT_DIR=code-reviewer-c', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-orchestrator + args: ['build', '-t', '${_REGISTRY}/orchestrator:${COMMIT_SHA}', '-t', '${_REGISTRY}/orchestrator:latest', '--build-arg', 'AGENT_DIR=orchestrator', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + # ============================================================ + # Build Payment Agents + # ============================================================ + + - name: 'gcr.io/cloud-builders/docker' + id: build-payment-devpay + args: ['build', '-t', '${_REGISTRY}/payment-devpay:${COMMIT_SHA}', '-t', '${_REGISTRY}/payment-devpay:latest', '--build-arg', 'AGENT_DIR=payment-devpay', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-payment-codeauditpay + args: ['build', '-t', '${_REGISTRY}/payment-codeauditpay:${COMMIT_SHA}', '-t', '${_REGISTRY}/payment-codeauditpay:latest', '--build-arg', 'AGENT_DIR=payment-codeauditpay', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + - name: 'gcr.io/cloud-builders/docker' + id: build-payment-securitypay + args: ['build', '-t', '${_REGISTRY}/payment-securitypay:${COMMIT_SHA}', '-t', '${_REGISTRY}/payment-securitypay:latest', '--build-arg', 'AGENT_DIR=payment-securitypay', '-f', 'demo/code_review/agents/Dockerfile', 'demo/code_review/agents'] + waitFor: ['-'] + + # ============================================================ + # Build Demo UI + # ============================================================ + + - name: 'gcr.io/cloud-builders/docker' + id: build-demo-ui-nicegui + args: ['build', '-t', '${_REGISTRY}/demo-ui-nicegui:${COMMIT_SHA}', '-t', '${_REGISTRY}/demo-ui-nicegui:latest', '-f', 'demo/code_review/ui/Dockerfile', 'demo/code_review/ui'] + waitFor: ['-'] + + # ============================================================ + # Push All Images + # ============================================================ + + # AEX Core + - name: 'gcr.io/cloud-builders/docker' + id: push-core + entrypoint: 'bash' + args: + - '-c' + - | + services="aex-gateway aex-work-publisher aex-bid-gateway aex-bid-evaluator aex-contract-engine aex-provider-registry aex-trust-broker aex-identity aex-settlement aex-telemetry aex-credentials-provider" + for svc in $services; do + docker push "${_REGISTRY}/$svc:${COMMIT_SHA}" + docker push "${_REGISTRY}/$svc:latest" + done + waitFor: + - build-gateway + - build-work-publisher + - build-bid-gateway + - build-bid-evaluator + - build-contract-engine + - build-provider-registry + - build-trust-broker + - build-identity + - build-settlement + - build-telemetry + - build-credentials-provider + + # Demo agents and UI + - name: 'gcr.io/cloud-builders/docker' + id: push-demo + entrypoint: 'bash' + args: + - '-c' + - | + agents="code-reviewer-a code-reviewer-b code-reviewer-c orchestrator payment-devpay payment-codeauditpay payment-securitypay demo-ui-nicegui" + for agent in $agents; do + docker push "${_REGISTRY}/$agent:${COMMIT_SHA}" + docker push "${_REGISTRY}/$agent:latest" + done + waitFor: + - build-code-reviewer-a + - build-code-reviewer-b + - build-code-reviewer-c + - build-orchestrator + - build-payment-devpay + - build-payment-codeauditpay + - build-payment-securitypay + - build-demo-ui-nicegui + + # ============================================================ + # Deploy to GKE + # ============================================================ + + # Get GKE credentials + - name: 'gcr.io/cloud-builders/gcloud' + id: get-credentials + args: + - 'container' + - 'clusters' + - 'get-credentials' + - '${_GKE_CLUSTER}' + - '--region=${_GKE_REGION}' + - '--project=${PROJECT_ID}' + waitFor: + - push-core + - push-demo + + # Apply K8s manifests with image substitution + - name: 'gcr.io/cloud-builders/kubectl' + id: deploy-manifests + entrypoint: 'bash' + args: + - '-c' + - | + # Ensure namespace exists + kubectl get namespace ${_GKE_NAMESPACE} || kubectl create namespace ${_GKE_NAMESPACE} + + # Apply Kustomize manifests with image overrides + kubectl kustomize deploy/k8s/base/ | \ + sed "s|\$${REGISTRY}|${_REGISTRY}|g" | \ + sed "s|\$${TAG}|${COMMIT_SHA}|g" | \ + kubectl apply -n ${_GKE_NAMESPACE} -f - + + echo "Waiting for rollout..." + # Wait for critical deployments + for deploy in aex-gateway aex-work-publisher aex-provider-registry; do + kubectl rollout status deployment/$deploy -n ${_GKE_NAMESPACE} --timeout=300s || true + done + + echo "" + echo "=== Deployment Status ===" + kubectl get pods -n ${_GKE_NAMESPACE} + echo "" + kubectl get svc -n ${_GKE_NAMESPACE} + env: + - 'CLOUDSDK_COMPUTE_REGION=${_GKE_REGION}' + - 'CLOUDSDK_CONTAINER_CLUSTER=${_GKE_CLUSTER}' + waitFor: + - get-credentials + +images: + # AEX Core + - '${_REGISTRY}/aex-gateway:${COMMIT_SHA}' + - '${_REGISTRY}/aex-work-publisher:${COMMIT_SHA}' + - '${_REGISTRY}/aex-bid-gateway:${COMMIT_SHA}' + - '${_REGISTRY}/aex-bid-evaluator:${COMMIT_SHA}' + - '${_REGISTRY}/aex-contract-engine:${COMMIT_SHA}' + - '${_REGISTRY}/aex-provider-registry:${COMMIT_SHA}' + - '${_REGISTRY}/aex-trust-broker:${COMMIT_SHA}' + - '${_REGISTRY}/aex-identity:${COMMIT_SHA}' + - '${_REGISTRY}/aex-settlement:${COMMIT_SHA}' + - '${_REGISTRY}/aex-telemetry:${COMMIT_SHA}' + - '${_REGISTRY}/aex-credentials-provider:${COMMIT_SHA}' + # Code Review Demo + - '${_REGISTRY}/code-reviewer-a:${COMMIT_SHA}' + - '${_REGISTRY}/code-reviewer-b:${COMMIT_SHA}' + - '${_REGISTRY}/code-reviewer-c:${COMMIT_SHA}' + - '${_REGISTRY}/orchestrator:${COMMIT_SHA}' + # Payment Agents + - '${_REGISTRY}/payment-devpay:${COMMIT_SHA}' + - '${_REGISTRY}/payment-codeauditpay:${COMMIT_SHA}' + - '${_REGISTRY}/payment-securitypay:${COMMIT_SHA}' + # Demo UI + - '${_REGISTRY}/demo-ui-nicegui:${COMMIT_SHA}' + +options: + logging: CLOUD_LOGGING_ONLY + machineType: 'E2_HIGHCPU_8' + +timeout: '2400s' diff --git a/deploy/gcp/deploy-gke.sh b/deploy/gcp/deploy-gke.sh new file mode 100644 index 0000000..8e277cb --- /dev/null +++ b/deploy/gcp/deploy-gke.sh @@ -0,0 +1,430 @@ +#!/bin/bash +set -euo pipefail + +# Agent Exchange - GKE Deployment Script +# Builds images, pushes to Artifact Registry, and deploys to GKE +# +# Usage: +# ./deploy-gke.sh --project-id my-project +# ./deploy-gke.sh --project-id my-project --environment production +# ./deploy-gke.sh --project-id my-project --skip-build +# ./deploy-gke.sh --project-id my-project --build-only +# ./deploy-gke.sh --project-id my-project --clean + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Defaults +PROJECT_ID="" +REGION="us-central1" +CLUSTER_NAME="aex-cluster" +ENVIRONMENT="dev" +NAMESPACE="aex" +BUILD_ONLY=false +SKIP_BUILD=false +CLEAN=false +VERSION="" + +usage() { + cat </dev/null || echo "latest") +fi + +# ============================================================ +# Validation +# ============================================================ + +validate_prerequisites() { + echo "Validating prerequisites..." + + if ! command -v gcloud &> /dev/null; then + echo "Error: gcloud CLI is not installed" + exit 1 + fi + + if ! command -v kubectl &> /dev/null; then + echo "Error: kubectl is not installed" + exit 1 + fi + + echo "Prerequisites OK" +} + +get_cluster_credentials() { + echo "Getting GKE cluster credentials..." + + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" + + echo "Connected to cluster: $(kubectl config current-context)" + + # Verify namespace exists + if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then + echo "Creating namespace '$NAMESPACE'..." + kubectl create namespace "$NAMESPACE" + fi +} + +# ============================================================ +# Image Building +# ============================================================ + +build_and_push_images() { + echo "" + echo "Building and pushing images to Artifact Registry..." + echo " Registry: $REGISTRY" + echo " Version: $VERSION" + echo "" + + # Configure Docker for Artifact Registry + gcloud auth configure-docker "$REGION-docker.pkg.dev" --quiet + + # Ensure Artifact Registry exists + if ! gcloud artifacts repositories describe aex --location="$REGION" --project="$PROJECT_ID" &> /dev/null; then + echo "Creating Artifact Registry repository 'aex'..." + gcloud artifacts repositories create aex \ + --repository-format=docker \ + --location="$REGION" \ + --project="$PROJECT_ID" \ + --description="Agent Exchange Docker images" + fi + + # AEX Core services + local core_services=( + "aex-gateway" + "aex-work-publisher" + "aex-bid-gateway" + "aex-bid-evaluator" + "aex-contract-engine" + "aex-provider-registry" + "aex-trust-broker" + "aex-identity" + "aex-settlement" + "aex-telemetry" + "aex-credentials-provider" + ) + + for service in "${core_services[@]}"; do + echo "Building $service..." + docker build \ + -f "$PROJECT_ROOT/src/$service/Dockerfile" \ + -t "$REGISTRY/$service:$VERSION" \ + -t "$REGISTRY/$service:latest" \ + "$PROJECT_ROOT/src/" + + echo "Pushing $service..." + docker push "$REGISTRY/$service:$VERSION" + docker push "$REGISTRY/$service:latest" + done + + # Code Review demo agents + local demo_agents=( + "code-reviewer-a" + "code-reviewer-b" + "code-reviewer-c" + "orchestrator" + "payment-devpay" + "payment-codeauditpay" + "payment-securitypay" + ) + + for agent in "${demo_agents[@]}"; do + echo "Building $agent..." + docker build \ + -f "$PROJECT_ROOT/demo/code_review/agents/Dockerfile" \ + --build-arg "AGENT_DIR=$agent" \ + -t "$REGISTRY/$agent:$VERSION" \ + -t "$REGISTRY/$agent:latest" \ + "$PROJECT_ROOT/demo/code_review/agents/" + + echo "Pushing $agent..." + docker push "$REGISTRY/$agent:$VERSION" + docker push "$REGISTRY/$agent:latest" + done + + # Demo UI + echo "Building demo-ui-nicegui..." + docker build \ + -f "$PROJECT_ROOT/demo/code_review/ui/Dockerfile" \ + -t "$REGISTRY/demo-ui-nicegui:$VERSION" \ + -t "$REGISTRY/demo-ui-nicegui:latest" \ + "$PROJECT_ROOT/demo/code_review/ui/" + + echo "Pushing demo-ui-nicegui..." + docker push "$REGISTRY/demo-ui-nicegui:$VERSION" + docker push "$REGISTRY/demo-ui-nicegui:latest" + + echo "" + echo "All images built and pushed successfully" +} + +# ============================================================ +# K8s Deployment +# ============================================================ + +deploy_manifests() { + echo "" + echo "Deploying K8s manifests..." + echo " Environment: $ENVIRONMENT" + echo " Namespace: $NAMESPACE" + echo " Version: $VERSION" + echo "" + + local kustomize_dir="$PROJECT_ROOT/deploy/k8s/base" + + # Check for environment-specific overlay + local overlay_dir="$PROJECT_ROOT/deploy/k8s/overlays/$ENVIRONMENT" + if [[ -d "$overlay_dir" ]] && [[ -f "$overlay_dir/kustomization.yaml" ]]; then + echo "Using overlay: $overlay_dir" + kustomize_dir="$overlay_dir" + else + echo "No overlay found for '$ENVIRONMENT', using base manifests" + fi + + # Apply manifests with Kustomize, setting the image registry + echo "Applying manifests with image overrides..." + + # Build the kustomize command with all image overrides + local kustomize_cmd="kubectl kustomize $kustomize_dir" + + # Apply with image substitution using sed for the registry/tag replacement + $kustomize_cmd | \ + sed "s|\${REGISTRY}|$REGISTRY|g" | \ + sed "s|\${TAG}|$VERSION|g" | \ + kubectl apply -n "$NAMESPACE" -f - + + echo "Manifests applied" +} + +wait_for_pods() { + echo "" + echo "Waiting for pods to be ready..." + + local timeout=300 + local start_time=$(date +%s) + + while true; do + local not_ready + not_ready=$(kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null | \ + grep -v "Running\|Completed" | wc -l | tr -d ' ') + + if [[ "$not_ready" -eq 0 ]]; then + local total + total=$(kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null | wc -l | tr -d ' ') + if [[ "$total" -gt 0 ]]; then + echo "All $total pods are ready!" + break + fi + fi + + local elapsed=$(( $(date +%s) - start_time )) + if [[ "$elapsed" -ge "$timeout" ]]; then + echo "Warning: Timeout waiting for pods after ${timeout}s" + echo "Current pod status:" + kubectl get pods -n "$NAMESPACE" + break + fi + + echo " Waiting... ($not_ready pods not ready, ${elapsed}s elapsed)" + sleep 10 + done +} + +get_ingress_ip() { + echo "" + echo "Getting Ingress external IP..." + + local ingress_ip="" + local attempts=0 + local max_attempts=30 + + while [[ -z "$ingress_ip" || "$ingress_ip" == "" ]] && [[ $attempts -lt $max_attempts ]]; do + ingress_ip=$(kubectl get svc ingress-nginx-controller \ + -n ingress-nginx \ + -o jsonpath='{.status.loadBalancer.ingress[0].ip}' 2>/dev/null || echo "") + + if [[ -z "$ingress_ip" ]]; then + ((attempts++)) + echo " Waiting for external IP... (attempt $attempts/$max_attempts)" + sleep 10 + fi + done + + if [[ -n "$ingress_ip" ]]; then + echo "Ingress IP: $ingress_ip" + else + echo "Warning: Could not get Ingress IP. Check ingress-nginx status:" + echo " kubectl get svc -n ingress-nginx" + fi + + echo "$ingress_ip" +} + +print_service_urls() { + local ingress_ip="$1" + + echo "" + echo "================================================================" + echo " Deployment Complete" + echo "================================================================" + echo "" + echo "Environment: $ENVIRONMENT" + echo "Version: $VERSION" + echo "Namespace: $NAMESPACE" + echo "" + + if [[ -n "$ingress_ip" ]]; then + echo "Service URLs (via Ingress at $ingress_ip):" + echo "" + echo " AEX Core:" + echo " Gateway: http://$ingress_ip/api" + echo "" + echo " Code Review Demo:" + echo " Demo UI: http://$ingress_ip/demo" + echo " Code Reviewer A: http://$ingress_ip/agents/code-reviewer-a" + echo " Code Reviewer B: http://$ingress_ip/agents/code-reviewer-b" + echo " Code Reviewer C: http://$ingress_ip/agents/code-reviewer-c" + echo " Orchestrator: http://$ingress_ip/agents/orchestrator" + echo "" + echo " Payment Agents:" + echo " DevPay: http://$ingress_ip/payments/devpay" + echo " CodeAuditPay: http://$ingress_ip/payments/codeauditpay" + echo " SecurityPay: http://$ingress_ip/payments/securitypay" + else + echo "Internal Service URLs (ClusterIP):" + kubectl get svc -n "$NAMESPACE" -o wide + fi + + echo "" + echo "Pod status:" + kubectl get pods -n "$NAMESPACE" -o wide + echo "" + echo "Useful commands:" + echo " kubectl get pods -n $NAMESPACE" + echo " kubectl logs -n $NAMESPACE deployment/aex-gateway" + echo " kubectl port-forward -n $NAMESPACE svc/aex-gateway 8080:8080" + echo " kubectl port-forward -n $NAMESPACE svc/demo-ui-nicegui 8502:8502" + echo "" +} + +# ============================================================ +# Cleanup +# ============================================================ + +clean_resources() { + echo "" + echo "Cleaning K8s resources in namespace '$NAMESPACE'..." + + read -p "This will delete all AEX resources. Continue? (y/N): " confirm + if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then + echo "Aborted." + exit 0 + fi + + # Delete namespace (cascades all resources) + kubectl delete namespace "$NAMESPACE" --wait=true 2>/dev/null || true + + echo "K8s resources deleted" + echo "" + echo "Note: Ingress controller and other cluster-level resources are preserved." + echo "To fully remove the cluster, use: ./gke-cluster.sh --project-id $PROJECT_ID --delete" +} + +# ============================================================ +# Main +# ============================================================ + +echo "" +echo "================================================================" +echo " Agent Exchange - GKE Deployment" +echo "================================================================" +echo "" +echo " Project: $PROJECT_ID" +echo " Cluster: $CLUSTER_NAME" +echo " Region: $REGION" +echo " Environment: $ENVIRONMENT" +echo " Version: $VERSION" +echo "" + +validate_prerequisites + +if [[ "$CLEAN" == "true" ]]; then + get_cluster_credentials + clean_resources + exit 0 +fi + +get_cluster_credentials + +if [[ "$SKIP_BUILD" != "true" ]]; then + build_and_push_images +fi + +if [[ "$BUILD_ONLY" == "true" ]]; then + echo "" + echo "Build complete. Skipping deployment (--build-only)." + exit 0 +fi + +deploy_manifests +wait_for_pods + +INGRESS_IP=$(get_ingress_ip) +print_service_urls "$INGRESS_IP" diff --git a/deploy/gcp/gke-cluster.sh b/deploy/gcp/gke-cluster.sh new file mode 100644 index 0000000..9cb75e9 --- /dev/null +++ b/deploy/gcp/gke-cluster.sh @@ -0,0 +1,515 @@ +#!/bin/bash +set -euo pipefail + +# Agent Exchange - GKE Cluster Setup Script +# Creates and configures a GKE cluster for AEX deployment +# +# Usage: +# ./gke-cluster.sh --project-id my-project --region us-central1 +# ./gke-cluster.sh --project-id my-project --mode standard --region us-central1 +# ./gke-cluster.sh --project-id my-project --delete + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Defaults +CLUSTER_NAME="aex-cluster" +REGION="us-central1" +PROJECT_ID="" +MODE="autopilot" # autopilot or standard +DELETE=false +NAMESPACE="aex" + +# Standard mode defaults +MIN_NODES=2 +MAX_NODES=5 +MACHINE_TYPE="e2-standard-4" + +usage() { + cat < /dev/null; then + echo "Error: gcloud CLI is not installed" + echo " Install: https://cloud.google.com/sdk/docs/install" + exit 1 + fi + + # Check authentication + if ! gcloud auth print-identity-token &> /dev/null; then + echo "Error: Not authenticated with gcloud. Run 'gcloud auth login'" + exit 1 + fi + + # Check project access + if ! gcloud projects describe "$PROJECT_ID" &> /dev/null; then + echo "Error: Cannot access project '$PROJECT_ID'" + exit 1 + fi + + # Check kubectl + if ! command -v kubectl &> /dev/null; then + echo "Warning: kubectl not found. Installing via gcloud..." + gcloud components install kubectl --quiet + fi + + # Check helm + if ! command -v helm &> /dev/null; then + echo "Warning: helm not found." + echo " Install: https://helm.sh/docs/intro/install/" + echo " Or: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash" + exit 1 + fi + + echo "Prerequisites OK" +} + +# ============================================================ +# API Enablement +# ============================================================ + +enable_apis() { + echo "Enabling required GCP APIs..." + + local apis=( + "container.googleapis.com" + "compute.googleapis.com" + "artifactregistry.googleapis.com" + "secretmanager.googleapis.com" + "iam.googleapis.com" + "iamcredentials.googleapis.com" + "cloudresourcemanager.googleapis.com" + ) + + for api in "${apis[@]}"; do + echo " Enabling $api..." + gcloud services enable "$api" --project="$PROJECT_ID" --quiet + done + + echo "APIs enabled" +} + +# ============================================================ +# Cluster Creation +# ============================================================ + +create_cluster() { + echo "" + echo "Creating GKE cluster..." + echo " Name: $CLUSTER_NAME" + echo " Region: $REGION" + echo " Mode: $MODE" + echo "" + + # Check if cluster already exists + if gcloud container clusters describe "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" &> /dev/null; then + echo "Cluster '$CLUSTER_NAME' already exists. Skipping creation." + return 0 + fi + + if [[ "$MODE" == "autopilot" ]]; then + echo "Creating Autopilot cluster (Google manages node infrastructure)..." + gcloud container clusters create-auto "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --release-channel=regular \ + --network=default \ + --subnetwork=default \ + --quiet + else + echo "Creating Standard cluster with node autoscaling..." + gcloud container clusters create "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --machine-type="$MACHINE_TYPE" \ + --num-nodes=1 \ + --min-nodes="$MIN_NODES" \ + --max-nodes="$MAX_NODES" \ + --enable-autoscaling \ + --enable-autorepair \ + --enable-autoupgrade \ + --release-channel=regular \ + --workload-pool="$PROJECT_ID.svc.id.goog" \ + --network=default \ + --subnetwork=default \ + --quiet + fi + + echo "Cluster created successfully" +} + +# ============================================================ +# kubectl Configuration +# ============================================================ + +configure_kubectl() { + echo "Configuring kubectl context..." + + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" + + echo "kubectl context set to: $(kubectl config current-context)" + + # Create namespace + if kubectl get namespace "$NAMESPACE" &> /dev/null; then + echo "Namespace '$NAMESPACE' already exists" + else + echo "Creating namespace '$NAMESPACE'..." + kubectl create namespace "$NAMESPACE" + fi + + echo "kubectl configured" +} + +# ============================================================ +# Helm Chart Installations +# ============================================================ + +install_nginx_ingress() { + echo "Installing Nginx Ingress Controller..." + + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true + helm repo update + + if helm status ingress-nginx -n ingress-nginx &> /dev/null; then + echo " Nginx Ingress already installed. Upgrading..." + fi + + helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --create-namespace \ + --set controller.service.type=LoadBalancer \ + --set controller.metrics.enabled=true \ + --set controller.podAnnotations."prometheus\.io/scrape"=true \ + --set controller.podAnnotations."prometheus\.io/port"=10254 \ + --wait \ + --timeout 300s + + echo "Nginx Ingress Controller installed" +} + +install_external_secrets() { + echo "Installing External Secrets Operator..." + + helm repo add external-secrets https://charts.external-secrets.io 2>/dev/null || true + helm repo update + + if helm status external-secrets -n external-secrets &> /dev/null; then + echo " External Secrets already installed. Upgrading..." + fi + + helm upgrade --install external-secrets external-secrets/external-secrets \ + --namespace external-secrets \ + --create-namespace \ + --set installCRDs=true \ + --wait \ + --timeout 300s + + echo "External Secrets Operator installed" +} + +# ============================================================ +# Workload Identity +# ============================================================ + +setup_workload_identity() { + echo "Configuring Workload Identity..." + + local sa_name="aex-gke" + local sa_email="$sa_name@$PROJECT_ID.iam.gserviceaccount.com" + local k8s_sa="aex-workload" + + # Create GCP service account if it does not exist + if gcloud iam service-accounts describe "$sa_email" --project="$PROJECT_ID" &> /dev/null; then + echo " GCP service account '$sa_name' already exists" + else + gcloud iam service-accounts create "$sa_name" \ + --display-name="Agent Exchange GKE Workload" \ + --project="$PROJECT_ID" + echo " Created GCP service account: $sa_email" + fi + + # Grant roles to GCP service account + local roles=( + "roles/secretmanager.secretAccessor" + "roles/datastore.user" + "roles/logging.logWriter" + "roles/cloudtrace.agent" + "roles/monitoring.metricWriter" + ) + + for role in "${roles[@]}"; do + gcloud projects add-iam-policy-binding "$PROJECT_ID" \ + --member="serviceAccount:$sa_email" \ + --role="$role" \ + --quiet 2>/dev/null || true + done + + # Create K8s service account + if kubectl get serviceaccount "$k8s_sa" -n "$NAMESPACE" &> /dev/null; then + echo " K8s service account '$k8s_sa' already exists" + else + kubectl create serviceaccount "$k8s_sa" -n "$NAMESPACE" + fi + + # Annotate K8s SA with GCP SA + kubectl annotate serviceaccount "$k8s_sa" \ + --namespace="$NAMESPACE" \ + "iam.gke.io/gcp-service-account=$sa_email" \ + --overwrite + + # Allow K8s SA to impersonate GCP SA + gcloud iam service-accounts add-iam-policy-binding "$sa_email" \ + --project="$PROJECT_ID" \ + --role="roles/iam.workloadIdentityUser" \ + --member="serviceAccount:$PROJECT_ID.svc.id.goog[$NAMESPACE/$k8s_sa]" \ + --quiet 2>/dev/null || true + + echo "Workload Identity configured" + echo " GCP SA: $sa_email" + echo " K8s SA: $k8s_sa (namespace: $NAMESPACE)" +} + +# ============================================================ +# Secrets from GCP Secret Manager +# ============================================================ + +create_k8s_secrets() { + echo "Creating K8s secrets from GCP Secret Manager..." + + local secrets_to_sync=( + "aex-jwt-secret:JWT_SIGNING_KEY" + "aex-api-key-salt:API_KEY_SALT" + ) + + # Build the secret data arguments + local secret_args=() + local has_secrets=false + + for secret_mapping in "${secrets_to_sync[@]}"; do + local gcp_secret="${secret_mapping%%:*}" + local k8s_key="${secret_mapping##*:}" + + # Try to get the secret value from GCP Secret Manager + local value="" + value=$(gcloud secrets versions access latest --secret="$gcp_secret" --project="$PROJECT_ID" 2>/dev/null) || true + + if [[ -n "$value" ]]; then + secret_args+=("--from-literal=$k8s_key=$value") + has_secrets=true + echo " Synced: $gcp_secret -> $k8s_key" + else + echo " Warning: Secret '$gcp_secret' not found in Secret Manager (skipped)" + fi + done + + # Also check for ANTHROPIC_API_KEY + local anthropic_key="" + anthropic_key=$(gcloud secrets versions access latest --secret="ANTHROPIC_API_KEY" --project="$PROJECT_ID" 2>/dev/null) || true + if [[ -n "$anthropic_key" ]]; then + secret_args+=("--from-literal=ANTHROPIC_API_KEY=$anthropic_key") + has_secrets=true + echo " Synced: ANTHROPIC_API_KEY" + else + echo " Warning: ANTHROPIC_API_KEY not found in Secret Manager" + echo " Create it with: echo 'your-key' | gcloud secrets create ANTHROPIC_API_KEY --data-file=- --project=$PROJECT_ID" + fi + + if [[ "$has_secrets" == "true" ]]; then + # Delete existing secret if present + kubectl delete secret aex-secrets -n "$NAMESPACE" 2>/dev/null || true + + # Create new secret + kubectl create secret generic aex-secrets \ + -n "$NAMESPACE" \ + "${secret_args[@]}" + + echo "K8s secrets created in namespace '$NAMESPACE'" + else + echo "No secrets found in Secret Manager. Using placeholder secret." + kubectl apply -f "$PROJECT_ROOT/deploy/k8s/base/secrets.yaml" 2>/dev/null || true + fi +} + +# ============================================================ +# Cluster Deletion +# ============================================================ + +delete_cluster() { + echo "" + echo "================================================================" + echo " WARNING: Deleting GKE cluster and associated resources" + echo " Cluster: $CLUSTER_NAME" + echo " Project: $PROJECT_ID" + echo " Region: $REGION" + echo "================================================================" + echo "" + + read -p "Type 'DELETE' to confirm: " confirmation + if [[ "$confirmation" != "DELETE" ]]; then + echo "Aborted." + exit 1 + fi + + # Get credentials first (may fail if cluster is already gone) + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" 2>/dev/null || true + + # Delete namespace (cascades to all resources within) + echo "Deleting namespace '$NAMESPACE'..." + kubectl delete namespace "$NAMESPACE" --wait=false 2>/dev/null || true + + # Uninstall Helm charts + echo "Uninstalling Helm charts..." + helm uninstall ingress-nginx -n ingress-nginx 2>/dev/null || true + helm uninstall external-secrets -n external-secrets 2>/dev/null || true + kubectl delete namespace ingress-nginx --wait=false 2>/dev/null || true + kubectl delete namespace external-secrets --wait=false 2>/dev/null || true + + # Delete the cluster + echo "Deleting GKE cluster '$CLUSTER_NAME'..." + gcloud container clusters delete "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --quiet + + # Clean up IAM bindings + echo "Cleaning up IAM bindings..." + local sa_email="aex-gke@$PROJECT_ID.iam.gserviceaccount.com" + if gcloud iam service-accounts describe "$sa_email" --project="$PROJECT_ID" &> /dev/null; then + gcloud iam service-accounts delete "$sa_email" \ + --project="$PROJECT_ID" \ + --quiet 2>/dev/null || true + fi + + echo "" + echo "GKE cluster and resources deleted successfully" +} + +# ============================================================ +# Main +# ============================================================ + +echo "" +echo "================================================================" +echo " Agent Exchange - GKE Cluster Setup" +echo "================================================================" +echo "" +echo " Project: $PROJECT_ID" +echo " Cluster: $CLUSTER_NAME" +echo " Region: $REGION" +echo " Mode: $MODE" +echo "" + +if [[ "$DELETE" == "true" ]]; then + validate_prerequisites + delete_cluster + exit 0 +fi + +validate_prerequisites +echo "" + +enable_apis +echo "" + +create_cluster +echo "" + +configure_kubectl +echo "" + +install_nginx_ingress +echo "" + +install_external_secrets +echo "" + +setup_workload_identity +echo "" + +create_k8s_secrets +echo "" + +echo "" +echo "================================================================" +echo " GKE Cluster Setup Complete" +echo "================================================================" +echo "" +echo "Cluster: $CLUSTER_NAME ($MODE mode)" +echo "Region: $REGION" +echo "Context: $(kubectl config current-context)" +echo "" +echo "Next steps:" +echo " 1. Deploy AEX services:" +echo " ./deploy/gcp/deploy-gke.sh --project-id $PROJECT_ID --region $REGION" +echo "" +echo " 2. Or use Kustomize directly:" +echo " kubectl apply -k deploy/k8s/base/" +echo "" +echo " 3. Check cluster status:" +echo " kubectl get nodes" +echo " kubectl get pods -n $NAMESPACE" +echo "" diff --git a/deploy/k8s/README.md b/deploy/k8s/README.md new file mode 100644 index 0000000..f4a189d --- /dev/null +++ b/deploy/k8s/README.md @@ -0,0 +1,379 @@ +# Agent Exchange (AEX) - Kubernetes Deployment + +Production-ready Kubernetes manifests for deploying the Agent Exchange code review demo, including all AEX core microservices, code review agents, payment agents, and the NiceGUI dashboard. + +## Architecture Overview + +``` + Ingress (nginx) + / \ + / \ + /api/* --> / --> + aex-gateway:8080 demo-ui-nicegui:8502 + | | + +----------+----------+ | + | AEX Core Services | | + | (11 Go microservices)| | + +----------+----------+ | + | | + +-----+-----+ +------+------+ + | MongoDB | | Code Review | + | StatefulSet| | Agents | + +------------+ | + Payment | + | Agents | + +-------------+ +``` + +### Services + +| Category | Services | Count | +|----------|----------|-------| +| Database | MongoDB 7 (StatefulSet) | 1 | +| AEX Core | gateway, work-publisher, bid-gateway, bid-evaluator, contract-engine, provider-registry, trust-broker, identity, settlement, telemetry, credentials-provider | 11 | +| Code Review Agents | code-reviewer-a (QuickReview), code-reviewer-b (CodeGuard), code-reviewer-c (ArchitectAI), orchestrator | 4 | +| Payment Agents | payment-devpay, payment-codeauditpay, payment-securitypay | 3 | +| UI | demo-ui-nicegui (NiceGUI WebSocket dashboard) | 1 | +| **Total** | | **20** | + +## Prerequisites + +- Kubernetes cluster (v1.25+) +- `kubectl` (v1.25+) +- `kustomize` (v5.0+) or `kubectl` with kustomize support +- Nginx Ingress Controller (for ingress routing) +- Container images built and pushed to a registry + +### For Local Development + +- [minikube](https://minikube.sigs.k8s.io/) or [kind](https://kind.sigs.k8s.io/) +- Docker (for building images locally) + +## Quick Start (Local Development) + +### 1. Start a Local Cluster + +**Using minikube:** + +```bash +minikube start --memory 8192 --cpus 4 +minikube addons enable ingress +``` + +**Using kind:** + +```bash +kind create cluster --name aex +kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/kind/deploy.yaml +``` + +### 2. Set Up Secrets + +Before deploying, create the secrets with your actual values: + +```bash +# Create namespace first +kubectl apply -f deploy/k8s/namespace.yaml + +# Create the secret (replace placeholder values) +kubectl create secret generic aex-secrets \ + --namespace aex \ + --from-literal=ANTHROPIC_API_KEY="sk-ant-your-key-here" \ + --from-literal=JWT_SIGNING_KEY="your-jwt-signing-key" \ + --from-literal=MONGO_URI="mongodb://root:root@mongodb.aex.svc.cluster.local:27017/?authSource=admin" +``` + +### 3. Deploy with Kustomize + +**Development (local images, 1 replica, small resources):** + +```bash +kubectl apply -k deploy/k8s/overlays/dev/ +``` + +**Staging (2 replicas, moderate resources):** + +```bash +kubectl apply -k deploy/k8s/overlays/staging/ +``` + +**Production (HPA, PDB, NetworkPolicy, larger resources):** + +```bash +kubectl apply -k deploy/k8s/overlays/production/ +``` + +### 4. Verify Deployment + +```bash +# Check all pods are running +kubectl get pods -n aex + +# Check all services +kubectl get svc -n aex + +# Watch pod status +kubectl get pods -n aex -w + +# Check deployment rollout status +kubectl rollout status deployment/aex-gateway -n aex +``` + +### 5. Access the Application + +**With minikube:** + +```bash +# Get the UI URL +minikube service demo-ui-nicegui -n aex --url + +# Or use port-forward +kubectl port-forward svc/demo-ui-nicegui 8502:8502 -n aex +``` + +**With Ingress:** + +```bash +# Get the ingress address +kubectl get ingress -n aex + +# Access: +# UI: http:/// +# API: http:///api/ +``` + +## Directory Structure + +``` +deploy/k8s/ +├── README.md # This file +├── namespace.yaml # aex namespace +├── base/ # Base Kustomize configuration +│ ├── kustomization.yaml # Assembles all resources +│ ├── namespace.yaml # Namespace definition +│ ├── configmap.yaml # Shared env vars and service URLs +│ └── secrets.yaml # Secret template (DO NOT commit real values) +├── services/ # AEX Core Services (Go microservices) +│ ├── mongodb/ +│ │ ├── statefulset.yaml # MongoDB StatefulSet with PVC +│ │ └── service.yaml # ClusterIP service +│ ├── aex-gateway/ +│ │ ├── deployment.yaml # API gateway deployment +│ │ └── service.yaml +│ ├── aex-work-publisher/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-bid-gateway/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-bid-evaluator/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-contract-engine/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-provider-registry/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-trust-broker/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-identity/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-settlement/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ ├── aex-telemetry/ +│ │ ├── deployment.yaml +│ │ └── service.yaml +│ └── aex-credentials-provider/ +│ ├── deployment.yaml +│ └── service.yaml +├── agents/ # Code Review Demo Agents (Python) +│ ├── code-reviewer-a/ +│ │ ├── deployment.yaml # QuickReview - Budget reviews +│ │ └── service.yaml +│ ├── code-reviewer-b/ +│ │ ├── deployment.yaml # CodeGuard - Security-focused +│ │ └── service.yaml +│ ├── code-reviewer-c/ +│ │ ├── deployment.yaml # ArchitectAI - Architecture review +│ │ └── service.yaml +│ ├── orchestrator/ +│ │ ├── deployment.yaml # Workflow coordinator +│ │ └── service.yaml +│ ├── payment-devpay/ +│ │ ├── deployment.yaml # General dev payments +│ │ └── service.yaml +│ ├── payment-codeauditpay/ +│ │ ├── deployment.yaml # Code audit payments +│ │ └── service.yaml +│ └── payment-securitypay/ +│ ├── deployment.yaml # Security payments +│ └── service.yaml +├── ui/ +│ ├── deployment.yaml # NiceGUI real-time dashboard +│ └── service.yaml # LoadBalancer service +├── ingress/ +│ └── ingress.yaml # Nginx Ingress routing +└── overlays/ + ├── dev/ + │ └── kustomization.yaml # Local dev (1 replica, NodePort, small resources) + ├── staging/ + │ └── kustomization.yaml # Staging (2 replicas, moderate resources) + └── production/ + ├── kustomization.yaml # Production config + ├── hpa.yaml # HorizontalPodAutoscalers (2-10 replicas) + ├── pdb.yaml # PodDisruptionBudgets + └── networkpolicy.yaml # Network policies (default deny + allow rules) +``` + +## Configuration + +### Environment Variables + +All shared configuration is managed through the `aex-config` ConfigMap. Service URLs use Kubernetes DNS: + +``` +http://.aex.svc.cluster.local: +``` + +### Secrets Management + +The `base/secrets.yaml` is a template. **Never commit real secret values.** + +For production, use one of: +- **Sealed Secrets**: Encrypt secrets in the repository +- **External Secrets Operator**: Sync from AWS Secrets Manager, HashiCorp Vault, etc. +- **SOPS**: Mozilla SOPS for encrypted secrets in git + +### Image Configuration + +Base manifests use placeholder image names (`${REGISTRY}/image-name:${TAG}`). Each overlay sets the actual registry and tag via the Kustomize `images` transformer. + +To update image tags for a deployment: + +```bash +# Update a specific image tag +cd deploy/k8s/overlays/production/ +kustomize edit set image YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-gateway=YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-gateway:v1.2.3 +``` + +## Overlay Details + +### Dev Overlay + +- 1 replica for all services +- NodePort for UI (port 30502) +- Local image names (`agent-exchange/*:local`) +- 1Gi MongoDB PVC +- Small resource requests/limits + +### Staging Overlay + +- 2 replicas for core services +- Moderate resources (256Mi-512Mi request, 512Mi-1Gi limit) +- 5Gi MongoDB PVC +- ECR image placeholders + +### Production Overlay + +- 2-3 base replicas + HPA (scales to 6-10) +- HorizontalPodAutoscalers on key services +- PodDisruptionBudgets (minAvailable: 1) +- NetworkPolicies (default deny + allow rules) +- Node affinity for workload isolation +- Pod anti-affinity for spread across nodes +- TLS on Ingress (cert-manager integration) +- 10Gi MongoDB PVC +- Large resource limits (512Mi-2Gi) + +## Monitoring and Troubleshooting + +### Health Checks + +All services expose `/health` endpoints. Kubernetes uses these for liveness and readiness probes. + +```bash +# Check health of a specific service +kubectl exec -n aex deploy/aex-gateway -- wget -qO- http://localhost:8080/health + +# Check all pod health +kubectl get pods -n aex -o wide +``` + +### Logs + +```bash +# View logs for a specific service +kubectl logs -n aex deploy/aex-gateway -f + +# View logs for all pods with a label +kubectl logs -n aex -l app.kubernetes.io/component=code-review-agent -f + +# View previous container logs (if crashed) +kubectl logs -n aex deploy/code-reviewer-a --previous +``` + +### Common Issues + +**Pods stuck in Pending:** +```bash +kubectl describe pod -n aex +# Check for resource constraints or PVC binding issues +``` + +**Pods in CrashLoopBackOff:** +```bash +kubectl logs -n aex --previous +# Check for missing env vars, connection issues, or startup failures +``` + +**MongoDB connection failures:** +```bash +# Verify MongoDB is running and ready +kubectl get pods -n aex -l app.kubernetes.io/name=mongodb +kubectl exec -n aex mongodb-0 -- mongosh --eval "db.adminCommand('ping')" +``` + +**Services not discovering each other:** +```bash +# Test DNS resolution +kubectl exec -n aex deploy/aex-gateway -- nslookup aex-bid-gateway.aex.svc.cluster.local + +# Verify ConfigMap values +kubectl get configmap aex-config -n aex -o yaml +``` + +### Scaling + +```bash +# Manual scaling +kubectl scale deployment aex-gateway -n aex --replicas=5 + +# Check HPA status (production) +kubectl get hpa -n aex + +# View HPA details +kubectl describe hpa aex-gateway-hpa -n aex +``` + +### Resource Usage + +```bash +# Pod resource usage (requires metrics-server) +kubectl top pods -n aex + +# Node resource usage +kubectl top nodes +``` + +## Cleanup + +```bash +# Delete all resources in the namespace +kubectl delete namespace aex + +# Or delete specific overlay +kubectl delete -k deploy/k8s/overlays/dev/ +``` diff --git a/deploy/k8s/agents/code-reviewer-a/deployment.yaml b/deploy/k8s/agents/code-reviewer-a/deployment.yaml new file mode 100644 index 0000000..d7a126a --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-a/deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: code-reviewer-a + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-a + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "QuickReview - Budget code review agent ($3 + $1/file)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: code-reviewer-a + template: + metadata: + labels: + app.kubernetes.io/name: code-reviewer-a + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: code-reviewer-a + image: ${REGISTRY}/code-review-code-reviewer-a:${TAG} + ports: + - containerPort: 8100 + protocol: TCP + env: + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: ANTHROPIC_API_KEY + - name: AEX_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: AEX_GATEWAY_URL + - name: AEX_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: AEX_API_KEY + - name: AGENT_HOSTNAME + value: "code-reviewer-a" + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8100 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8100 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/code-reviewer-a/service.yaml b/deploy/k8s/agents/code-reviewer-a/service.yaml new file mode 100644 index 0000000..986c004 --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-a/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: code-reviewer-a + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-a + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: code-reviewer-a + ports: + - port: 8100 + targetPort: 8100 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/code-reviewer-b/deployment.yaml b/deploy/k8s/agents/code-reviewer-b/deployment.yaml new file mode 100644 index 0000000..4b5150c --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-b/deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: code-reviewer-b + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-b + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "CodeGuard - Security-focused code review agent ($10 + $3/file)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: code-reviewer-b + template: + metadata: + labels: + app.kubernetes.io/name: code-reviewer-b + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: code-reviewer-b + image: ${REGISTRY}/code-review-code-reviewer-b:${TAG} + ports: + - containerPort: 8101 + protocol: TCP + env: + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: ANTHROPIC_API_KEY + - name: AEX_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: AEX_GATEWAY_URL + - name: AEX_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: AEX_API_KEY + - name: AGENT_HOSTNAME + value: "code-reviewer-b" + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8101 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8101 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/code-reviewer-b/service.yaml b/deploy/k8s/agents/code-reviewer-b/service.yaml new file mode 100644 index 0000000..b571e29 --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-b/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: code-reviewer-b + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-b + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: code-reviewer-b + ports: + - port: 8101 + targetPort: 8101 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/code-reviewer-c/deployment.yaml b/deploy/k8s/agents/code-reviewer-c/deployment.yaml new file mode 100644 index 0000000..de43300 --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-c/deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: code-reviewer-c + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-c + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "ArchitectAI - Premium architecture review agent ($25 + $5/file)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: code-reviewer-c + template: + metadata: + labels: + app.kubernetes.io/name: code-reviewer-c + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: code-reviewer-c + image: ${REGISTRY}/code-review-code-reviewer-c:${TAG} + ports: + - containerPort: 8102 + protocol: TCP + env: + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: ANTHROPIC_API_KEY + - name: AEX_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: AEX_GATEWAY_URL + - name: AEX_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: AEX_API_KEY + - name: AGENT_HOSTNAME + value: "code-reviewer-c" + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8102 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8102 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/code-reviewer-c/service.yaml b/deploy/k8s/agents/code-reviewer-c/service.yaml new file mode 100644 index 0000000..01db1d0 --- /dev/null +++ b/deploy/k8s/agents/code-reviewer-c/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: code-reviewer-c + namespace: aex + labels: + app.kubernetes.io/name: code-reviewer-c + app.kubernetes.io/component: code-review-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: code-reviewer-c + ports: + - port: 8102 + targetPort: 8102 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/orchestrator/deployment.yaml b/deploy/k8s/agents/orchestrator/deployment.yaml new file mode 100644 index 0000000..730edbd --- /dev/null +++ b/deploy/k8s/agents/orchestrator/deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: orchestrator + namespace: aex + labels: + app.kubernetes.io/name: orchestrator + app.kubernetes.io/component: orchestrator + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "Code Review Orchestrator - Workflow coordinator for review agents" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: orchestrator + template: + metadata: + labels: + app.kubernetes.io/name: orchestrator + app.kubernetes.io/component: orchestrator + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: orchestrator + image: ${REGISTRY}/code-review-orchestrator:${TAG} + ports: + - containerPort: 8103 + protocol: TCP + env: + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: ANTHROPIC_API_KEY + - name: AEX_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: AEX_GATEWAY_URL + - name: AEX_API_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: AEX_API_KEY + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8103 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8103 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/orchestrator/service.yaml b/deploy/k8s/agents/orchestrator/service.yaml new file mode 100644 index 0000000..ee47119 --- /dev/null +++ b/deploy/k8s/agents/orchestrator/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: orchestrator + namespace: aex + labels: + app.kubernetes.io/name: orchestrator + app.kubernetes.io/component: orchestrator + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: orchestrator + ports: + - port: 8103 + targetPort: 8103 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/payment-codeauditpay/deployment.yaml b/deploy/k8s/agents/payment-codeauditpay/deployment.yaml new file mode 100644 index 0000000..e3a9b17 --- /dev/null +++ b/deploy/k8s/agents/payment-codeauditpay/deployment.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: payment-codeauditpay + namespace: aex + labels: + app.kubernetes.io/name: payment-codeauditpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "CodeAuditPay - Code audit specialist (2.5% fee, 3% reward = cashback)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: payment-codeauditpay + template: + metadata: + labels: + app.kubernetes.io/name: payment-codeauditpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: payment-codeauditpay + image: ${REGISTRY}/code-review-payment-codeauditpay:${TAG} + ports: + - containerPort: 8201 + protocol: TCP + env: + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8201 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8201 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/payment-codeauditpay/service.yaml b/deploy/k8s/agents/payment-codeauditpay/service.yaml new file mode 100644 index 0000000..8582f70 --- /dev/null +++ b/deploy/k8s/agents/payment-codeauditpay/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: payment-codeauditpay + namespace: aex + labels: + app.kubernetes.io/name: payment-codeauditpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: payment-codeauditpay + ports: + - port: 8201 + targetPort: 8201 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/payment-devpay/deployment.yaml b/deploy/k8s/agents/payment-devpay/deployment.yaml new file mode 100644 index 0000000..2475e1a --- /dev/null +++ b/deploy/k8s/agents/payment-devpay/deployment.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: payment-devpay + namespace: aex + labels: + app.kubernetes.io/name: payment-devpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "DevPay - General dev payments (2% fee, 1% reward on code_review)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: payment-devpay + template: + metadata: + labels: + app.kubernetes.io/name: payment-devpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: payment-devpay + image: ${REGISTRY}/code-review-payment-devpay:${TAG} + ports: + - containerPort: 8200 + protocol: TCP + env: + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8200 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8200 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/payment-devpay/service.yaml b/deploy/k8s/agents/payment-devpay/service.yaml new file mode 100644 index 0000000..ea3f27d --- /dev/null +++ b/deploy/k8s/agents/payment-devpay/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: payment-devpay + namespace: aex + labels: + app.kubernetes.io/name: payment-devpay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: payment-devpay + ports: + - port: 8200 + targetPort: 8200 + protocol: TCP + name: http diff --git a/deploy/k8s/agents/payment-securitypay/deployment.yaml b/deploy/k8s/agents/payment-securitypay/deployment.yaml new file mode 100644 index 0000000..8338e95 --- /dev/null +++ b/deploy/k8s/agents/payment-securitypay/deployment.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: payment-securitypay + namespace: aex + labels: + app.kubernetes.io/name: payment-securitypay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "SecurityPay - Security specialist (3% fee, 4% reward = cashback)" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: payment-securitypay + template: + metadata: + labels: + app.kubernetes.io/name: payment-securitypay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: payment-securitypay + image: ${REGISTRY}/code-review-payment-securitypay:${TAG} + ports: + - containerPort: 8202 + protocol: TCP + env: + - name: CONFIG_PATH + valueFrom: + configMapKeyRef: + name: aex-config + key: CONFIG_PATH + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 8202 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8202 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/agents/payment-securitypay/service.yaml b/deploy/k8s/agents/payment-securitypay/service.yaml new file mode 100644 index 0000000..e0e3599 --- /dev/null +++ b/deploy/k8s/agents/payment-securitypay/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: payment-securitypay + namespace: aex + labels: + app.kubernetes.io/name: payment-securitypay + app.kubernetes.io/component: payment-agent + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: payment-securitypay + ports: + - port: 8202 + targetPort: 8202 + protocol: TCP + name: http diff --git a/deploy/k8s/base/configmap.yaml b/deploy/k8s/base/configmap.yaml new file mode 100644 index 0000000..8ac3a91 --- /dev/null +++ b/deploy/k8s/base/configmap.yaml @@ -0,0 +1,45 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: aex-config + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange + app.kubernetes.io/component: config +data: + # --- Environment --- + ENVIRONMENT: "development" + MONGO_DB: "aex" + STORE_TYPE: "mongo" + MONGO_COLLECTION_WORK: "work_specs" + AP2_ENABLED: "true" + + # --- AEX Core Service URLs (K8s DNS) --- + # All Go microservices listen on port 8080 internally + GATEWAY_URL: "http://aex-gateway.aex.svc.cluster.local:8080" + IDENTITY_URL: "http://aex-identity.aex.svc.cluster.local:8080" + BID_GATEWAY_URL: "http://aex-bid-gateway.aex.svc.cluster.local:8080" + PROVIDER_REGISTRY_URL: "http://aex-provider-registry.aex.svc.cluster.local:8080" + CONTRACT_ENGINE_URL: "http://aex-contract-engine.aex.svc.cluster.local:8080" + TRUST_BROKER_URL: "http://aex-trust-broker.aex.svc.cluster.local:8080" + SETTLEMENT_URL: "http://aex-settlement.aex.svc.cluster.local:8080" + TELEMETRY_URL: "http://aex-telemetry.aex.svc.cluster.local:8080" + CREDENTIALS_PROVIDER_URL: "http://aex-credentials-provider.aex.svc.cluster.local:8080" + WORK_PUBLISHER_URL: "http://aex-work-publisher.aex.svc.cluster.local:8080" + + # --- Agent / Demo URLs --- + AEX_GATEWAY_URL: "http://aex-gateway.aex.svc.cluster.local:8080" + AEX_SETTLEMENT_URL: "http://aex-settlement.aex.svc.cluster.local:8080" + AEX_PROVIDER_REGISTRY_URL: "http://aex-provider-registry.aex.svc.cluster.local:8080" + CONFIG_PATH: "config.yaml" + + # --- Code Review Agent URLs --- + CODE_REVIEWER_A_URL: "http://code-reviewer-a.aex.svc.cluster.local:8100" + CODE_REVIEWER_B_URL: "http://code-reviewer-b.aex.svc.cluster.local:8101" + CODE_REVIEWER_C_URL: "http://code-reviewer-c.aex.svc.cluster.local:8102" + ORCHESTRATOR_URL: "http://orchestrator.aex.svc.cluster.local:8103" + + # --- Payment Agent URLs --- + DEVPAY_URL: "http://payment-devpay.aex.svc.cluster.local:8200" + CODEAUDITPAY_URL: "http://payment-codeauditpay.aex.svc.cluster.local:8201" + SECURITYPAY_URL: "http://payment-securitypay.aex.svc.cluster.local:8202" diff --git a/deploy/k8s/base/kustomization.yaml b/deploy/k8s/base/kustomization.yaml new file mode 100644 index 0000000..4461d1c --- /dev/null +++ b/deploy/k8s/base/kustomization.yaml @@ -0,0 +1,63 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: aex + +resources: + - namespace.yaml + - configmap.yaml + - secrets.yaml + + # --- MongoDB --- + - ../services/mongodb/statefulset.yaml + - ../services/mongodb/service.yaml + + # --- AEX Core Services --- + - ../services/aex-gateway/deployment.yaml + - ../services/aex-gateway/service.yaml + - ../services/aex-work-publisher/deployment.yaml + - ../services/aex-work-publisher/service.yaml + - ../services/aex-bid-gateway/deployment.yaml + - ../services/aex-bid-gateway/service.yaml + - ../services/aex-bid-evaluator/deployment.yaml + - ../services/aex-bid-evaluator/service.yaml + - ../services/aex-contract-engine/deployment.yaml + - ../services/aex-contract-engine/service.yaml + - ../services/aex-provider-registry/deployment.yaml + - ../services/aex-provider-registry/service.yaml + - ../services/aex-trust-broker/deployment.yaml + - ../services/aex-trust-broker/service.yaml + - ../services/aex-identity/deployment.yaml + - ../services/aex-identity/service.yaml + - ../services/aex-settlement/deployment.yaml + - ../services/aex-settlement/service.yaml + - ../services/aex-telemetry/deployment.yaml + - ../services/aex-telemetry/service.yaml + - ../services/aex-credentials-provider/deployment.yaml + - ../services/aex-credentials-provider/service.yaml + + # --- Code Review Demo Agents --- + - ../agents/code-reviewer-a/deployment.yaml + - ../agents/code-reviewer-a/service.yaml + - ../agents/code-reviewer-b/deployment.yaml + - ../agents/code-reviewer-b/service.yaml + - ../agents/code-reviewer-c/deployment.yaml + - ../agents/code-reviewer-c/service.yaml + - ../agents/orchestrator/deployment.yaml + - ../agents/orchestrator/service.yaml + - ../agents/payment-devpay/deployment.yaml + - ../agents/payment-devpay/service.yaml + - ../agents/payment-codeauditpay/deployment.yaml + - ../agents/payment-codeauditpay/service.yaml + - ../agents/payment-securitypay/deployment.yaml + - ../agents/payment-securitypay/service.yaml + + # --- Demo UI --- + - ../ui/deployment.yaml + - ../ui/service.yaml + + # --- Ingress --- + - ../ingress/ingress.yaml + +commonLabels: + app.kubernetes.io/part-of: agent-exchange diff --git a/deploy/k8s/base/namespace.yaml b/deploy/k8s/base/namespace.yaml new file mode 100644 index 0000000..f4996a5 --- /dev/null +++ b/deploy/k8s/base/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: aex + labels: + app.kubernetes.io/part-of: agent-exchange + app.kubernetes.io/managed-by: kustomize diff --git a/deploy/k8s/base/secrets.yaml b/deploy/k8s/base/secrets.yaml new file mode 100644 index 0000000..06f535c --- /dev/null +++ b/deploy/k8s/base/secrets.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Secret +metadata: + name: aex-secrets + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange + app.kubernetes.io/component: secrets +type: Opaque +stringData: + # IMPORTANT: Replace these placeholder values before deploying. + # For production, use an external secrets manager (e.g., AWS Secrets Manager, + # HashiCorp Vault, or Sealed Secrets) instead of storing secrets in manifests. + + # Anthropic API key for Claude-powered code review agents + ANTHROPIC_API_KEY: "REPLACE_ME" + + # JWT signing key for aex-identity service + JWT_SIGNING_KEY: "REPLACE_ME" + + # MongoDB credentials + MONGO_USERNAME: "root" + MONGO_PASSWORD: "root" + + # MongoDB connection URI (includes credentials) + MONGO_URI: "mongodb://root:root@mongodb.aex.svc.cluster.local:27017/?authSource=admin" + + # AEX API key for agent authentication + AEX_API_KEY: "dev-api-key" diff --git a/deploy/k8s/ingress/ingress.yaml b/deploy/k8s/ingress/ingress.yaml new file mode 100644 index 0000000..8872575 --- /dev/null +++ b/deploy/k8s/ingress/ingress.yaml @@ -0,0 +1,40 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: aex-ingress + namespace: aex + labels: + app.kubernetes.io/name: aex-ingress + app.kubernetes.io/component: ingress + app.kubernetes.io/part-of: agent-exchange + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$1 + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "120" + nginx.ingress.kubernetes.io/proxy-send-timeout: "120" + # WebSocket support for NiceGUI real-time UI + nginx.ingress.kubernetes.io/proxy-http-version: "1.1" + nginx.ingress.kubernetes.io/configuration-snippet: | + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; +spec: + ingressClassName: nginx + rules: + - http: + paths: + # AEX Gateway API - all API traffic + - path: /api/(.*) + pathType: ImplementationSpecific + backend: + service: + name: aex-gateway + port: + number: 8080 + # Demo UI - NiceGUI dashboard (default route) + - path: /(.*) + pathType: ImplementationSpecific + backend: + service: + name: demo-ui-nicegui + port: + number: 8502 diff --git a/deploy/k8s/kind-config.yaml b/deploy/k8s/kind-config.yaml new file mode 100644 index 0000000..caffc22 --- /dev/null +++ b/deploy/k8s/kind-config.yaml @@ -0,0 +1,30 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: aex +nodes: + - role: control-plane + kubeadmConfigPatches: + - | + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: + node-labels: "ingress-ready=true" + extraPortMappings: + # Ingress HTTP + - containerPort: 80 + hostPort: 80 + protocol: TCP + # Ingress HTTPS + - containerPort: 443 + hostPort: 443 + protocol: TCP + # NiceGUI UI direct access + - containerPort: 30502 + hostPort: 8502 + protocol: TCP + # Gateway direct access + - containerPort: 30080 + hostPort: 8080 + protocol: TCP + - role: worker + - role: worker diff --git a/deploy/k8s/namespace.yaml b/deploy/k8s/namespace.yaml new file mode 100644 index 0000000..f4996a5 --- /dev/null +++ b/deploy/k8s/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: aex + labels: + app.kubernetes.io/part-of: agent-exchange + app.kubernetes.io/managed-by: kustomize diff --git a/deploy/k8s/overlays/dev/kustomization.yaml b/deploy/k8s/overlays/dev/kustomization.yaml new file mode 100644 index 0000000..ce681dc --- /dev/null +++ b/deploy/k8s/overlays/dev/kustomization.yaml @@ -0,0 +1,146 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Dev overlay: 1 replica, small resources, local images (Kind) +# Usage: kubectl apply -k deploy/k8s/overlays/dev/ + +namespace: aex + +resources: + - ../../base + +# Override images for local development (Kind cluster) +images: + # AEX Core Services + - name: ${REGISTRY}/aex-gateway + newName: aex/aex-gateway + newTag: local + - name: ${REGISTRY}/aex-work-publisher + newName: aex/aex-work-publisher + newTag: local + - name: ${REGISTRY}/aex-bid-gateway + newName: aex/aex-bid-gateway + newTag: local + - name: ${REGISTRY}/aex-bid-evaluator + newName: aex/aex-bid-evaluator + newTag: local + - name: ${REGISTRY}/aex-contract-engine + newName: aex/aex-contract-engine + newTag: local + - name: ${REGISTRY}/aex-provider-registry + newName: aex/aex-provider-registry + newTag: local + - name: ${REGISTRY}/aex-trust-broker + newName: aex/aex-trust-broker + newTag: local + - name: ${REGISTRY}/aex-identity + newName: aex/aex-identity + newTag: local + - name: ${REGISTRY}/aex-settlement + newName: aex/aex-settlement + newTag: local + - name: ${REGISTRY}/aex-telemetry + newName: aex/aex-telemetry + newTag: local + - name: ${REGISTRY}/aex-credentials-provider + newName: aex/aex-credentials-provider + newTag: local + # Demo Agents + - name: ${REGISTRY}/code-review-code-reviewer-a + newName: aex/code-reviewer-a + newTag: local + - name: ${REGISTRY}/code-review-code-reviewer-b + newName: aex/code-reviewer-b + newTag: local + - name: ${REGISTRY}/code-review-code-reviewer-c + newName: aex/code-reviewer-c + newTag: local + - name: ${REGISTRY}/code-review-orchestrator + newName: aex/orchestrator + newTag: local + - name: ${REGISTRY}/code-review-payment-devpay + newName: aex/payment-devpay + newTag: local + - name: ${REGISTRY}/code-review-payment-codeauditpay + newName: aex/payment-codeauditpay + newTag: local + - name: ${REGISTRY}/code-review-payment-securitypay + newName: aex/payment-securitypay + newTag: local + # UI + - name: ${REGISTRY}/code-review-demo-ui-nicegui + newName: aex/demo-ui-nicegui + newTag: local + +patches: + # --- Dev ConfigMap overrides --- + - target: + kind: ConfigMap + name: aex-config + patch: | + - op: replace + path: /data/ENVIRONMENT + value: "development" + + # --- Dev: Use NodePort for UI instead of LoadBalancer --- + - target: + kind: Service + name: demo-ui-nicegui + patch: | + - op: replace + path: /spec/type + value: NodePort + - op: add + path: /spec/ports/0/nodePort + value: 30502 + + # --- Dev: NodePort for gateway --- + - target: + kind: Service + name: aex-gateway + patch: | + - op: replace + path: /spec/type + value: NodePort + - op: add + path: /spec/ports/0/nodePort + value: 30080 + + # --- Dev: Smaller MongoDB PVC --- + - target: + kind: StatefulSet + name: mongodb + patch: | + - op: replace + path: /spec/volumeClaimTemplates/0/spec/resources/requests/storage + value: "1Gi" + + # --- Dev: imagePullPolicy Never for Kind --- + - target: + kind: Deployment + patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: unused + spec: + template: + spec: + containers: + - name: "*" + imagePullPolicy: Never + + - target: + kind: StatefulSet + name: mongodb + patch: | + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: mongodb + spec: + template: + spec: + containers: + - name: mongodb + imagePullPolicy: IfNotPresent diff --git a/deploy/k8s/overlays/production/hpa.yaml b/deploy/k8s/overlays/production/hpa.yaml new file mode 100644 index 0000000..6bbc811 --- /dev/null +++ b/deploy/k8s/overlays/production/hpa.yaml @@ -0,0 +1,211 @@ +--- +# HPA for aex-gateway (primary entry point - scale aggressively) +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-gateway-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-gateway + minReplicas: 3 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 + behavior: + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Pods + value: 1 + periodSeconds: 60 + scaleUp: + stabilizationWindowSeconds: 30 + policies: + - type: Pods + value: 2 + periodSeconds: 60 +--- +# HPA for aex-work-publisher +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-work-publisher-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-work-publisher + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-work-publisher + minReplicas: 2 + maxReplicas: 8 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for aex-bid-gateway +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-bid-gateway-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-bid-gateway + minReplicas: 2 + maxReplicas: 8 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for aex-bid-evaluator +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-bid-evaluator-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-evaluator + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-bid-evaluator + minReplicas: 2 + maxReplicas: 6 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for aex-contract-engine +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-contract-engine-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-contract-engine + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-contract-engine + minReplicas: 2 + maxReplicas: 6 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for aex-provider-registry +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-provider-registry-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-provider-registry + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-provider-registry + minReplicas: 2 + maxReplicas: 6 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for aex-settlement +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: aex-settlement-hpa + namespace: aex + labels: + app.kubernetes.io/name: aex-settlement + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: aex-settlement + minReplicas: 2 + maxReplicas: 8 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 +--- +# HPA for demo-ui-nicegui +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: demo-ui-nicegui-hpa + namespace: aex + labels: + app.kubernetes.io/name: demo-ui-nicegui + app.kubernetes.io/part-of: agent-exchange +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: demo-ui-nicegui + minReplicas: 2 + maxReplicas: 6 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 75 diff --git a/deploy/k8s/overlays/production/kustomization.yaml b/deploy/k8s/overlays/production/kustomization.yaml new file mode 100644 index 0000000..91e9035 --- /dev/null +++ b/deploy/k8s/overlays/production/kustomization.yaml @@ -0,0 +1,266 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Production overlay: HPA (2-10), larger resources, node affinity, ECR images +# Usage: kubectl apply -k deploy/k8s/overlays/production/ + +namespace: aex + +resources: + - ../../base + # Production-specific resources + - hpa.yaml + - pdb.yaml + - networkpolicy.yaml + +# Override images for production (ECR or your container registry) +images: + # AEX Core Services + - name: ${REGISTRY}/aex-gateway + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-gateway + newTag: latest + - name: ${REGISTRY}/aex-work-publisher + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-work-publisher + newTag: latest + - name: ${REGISTRY}/aex-bid-gateway + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-bid-gateway + newTag: latest + - name: ${REGISTRY}/aex-bid-evaluator + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-bid-evaluator + newTag: latest + - name: ${REGISTRY}/aex-contract-engine + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-contract-engine + newTag: latest + - name: ${REGISTRY}/aex-provider-registry + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-provider-registry + newTag: latest + - name: ${REGISTRY}/aex-trust-broker + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-trust-broker + newTag: latest + - name: ${REGISTRY}/aex-identity + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-identity + newTag: latest + - name: ${REGISTRY}/aex-settlement + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-settlement + newTag: latest + - name: ${REGISTRY}/aex-telemetry + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-telemetry + newTag: latest + - name: ${REGISTRY}/aex-credentials-provider + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-credentials-provider + newTag: latest + # Demo Agents + - name: ${REGISTRY}/code-review-code-reviewer-a + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-a + newTag: latest + - name: ${REGISTRY}/code-review-code-reviewer-b + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-b + newTag: latest + - name: ${REGISTRY}/code-review-code-reviewer-c + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-c + newTag: latest + - name: ${REGISTRY}/code-review-orchestrator + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-orchestrator + newTag: latest + - name: ${REGISTRY}/code-review-payment-devpay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-devpay + newTag: latest + - name: ${REGISTRY}/code-review-payment-codeauditpay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-codeauditpay + newTag: latest + - name: ${REGISTRY}/code-review-payment-securitypay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-securitypay + newTag: latest + # UI + - name: ${REGISTRY}/code-review-demo-ui-nicegui + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-demo-ui-nicegui + newTag: latest + +patches: + # --- Production ConfigMap overrides --- + - target: + kind: ConfigMap + name: aex-config + patch: | + - op: replace + path: /data/ENVIRONMENT + value: "production" + + # --- Production: 3 replicas for core services (HPA will manage scaling) --- + - target: + kind: Deployment + name: aex-gateway + patch: | + - op: replace + path: /spec/replicas + value: 3 + - target: + kind: Deployment + name: aex-work-publisher + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-bid-gateway + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-bid-evaluator + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-contract-engine + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-provider-registry + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-trust-broker + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-identity + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-settlement + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-credentials-provider + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-telemetry + patch: | + - op: replace + path: /spec/replicas + value: 2 + + # --- Production: Larger resources for AEX services --- + - target: + kind: Deployment + labelSelector: app.kubernetes.io/component in (gateway,work-publisher,bid-gateway,bid-evaluator,contract-engine,provider-registry,trust-broker,identity,settlement,telemetry,credentials-provider) + patch: | + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "500m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "512Mi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "1000m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "1Gi" + + # --- Production: Larger resources for agents --- + - target: + kind: Deployment + labelSelector: app.kubernetes.io/component in (code-review-agent,orchestrator,payment-agent,ui) + patch: | + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "500m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "1Gi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "1000m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "2Gi" + + # --- Production: Larger MongoDB PVC and resources --- + - target: + kind: StatefulSet + name: mongodb + patch: | + - op: replace + path: /spec/volumeClaimTemplates/0/spec/resources/requests/storage + value: "10Gi" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "1000m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "1Gi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "2000m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "2Gi" + + # --- Production: Add node affinity to all pods --- + - target: + kind: Deployment + patch: | + - op: add + path: /spec/template/spec/affinity + value: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: node-role.kubernetes.io/workload + operator: In + values: + - agent-exchange + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: agent-exchange + topologyKey: kubernetes.io/hostname + + # --- Production: TLS on Ingress --- + - target: + kind: Ingress + name: aex-ingress + patch: | + - op: add + path: /spec/tls + value: + - hosts: + - aex.example.com + secretName: aex-tls-cert + - op: add + path: /spec/rules/0/host + value: "aex.example.com" + - op: add + path: /metadata/annotations/cert-manager.io~1cluster-issuer + value: "letsencrypt-prod" diff --git a/deploy/k8s/overlays/production/networkpolicy.yaml b/deploy/k8s/overlays/production/networkpolicy.yaml new file mode 100644 index 0000000..2659588 --- /dev/null +++ b/deploy/k8s/overlays/production/networkpolicy.yaml @@ -0,0 +1,102 @@ +--- +# Default deny all ingress in the aex namespace +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny-ingress + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange +spec: + podSelector: {} + policyTypes: + - Ingress +--- +# Allow ingress to aex-gateway from the ingress controller +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-ingress-to-gateway + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: aex-gateway + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ingress-nginx + ports: + - protocol: TCP + port: 8080 +--- +# Allow ingress to demo-ui-nicegui from the ingress controller +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-ingress-to-ui + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: demo-ui-nicegui + policyTypes: + - Ingress + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ingress-nginx + ports: + - protocol: TCP + port: 8502 +--- +# Allow all intra-namespace communication within aex +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-intra-namespace + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange +spec: + podSelector: + matchLabels: + app.kubernetes.io/part-of: agent-exchange + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/part-of: agent-exchange +--- +# Allow MongoDB access only from AEX services +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-mongodb-from-aex + namespace: aex + labels: + app.kubernetes.io/part-of: agent-exchange +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: mongodb + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app.kubernetes.io/part-of: agent-exchange + ports: + - protocol: TCP + port: 27017 diff --git a/deploy/k8s/overlays/production/pdb.yaml b/deploy/k8s/overlays/production/pdb.yaml new file mode 100644 index 0000000..a9fc367 --- /dev/null +++ b/deploy/k8s/overlays/production/pdb.yaml @@ -0,0 +1,105 @@ +--- +# PodDisruptionBudget for aex-gateway +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-gateway-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-gateway +--- +# PodDisruptionBudget for aex-bid-gateway +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-bid-gateway-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-bid-gateway +--- +# PodDisruptionBudget for aex-contract-engine +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-contract-engine-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-contract-engine + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-contract-engine +--- +# PodDisruptionBudget for aex-settlement +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-settlement-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-settlement + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-settlement +--- +# PodDisruptionBudget for aex-provider-registry +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-provider-registry-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-provider-registry + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-provider-registry +--- +# PodDisruptionBudget for aex-identity +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: aex-identity-pdb + namespace: aex + labels: + app.kubernetes.io/name: aex-identity + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-identity +--- +# PodDisruptionBudget for mongodb +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: mongodb-pdb + namespace: aex + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/part-of: agent-exchange +spec: + minAvailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: mongodb diff --git a/deploy/k8s/overlays/staging/kustomization.yaml b/deploy/k8s/overlays/staging/kustomization.yaml new file mode 100644 index 0000000..66202b3 --- /dev/null +++ b/deploy/k8s/overlays/staging/kustomization.yaml @@ -0,0 +1,218 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Staging overlay: 2 replicas, moderate resources, ECR images +# Usage: kubectl apply -k deploy/k8s/overlays/staging/ + +namespace: aex + +resources: + - ../../base + +# Override images for staging (ECR or your container registry) +images: + # AEX Core Services + - name: ${REGISTRY}/aex-gateway + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-gateway + newTag: staging + - name: ${REGISTRY}/aex-work-publisher + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-work-publisher + newTag: staging + - name: ${REGISTRY}/aex-bid-gateway + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-bid-gateway + newTag: staging + - name: ${REGISTRY}/aex-bid-evaluator + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-bid-evaluator + newTag: staging + - name: ${REGISTRY}/aex-contract-engine + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-contract-engine + newTag: staging + - name: ${REGISTRY}/aex-provider-registry + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-provider-registry + newTag: staging + - name: ${REGISTRY}/aex-trust-broker + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-trust-broker + newTag: staging + - name: ${REGISTRY}/aex-identity + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-identity + newTag: staging + - name: ${REGISTRY}/aex-settlement + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-settlement + newTag: staging + - name: ${REGISTRY}/aex-telemetry + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-telemetry + newTag: staging + - name: ${REGISTRY}/aex-credentials-provider + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/aex-credentials-provider + newTag: staging + # Demo Agents + - name: ${REGISTRY}/code-review-code-reviewer-a + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-a + newTag: staging + - name: ${REGISTRY}/code-review-code-reviewer-b + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-b + newTag: staging + - name: ${REGISTRY}/code-review-code-reviewer-c + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-code-reviewer-c + newTag: staging + - name: ${REGISTRY}/code-review-orchestrator + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-orchestrator + newTag: staging + - name: ${REGISTRY}/code-review-payment-devpay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-devpay + newTag: staging + - name: ${REGISTRY}/code-review-payment-codeauditpay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-codeauditpay + newTag: staging + - name: ${REGISTRY}/code-review-payment-securitypay + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-payment-securitypay + newTag: staging + # UI + - name: ${REGISTRY}/code-review-demo-ui-nicegui + newName: YOUR_ACCOUNT.dkr.ecr.YOUR_REGION.amazonaws.com/code-review-demo-ui-nicegui + newTag: staging + +patches: + # --- Staging ConfigMap overrides --- + - target: + kind: ConfigMap + name: aex-config + patch: | + - op: replace + path: /data/ENVIRONMENT + value: "staging" + + # --- Staging: 2 replicas for core services --- + - target: + kind: Deployment + name: aex-gateway + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-work-publisher + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-bid-gateway + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-bid-evaluator + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-contract-engine + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-provider-registry + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-trust-broker + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-identity + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-settlement + patch: | + - op: replace + path: /spec/replicas + value: 2 + - target: + kind: Deployment + name: aex-credentials-provider + patch: | + - op: replace + path: /spec/replicas + value: 2 + + # --- Staging: Moderate resources for AEX services --- + - target: + kind: Deployment + labelSelector: app.kubernetes.io/component in (gateway,work-publisher,bid-gateway,bid-evaluator,contract-engine,provider-registry,trust-broker,identity,settlement,telemetry,credentials-provider) + patch: | + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "200m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "256Mi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "500m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "512Mi" + + # --- Staging: Moderate resources for agents --- + - target: + kind: Deployment + labelSelector: app.kubernetes.io/component in (code-review-agent,orchestrator,payment-agent,ui) + patch: | + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "300m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "512Mi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "750m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "1Gi" + + # --- Staging: Larger MongoDB PVC --- + - target: + kind: StatefulSet + name: mongodb + patch: | + - op: replace + path: /spec/volumeClaimTemplates/0/spec/resources/requests/storage + value: "5Gi" + + # --- Staging: Larger MongoDB resources --- + - target: + kind: StatefulSet + name: mongodb + patch: | + - op: replace + path: /spec/template/spec/containers/0/resources/requests/cpu + value: "500m" + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "512Mi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/cpu + value: "1000m" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "1Gi" diff --git a/deploy/k8s/services/aex-bid-evaluator/deployment.yaml b/deploy/k8s/services/aex-bid-evaluator/deployment.yaml new file mode 100644 index 0000000..ff5a5c1 --- /dev/null +++ b/deploy/k8s/services/aex-bid-evaluator/deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-bid-evaluator + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-evaluator + app.kubernetes.io/component: bid-evaluator + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-bid-evaluator + template: + metadata: + labels: + app.kubernetes.io/name: aex-bid-evaluator + app.kubernetes.io/component: bid-evaluator + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-bid-evaluator + image: ${REGISTRY}/aex-bid-evaluator:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: BID_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: BID_GATEWAY_URL + - name: TRUST_BROKER_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: TRUST_BROKER_URL + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-bid-evaluator/service.yaml b/deploy/k8s/services/aex-bid-evaluator/service.yaml new file mode 100644 index 0000000..0052e64 --- /dev/null +++ b/deploy/k8s/services/aex-bid-evaluator/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-bid-evaluator + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-evaluator + app.kubernetes.io/component: bid-evaluator + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-bid-evaluator + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-bid-gateway/deployment.yaml b/deploy/k8s/services/aex-bid-gateway/deployment.yaml new file mode 100644 index 0000000..a363e34 --- /dev/null +++ b/deploy/k8s/services/aex-bid-gateway/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-bid-gateway + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-gateway + app.kubernetes.io/component: bid-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-bid-gateway + template: + metadata: + labels: + app.kubernetes.io/name: aex-bid-gateway + app.kubernetes.io/component: bid-gateway + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-bid-gateway + image: ${REGISTRY}/aex-bid-gateway:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + - name: PROVIDER_REGISTRY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: PROVIDER_REGISTRY_URL + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-bid-gateway/service.yaml b/deploy/k8s/services/aex-bid-gateway/service.yaml new file mode 100644 index 0000000..edf199d --- /dev/null +++ b/deploy/k8s/services/aex-bid-gateway/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-bid-gateway + namespace: aex + labels: + app.kubernetes.io/name: aex-bid-gateway + app.kubernetes.io/component: bid-gateway + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-bid-gateway + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-contract-engine/deployment.yaml b/deploy/k8s/services/aex-contract-engine/deployment.yaml new file mode 100644 index 0000000..b2ad357 --- /dev/null +++ b/deploy/k8s/services/aex-contract-engine/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-contract-engine + namespace: aex + labels: + app.kubernetes.io/name: aex-contract-engine + app.kubernetes.io/component: contract-engine + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-contract-engine + template: + metadata: + labels: + app.kubernetes.io/name: aex-contract-engine + app.kubernetes.io/component: contract-engine + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-contract-engine + image: ${REGISTRY}/aex-contract-engine:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: BID_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: BID_GATEWAY_URL + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-contract-engine/service.yaml b/deploy/k8s/services/aex-contract-engine/service.yaml new file mode 100644 index 0000000..b65c2f6 --- /dev/null +++ b/deploy/k8s/services/aex-contract-engine/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-contract-engine + namespace: aex + labels: + app.kubernetes.io/name: aex-contract-engine + app.kubernetes.io/component: contract-engine + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-contract-engine + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-credentials-provider/deployment.yaml b/deploy/k8s/services/aex-credentials-provider/deployment.yaml new file mode 100644 index 0000000..48611a0 --- /dev/null +++ b/deploy/k8s/services/aex-credentials-provider/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-credentials-provider + namespace: aex + labels: + app.kubernetes.io/name: aex-credentials-provider + app.kubernetes.io/component: credentials-provider + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-credentials-provider + template: + metadata: + labels: + app.kubernetes.io/name: aex-credentials-provider + app.kubernetes.io/component: credentials-provider + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-credentials-provider + image: ${REGISTRY}/aex-credentials-provider:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: ENVIRONMENT + valueFrom: + configMapKeyRef: + name: aex-config + key: ENVIRONMENT + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-credentials-provider/service.yaml b/deploy/k8s/services/aex-credentials-provider/service.yaml new file mode 100644 index 0000000..fd5c0dc --- /dev/null +++ b/deploy/k8s/services/aex-credentials-provider/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-credentials-provider + namespace: aex + labels: + app.kubernetes.io/name: aex-credentials-provider + app.kubernetes.io/component: credentials-provider + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-credentials-provider + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-gateway/deployment.yaml b/deploy/k8s/services/aex-gateway/deployment.yaml new file mode 100644 index 0000000..9e8b853 --- /dev/null +++ b/deploy/k8s/services/aex-gateway/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-gateway + namespace: aex + labels: + app.kubernetes.io/name: aex-gateway + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-gateway + template: + metadata: + labels: + app.kubernetes.io/name: aex-gateway + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-gateway + image: ${REGISTRY}/aex-gateway:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: IDENTITY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: IDENTITY_URL + - name: BID_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: BID_GATEWAY_URL + - name: PROVIDER_REGISTRY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: PROVIDER_REGISTRY_URL + - name: CONTRACT_ENGINE_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CONTRACT_ENGINE_URL + - name: TRUST_BROKER_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: TRUST_BROKER_URL + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-gateway/service.yaml b/deploy/k8s/services/aex-gateway/service.yaml new file mode 100644 index 0000000..f51ee32 --- /dev/null +++ b/deploy/k8s/services/aex-gateway/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-gateway + namespace: aex + labels: + app.kubernetes.io/name: aex-gateway + app.kubernetes.io/component: gateway + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-gateway + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-identity/deployment.yaml b/deploy/k8s/services/aex-identity/deployment.yaml new file mode 100644 index 0000000..047427a --- /dev/null +++ b/deploy/k8s/services/aex-identity/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-identity + namespace: aex + labels: + app.kubernetes.io/name: aex-identity + app.kubernetes.io/component: identity + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-identity + template: + metadata: + labels: + app.kubernetes.io/name: aex-identity + app.kubernetes.io/component: identity + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-identity + image: ${REGISTRY}/aex-identity:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + - name: JWT_SIGNING_KEY + valueFrom: + secretKeyRef: + name: aex-secrets + key: JWT_SIGNING_KEY + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-identity/service.yaml b/deploy/k8s/services/aex-identity/service.yaml new file mode 100644 index 0000000..90f79b9 --- /dev/null +++ b/deploy/k8s/services/aex-identity/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-identity + namespace: aex + labels: + app.kubernetes.io/name: aex-identity + app.kubernetes.io/component: identity + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-identity + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-provider-registry/deployment.yaml b/deploy/k8s/services/aex-provider-registry/deployment.yaml new file mode 100644 index 0000000..adc4706 --- /dev/null +++ b/deploy/k8s/services/aex-provider-registry/deployment.yaml @@ -0,0 +1,68 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-provider-registry + namespace: aex + labels: + app.kubernetes.io/name: aex-provider-registry + app.kubernetes.io/component: provider-registry + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-provider-registry + template: + metadata: + labels: + app.kubernetes.io/name: aex-provider-registry + app.kubernetes.io/component: provider-registry + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-provider-registry + image: ${REGISTRY}/aex-provider-registry:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + - name: ENVIRONMENT + valueFrom: + configMapKeyRef: + name: aex-config + key: ENVIRONMENT + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-provider-registry/service.yaml b/deploy/k8s/services/aex-provider-registry/service.yaml new file mode 100644 index 0000000..1091dab --- /dev/null +++ b/deploy/k8s/services/aex-provider-registry/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-provider-registry + namespace: aex + labels: + app.kubernetes.io/name: aex-provider-registry + app.kubernetes.io/component: provider-registry + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-provider-registry + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-settlement/deployment.yaml b/deploy/k8s/services/aex-settlement/deployment.yaml new file mode 100644 index 0000000..6f283e5 --- /dev/null +++ b/deploy/k8s/services/aex-settlement/deployment.yaml @@ -0,0 +1,93 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-settlement + namespace: aex + labels: + app.kubernetes.io/name: aex-settlement + app.kubernetes.io/component: settlement + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-settlement + template: + metadata: + labels: + app.kubernetes.io/name: aex-settlement + app.kubernetes.io/component: settlement + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-settlement + image: ${REGISTRY}/aex-settlement:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: ENVIRONMENT + valueFrom: + configMapKeyRef: + name: aex-config + key: ENVIRONMENT + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + - name: AP2_ENABLED + valueFrom: + configMapKeyRef: + name: aex-config + key: AP2_ENABLED + - name: CREDENTIALS_PROVIDER_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CREDENTIALS_PROVIDER_URL + - name: DEVPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: DEVPAY_URL + - name: CODEAUDITPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CODEAUDITPAY_URL + - name: SECURITYPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: SECURITYPAY_URL + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-settlement/service.yaml b/deploy/k8s/services/aex-settlement/service.yaml new file mode 100644 index 0000000..aa123b2 --- /dev/null +++ b/deploy/k8s/services/aex-settlement/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-settlement + namespace: aex + labels: + app.kubernetes.io/name: aex-settlement + app.kubernetes.io/component: settlement + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-settlement + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-telemetry/deployment.yaml b/deploy/k8s/services/aex-telemetry/deployment.yaml new file mode 100644 index 0000000..cd8231e --- /dev/null +++ b/deploy/k8s/services/aex-telemetry/deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-telemetry + namespace: aex + labels: + app.kubernetes.io/name: aex-telemetry + app.kubernetes.io/component: telemetry + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-telemetry + template: + metadata: + labels: + app.kubernetes.io/name: aex-telemetry + app.kubernetes.io/component: telemetry + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-telemetry + image: ${REGISTRY}/aex-telemetry:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-telemetry/service.yaml b/deploy/k8s/services/aex-telemetry/service.yaml new file mode 100644 index 0000000..9d1fa2e --- /dev/null +++ b/deploy/k8s/services/aex-telemetry/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-telemetry + namespace: aex + labels: + app.kubernetes.io/name: aex-telemetry + app.kubernetes.io/component: telemetry + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-telemetry + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-trust-broker/deployment.yaml b/deploy/k8s/services/aex-trust-broker/deployment.yaml new file mode 100644 index 0000000..4afe751 --- /dev/null +++ b/deploy/k8s/services/aex-trust-broker/deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-trust-broker + namespace: aex + labels: + app.kubernetes.io/name: aex-trust-broker + app.kubernetes.io/component: trust-broker + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-trust-broker + template: + metadata: + labels: + app.kubernetes.io/name: aex-trust-broker + app.kubernetes.io/component: trust-broker + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-trust-broker + image: ${REGISTRY}/aex-trust-broker:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-trust-broker/service.yaml b/deploy/k8s/services/aex-trust-broker/service.yaml new file mode 100644 index 0000000..c8669cd --- /dev/null +++ b/deploy/k8s/services/aex-trust-broker/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-trust-broker + namespace: aex + labels: + app.kubernetes.io/name: aex-trust-broker + app.kubernetes.io/component: trust-broker + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-trust-broker + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/aex-work-publisher/deployment.yaml b/deploy/k8s/services/aex-work-publisher/deployment.yaml new file mode 100644 index 0000000..e0f55ff --- /dev/null +++ b/deploy/k8s/services/aex-work-publisher/deployment.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aex-work-publisher + namespace: aex + labels: + app.kubernetes.io/name: aex-work-publisher + app.kubernetes.io/component: work-publisher + app.kubernetes.io/part-of: agent-exchange +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: aex-work-publisher + template: + metadata: + labels: + app.kubernetes.io/name: aex-work-publisher + app.kubernetes.io/component: work-publisher + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: aex-work-publisher + image: ${REGISTRY}/aex-work-publisher:${TAG} + ports: + - containerPort: 8080 + protocol: TCP + env: + - name: PORT + value: "8080" + - name: STORE_TYPE + valueFrom: + configMapKeyRef: + name: aex-config + key: STORE_TYPE + - name: MONGO_URI + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_URI + - name: MONGO_DB + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_DB + - name: MONGO_COLLECTION_WORK + valueFrom: + configMapKeyRef: + name: aex-config + key: MONGO_COLLECTION_WORK + - name: PROVIDER_REGISTRY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: PROVIDER_REGISTRY_URL + - name: ENVIRONMENT + valueFrom: + configMapKeyRef: + name: aex-config + key: ENVIRONMENT + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/services/aex-work-publisher/service.yaml b/deploy/k8s/services/aex-work-publisher/service.yaml new file mode 100644 index 0000000..384e07d --- /dev/null +++ b/deploy/k8s/services/aex-work-publisher/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: aex-work-publisher + namespace: aex + labels: + app.kubernetes.io/name: aex-work-publisher + app.kubernetes.io/component: work-publisher + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: aex-work-publisher + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http diff --git a/deploy/k8s/services/mongodb/service.yaml b/deploy/k8s/services/mongodb/service.yaml new file mode 100644 index 0000000..4313864 --- /dev/null +++ b/deploy/k8s/services/mongodb/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: mongodb + namespace: aex + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/component: database + app.kubernetes.io/part-of: agent-exchange +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: mongodb + ports: + - port: 27017 + targetPort: 27017 + protocol: TCP + name: mongo diff --git a/deploy/k8s/services/mongodb/statefulset.yaml b/deploy/k8s/services/mongodb/statefulset.yaml new file mode 100644 index 0000000..8731cd8 --- /dev/null +++ b/deploy/k8s/services/mongodb/statefulset.yaml @@ -0,0 +1,83 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: mongodb + namespace: aex + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/component: database + app.kubernetes.io/part-of: agent-exchange +spec: + serviceName: mongodb + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: mongodb + template: + metadata: + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/component: database + app.kubernetes.io/part-of: agent-exchange + spec: + terminationGracePeriodSeconds: 30 + containers: + - name: mongodb + image: mongo:7 + ports: + - containerPort: 27017 + name: mongo + protocol: TCP + env: + - name: MONGO_INITDB_ROOT_USERNAME + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_USERNAME + - name: MONGO_INITDB_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: aex-secrets + key: MONGO_PASSWORD + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + exec: + command: + - mongosh + - --eval + - "db.adminCommand('ping')" + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 5 + readinessProbe: + exec: + command: + - mongosh + - --eval + - "db.adminCommand('ping')" + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + volumeMounts: + - name: mongo-data + mountPath: /data/db + volumeClaimTemplates: + - metadata: + name: mongo-data + labels: + app.kubernetes.io/name: mongodb + app.kubernetes.io/part-of: agent-exchange + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/deploy/k8s/ui/deployment.yaml b/deploy/k8s/ui/deployment.yaml new file mode 100644 index 0000000..79af5b2 --- /dev/null +++ b/deploy/k8s/ui/deployment.yaml @@ -0,0 +1,98 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: demo-ui-nicegui + namespace: aex + labels: + app.kubernetes.io/name: demo-ui-nicegui + app.kubernetes.io/component: ui + app.kubernetes.io/part-of: agent-exchange + annotations: + description: "NiceGUI real-time WebSocket dashboard for code review demo" +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: demo-ui-nicegui + template: + metadata: + labels: + app.kubernetes.io/name: demo-ui-nicegui + app.kubernetes.io/component: ui + app.kubernetes.io/part-of: agent-exchange + spec: + containers: + - name: demo-ui-nicegui + image: ${REGISTRY}/code-review-demo-ui-nicegui:${TAG} + ports: + - containerPort: 8502 + protocol: TCP + env: + - name: PORT + value: "8502" + - name: AEX_GATEWAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: AEX_GATEWAY_URL + - name: AEX_SETTLEMENT_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: SETTLEMENT_URL + - name: AEX_PROVIDER_REGISTRY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: PROVIDER_REGISTRY_URL + - name: CODE_REVIEWER_A_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CODE_REVIEWER_A_URL + - name: CODE_REVIEWER_B_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CODE_REVIEWER_B_URL + - name: CODE_REVIEWER_C_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CODE_REVIEWER_C_URL + - name: DEVPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: DEVPAY_URL + - name: CODEAUDITPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: CODEAUDITPAY_URL + - name: SECURITYPAY_URL + valueFrom: + configMapKeyRef: + name: aex-config + key: SECURITYPAY_URL + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + tcpSocket: + port: 8502 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: 8502 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 diff --git a/deploy/k8s/ui/service.yaml b/deploy/k8s/ui/service.yaml new file mode 100644 index 0000000..13aac57 --- /dev/null +++ b/deploy/k8s/ui/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: demo-ui-nicegui + namespace: aex + labels: + app.kubernetes.io/name: demo-ui-nicegui + app.kubernetes.io/component: ui + app.kubernetes.io/part-of: agent-exchange +spec: + type: LoadBalancer + selector: + app.kubernetes.io/name: demo-ui-nicegui + ports: + - port: 8502 + targetPort: 8502 + protocol: TCP + name: http diff --git a/hack/deploy/setup-eks.sh b/hack/deploy/setup-eks.sh new file mode 100644 index 0000000..2f67d88 --- /dev/null +++ b/hack/deploy/setup-eks.sh @@ -0,0 +1,657 @@ +#!/bin/bash +set -e + +# Agent Exchange - EKS Setup Script +# This script sets up the required AWS EKS resources for Kubernetes deployment +# It installs prerequisites, creates the cluster, and configures add-ons. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Configuration +AWS_REGION="${AWS_REGION:-us-east-1}" +AWS_ACCOUNT_ID="${AWS_ACCOUNT_ID:-}" +CLUSTER_NAME="${CLUSTER_NAME:-aex-eks}" +ENVIRONMENT_NAME="${ENVIRONMENT_NAME:-aex}" +ENVIRONMENT="${ENVIRONMENT:-dev}" +DRY_RUN=false + +usage() { + echo "Agent Exchange - EKS Setup" + echo "" + echo "Usage: $0 [command] [options]" + echo "" + echo "Commands:" + echo " all Run all setup steps (default)" + echo " prerequisites Install/verify eksctl, kubectl, helm" + echo " cluster Create EKS cluster via CloudFormation" + echo " addons Install Helm add-ons (LB controller, ingress, etc.)" + echo " irsa Set up IRSA for pod-level AWS permissions" + echo " autoscaler Configure cluster autoscaler" + echo " validate Validate configuration without creating resources" + echo "" + echo "Options:" + echo " --dry-run Show what would be done without making changes" + echo "" + echo "Environment variables:" + echo " AWS_REGION AWS region (default: us-east-1)" + echo " AWS_ACCOUNT_ID AWS account ID (auto-detected)" + echo " CLUSTER_NAME EKS cluster name (default: aex-eks)" + echo " ENVIRONMENT_NAME Environment name prefix (default: aex)" + echo " ENVIRONMENT Environment: dev, staging, production (default: dev)" + echo "" + echo "This script will:" + echo " 1. Install/verify eksctl, kubectl, helm" + echo " 2. Deploy infrastructure stack (VPC, ECR, secrets)" + echo " 3. Deploy EKS cluster via CloudFormation" + echo " 4. Install AWS Load Balancer Controller (Helm)" + echo " 5. Install Nginx Ingress Controller (Helm)" + echo " 6. Install External Secrets Operator (Helm)" + echo " 7. Install metrics-server for HPA" + echo " 8. Configure cluster autoscaler" + echo " 9. Set up IRSA for pod-level AWS permissions" +} + +# Parse options +parse_options() { + for arg in "$@"; do + case $arg in + --dry-run) + DRY_RUN=true + shift + ;; + esac + done +} + +# ============================================================ +# Prerequisites +# ============================================================ + +check_prerequisites() { + echo "=============================================" + echo " Checking Prerequisites" + echo "=============================================" + echo "" + + local errors=0 + + # Check AWS CLI + echo -n "Checking AWS CLI... " + if command -v aws &>/dev/null; then + aws_version=$(aws --version 2>&1 | head -1) + echo "OK ($aws_version)" + else + echo "MISSING" + echo " Install: https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html" + ((errors++)) + fi + + # Check kubectl + echo -n "Checking kubectl... " + if command -v kubectl &>/dev/null; then + kubectl_version=$(kubectl version --client --short 2>/dev/null || kubectl version --client -o json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['clientVersion']['gitVersion'])" 2>/dev/null || echo "unknown") + echo "OK ($kubectl_version)" + else + echo "MISSING" + echo " Install: https://kubernetes.io/docs/tasks/tools/" + ((errors++)) + fi + + # Check helm + echo -n "Checking helm... " + if command -v helm &>/dev/null; then + helm_version=$(helm version --short 2>/dev/null || echo "unknown") + echo "OK ($helm_version)" + else + echo "MISSING" + echo " Install: https://helm.sh/docs/intro/install/" + ((errors++)) + fi + + # Check eksctl (optional but recommended) + echo -n "Checking eksctl... " + if command -v eksctl &>/dev/null; then + eksctl_version=$(eksctl version 2>/dev/null || echo "unknown") + echo "OK ($eksctl_version)" + else + echo "NOT INSTALLED (optional)" + echo " Install: https://eksctl.io/installation/" + fi + + # Check docker + echo -n "Checking docker... " + if command -v docker &>/dev/null; then + echo "OK" + else + echo "MISSING" + echo " Install: https://docs.docker.com/get-docker/" + ((errors++)) + fi + + # Check AWS authentication + echo "" + echo -n "Checking AWS authentication... " + if aws sts get-caller-identity &>/dev/null; then + identity=$(aws sts get-caller-identity --query 'Arn' --output text) + echo "OK" + echo " Identity: $identity" + + if [ -z "$AWS_ACCOUNT_ID" ]; then + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || echo "") + fi + else + echo "FAILED" + echo " Not authenticated. Run 'aws configure' or set AWS credentials" + ((errors++)) + fi + + echo "" + if [ $errors -gt 0 ]; then + echo "PREREQUISITE CHECK FAILED: $errors error(s)" + echo "Please install the missing tools and try again." + return 1 + else + echo "All prerequisites satisfied." + return 0 + fi +} + +install_prerequisites() { + echo "=============================================" + echo " Installing Prerequisites" + echo "=============================================" + echo "" + + local os_type + os_type=$(uname -s | tr '[:upper:]' '[:lower:]') + + # Install kubectl if missing + if ! command -v kubectl &>/dev/null; then + echo "Installing kubectl..." + if [ "$os_type" = "darwin" ]; then + if command -v brew &>/dev/null; then + brew install kubectl + else + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/darwin/amd64/kubectl" + chmod +x kubectl && sudo mv kubectl /usr/local/bin/ + fi + elif [ "$os_type" = "linux" ]; then + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl && sudo mv kubectl /usr/local/bin/ + fi + echo "kubectl installed." + fi + + # Install helm if missing + if ! command -v helm &>/dev/null; then + echo "Installing helm..." + if [ "$os_type" = "darwin" ]; then + if command -v brew &>/dev/null; then + brew install helm + else + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + fi + elif [ "$os_type" = "linux" ]; then + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + fi + echo "helm installed." + fi + + # Install eksctl if missing + if ! command -v eksctl &>/dev/null; then + echo "Installing eksctl..." + if [ "$os_type" = "darwin" ]; then + if command -v brew &>/dev/null; then + brew tap weaveworks/tap + brew install weaveworks/tap/eksctl + else + ARCH=$(uname -m) + curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_Darwin_${ARCH}.tar.gz" + tar -xzf "eksctl_Darwin_${ARCH}.tar.gz" -C /tmp && sudo mv /tmp/eksctl /usr/local/bin + rm -f "eksctl_Darwin_${ARCH}.tar.gz" + fi + elif [ "$os_type" = "linux" ]; then + ARCH=amd64 + curl -sLO "https://github.com/eksctl-io/eksctl/releases/latest/download/eksctl_Linux_${ARCH}.tar.gz" + tar -xzf "eksctl_Linux_${ARCH}.tar.gz" -C /tmp && sudo mv /tmp/eksctl /usr/local/bin + rm -f "eksctl_Linux_${ARCH}.tar.gz" + fi + echo "eksctl installed." + fi + + echo "" + echo "Prerequisites installation complete." +} + +# ============================================================ +# Cluster creation +# ============================================================ + +create_cluster() { + echo "=============================================" + echo " Creating EKS Cluster" + echo "=============================================" + echo "" + echo "Cluster: $CLUSTER_NAME" + echo "Region: $AWS_REGION" + echo "Environment: $ENVIRONMENT" + echo "" + + # Deploy infrastructure stack first + echo "Deploying infrastructure stack (VPC, ECR, Secrets)..." + aws cloudformation deploy \ + --template-file "$PROJECT_ROOT/deploy/aws/infrastructure.yaml" \ + --stack-name "${ENVIRONMENT_NAME}-infrastructure" \ + --parameter-overrides EnvironmentName="$ENVIRONMENT_NAME" \ + --capabilities CAPABILITY_NAMED_IAM \ + --region "$AWS_REGION" \ + --no-fail-on-empty-changeset + + echo "Infrastructure stack deployed." + echo "" + + # Deploy EKS cluster stack + echo "Deploying EKS cluster stack..." + + case "$ENVIRONMENT" in + production) + NODE_TYPE="m5.large"; MIN=3; MAX=10; DESIRED=3 ;; + staging) + NODE_TYPE="t3.large"; MIN=2; MAX=5; DESIRED=2 ;; + *) + NODE_TYPE="t3.medium"; MIN=2; MAX=5; DESIRED=2 ;; + esac + + aws cloudformation deploy \ + --template-file "$PROJECT_ROOT/deploy/aws/eks-cluster.yaml" \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --parameter-overrides \ + EnvironmentName="$ENVIRONMENT_NAME" \ + Environment="$ENVIRONMENT" \ + ClusterName="$CLUSTER_NAME" \ + NodeInstanceType="$NODE_TYPE" \ + MinSize="$MIN" \ + MaxSize="$MAX" \ + DesiredSize="$DESIRED" \ + --capabilities CAPABILITY_NAMED_IAM \ + --region "$AWS_REGION" \ + --no-fail-on-empty-changeset + + echo "EKS cluster stack deployed." + echo "" + + # Configure kubeconfig + echo "Configuring kubeconfig..." + aws eks update-kubeconfig \ + --name "$CLUSTER_NAME" \ + --region "$AWS_REGION" \ + --alias "$CLUSTER_NAME" + + echo "" + echo "EKS cluster is ready." + kubectl cluster-info + echo "" + kubectl get nodes +} + +# ============================================================ +# Helm add-ons +# ============================================================ + +install_addons() { + echo "=============================================" + echo " Installing Helm Add-ons" + echo "=============================================" + echo "" + + # Ensure kubeconfig is set + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$AWS_REGION" 2>/dev/null || true + + # Get VPC ID and LB controller role ARN + VPC_ID=$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-infrastructure" \ + --query "Stacks[0].Outputs[?OutputKey=='VPCId'].OutputValue" \ + --output text --region "$AWS_REGION") + + LB_ROLE_ARN=$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --query "Stacks[0].Outputs[?OutputKey=='AWSLoadBalancerControllerRoleArn'].OutputValue" \ + --output text --region "$AWS_REGION") + + # --- AWS Load Balancer Controller --- + echo "1. Installing AWS Load Balancer Controller..." + helm repo add eks https://aws.github.io/eks-charts 2>/dev/null || true + helm repo update eks + helm upgrade --install aws-load-balancer-controller eks/aws-load-balancer-controller \ + --namespace kube-system \ + --set clusterName="$CLUSTER_NAME" \ + --set serviceAccount.create=true \ + --set serviceAccount.name=aws-load-balancer-controller \ + --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"="$LB_ROLE_ARN" \ + --set region="$AWS_REGION" \ + --set vpcId="$VPC_ID" \ + --wait + echo " AWS Load Balancer Controller installed." + echo "" + + # --- Nginx Ingress Controller --- + echo "2. Installing Nginx Ingress Controller..." + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true + helm repo update ingress-nginx + kubectl create namespace ingress-nginx 2>/dev/null || true + helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --set controller.service.type=LoadBalancer \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-type"=nlb \ + --set controller.service.annotations."service\.beta\.kubernetes\.io/aws-load-balancer-scheme"=internet-facing \ + --set controller.metrics.enabled=true \ + --wait + echo " Nginx Ingress Controller installed." + echo "" + + # --- External Secrets Operator --- + echo "3. Installing External Secrets Operator..." + helm repo add external-secrets https://charts.external-secrets.io 2>/dev/null || true + helm repo update external-secrets + kubectl create namespace external-secrets 2>/dev/null || true + helm upgrade --install external-secrets external-secrets/external-secrets \ + --namespace external-secrets \ + --set installCRDs=true \ + --wait + echo " External Secrets Operator installed." + echo "" + + # --- Metrics Server --- + echo "4. Installing Metrics Server (for HPA)..." + helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ 2>/dev/null || true + helm repo update metrics-server + helm upgrade --install metrics-server metrics-server/metrics-server \ + --namespace kube-system \ + --set args[0]="--kubelet-preferred-address-types=InternalIP" \ + --wait + echo " Metrics Server installed." + echo "" + + echo "All Helm add-ons installed successfully." +} + +# ============================================================ +# Cluster Autoscaler +# ============================================================ + +configure_autoscaler() { + echo "=============================================" + echo " Configuring Cluster Autoscaler" + echo "=============================================" + echo "" + + # Ensure kubeconfig is set + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$AWS_REGION" 2>/dev/null || true + + # Install cluster autoscaler via Helm + echo "Installing Cluster Autoscaler..." + helm repo add autoscaler https://kubernetes.github.io/autoscaler 2>/dev/null || true + helm repo update autoscaler + helm upgrade --install cluster-autoscaler autoscaler/cluster-autoscaler \ + --namespace kube-system \ + --set autoDiscovery.clusterName="$CLUSTER_NAME" \ + --set awsRegion="$AWS_REGION" \ + --set rbac.serviceAccount.create=true \ + --set rbac.serviceAccount.name=cluster-autoscaler \ + --set extraArgs.balance-similar-node-groups=true \ + --set extraArgs.skip-nodes-with-system-pods=false \ + --set extraArgs.expander=least-waste \ + --set extraArgs.scale-down-delay-after-add=5m \ + --set extraArgs.scale-down-unneeded-time=5m \ + --wait + + echo "Cluster Autoscaler configured." +} + +# ============================================================ +# IRSA Setup +# ============================================================ + +setup_irsa() { + echo "=============================================" + echo " Setting up IRSA (IAM Roles for Service Accounts)" + echo "=============================================" + echo "" + + # Ensure kubeconfig is set + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$AWS_REGION" 2>/dev/null || true + + POD_ROLE_ARN=$(aws cloudformation describe-stacks \ + --stack-name "${ENVIRONMENT_NAME}-eks-cluster" \ + --query "Stacks[0].Outputs[?OutputKey=='EKSPodRoleArn'].OutputValue" \ + --output text --region "$AWS_REGION") + + echo "Pod Role ARN: $POD_ROLE_ARN" + echo "" + + # Create namespace + kubectl create namespace aex 2>/dev/null || true + + # Create annotated service account + kubectl apply -f - </dev/null; then + echo "EXISTS" + else + echo "WILL CREATE" + fi + + echo -n " EKS cluster stack... " + if aws cloudformation describe-stacks --stack-name "${ENVIRONMENT_NAME}-eks-cluster" --region "$AWS_REGION" &>/dev/null; then + echo "EXISTS" + ((warnings++)) + else + echo "WILL CREATE" + fi + + echo -n " EKS cluster '$CLUSTER_NAME'... " + if aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_REGION" &>/dev/null; then + STATUS=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_REGION" --query 'cluster.status' --output text) + echo "EXISTS (status: $STATUS)" + ((warnings++)) + else + echo "WILL CREATE" + fi + + echo "" + echo "=============================================" + if [ $warnings -gt 0 ]; then + echo "VALIDATION PASSED with $warnings warning(s)" + echo "Some resources already exist and may be updated." + else + echo "VALIDATION PASSED" + echo "Ready to create EKS resources." + fi + echo "" + + # Estimate costs + echo "Estimated costs (approximate):" + echo " EKS cluster: \$0.10/hour" + echo " t3.medium nodes x2: \$0.084/hour" + echo " NAT Gateway: \$0.045/hour" + echo " Load Balancer: \$0.025/hour" + echo " Total (dev): ~\$0.254/hour (~\$183/month)" + echo "" +} + +# ============================================================ +# Print summary +# ============================================================ + +print_summary() { + echo "" + echo "========================================" + echo " EKS Setup Complete!" + echo "========================================" + echo "" + echo "Resources created:" + echo " - Infrastructure stack (VPC, ECR, Secrets Manager)" + echo " - EKS cluster: $CLUSTER_NAME" + echo " - Managed node group with auto-scaling" + echo " - AWS Load Balancer Controller" + echo " - Nginx Ingress Controller" + echo " - External Secrets Operator" + echo " - Metrics Server" + echo " - Cluster Autoscaler" + echo " - IRSA-annotated service account" + echo "" + echo "Kubeconfig:" + echo " aws eks update-kubeconfig --name $CLUSTER_NAME --region $AWS_REGION" + echo "" + echo "Next steps:" + echo " 1. Deploy services: deploy/aws/deploy-eks.sh --region $AWS_REGION --env $ENVIRONMENT" + echo " 2. Update secrets in AWS Secrets Manager" + echo " 3. Verify: kubectl get pods -n aex" + echo "" + echo "Teardown:" + echo " hack/deploy/teardown-eks.sh" +} + +# ============================================================ +# Main +# ============================================================ + +parse_options "$@" + +# Remove --dry-run from args for case matching +CMD="${1:-all}" +if [ "$CMD" = "--dry-run" ]; then + CMD="${2:-all}" +fi + +case "$CMD" in + -h|--help|help) + usage + exit 0 + ;; + validate|--validate) + validate + ;; + prerequisites|prereqs) + check_prerequisites || install_prerequisites + ;; + cluster) + check_prerequisites || { echo "Fix prerequisites first."; exit 1; } + if [ "$DRY_RUN" = true ]; then + echo "[DRY-RUN] Would create EKS cluster: $CLUSTER_NAME" + else + create_cluster + fi + ;; + addons) + if [ "$DRY_RUN" = true ]; then + echo "[DRY-RUN] Would install Helm add-ons" + else + install_addons + fi + ;; + autoscaler) + if [ "$DRY_RUN" = true ]; then + echo "[DRY-RUN] Would configure cluster autoscaler" + else + configure_autoscaler + fi + ;; + irsa) + if [ "$DRY_RUN" = true ]; then + echo "[DRY-RUN] Would set up IRSA" + else + setup_irsa + fi + ;; + all) + check_prerequisites || install_prerequisites + + if [ "$DRY_RUN" = true ]; then + echo "" + echo "=============================================" + echo " DRY-RUN MODE - No changes will be made" + echo "=============================================" + echo "" + echo "Region: $AWS_REGION" + echo "Account: $AWS_ACCOUNT_ID" + echo "Cluster: $CLUSTER_NAME" + echo "Environment: $ENVIRONMENT" + echo "" + echo "The following resources would be created:" + echo " - Infrastructure stack (VPC, ECR, Secrets)" + echo " - EKS cluster with Kubernetes 1.29" + echo " - Managed node group (t3.medium, 2-5 nodes)" + echo " - OIDC provider for IRSA" + echo " - AWS Load Balancer Controller (Helm)" + echo " - Nginx Ingress Controller (Helm)" + echo " - External Secrets Operator (Helm)" + echo " - Metrics Server (Helm)" + echo " - Cluster Autoscaler (Helm)" + echo " - IRSA service account in 'aex' namespace" + echo "" + echo "Run without --dry-run to create these resources." + else + echo "" + create_cluster + echo "" + install_addons + echo "" + configure_autoscaler + echo "" + setup_irsa + echo "" + print_summary + fi + ;; + *) + echo "Unknown command: $CMD" + usage + exit 1 + ;; +esac diff --git a/hack/deploy/setup-gke.sh b/hack/deploy/setup-gke.sh new file mode 100644 index 0000000..701e7a6 --- /dev/null +++ b/hack/deploy/setup-gke.sh @@ -0,0 +1,626 @@ +#!/bin/bash +set -euo pipefail + +# Agent Exchange - GKE Setup Helper Script +# End-to-end setup: prerequisites check, GKE cluster creation, Helm charts, and configuration +# +# Usage: +# GCP_PROJECT_ID=my-project ./setup-gke.sh +# GCP_PROJECT_ID=my-project GCP_REGION=us-east1 ./setup-gke.sh +# GCP_PROJECT_ID=my-project ./setup-gke.sh --mode standard + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Configuration +PROJECT="${GCP_PROJECT_ID:-}" +REGION="${GCP_REGION:-us-central1}" +CLUSTER_NAME="${GKE_CLUSTER_NAME:-aex-cluster}" +MODE="autopilot" +NAMESPACE="aex" +DRY_RUN=false + +usage() { + cat < /dev/null; then + local gcloud_ver + gcloud_ver=$(gcloud version --format='value(Google Cloud SDK)' 2>/dev/null | head -1) + echo "OK (v$gcloud_ver)" + else + echo "NOT FOUND" + echo " Install: https://cloud.google.com/sdk/docs/install" + ((errors++)) + fi + + # kubectl + echo -n " kubectl... " + if command -v kubectl &> /dev/null; then + local kubectl_ver + kubectl_ver=$(kubectl version --client --short 2>/dev/null | head -1 || kubectl version --client -o json 2>/dev/null | grep -o '"gitVersion": "[^"]*"' | head -1 || echo "installed") + echo "OK ($kubectl_ver)" + else + echo "NOT FOUND" + echo " Install: gcloud components install kubectl" + echo " Or: https://kubernetes.io/docs/tasks/tools/" + ((errors++)) + fi + + # helm + echo -n " helm... " + if command -v helm &> /dev/null; then + local helm_ver + helm_ver=$(helm version --short 2>/dev/null | head -1) + echo "OK ($helm_ver)" + else + echo "NOT FOUND" + echo " Install: https://helm.sh/docs/intro/install/" + echo " Or: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash" + ((errors++)) + fi + + # docker (for building images) + echo -n " docker... " + if command -v docker &> /dev/null; then + local docker_ver + docker_ver=$(docker --version 2>/dev/null | head -1) + echo "OK ($docker_ver)" + else + echo "NOT FOUND (optional, needed for local builds)" + fi + + # gcloud auth + echo -n " gcloud authentication... " + if gcloud auth print-identity-token &> /dev/null; then + local account + account=$(gcloud config get-value account 2>/dev/null) + echo "OK ($account)" + else + echo "NOT AUTHENTICATED" + echo " Run: gcloud auth login" + ((errors++)) + fi + + # Project access + echo -n " GCP project ($PROJECT)... " + if gcloud projects describe "$PROJECT" &> /dev/null; then + echo "OK" + else + echo "NOT ACCESSIBLE" + ((errors++)) + fi + + # Billing + echo -n " Billing enabled... " + local billing + billing=$(gcloud billing projects describe "$PROJECT" --format='value(billingEnabled)' 2>/dev/null || echo "false") + if [[ "$billing" == "True" ]]; then + echo "OK" + else + echo "WARNING (billing may not be enabled)" + fi + + echo "" + if [[ $errors -gt 0 ]]; then + echo "FAILED: $errors prerequisite(s) missing. Please fix and retry." + exit 1 + fi + echo "All prerequisites satisfied." +} + +# ============================================================ +# Step 2: Enable GCP APIs +# ============================================================ + +enable_apis() { + echo "" + echo "================================================================" + echo " Step 2: Enabling GCP APIs" + echo "================================================================" + echo "" + + local apis=( + "container.googleapis.com:Kubernetes Engine" + "compute.googleapis.com:Compute Engine" + "artifactregistry.googleapis.com:Artifact Registry" + "secretmanager.googleapis.com:Secret Manager" + "iam.googleapis.com:IAM" + "iamcredentials.googleapis.com:IAM Credentials" + "cloudresourcemanager.googleapis.com:Resource Manager" + "certificatemanager.googleapis.com:Certificate Manager" + ) + + for api_info in "${apis[@]}"; do + local api="${api_info%%:*}" + local name="${api_info##*:}" + echo -n " $name ($api)... " + if [[ "$DRY_RUN" == "true" ]]; then + echo "WOULD ENABLE" + else + gcloud services enable "$api" --project="$PROJECT" --quiet + echo "ENABLED" + fi + done +} + +# ============================================================ +# Step 3: Create Artifact Registry (reuse existing) +# ============================================================ + +ensure_artifact_registry() { + echo "" + echo "================================================================" + echo " Step 3: Artifact Registry" + echo "================================================================" + echo "" + + echo -n " Repository 'aex' in $REGION... " + if gcloud artifacts repositories describe aex --location="$REGION" --project="$PROJECT" &> /dev/null; then + echo "EXISTS (reusing)" + elif [[ "$DRY_RUN" == "true" ]]; then + echo "WOULD CREATE" + else + gcloud artifacts repositories create aex \ + --repository-format=docker \ + --location="$REGION" \ + --project="$PROJECT" \ + --description="Agent Exchange Docker images" + echo "CREATED" + fi +} + +# ============================================================ +# Step 4: Create GKE Cluster +# ============================================================ + +create_gke_cluster() { + echo "" + echo "================================================================" + echo " Step 4: Creating GKE Cluster ($MODE mode)" + echo "================================================================" + echo "" + + echo " Name: $CLUSTER_NAME" + echo " Region: $REGION" + echo " Mode: $MODE" + echo "" + + if gcloud container clusters describe "$CLUSTER_NAME" \ + --region="$REGION" --project="$PROJECT" &> /dev/null; then + echo " Cluster already exists. Skipping creation." + return 0 + fi + + if [[ "$DRY_RUN" == "true" ]]; then + echo " WOULD CREATE cluster" + return 0 + fi + + if [[ "$MODE" == "autopilot" ]]; then + echo " Creating Autopilot cluster..." + gcloud container clusters create-auto "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT" \ + --release-channel=regular \ + --network=default \ + --subnetwork=default \ + --quiet + else + echo " Creating Standard cluster with node autoscaling..." + gcloud container clusters create "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT" \ + --machine-type=e2-standard-4 \ + --num-nodes=1 \ + --min-nodes=2 \ + --max-nodes=5 \ + --enable-autoscaling \ + --enable-autorepair \ + --enable-autoupgrade \ + --release-channel=regular \ + --workload-pool="$PROJECT.svc.id.goog" \ + --network=default \ + --subnetwork=default \ + --quiet + fi + + echo " Cluster created" +} + +# ============================================================ +# Step 5: Configure kubectl +# ============================================================ + +configure_kubectl() { + echo "" + echo "================================================================" + echo " Step 5: Configuring kubectl" + echo "================================================================" + echo "" + + if [[ "$DRY_RUN" == "true" ]]; then + echo " WOULD configure kubectl context" + return 0 + fi + + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT" + + echo " Context: $(kubectl config current-context)" + + # Create namespace + if kubectl get namespace "$NAMESPACE" &> /dev/null; then + echo " Namespace '$NAMESPACE' already exists" + else + kubectl create namespace "$NAMESPACE" + echo " Namespace '$NAMESPACE' created" + fi +} + +# ============================================================ +# Step 6: Install Helm Charts +# ============================================================ + +install_helm_charts() { + echo "" + echo "================================================================" + echo " Step 6: Installing Helm Charts" + echo "================================================================" + echo "" + + if [[ "$DRY_RUN" == "true" ]]; then + echo " WOULD install: ingress-nginx, cert-manager, external-secrets" + if [[ "$MODE" == "standard" ]]; then + echo " WOULD install: metrics-server (standard mode)" + fi + return 0 + fi + + # Add Helm repos + echo " Adding Helm repositories..." + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx 2>/dev/null || true + helm repo add jetstack https://charts.jetstack.io 2>/dev/null || true + helm repo add external-secrets https://charts.external-secrets.io 2>/dev/null || true + if [[ "$MODE" == "standard" ]]; then + helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ 2>/dev/null || true + fi + helm repo update + + # Nginx Ingress Controller + echo "" + echo " Installing Nginx Ingress Controller..." + helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --create-namespace \ + --set controller.service.type=LoadBalancer \ + --set controller.metrics.enabled=true \ + --set controller.podAnnotations."prometheus\.io/scrape"=true \ + --set controller.podAnnotations."prometheus\.io/port"=10254 \ + --wait \ + --timeout 300s + echo " Nginx Ingress Controller installed" + + # cert-manager + echo "" + echo " Installing cert-manager..." + helm upgrade --install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --set crds.enabled=true \ + --wait \ + --timeout 300s + echo " cert-manager installed" + + # External Secrets Operator + echo "" + echo " Installing External Secrets Operator..." + helm upgrade --install external-secrets external-secrets/external-secrets \ + --namespace external-secrets \ + --create-namespace \ + --set installCRDs=true \ + --wait \ + --timeout 300s + echo " External Secrets Operator installed" + + # metrics-server (Standard mode only, Autopilot has it built in) + if [[ "$MODE" == "standard" ]]; then + echo "" + echo " Installing metrics-server (Standard mode)..." + helm upgrade --install metrics-server metrics-server/metrics-server \ + --namespace kube-system \ + --set args[0]="--kubelet-insecure-tls" \ + --wait \ + --timeout 120s + echo " metrics-server installed" + fi +} + +# ============================================================ +# Step 7: Set up Workload Identity +# ============================================================ + +setup_workload_identity() { + echo "" + echo "================================================================" + echo " Step 7: Setting Up Workload Identity" + echo "================================================================" + echo "" + + local sa_name="aex-gke" + local sa_email="$sa_name@$PROJECT.iam.gserviceaccount.com" + local k8s_sa="aex-workload" + + if [[ "$DRY_RUN" == "true" ]]; then + echo " WOULD create GCP SA: $sa_email" + echo " WOULD create K8s SA: $k8s_sa" + echo " WOULD bind Workload Identity" + return 0 + fi + + # Create GCP service account + if gcloud iam service-accounts describe "$sa_email" --project="$PROJECT" &> /dev/null; then + echo " GCP SA '$sa_name' already exists" + else + gcloud iam service-accounts create "$sa_name" \ + --display-name="Agent Exchange GKE Workload" \ + --project="$PROJECT" + echo " Created GCP SA: $sa_email" + fi + + # Grant roles + local roles=( + "roles/secretmanager.secretAccessor" + "roles/datastore.user" + "roles/logging.logWriter" + "roles/cloudtrace.agent" + "roles/monitoring.metricWriter" + ) + + for role in "${roles[@]}"; do + gcloud projects add-iam-policy-binding "$PROJECT" \ + --member="serviceAccount:$sa_email" \ + --role="$role" \ + --quiet 2>/dev/null || true + done + echo " IAM roles granted" + + # Create K8s service account + if kubectl get serviceaccount "$k8s_sa" -n "$NAMESPACE" &> /dev/null; then + echo " K8s SA '$k8s_sa' already exists" + else + kubectl create serviceaccount "$k8s_sa" -n "$NAMESPACE" + echo " Created K8s SA: $k8s_sa" + fi + + # Annotate K8s SA + kubectl annotate serviceaccount "$k8s_sa" \ + --namespace="$NAMESPACE" \ + "iam.gke.io/gcp-service-account=$sa_email" \ + --overwrite + + # Bind Workload Identity + gcloud iam service-accounts add-iam-policy-binding "$sa_email" \ + --project="$PROJECT" \ + --role="roles/iam.workloadIdentityUser" \ + --member="serviceAccount:$PROJECT.svc.id.goog[$NAMESPACE/$k8s_sa]" \ + --quiet 2>/dev/null || true + + echo " Workload Identity configured" + + # Also set up GitHub Actions service account for GKE access + local gh_sa_name="aex-github-actions" + local gh_sa_email="$gh_sa_name@$PROJECT.iam.gserviceaccount.com" + + if gcloud iam service-accounts describe "$gh_sa_email" --project="$PROJECT" &> /dev/null; then + echo " GitHub Actions SA already exists" + + # Grant additional GKE-specific roles + local gke_roles=( + "roles/container.developer" + "roles/container.clusterViewer" + ) + for role in "${gke_roles[@]}"; do + gcloud projects add-iam-policy-binding "$PROJECT" \ + --member="serviceAccount:$gh_sa_email" \ + --role="$role" \ + --quiet 2>/dev/null || true + done + echo " GKE roles granted to GitHub Actions SA" + else + echo " Warning: GitHub Actions SA not found. Run setup-gcp.sh first." + fi +} + +# ============================================================ +# Step 8: Create Namespace and Secrets +# ============================================================ + +create_namespace_and_secrets() { + echo "" + echo "================================================================" + echo " Step 8: Creating Namespace and Secrets" + echo "================================================================" + echo "" + + if [[ "$DRY_RUN" == "true" ]]; then + echo " WOULD create namespace '$NAMESPACE'" + echo " WOULD sync secrets from Secret Manager" + return 0 + fi + + # Namespace should already exist from step 5, but ensure it + kubectl get namespace "$NAMESPACE" &> /dev/null || kubectl create namespace "$NAMESPACE" + + # Sync secrets from GCP Secret Manager + local secret_args=() + local has_secrets=false + + local secrets_to_sync=( + "aex-jwt-secret:JWT_SIGNING_KEY" + "aex-api-key-salt:API_KEY_SALT" + "ANTHROPIC_API_KEY:ANTHROPIC_API_KEY" + ) + + for mapping in "${secrets_to_sync[@]}"; do + local gcp_secret="${mapping%%:*}" + local k8s_key="${mapping##*:}" + local value="" + value=$(gcloud secrets versions access latest --secret="$gcp_secret" --project="$PROJECT" 2>/dev/null) || true + + if [[ -n "$value" ]]; then + secret_args+=("--from-literal=$k8s_key=$value") + has_secrets=true + echo " Synced: $gcp_secret -> $k8s_key" + else + echo " Skipped: $gcp_secret (not found in Secret Manager)" + fi + done + + if [[ "$has_secrets" == "true" ]]; then + kubectl delete secret aex-secrets -n "$NAMESPACE" 2>/dev/null || true + kubectl create secret generic aex-secrets -n "$NAMESPACE" "${secret_args[@]}" + echo " K8s secrets created" + else + echo " No secrets found in Secret Manager. Apply placeholder:" + echo " kubectl apply -f deploy/k8s/base/secrets.yaml" + fi +} + +# ============================================================ +# Step 9: Output Summary +# ============================================================ + +print_summary() { + echo "" + echo "================================================================" + echo " GKE Setup Complete" + echo "================================================================" + echo "" + echo " Project: $PROJECT" + echo " Cluster: $CLUSTER_NAME ($MODE mode)" + echo " Region: $REGION" + echo " Namespace: $NAMESPACE" + + if [[ "$DRY_RUN" != "true" ]]; then + echo " Context: $(kubectl config current-context)" + echo "" + echo " Cluster nodes:" + kubectl get nodes -o wide 2>/dev/null || echo " (Autopilot scales on demand)" + echo "" + echo " Installed Helm charts:" + helm list -A 2>/dev/null | head -20 + fi + + echo "" + echo "Next steps:" + echo "" + echo " 1. Deploy AEX to GKE:" + echo " ./deploy/gcp/deploy-gke.sh --project-id $PROJECT --region $REGION" + echo "" + echo " 2. Or apply K8s manifests directly:" + echo " kubectl apply -k deploy/k8s/base/" + echo "" + echo " 3. Check cluster status:" + echo " kubectl get pods -n $NAMESPACE" + echo " kubectl get svc -n $NAMESPACE" + echo "" + echo " 4. Tear down when done:" + echo " ./hack/deploy/teardown-gke.sh" + echo "" + echo " 5. Add these GitHub secrets for CI/CD:" + echo " GKE_CLUSTER_NAME: $CLUSTER_NAME" + echo " GKE_CLUSTER_REGION: $REGION" + echo "" +} + +# ============================================================ +# Main +# ============================================================ + +echo "" +echo "================================================================" +echo " Agent Exchange - GKE Setup" +echo "================================================================" +echo "" +echo " Project: $PROJECT" +echo " Region: $REGION" +echo " Cluster: $CLUSTER_NAME" +echo " Mode: $MODE" +echo "" + +if [[ "$DRY_RUN" == "true" ]]; then + echo " *** DRY-RUN MODE - No changes will be made ***" + echo "" +fi + +check_prerequisites +enable_apis +ensure_artifact_registry +create_gke_cluster +configure_kubectl +install_helm_charts +setup_workload_identity +create_namespace_and_secrets +print_summary diff --git a/hack/deploy/teardown-eks.sh b/hack/deploy/teardown-eks.sh new file mode 100644 index 0000000..cef00de --- /dev/null +++ b/hack/deploy/teardown-eks.sh @@ -0,0 +1,419 @@ +#!/bin/bash +set -e + +# Agent Exchange - EKS Teardown Script +# This script removes all EKS resources created for Agent Exchange +# WARNING: This is DESTRUCTIVE and will delete all data! + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Configuration +AWS_REGION="${AWS_REGION:-us-east-1}" +AWS_ACCOUNT_ID="${AWS_ACCOUNT_ID:-}" +CLUSTER_NAME="${CLUSTER_NAME:-aex-eks}" +ENVIRONMENT_NAME="${ENVIRONMENT_NAME:-aex}" +DELETE_INFRA=false + +usage() { + echo "Agent Exchange - EKS Teardown" + echo "" + echo "Usage: $0 [command] [options]" + echo "" + echo "Commands:" + echo " all Delete all EKS resources (default)" + echo " k8s Delete Kubernetes resources only" + echo " helm Uninstall Helm charts only" + echo " cluster Delete EKS cluster and node groups only" + echo " stacks Delete CloudFormation stacks only" + echo "" + echo "Options:" + echo " --include-infra Also delete infrastructure stack (VPC, ECR, secrets)" + echo "" + echo "Environment variables:" + echo " AWS_REGION AWS region (default: us-east-1)" + echo " AWS_ACCOUNT_ID AWS account ID (auto-detected)" + echo " CLUSTER_NAME EKS cluster name (default: aex-eks)" + echo " ENVIRONMENT_NAME Environment name prefix (default: aex)" + echo "" + echo "WARNING: This will permanently delete all EKS resources and data!" +} + +check_prerequisites() { + echo "Checking prerequisites..." + + if [ -z "$AWS_ACCOUNT_ID" ]; then + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text 2>/dev/null || true) + if [ -z "$AWS_ACCOUNT_ID" ]; then + echo "Error: Could not determine AWS Account ID" + exit 1 + fi + fi + + if ! command -v aws &>/dev/null; then + echo "Error: AWS CLI is not installed" + exit 1 + fi + + if ! aws sts get-caller-identity &>/dev/null; then + echo "Error: Not authenticated with AWS" + exit 1 + fi + + echo "Prerequisites OK" +} + +confirm_deletion() { + echo "" + echo "================================================================" + echo " WARNING" + echo " This will PERMANENTLY DELETE EKS resources" + echo " in AWS region: $AWS_REGION" + echo " Account: $AWS_ACCOUNT_ID" + echo " Cluster: $CLUSTER_NAME" + if [ "$DELETE_INFRA" = true ]; then + echo "" + echo " ** ALSO DELETING INFRASTRUCTURE (VPC, ECR, Secrets) **" + fi + echo "" + echo " This action CANNOT be undone!" + echo "================================================================" + echo "" + read -p "Type 'DELETE' to confirm: " confirmation + + if [ "$confirmation" != "DELETE" ]; then + echo "Aborted." + exit 1 + fi +} + +# ============================================================ +# Delete Kubernetes resources +# ============================================================ + +delete_k8s_resources() { + echo "Deleting Kubernetes resources..." + + # Try to configure kubectl + if aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_REGION" &>/dev/null; then + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$AWS_REGION" 2>/dev/null || true + + # Delete the aex namespace (cascading delete of all resources) + echo " Deleting namespace 'aex'..." + kubectl delete namespace aex --ignore-not-found --timeout=120s 2>/dev/null || true + + # Delete ingress resources (to release load balancers) + echo " Deleting ingress resources..." + kubectl delete ingress --all -n aex 2>/dev/null || true + + # Wait for load balancers to be released + echo " Waiting for load balancers to be released..." + sleep 15 + + echo " Kubernetes resources deleted." + else + echo " Cluster not found or not accessible, skipping K8s cleanup." + fi +} + +# ============================================================ +# Uninstall Helm charts +# ============================================================ + +uninstall_helm_charts() { + echo "Uninstalling Helm charts..." + + if ! aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_REGION" &>/dev/null; then + echo " Cluster not found, skipping Helm cleanup." + return + fi + + aws eks update-kubeconfig --name "$CLUSTER_NAME" --region "$AWS_REGION" 2>/dev/null || true + + charts=( + "cluster-autoscaler:kube-system" + "metrics-server:kube-system" + "external-secrets:external-secrets" + "ingress-nginx:ingress-nginx" + "aws-load-balancer-controller:kube-system" + ) + + for chart_ns in "${charts[@]}"; do + IFS=':' read -r chart namespace <<< "$chart_ns" + echo " Uninstalling $chart from $namespace..." + helm uninstall "$chart" -n "$namespace" 2>/dev/null || true + done + + # Clean up namespaces created by add-ons + echo " Deleting add-on namespaces..." + kubectl delete namespace ingress-nginx --ignore-not-found 2>/dev/null || true + kubectl delete namespace external-secrets --ignore-not-found 2>/dev/null || true + + # Wait for any load balancers to be released + echo " Waiting for resources to be released..." + sleep 20 + + echo " Helm charts uninstalled." +} + +# ============================================================ +# Delete EKS cluster +# ============================================================ + +delete_eks_cluster() { + echo "Deleting EKS cluster..." + + # Check if cluster exists + if ! aws eks describe-cluster --name "$CLUSTER_NAME" --region "$AWS_REGION" &>/dev/null; then + echo " Cluster '$CLUSTER_NAME' not found, skipping." + return + fi + + # Delete node groups first + echo " Listing node groups..." + NODE_GROUPS=$(aws eks list-nodegroups --cluster-name "$CLUSTER_NAME" --region "$AWS_REGION" \ + --query 'nodegroups[*]' --output text 2>/dev/null || echo "") + + for ng in $NODE_GROUPS; do + echo " Deleting node group: $ng..." + aws eks delete-nodegroup \ + --cluster-name "$CLUSTER_NAME" \ + --nodegroup-name "$ng" \ + --region "$AWS_REGION" 2>/dev/null || true + done + + # Wait for node groups to be deleted + if [ -n "$NODE_GROUPS" ]; then + echo " Waiting for node groups to be deleted..." + for ng in $NODE_GROUPS; do + aws eks wait nodegroup-deleted \ + --cluster-name "$CLUSTER_NAME" \ + --nodegroup-name "$ng" \ + --region "$AWS_REGION" 2>/dev/null || true + done + fi + + # Delete Fargate profiles if any + FARGATE_PROFILES=$(aws eks list-fargate-profiles --cluster-name "$CLUSTER_NAME" --region "$AWS_REGION" \ + --query 'fargateProfileNames[*]' --output text 2>/dev/null || echo "") + + for fp in $FARGATE_PROFILES; do + echo " Deleting Fargate profile: $fp..." + aws eks delete-fargate-profile \ + --cluster-name "$CLUSTER_NAME" \ + --fargate-profile-name "$fp" \ + --region "$AWS_REGION" 2>/dev/null || true + aws eks wait fargate-profile-deleted \ + --cluster-name "$CLUSTER_NAME" \ + --fargate-profile-name "$fp" \ + --region "$AWS_REGION" 2>/dev/null || true + done + + # Delete the cluster + echo " Deleting EKS cluster: $CLUSTER_NAME..." + aws eks delete-cluster \ + --name "$CLUSTER_NAME" \ + --region "$AWS_REGION" 2>/dev/null || true + + echo " Waiting for cluster deletion..." + aws eks wait cluster-deleted \ + --name "$CLUSTER_NAME" \ + --region "$AWS_REGION" 2>/dev/null || true + + echo " EKS cluster deleted." +} + +# ============================================================ +# Delete CloudFormation stacks +# ============================================================ + +delete_cloudformation_stacks() { + echo "Deleting CloudFormation stacks..." + + # Delete EKS cluster stack + STACK_NAME="${ENVIRONMENT_NAME}-eks-cluster" + echo " Checking stack: $STACK_NAME..." + if aws cloudformation describe-stacks --stack-name "$STACK_NAME" --region "$AWS_REGION" &>/dev/null; then + echo " Deleting $STACK_NAME..." + aws cloudformation delete-stack \ + --stack-name "$STACK_NAME" \ + --region "$AWS_REGION" + + echo " Waiting for stack deletion..." + aws cloudformation wait stack-delete-complete \ + --stack-name "$STACK_NAME" \ + --region "$AWS_REGION" 2>/dev/null || { + echo " Warning: Stack deletion may not be complete. Check AWS console." + } + else + echo " Stack $STACK_NAME not found, skipping." + fi + + # Optionally delete infrastructure stack + if [ "$DELETE_INFRA" = true ]; then + # Delete services stack first (ECS) + SERVICES_STACK="${ENVIRONMENT_NAME}-services" + if aws cloudformation describe-stacks --stack-name "$SERVICES_STACK" --region "$AWS_REGION" &>/dev/null; then + echo " Deleting $SERVICES_STACK..." + aws cloudformation delete-stack --stack-name "$SERVICES_STACK" --region "$AWS_REGION" + aws cloudformation wait stack-delete-complete --stack-name "$SERVICES_STACK" --region "$AWS_REGION" 2>/dev/null || true + fi + + INFRA_STACK="${ENVIRONMENT_NAME}-infrastructure" + echo " Checking stack: $INFRA_STACK..." + if aws cloudformation describe-stacks --stack-name "$INFRA_STACK" --region "$AWS_REGION" &>/dev/null; then + echo " Deleting $INFRA_STACK..." + aws cloudformation delete-stack \ + --stack-name "$INFRA_STACK" \ + --region "$AWS_REGION" + + echo " Waiting for stack deletion..." + aws cloudformation wait stack-delete-complete \ + --stack-name "$INFRA_STACK" \ + --region "$AWS_REGION" 2>/dev/null || { + echo " Warning: Infrastructure stack deletion may not be complete." + echo " ECR repositories with images may need manual deletion." + } + else + echo " Stack $INFRA_STACK not found, skipping." + fi + fi + + echo " CloudFormation stacks deleted." +} + +# ============================================================ +# Clean up OIDC provider +# ============================================================ + +cleanup_oidc() { + echo "Cleaning up OIDC provider..." + + # Find and delete OIDC providers associated with the cluster + OIDC_PROVIDERS=$(aws iam list-open-id-connect-providers \ + --query 'OpenIDConnectProviderList[*].Arn' \ + --output text 2>/dev/null || echo "") + + for arn in $OIDC_PROVIDERS; do + # Check if it's related to our EKS cluster + ISSUER_URL=$(aws iam get-open-id-connect-provider --open-id-connect-provider-arn "$arn" \ + --query 'Url' --output text 2>/dev/null || echo "") + if echo "$ISSUER_URL" | grep -q "$AWS_REGION.*eks"; then + echo " Deleting OIDC provider: $arn" + aws iam delete-open-id-connect-provider \ + --open-id-connect-provider-arn "$arn" 2>/dev/null || true + fi + done + + echo " OIDC cleanup complete." +} + +# ============================================================ +# Print summary +# ============================================================ + +print_summary() { + echo "" + echo "========================================" + echo " EKS Teardown Complete!" + echo "========================================" + echo "" + echo "Deleted resources:" + echo " - Kubernetes namespace 'aex' and all resources" + echo " - Helm charts (LB controller, ingress, external-secrets, metrics-server)" + echo " - EKS node groups" + echo " - EKS cluster: $CLUSTER_NAME" + echo " - CloudFormation stack: ${ENVIRONMENT_NAME}-eks-cluster" + echo " - OIDC provider" + if [ "$DELETE_INFRA" = true ]; then + echo " - Infrastructure stack (VPC, ECR, Secrets)" + else + echo "" + echo "NOT deleted (shared with ECS):" + echo " - Infrastructure stack (VPC, ECR, Secrets)" + echo " To also delete infrastructure: $0 --include-infra" + fi + echo "" + echo "Note: Some resources may take a few minutes to fully delete." + echo "Remove kubeconfig context: kubectl config delete-context $CLUSTER_NAME" +} + +# ============================================================ +# Main +# ============================================================ + +# Parse options +for arg in "$@"; do + case $arg in + --include-infra) + DELETE_INFRA=true + ;; + esac +done + +# Get command (skip --flags) +CMD="" +for arg in "$@"; do + case $arg in + --*) ;; + *) + if [ -z "$CMD" ]; then + CMD="$arg" + fi + ;; + esac +done +CMD="${CMD:-all}" + +case "$CMD" in + -h|--help|help) + usage + exit 0 + ;; + k8s) + check_prerequisites + confirm_deletion + delete_k8s_resources + ;; + helm) + check_prerequisites + confirm_deletion + uninstall_helm_charts + ;; + cluster) + check_prerequisites + confirm_deletion + delete_eks_cluster + ;; + stacks) + check_prerequisites + confirm_deletion + delete_cloudformation_stacks + ;; + all) + check_prerequisites + echo "" + echo "Region: $AWS_REGION" + echo "Account: $AWS_ACCOUNT_ID" + echo "Cluster: $CLUSTER_NAME" + echo "" + confirm_deletion + echo "" + + delete_k8s_resources + echo "" + uninstall_helm_charts + echo "" + delete_eks_cluster + echo "" + cleanup_oidc + echo "" + delete_cloudformation_stacks + echo "" + print_summary + ;; + *) + echo "Unknown command: $CMD" + usage + exit 1 + ;; +esac diff --git a/hack/deploy/teardown-gke.sh b/hack/deploy/teardown-gke.sh new file mode 100644 index 0000000..0102057 --- /dev/null +++ b/hack/deploy/teardown-gke.sh @@ -0,0 +1,401 @@ +#!/bin/bash +set -e + +# Agent Exchange - GKE Teardown Script +# Removes all GKE resources created for Agent Exchange +# WARNING: This is DESTRUCTIVE and will delete all data! +# +# Usage: +# GCP_PROJECT_ID=my-project ./teardown-gke.sh +# GCP_PROJECT_ID=my-project ./teardown-gke.sh namespace +# GCP_PROJECT_ID=my-project ./teardown-gke.sh all + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Configuration +PROJECT="${GCP_PROJECT_ID:-}" +REGION="${GCP_REGION:-us-central1}" +CLUSTER_NAME="${GKE_CLUSTER_NAME:-aex-cluster}" +NAMESPACE="aex" + +usage() { + cat < /dev/null; then + echo "Error: gcloud CLI is not installed" + exit 1 + fi + + if ! gcloud auth print-identity-token &> /dev/null; then + echo "Error: Not authenticated with gcloud. Run 'gcloud auth login'" + exit 1 + fi + + echo "Prerequisites OK" +} + +confirm_deletion() { + local scope="$1" + + echo "" + echo "================================================================" + echo " WARNING: DESTRUCTIVE ACTION" + echo "================================================================" + echo "" + echo " Project: $PROJECT" + echo " Region: $REGION" + echo " Cluster: $CLUSTER_NAME" + echo " Scope: $scope" + echo "" + echo " This action CANNOT be undone!" + echo "" + echo "================================================================" + echo "" + + read -p "Type 'DELETE' to confirm: " confirmation + if [[ "$confirmation" != "DELETE" ]]; then + echo "Aborted." + exit 1 + fi +} + +# ============================================================ +# Get cluster credentials (best effort) +# ============================================================ + +get_credentials() { + gcloud container clusters get-credentials "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT" 2>/dev/null || true +} + +# ============================================================ +# Delete K8s namespace and resources +# ============================================================ + +delete_namespace() { + echo "" + echo "Deleting K8s namespace '$NAMESPACE' and all resources..." + + if kubectl get namespace "$NAMESPACE" &> /dev/null; then + # List what will be deleted + echo " Resources in namespace '$NAMESPACE':" + kubectl get all -n "$NAMESPACE" 2>/dev/null || true + echo "" + + # Delete the namespace (cascades to all resources) + kubectl delete namespace "$NAMESPACE" --wait=true --timeout=120s 2>/dev/null || true + echo " Namespace '$NAMESPACE' deleted" + else + echo " Namespace '$NAMESPACE' not found (already deleted or cluster unreachable)" + fi +} + +# ============================================================ +# Uninstall Helm charts +# ============================================================ + +delete_helm_charts() { + echo "" + echo "Uninstalling Helm charts..." + + local charts=( + "ingress-nginx:ingress-nginx" + "cert-manager:cert-manager" + "external-secrets:external-secrets" + "metrics-server:kube-system" + ) + + for chart_info in "${charts[@]}"; do + local chart="${chart_info%%:*}" + local ns="${chart_info##*:}" + + if helm status "$chart" -n "$ns" &> /dev/null; then + echo " Uninstalling $chart from namespace $ns..." + helm uninstall "$chart" -n "$ns" 2>/dev/null || true + else + echo " $chart not found in namespace $ns (skipping)" + fi + done + + # Clean up chart namespaces + local chart_namespaces=( + "ingress-nginx" + "cert-manager" + "external-secrets" + ) + + for ns in "${chart_namespaces[@]}"; do + if kubectl get namespace "$ns" &> /dev/null; then + echo " Deleting namespace $ns..." + kubectl delete namespace "$ns" --wait=false 2>/dev/null || true + fi + done + + echo " Helm charts uninstalled" +} + +# ============================================================ +# Delete GKE cluster +# ============================================================ + +delete_cluster() { + echo "" + echo "Deleting GKE cluster '$CLUSTER_NAME'..." + + if gcloud container clusters describe "$CLUSTER_NAME" \ + --region="$REGION" --project="$PROJECT" &> /dev/null; then + + gcloud container clusters delete "$CLUSTER_NAME" \ + --region="$REGION" \ + --project="$PROJECT" \ + --quiet + + echo " Cluster deleted" + else + echo " Cluster '$CLUSTER_NAME' not found (already deleted)" + fi + + # Clean up kubectl context + local context="gke_${PROJECT}_${REGION}_${CLUSTER_NAME}" + kubectl config delete-context "$context" 2>/dev/null || true + kubectl config delete-cluster "$context" 2>/dev/null || true + echo " kubectl context cleaned up" +} + +# ============================================================ +# Clean up IAM bindings +# ============================================================ + +delete_iam_resources() { + echo "" + echo "Cleaning up IAM resources..." + + # GKE Workload service account + local sa_email="aex-gke@$PROJECT.iam.gserviceaccount.com" + + if gcloud iam service-accounts describe "$sa_email" --project="$PROJECT" &> /dev/null; then + echo " Deleting service account: $sa_email" + + # Remove IAM bindings first + local roles=( + "roles/secretmanager.secretAccessor" + "roles/datastore.user" + "roles/logging.logWriter" + "roles/cloudtrace.agent" + "roles/monitoring.metricWriter" + ) + + for role in "${roles[@]}"; do + gcloud projects remove-iam-policy-binding "$PROJECT" \ + --member="serviceAccount:$sa_email" \ + --role="$role" \ + --quiet 2>/dev/null || true + done + + # Delete the service account + gcloud iam service-accounts delete "$sa_email" \ + --project="$PROJECT" \ + --quiet 2>/dev/null || true + + echo " Service account deleted" + else + echo " Service account '$sa_email' not found (already deleted)" + fi + + # Remove GKE roles from GitHub Actions SA (but keep the SA itself) + local gh_sa_email="aex-github-actions@$PROJECT.iam.gserviceaccount.com" + if gcloud iam service-accounts describe "$gh_sa_email" --project="$PROJECT" &> /dev/null; then + echo " Removing GKE roles from GitHub Actions SA..." + local gke_roles=( + "roles/container.developer" + "roles/container.clusterViewer" + ) + for role in "${gke_roles[@]}"; do + gcloud projects remove-iam-policy-binding "$PROJECT" \ + --member="serviceAccount:$gh_sa_email" \ + --role="$role" \ + --quiet 2>/dev/null || true + done + echo " GKE roles removed (SA preserved for Cloud Run)" + fi + + echo " IAM cleanup complete" +} + +# ============================================================ +# Delete Artifact Registry images +# ============================================================ + +delete_images() { + echo "" + echo "Deleting Artifact Registry images..." + + if ! gcloud artifacts repositories describe aex \ + --location="$REGION" --project="$PROJECT" &> /dev/null; then + echo " Repository 'aex' not found (skipping)" + return 0 + fi + + echo " Listing images in Artifact Registry..." + local images + images=$(gcloud artifacts docker images list \ + "$REGION-docker.pkg.dev/$PROJECT/aex" \ + --format="value(PACKAGE)" \ + --project="$PROJECT" 2>/dev/null | sort -u || echo "") + + if [[ -z "$images" ]]; then + echo " No images found" + return 0 + fi + + echo " Found images:" + echo "$images" | while read -r img; do + echo " - $img" + done + echo "" + + read -p " Delete all images? (y/N): " confirm_images + if [[ "$confirm_images" == "y" || "$confirm_images" == "Y" ]]; then + echo "$images" | while read -r img; do + if [[ -n "$img" ]]; then + echo " Deleting $img..." + gcloud artifacts docker images delete "$img" \ + --project="$PROJECT" \ + --delete-tags \ + --quiet 2>/dev/null || true + fi + done + echo " Images deleted" + else + echo " Image deletion skipped" + fi +} + +# ============================================================ +# Summary +# ============================================================ + +print_summary() { + echo "" + echo "================================================================" + echo " GKE Teardown Complete" + echo "================================================================" + echo "" + echo " Deleted resources:" + echo " - K8s namespace '$NAMESPACE' and all resources within" + echo " - Helm charts (ingress-nginx, cert-manager, external-secrets)" + echo " - GKE cluster '$CLUSTER_NAME'" + echo " - IAM service account 'aex-gke'" + echo "" + echo " Preserved resources:" + echo " - Artifact Registry (shared with Cloud Run)" + echo " - GitHub Actions service account (shared with Cloud Run)" + echo " - Secret Manager secrets (shared with Cloud Run)" + echo " - Workload Identity Pool (shared with Cloud Run)" + echo "" + echo " To also delete shared resources, run:" + echo " ./hack/deploy/teardown-gcp.sh" + echo "" +} + +# ============================================================ +# Main +# ============================================================ + +case "${1:-all}" in + -h|--help|help) + usage + exit 0 + ;; + namespace) + check_prerequisites + confirm_deletion "namespace only" + get_credentials + delete_namespace + ;; + helm) + check_prerequisites + confirm_deletion "Helm charts only" + get_credentials + delete_helm_charts + ;; + cluster) + check_prerequisites + confirm_deletion "GKE cluster only" + delete_cluster + ;; + iam) + check_prerequisites + confirm_deletion "IAM resources only" + delete_iam_resources + ;; + images) + check_prerequisites + confirm_deletion "Artifact Registry images" + delete_images + ;; + all) + check_prerequisites + echo "" + echo "Project: $PROJECT" + echo "Region: $REGION" + echo "Cluster: $CLUSTER_NAME" + echo "" + confirm_deletion "ALL GKE resources" + echo "" + + get_credentials + + delete_namespace + echo "" + delete_helm_charts + echo "" + delete_cluster + echo "" + delete_iam_resources + echo "" + + read -p "Also delete Artifact Registry images? (y/N): " del_images + if [[ "$del_images" == "y" || "$del_images" == "Y" ]]; then + delete_images + echo "" + fi + + print_summary + ;; + *) + echo "Unknown command: $1" + usage + exit 1 + ;; +esac