Skip to content

Commit 7389444

Browse files
markturanskyAmbient Code Botclaude
authored
fix(security,api-server,control-plane): CodeRabbit fixes, deletecollection fallback, idempotent agent start (#1290)
## Summary - **Security fixes** (CodeRabbit PR #1289 review): IDOR protection on credential Get/Patch/Delete/GetToken, projectID format validation in List, MCP sidecar guard on token exchange config, nil-guard for mention resolver TokenFunc - **deletecollection RBAC fallback**: When the tenant SA lacks `deletecollection`, session cleanup now falls back to list+delete individually — prevents orphaned pods accumulating - **Idempotent agent start**: `POST /projects/{id}/agents/{agent_id}/start` returns 200 with existing active session instead of creating duplicates. New sessions still return 201. ## Test plan - [x] `go build ./...` passes for api-server, control-plane, ambient-mcp - [x] `go vet ./...` passes for all three - [x] Credential integration tests pass (9/9) - [x] MCP mention resolver tests pass (10/10 including new nil-guard test) - [x] Control-plane unit tests pass - [x] Runner tests pass (647/647) 🤖 Generated with [Claude Code](https://claude.ai/code) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Reuse active sessions: existing active sessions are returned instead of creating duplicates. * Control plane now forwards HTTP/HTTPS/NO_PROXY to spawned pods and sidecars. * **Bug Fixes** * Improved credential project validation and enforcement. * Kubernetes deletion now falls back when permissions restrict bulk deletes. * Resolver and session initialization now handle configuration/errors robustly. * **Documentation** * Added troubleshooting note about proxy env propagation to runner pods. * **Tests** * Updated tests to cover resolver and session behavior. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Ambient Code Bot <bot@ambient-code.local> Co-authored-by: Claude <noreply@anthropic.com>
1 parent 063953f commit 7389444

File tree

14 files changed

+203
-30
lines changed

14 files changed

+203
-30
lines changed

.claude/skills/ambient-pr-test/SKILL.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,14 @@ docker login quay.io
244244

245245
Either `build.sh` was not run or the CI build workflow failed. Check Actions → `Build and Push Component Docker Images` for the PR.
246246

247+
### Runner pods can't reach external hosts (Squid proxy)
248+
249+
The MPP cluster routes outbound traffic through a Squid proxy (`proxy.squi-001.prod.iad2.dc.redhat.com:3128`). The `runtime-int` deployments have `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` set in their pod specs, but runner pods spawned by the control plane did not inherit these.
250+
251+
**Fix (merged):** The control plane reads `HTTP_PROXY`, `HTTPS_PROXY`, `NO_PROXY` from its own environment and injects them into both the runner container (`buildEnv()`) and the MCP sidecar container (`buildMCPSidecar()`). No manifest change needed — the CP's deployment already has the proxy vars; they now propagate automatically.
252+
253+
**Pattern:** When the CP needs to forward platform-level env vars to spawned pods, add the field to `ControlPlaneConfig``KubeReconcilerConfig``buildEnv()`/`buildMCPSidecar()`.
254+
247255
### JWT / UNAUTHENTICATED errors in api-server
248256

249257
The production overlay configures JWT against Red Hat SSO. For ephemeral test instances without SSO integration:

.claude/skills/devflow/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Then re-read this file before continuing.
3131

3232
## Workflow Overview
3333

34-
```
34+
```text
3535
1. Ticket (optional)
3636
2. Branch (from ticket name if available)
3737
3. Spec change (load + modify the component's .spec.md)

components/ambient-api-server/plugins/agents/start_handler.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,27 @@ func NewStartHandler(agent AgentService, inboxSvc inbox.InboxMessageService, ses
4747
}
4848

4949
func (h *startHandler) Start(w http.ResponseWriter, r *http.Request) {
50-
cfg := &handlers.HandlerConfig{
51-
Action: func() (interface{}, *pkgerrors.ServiceError) {
52-
ctx := r.Context()
53-
agentID := mux.Vars(r)["agent_id"]
50+
ctx := r.Context()
51+
agentID := mux.Vars(r)["agent_id"]
5452

55-
agent, err := h.agent.Get(ctx, agentID)
56-
if err != nil {
57-
return nil, err
58-
}
53+
agent, err := h.agent.Get(ctx, agentID)
54+
if err != nil {
55+
handlers.HandleError(ctx, w, err)
56+
return
57+
}
5958

59+
if existing, _ := h.session.ActiveByAgentID(ctx, agentID); existing != nil {
60+
resp := &StartResponse{
61+
Session: sessions.PresentSession(existing),
62+
}
63+
w.Header().Set("Content-Type", "application/json")
64+
w.WriteHeader(http.StatusOK)
65+
json.NewEncoder(w).Encode(resp)
66+
return
67+
}
68+
69+
cfg := &handlers.HandlerConfig{
70+
Action: func() (interface{}, *pkgerrors.ServiceError) {
6071
unread, inboxErr := h.inbox.UnreadByAgentID(ctx, agentID)
6172
if inboxErr != nil {
6273
return nil, inboxErr

components/ambient-api-server/plugins/credentials/handler.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package credentials
33
import (
44
"fmt"
55
"net/http"
6+
"regexp"
67

78
"github.com/gorilla/mux"
89

@@ -13,6 +14,8 @@ import (
1314
"github.com/openshift-online/rh-trex-ai/pkg/services"
1415
)
1516

17+
var safeProjectIDPattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
18+
1619
var _ handlers.RestHandler = credentialHandler{}
1720

1821
type credentialHandler struct {
@@ -58,11 +61,15 @@ func (h credentialHandler) Patch(w http.ResponseWriter, r *http.Request) {
5861
Validators: []handlers.Validate{},
5962
Action: func() (interface{}, *errors.ServiceError) {
6063
ctx := r.Context()
64+
projectID := mux.Vars(r)["id"]
6165
id := mux.Vars(r)["cred_id"]
6266
found, err := h.credential.Get(ctx, id)
6367
if err != nil {
6468
return nil, err
6569
}
70+
if found.ProjectID != projectID {
71+
return nil, errors.NotFound("credential with id='%s' not found", id)
72+
}
6673

6774
if patch.Name != nil {
6875
found.Name = *patch.Name
@@ -106,6 +113,9 @@ func (h credentialHandler) List(w http.ResponseWriter, r *http.Request) {
106113
Action: func() (interface{}, *errors.ServiceError) {
107114
ctx := r.Context()
108115
projectID := mux.Vars(r)["id"]
116+
if !safeProjectIDPattern.MatchString(projectID) {
117+
return nil, errors.Validation("invalid project ID format")
118+
}
109119

110120
listArgs := services.NewListArguments(r.URL.Query())
111121
projectFilter := fmt.Sprintf("project_id = '%s'", projectID)
@@ -148,12 +158,16 @@ func (h credentialHandler) List(w http.ResponseWriter, r *http.Request) {
148158
func (h credentialHandler) Get(w http.ResponseWriter, r *http.Request) {
149159
cfg := &handlers.HandlerConfig{
150160
Action: func() (interface{}, *errors.ServiceError) {
161+
projectID := mux.Vars(r)["id"]
151162
id := mux.Vars(r)["cred_id"]
152163
ctx := r.Context()
153164
credential, err := h.credential.Get(ctx, id)
154165
if err != nil {
155166
return nil, err
156167
}
168+
if credential.ProjectID != projectID {
169+
return nil, errors.NotFound("credential with id='%s' not found", id)
170+
}
157171

158172
return PresentCredential(credential), nil
159173
},
@@ -165,9 +179,17 @@ func (h credentialHandler) Get(w http.ResponseWriter, r *http.Request) {
165179
func (h credentialHandler) Delete(w http.ResponseWriter, r *http.Request) {
166180
cfg := &handlers.HandlerConfig{
167181
Action: func() (interface{}, *errors.ServiceError) {
182+
projectID := mux.Vars(r)["id"]
168183
id := mux.Vars(r)["cred_id"]
169184
ctx := r.Context()
170-
err := h.credential.Delete(ctx, id)
185+
found, err := h.credential.Get(ctx, id)
186+
if err != nil {
187+
return nil, err
188+
}
189+
if found.ProjectID != projectID {
190+
return nil, errors.NotFound("credential with id='%s' not found", id)
191+
}
192+
err = h.credential.Delete(ctx, id)
171193
if err != nil {
172194
return nil, err
173195
}
@@ -180,12 +202,16 @@ func (h credentialHandler) Delete(w http.ResponseWriter, r *http.Request) {
180202
func (h credentialHandler) GetToken(w http.ResponseWriter, r *http.Request) {
181203
cfg := &handlers.HandlerConfig{
182204
Action: func() (interface{}, *errors.ServiceError) {
205+
projectID := mux.Vars(r)["id"]
183206
id := mux.Vars(r)["cred_id"]
184207
ctx := r.Context()
185208
credential, err := h.credential.Get(ctx, id)
186209
if err != nil {
187210
return nil, err
188211
}
212+
if credential.ProjectID != projectID {
213+
return nil, errors.NotFound("credential with id='%s' not found", id)
214+
}
189215

190216
return PresentCredentialToken(credential), nil
191217
},

components/ambient-api-server/plugins/sessions/dao.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ type SessionDao interface {
1717
FindByIDs(ctx context.Context, ids []string) (SessionList, error)
1818
All(ctx context.Context) (SessionList, error)
1919
AllByProjectId(ctx context.Context, projectId string) (SessionList, error)
20+
ActiveByAgentID(ctx context.Context, agentID string) (*Session, error)
2021
}
2122

2223
var _ SessionDao = &sqlSessionDao{}
@@ -91,3 +92,15 @@ func (d *sqlSessionDao) AllByProjectId(ctx context.Context, projectId string) (S
9192
}
9293
return sessions, nil
9394
}
95+
96+
func (d *sqlSessionDao) ActiveByAgentID(ctx context.Context, agentID string) (*Session, error) {
97+
g2 := (*d.sessionFactory).New(ctx)
98+
var session Session
99+
err := g2.Where("agent_id = ? AND phase IN (?)", agentID, []string{"Pending", "Creating", "Running"}).
100+
Order("created_at DESC").
101+
Take(&session).Error
102+
if err != nil {
103+
return nil, err
104+
}
105+
return &session, nil
106+
}

components/ambient-api-server/plugins/sessions/mock_dao.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,13 @@ func (d *sessionDaoMock) AllByProjectId(ctx context.Context, projectId string) (
7777
}
7878
return filtered, nil
7979
}
80+
81+
func (d *sessionDaoMock) ActiveByAgentID(ctx context.Context, agentID string) (*Session, error) {
82+
activePhases := map[string]bool{"Pending": true, "Creating": true, "Running": true}
83+
for _, s := range d.sessions {
84+
if s.AgentId != nil && *s.AgentId == agentID && s.Phase != nil && activePhases[*s.Phase] {
85+
return s, nil
86+
}
87+
}
88+
return nil, gorm.ErrRecordNotFound
89+
}

components/ambient-api-server/plugins/sessions/service.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ type SessionService interface {
2222
UpdateStatus(ctx context.Context, id string, patch *SessionStatusPatchRequest) (*Session, *errors.ServiceError)
2323
Start(ctx context.Context, id string) (*Session, *errors.ServiceError)
2424
Stop(ctx context.Context, id string) (*Session, *errors.ServiceError)
25+
ActiveByAgentID(ctx context.Context, agentID string) (*Session, *errors.ServiceError)
2526

2627
FindByIDs(ctx context.Context, ids []string) (SessionList, *errors.ServiceError)
2728

@@ -265,6 +266,14 @@ func (s *sqlSessionService) Start(ctx context.Context, id string) (*Session, *er
265266
return session, nil
266267
}
267268

269+
func (s *sqlSessionService) ActiveByAgentID(ctx context.Context, agentID string) (*Session, *errors.ServiceError) {
270+
session, err := s.sessionDao.ActiveByAgentID(ctx, agentID)
271+
if err != nil {
272+
return nil, nil
273+
}
274+
return session, nil
275+
}
276+
268277
func (s *sqlSessionService) Stop(ctx context.Context, id string) (*Session, *errors.ServiceError) {
269278
session, err := s.sessionDao.Get(ctx, id)
270279
if err != nil {

components/ambient-control-plane/cmd/ambient-control-plane/main.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ func runKubeMode(ctx context.Context, cfg *config.ControlPlaneConfig) error {
148148
CPRuntimeNamespace: cfg.CPRuntimeNamespace,
149149
CPTokenURL: cfg.CPTokenURL,
150150
CPTokenPublicKey: string(kp.PublicKeyPEM),
151+
HTTPProxy: cfg.HTTPProxy,
152+
HTTPSProxy: cfg.HTTPSProxy,
153+
NoProxy: cfg.NoProxy,
151154
}
152155

153156
conn, err := grpc.NewClient(cfg.GRPCServerAddr, grpc.WithTransportCredentials(grpcCredentials(cfg.GRPCUseTLS)))

components/ambient-control-plane/internal/config/config.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ type ControlPlaneConfig struct {
3939
ProjectKubeTokenFile string
4040
CPTokenListenAddr string
4141
CPTokenURL string
42+
HTTPProxy string
43+
HTTPSProxy string
44+
NoProxy string
4245
}
4346

4447
func Load() (*ControlPlaneConfig, error) {
@@ -75,6 +78,9 @@ func Load() (*ControlPlaneConfig, error) {
7578
ProjectKubeTokenFile: os.Getenv("PROJECT_KUBE_TOKEN_FILE"),
7679
CPTokenListenAddr: envOrDefault("CP_TOKEN_LISTEN_ADDR", ":8080"),
7780
CPTokenURL: os.Getenv("CP_TOKEN_URL"),
81+
HTTPProxy: os.Getenv("HTTP_PROXY"),
82+
HTTPSProxy: os.Getenv("HTTPS_PROXY"),
83+
NoProxy: os.Getenv("NO_PROXY"),
7884
}
7985

8086
if cfg.MCPAPIServerURL == "" {

components/ambient-control-plane/internal/kubeclient/kubeclient.go

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"strings"
88

99
"github.com/rs/zerolog"
10+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
1011
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1112
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
1213
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -201,7 +202,7 @@ func (kc *KubeClient) DeletePod(ctx context.Context, namespace, name string, opt
201202
}
202203

203204
func (kc *KubeClient) DeletePodsByLabel(ctx context.Context, namespace, labelSelector string) error {
204-
return kc.dynamic.Resource(PodGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
205+
return kc.deleteCollectionWithFallback(ctx, PodGVR, namespace, labelSelector)
205206
}
206207

207208
// Service operations
@@ -214,7 +215,7 @@ func (kc *KubeClient) CreateService(ctx context.Context, obj *unstructured.Unstr
214215
}
215216

216217
func (kc *KubeClient) DeleteServicesByLabel(ctx context.Context, namespace, labelSelector string) error {
217-
return kc.dynamic.Resource(ServiceGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
218+
return kc.deleteCollectionWithFallback(ctx, ServiceGVR, namespace, labelSelector)
218219
}
219220

220221
// Secret operations
@@ -231,7 +232,7 @@ func (kc *KubeClient) UpdateSecret(ctx context.Context, obj *unstructured.Unstru
231232
}
232233

233234
func (kc *KubeClient) DeleteSecretsByLabel(ctx context.Context, namespace, labelSelector string) error {
234-
return kc.dynamic.Resource(SecretGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
235+
return kc.deleteCollectionWithFallback(ctx, SecretGVR, namespace, labelSelector)
235236
}
236237

237238
// ServiceAccount operations
@@ -244,7 +245,7 @@ func (kc *KubeClient) CreateServiceAccount(ctx context.Context, obj *unstructure
244245
}
245246

246247
func (kc *KubeClient) DeleteServiceAccountsByLabel(ctx context.Context, namespace, labelSelector string) error {
247-
return kc.dynamic.Resource(ServiceAccountGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
248+
return kc.deleteCollectionWithFallback(ctx, ServiceAccountGVR, namespace, labelSelector)
248249
}
249250

250251
// Role operations
@@ -257,11 +258,38 @@ func (kc *KubeClient) CreateRole(ctx context.Context, obj *unstructured.Unstruct
257258
}
258259

259260
func (kc *KubeClient) DeleteRolesByLabel(ctx context.Context, namespace, labelSelector string) error {
260-
return kc.dynamic.Resource(RoleGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
261+
return kc.deleteCollectionWithFallback(ctx, RoleGVR, namespace, labelSelector)
261262
}
262263

263264
func (kc *KubeClient) DeleteRoleBindingsByLabel(ctx context.Context, namespace, labelSelector string) error {
264-
return kc.dynamic.Resource(RoleBindingGVR).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
265+
return kc.deleteCollectionWithFallback(ctx, RoleBindingGVR, namespace, labelSelector)
266+
}
267+
268+
func (kc *KubeClient) deleteCollectionWithFallback(ctx context.Context, gvr schema.GroupVersionResource, namespace, labelSelector string) error {
269+
err := kc.dynamic.Resource(gvr).Namespace(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: labelSelector})
270+
if err == nil {
271+
return nil
272+
}
273+
if !k8serrors.IsForbidden(err) {
274+
return err
275+
}
276+
277+
kc.logger.Warn().Str("resource", gvr.Resource).Str("namespace", namespace).Msg("deletecollection forbidden, falling back to list+delete")
278+
279+
list, listErr := kc.dynamic.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
280+
if listErr != nil {
281+
return fmt.Errorf("fallback list %s: %w", gvr.Resource, listErr)
282+
}
283+
284+
var lastErr error
285+
for i := range list.Items {
286+
name := list.Items[i].GetName()
287+
if delErr := kc.dynamic.Resource(gvr).Namespace(namespace).Delete(ctx, name, metav1.DeleteOptions{}); delErr != nil && !k8serrors.IsNotFound(delErr) {
288+
kc.logger.Warn().Err(delErr).Str("resource", gvr.Resource).Str("name", name).Msg("fallback delete failed")
289+
lastErr = delErr
290+
}
291+
}
292+
return lastErr
265293
}
266294

267295
func (kc *KubeClient) GetNetworkPolicy(ctx context.Context, namespace, name string) (*unstructured.Unstructured, error) {

0 commit comments

Comments
 (0)