Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions .github/workflows/amber-issue-handler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ jobs:
[{"url": "https://github.com/${{ github.repository }}", "branch": "main"}]
model: claude-opus-4-6
wait: 'true'
timeout: '60'
timeout: '0'
stop-on-run-finished: 'true'

- name: Post-session labels and comment
if: steps.existing.outputs.skip != 'true'
Expand Down Expand Up @@ -248,7 +249,8 @@ jobs:
[{"url": "https://github.com/${{ github.repository }}", "branch": "main"}]
model: claude-opus-4-6
wait: 'true'
timeout: '60'
timeout: '0'
stop-on-run-finished: 'true'

# Custom prompt: @amber <instruction> — pass user's text
- name: Run custom prompt
Expand All @@ -271,14 +273,15 @@ jobs:
[{"url": "https://github.com/${{ github.repository }}", "branch": "main"}]
model: claude-opus-4-6
wait: 'true'
timeout: '60'
timeout: '0'
stop-on-run-finished: 'true'

- name: Session summary
if: always() && steps.context.outputs.is_fork != 'true'
run: |
# Get session name from whichever step ran
SESSION_NAME="${{ steps.fix-session.outputs.session-name }}${{ steps.fix-issue-session.outputs.session-name }}${{ steps.custom-session.outputs.session-name }}"
SESSION_PHASE="${{ steps.fix-session.outputs.session-phase }}${{ steps.fix-issue-session.outputs.session-phase }}${{ steps.custom-session.outputs.session-phase }}"
# Get session name from whichever step ran (only one will have output)
SESSION_NAME="${{ steps.fix-session.outputs.session-name || steps.fix-issue-session.outputs.session-name || steps.custom-session.outputs.session-name }}"
SESSION_PHASE="${{ steps.fix-session.outputs.session-phase || steps.fix-issue-session.outputs.session-phase || steps.custom-session.outputs.session-phase }}"

echo "### Amber — ${{ steps.context.outputs.type }} #${{ steps.context.outputs.number }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
Expand Down Expand Up @@ -313,6 +316,8 @@ jobs:
import os
import re
import subprocess
import time
import uuid
import requests
from datetime import datetime, timezone

Expand Down Expand Up @@ -368,7 +373,6 @@ jobs:

def create_session_api(prompt, session_name="", model="claude-opus-4-6"):
"""Create a new session or send message to existing one."""
import time, uuid

if session_name:
# Ensure session is running
Expand Down Expand Up @@ -401,7 +405,7 @@ jobs:

# Create new session
url = f"{API_URL.rstrip('/')}/projects/{PROJECT}/agentic-sessions"
body = {"initialPrompt": prompt, "inactivityTimeout": 60,
body = {"initialPrompt": prompt, "stopOnRunFinished": True,
"llmSettings": {"model": model},
"repos": [{"url": f"https://github.com/{REPO}", "branch": "main"}]}
try:
Expand Down
3 changes: 3 additions & 0 deletions components/backend/handlers/sessions.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,9 @@ func CreateSession(c *gin.Context) {
}
spec["inactivityTimeout"] = *req.InactivityTimeout
}
if req.StopOnRunFinished != nil && *req.StopOnRunFinished {
spec["stopOnRunFinished"] = true
}
Comment on lines +744 to +746
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

stopOnRunFinished is write-only right now

Line 744 adds the create-time write, but parseSpec() still never reads spec.stopOnRunFinished. GetSession, ListSessions, and the other handlers that return parseSpec(spec) will still surface this field as false/omitted even when the CR stores true, so the new API does not round-trip.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/handlers/sessions.go` around lines 744 - 746, parseSpec()
is not reading spec["stopOnRunFinished"], so a create-time write via
req.StopOnRunFinished only persists in the CR but never surfaces through
GetSession/ListSessions; update parseSpec(spec) to detect the
"stopOnRunFinished" key (handle both bool and pointer semantics as used
elsewhere), set the corresponding field in the returned session spec struct
(matching the type used by GetSession/ListSessions), and ensure parseSpec
returns the true value when the CR stores true so the API round-trips correctly.


session := map[string]interface{}{
"apiVersion": "vteam.ambient-code/v1alpha1",
Expand Down
2 changes: 2 additions & 0 deletions components/backend/types/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type AgenticSessionSpec struct {
LLMSettings LLMSettings `json:"llmSettings"`
Timeout int `json:"timeout"`
InactivityTimeout *int `json:"inactivityTimeout,omitempty"`
StopOnRunFinished bool `json:"stopOnRunFinished,omitempty"`
UserContext *UserContext `json:"userContext,omitempty"`
BotAccount *BotAccountRef `json:"botAccount,omitempty"`
ResourceOverrides *ResourceOverrides `json:"resourceOverrides,omitempty"`
Expand Down Expand Up @@ -58,6 +59,7 @@ type CreateAgenticSessionRequest struct {
LLMSettings *LLMSettings `json:"llmSettings,omitempty"`
Timeout *int `json:"timeout,omitempty"`
InactivityTimeout *int `json:"inactivityTimeout,omitempty"`
StopOnRunFinished *bool `json:"stopOnRunFinished,omitempty"`
ParentSessionID string `json:"parent_session_id,omitempty"`
Repos []SimpleRepo `json:"repos,omitempty"`
ActiveWorkflow *WorkflowSelection `json:"activeWorkflow,omitempty"`
Expand Down
76 changes: 62 additions & 14 deletions components/backend/websocket/agui_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ import (

const (
// activityDebounceInterval is the minimum interval between CR status updates for lastActivityTime.
// Inactivity timeout is measured in hours, so minute-level granularity is sufficient.
activityDebounceInterval = 60 * time.Second
// Must be significantly shorter than the smallest inactivity timeout to prevent
// false inactivity detection while the session is actively processing.
activityDebounceInterval = 10 * time.Second

// activityUpdateTimeout bounds how long a single activity status update can take.
activityUpdateTimeout = 10 * time.Second
Expand All @@ -56,6 +57,11 @@ var activityUpdateSem = make(chan struct{}, 50)
// for each session to avoid excessive API calls. Key: "namespace/sessionName"
var lastActivityUpdateTimes sync.Map

// stopOnRunFinishedCache tracks which sessions have stopOnRunFinished set.
// Populated lazily on first RUN_FINISHED event, avoids repeated k8s API calls.
// Key: sessionName, Value: bool
var stopOnRunFinishedCache sync.Map
Comment on lines +60 to +63
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Cache this by project/session, not sessionName alone

AgenticSessions are namespaced. With the current keying, a cached false for one project can make Line 1010 return early for a same-named session in another project, and that RUN_FINISHED will never even read its CR.

Also applies to: 1008-1013, 1025-1026

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/websocket/agui_proxy.go` around lines 60 - 63, The cache
stopOnRunFinishedCache is keyed by sessionName only which collides across
namespaces; change it to use a composite key that includes project/namespace
plus session name (e.g. fmt.Sprintf("%s/%s", session.GetNamespace() or
session.Project, sessionName) or a small struct key) wherever the map is set or
read (the lazy population on first RUN_FINISHED, the early-return check, and
subsequent lookups around the RUN_FINISHED handling). Update the comment to
reflect "Key: project/sessionName" and ensure all references that read or write
stopOnRunFinishedCache (including the code paths currently using sessionName
alone) are updated to compute and use the composite key.


// sessionProjectMap maps sessionName → projectName so that persistStreamedEvent
// (which only receives sessionID) can look up the project for activity tracking.
// Populated by HandleAGUIRunProxy on each run request.
Expand Down Expand Up @@ -533,13 +539,20 @@ func persistStreamedEvent(sessionID, runID, threadID, jsonData string) {
// sessionID-to-project mapping populated by HandleAGUIRunProxy.
eventType, _ := event["type"].(string)

// Update lastActivityTime on CR for activity events (debounced).
if isActivityEvent(eventType) {
// Update lastActivityTime on CR for any event (debounced).
if eventType != "" {
if projectName, ok := sessionProjectMap.Load(sessionID); ok {
updateLastActivityTime(projectName.(string), sessionID, eventType == types.EventTypeRunStarted)
}
}

// Stop session on RUN_FINISHED if stopOnRunFinished is set.
if eventType == types.EventTypeRunFinished {
if projectName, ok := sessionProjectMap.Load(sessionID); ok {
go checkAndStopOnRunFinished(projectName.(string), sessionID)
}
Comment on lines +542 to +553
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail
sed -n '538,556p' components/backend/websocket/agui_proxy.go
sed -n '1000,1105p' components/backend/websocket/agui_proxy.go
rg -n 'RetryOnConflict|Patch\(' components/backend/websocket/agui_proxy.go || true

Repository: ambient-code/platform

Length of output: 4576


checkAndStopOnRunFinished needs conflict retry and namespaced cache key

The stop-on-RUN_FINISHED path has two functional bugs:

  1. Silent failure on resource conflict: checkAndStopOnRunFinished() fetches the object, modifies it, then calls Update() with no RetryOnConflict wrapper. Concurrent writes (e.g., from updateLastActivityTime() calling UpdateStatus()) can advance the resource version, causing Update() to fail silently with a logged error, leaving the session running.

  2. Cache key collision across namespaces: stopOnRunFinishedCache uses bare sessionName as the key, but AgenticSession resources are namespace-scoped by projectName. Sessions with identical names in different projects will collide in cache, causing incorrect stop decisions. For comparison, updateLastActivityTime correctly uses projectName + "/" + sessionName as its cache key.

Use RetryOnConflict (or fetch-and-patch) for the stop operation and key the cache as projectName + "/" + sessionName.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/websocket/agui_proxy.go` around lines 542 - 553, The
stop-on-RUN_FINISHED flow in checkAndStopOnRunFinished has two issues: it must
retry on resource version conflicts and must key the cache by namespace; change
stopOnRunFinishedCache to use projectName + "/" + sessionName (same form as
updateLastActivityTime) and wrap the fetch-modify-update sequence inside a
RetryOnConflict (or equivalent fetch-and-patch loop) in
checkAndStopOnRunFinished so concurrent UpdateStatus/Update races are retried
instead of failing silently.

}

// agentStatus is derived at query time from the event log (DeriveAgentStatus).
// No CR updates needed here — the persisted events ARE the source of truth.
}
Expand Down Expand Up @@ -984,17 +997,52 @@ func triggerDisplayNameGenerationIfNeeded(projectName, sessionName string, messa
handlers.GenerateDisplayNameAsync(projectName, sessionName, userMessage, sessionCtx)
}

// isActivityEvent returns true for AG-UI event types that indicate session activity.
func isActivityEvent(eventType string) bool {
switch eventType {
case types.EventTypeRunStarted,
types.EventTypeTextMessageStart,
types.EventTypeTextMessageContent,
types.EventTypeToolCallStart:
return true
default:
return false
// checkAndStopOnRunFinished checks if stopOnRunFinished is set for a session
// and triggers a stop. Uses an in-memory cache to avoid k8s API calls for
// sessions that don't have the flag set.
func checkAndStopOnRunFinished(projectName, sessionName string) {
if handlers.DynamicClient == nil {
return
}

// Check cache first — skip k8s API call for sessions we've already checked
if cached, ok := stopOnRunFinishedCache.Load(sessionName); ok {
if !cached.(bool) {
return
}
}

gvr := types.GetAgenticSessionResource()
ctx, cancel := context.WithTimeout(context.Background(), activityUpdateTimeout)
defer cancel()

obj, err := handlers.DynamicClient.Resource(gvr).Namespace(projectName).Get(ctx, sessionName, metav1.GetOptions{})
if err != nil {
log.Printf("stopOnRunFinished: failed to get session %s/%s: %v", projectName, sessionName, err)
return
}

stopOnFinish, _, _ := unstructured.NestedBool(obj.Object, "spec", "stopOnRunFinished")
stopOnRunFinishedCache.Store(sessionName, stopOnFinish)
if !stopOnFinish {
return
}

annotations := obj.GetAnnotations()
if annotations == nil {
annotations = make(map[string]string)
}
annotations["ambient-code.io/desired-phase"] = "Stopped"
annotations["ambient-code.io/stop-reason"] = "run-finished"
obj.SetAnnotations(annotations)

_, err = handlers.DynamicClient.Resource(gvr).Namespace(projectName).Update(ctx, obj, metav1.UpdateOptions{})
if err != nil {
log.Printf("stopOnRunFinished: failed to update session %s/%s: %v", projectName, sessionName, err)
return
}

log.Printf("stopOnRunFinished: session %s/%s set to Stopped after RUN_FINISHED", projectName, sessionName)
}

// updateLastActivityTime updates the lastActivityTime field on the AgenticSession CR status.
Expand Down
3 changes: 3 additions & 0 deletions components/manifests/base/crds/agenticsessions-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ spec:
type: integer
minimum: 0
description: "Seconds of inactivity before auto-stopping a session. 0 disables auto-shutdown. If omitted, falls back to project-level inactivityTimeoutSeconds, then 24h default."
stopOnRunFinished:
type: boolean
description: "When true, automatically stop the session when the agent completes its run (RUN_FINISHED event). Useful for one-shot tasks triggered by automation."
environmentVariables:
type: object
additionalProperties:
Expand Down
Loading