From 009b4d47b6c8812c32562f2e24c8e51cd38db3cb Mon Sep 17 00:00:00 2001 From: liyuqing Date: Wed, 10 Jun 2026 10:37:01 +0800 Subject: [PATCH] feat(sight): improve interruption detection and visualization - Fix interruption count not filtering by agent selection in dashboard - Add interruption marking in ATIF trace viewer (red pulse dot, severity border, type badges, expandable detail panel) - Tune dead_loop detection thresholds from 3 to 5 consecutive repeats - Add stats_filtered() to support agent_name filtering in interruption API - Embed call_id in ATIF step extra for precise interruption-to-step matching Signed-off-by: liyuqing --- .../dashboard/src/pages/AtifViewerPage.tsx | 137 ++++++++++++++++-- .../dashboard/src/pages/ConversationList.tsx | 2 +- src/agentsight/dashboard/src/types/index.ts | 2 +- .../dashboard/src/utils/apiClient.ts | 4 +- src/agentsight/src/atif/converter.rs | 4 +- .../src/interruption/loop_detector.rs | 37 ++++- src/agentsight/src/server/handlers.rs | 4 +- .../src/storage/sqlite/interruption.rs | 40 ++++- 8 files changed, 193 insertions(+), 37 deletions(-) diff --git a/src/agentsight/dashboard/src/pages/AtifViewerPage.tsx b/src/agentsight/dashboard/src/pages/AtifViewerPage.tsx index 7642faa85..83793d7f4 100644 --- a/src/agentsight/dashboard/src/pages/AtifViewerPage.tsx +++ b/src/agentsight/dashboard/src/pages/AtifViewerPage.tsx @@ -3,7 +3,11 @@ import { useNavigate, useSearchParams } from 'react-router-dom'; import type { AtifDocument, AtifStep, AtifToolCall, AtifObservation, AtifStepMetrics, } from '../types'; -import { fetchAtifBySession, fetchAtifByConversation } from '../utils/apiClient'; +import { + fetchAtifBySession, fetchAtifByConversation, + fetchSessionInterruptions, fetchConversationInterruptions, + InterruptionRecord, INTERRUPTION_TYPE_CN, +} from '../utils/apiClient'; // ─── Helpers ────────────────────────────────────────────────────────────────── @@ -115,15 +119,29 @@ const ExpandableText: React.FC<{ text: string; className?: string }> = ({ text, ); }; +// ─── Severity styling ───────────────────────────────────────────────────────── + +const SEVERITY_STYLES: Record = { + critical: { border: 'border-red-400', bg: 'bg-red-50', text: 'text-red-700', dot: 'bg-red-500', label: '严重' }, + high: { border: 'border-orange-400', bg: 'bg-orange-50', text: 'text-orange-700', dot: 'bg-orange-500', label: '高危' }, + medium: { border: 'border-yellow-400', bg: 'bg-yellow-50', text: 'text-yellow-700', dot: 'bg-yellow-500', label: '中危' }, + low: { border: 'border-blue-300', bg: 'bg-blue-50', text: 'text-blue-700', dot: 'bg-blue-400', label: '低危' }, +}; + +function getSeverityStyle(severity: string) { + return SEVERITY_STYLES[severity] ?? SEVERITY_STYLES.low; +} + // ─── StepCard ───────────────────────────────────────────────────────────────── interface StepCardProps { step: AtifStep; + interruptions: InterruptionRecord[]; expandedSections: Set; onToggleSection: (key: string) => void; } -const StepCard: React.FC = ({ step, expandedSections, onToggleSection }) => { +const StepCard: React.FC = ({ step, interruptions, expandedSections, onToggleSection }) => { const style = getSourceStyle(step.source); const sectionKey = (name: string) => `${step.step_id}-${name}`; const isOpen = (name: string) => expandedSections.has(sectionKey(name)); @@ -136,14 +154,23 @@ const StepCard: React.FC = ({ step, expandedSections, onToggleSec step.metrics.prompt_tokens != null || step.metrics.completion_tokens != null ); + const hasInterruptions = interruptions.length > 0; + // Pick the highest severity for border highlight + const highestSeverity = hasInterruptions + ? (['critical', 'high', 'medium', 'low'].find(s => interruptions.some(i => i.severity === s)) ?? 'low') + : null; return (
- {/* Timeline dot */} -
+ {/* Timeline dot — red pulsing if interrupted */} + {hasInterruptions ? ( +
+ ) : ( +
+ )} {/* Card */} -
+
{/* Header */}
@@ -158,6 +185,15 @@ const StepCard: React.FC = ({ step, expandedSections, onToggleSec {step.model_name} )} + {/* Interruption badges */} + {hasInterruptions && interruptions.map((intr) => { + const ss = getSeverityStyle(intr.severity); + return ( + + ⚠️ {INTERRUPTION_TYPE_CN[intr.interruption_type] ?? intr.interruption_type} ({ss.label}) + + ); + })}
{/* Body */} @@ -255,6 +291,44 @@ const StepCard: React.FC = ({ step, expandedSections, onToggleSec )} )} + + {/* Interruption details (shown for any step source) */} + {hasInterruptions && ( + toggle('interruptions')} + > +
+ {interruptions.map((intr) => { + const ss = getSeverityStyle(intr.severity); + return ( +
+
+ + + {INTERRUPTION_TYPE_CN[intr.interruption_type] ?? intr.interruption_type} + + + {ss.label} + + + {new Date(intr.occurred_at_ns / 1_000_000).toLocaleString('zh-CN')} + +
+ {intr.detail && ( +
+                          {intr.detail}
+                        
+ )} +
+ ); + })} +
+
+ )}
@@ -345,6 +419,7 @@ export const AtifViewerPage: React.FC = () => { // Data state const [doc, setDoc] = useState(null); + const [interruptions, setInterruptions] = useState([]); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); @@ -371,16 +446,25 @@ export const AtifViewerPage: React.FC = () => { setLoading(true); setError(null); setDoc(null); + setInterruptions([]); setExpandedSections(new Set()); try { let data: AtifDocument; + let intrs: InterruptionRecord[] = []; if (t === 'conversation') { - data = await fetchAtifByConversation(i.trim()); + [data, intrs] = await Promise.all([ + fetchAtifByConversation(i.trim()), + fetchConversationInterruptions(i.trim()).catch(() => [] as InterruptionRecord[]), + ]); } else { - data = await fetchAtifBySession(i.trim()); + [data, intrs] = await Promise.all([ + fetchAtifBySession(i.trim()), + fetchSessionInterruptions(i.trim()).catch(() => [] as InterruptionRecord[]), + ]); } setDoc(data); + setInterruptions(intrs); } catch (e: any) { setError(e.message ?? '加载失败'); } finally { @@ -606,6 +690,11 @@ export const AtifViewerPage: React.FC = () => { 共 {doc.steps.length} 步 + {interruptions.length > 0 && ( + + ⚠️ {interruptions.length} 个异常中断 + + )} {doc.steps.length === 0 ? ( @@ -618,14 +707,32 @@ export const AtifViewerPage: React.FC = () => { {/* Vertical line */}
- {doc.steps.map(step => ( - - ))} + {doc.steps.map(step => { + // Match interruptions to this step via call_id or timestamp + const stepCallId = step.extra?.call_id; + const stepInterruptions = interruptions.filter(intr => { + // Match by call_id if available + if (stepCallId && intr.call_id) { + return intr.call_id === stepCallId; + } + // Fallback: match by timestamp proximity (within same step window) + if (step.timestamp && intr.occurred_at_ns) { + const stepTs = new Date(step.timestamp).getTime() * 1_000_000; + // Within ±30 seconds of the step timestamp + return Math.abs(intr.occurred_at_ns - stepTs) < 30_000_000_000; + } + return false; + }); + return ( + + ); + })}
)}
diff --git a/src/agentsight/dashboard/src/pages/ConversationList.tsx b/src/agentsight/dashboard/src/pages/ConversationList.tsx index 63e498270..a90973625 100644 --- a/src/agentsight/dashboard/src/pages/ConversationList.tsx +++ b/src/agentsight/dashboard/src/pages/ConversationList.tsx @@ -898,7 +898,7 @@ export const ConversationList: React.FC = () => { ), fetchTimeseries(startNs, endNs, agent), fetchInterruptionCount(startNs, endNs, agent).catch(() => null), - fetchInterruptionStats(startNs, endNs).catch(() => [] as InterruptionTypeStat[]), + fetchInterruptionStats(startNs, endNs, agent).catch(() => [] as InterruptionTypeStat[]), fetchInterruptionSessionCounts(startNs, endNs).catch(() => [] as SessionInterruptionCount[]), fetchInterruptionConversationCounts(startNs, endNs).catch(() => [] as ConversationInterruptionCount[]), fetchTokenSavings(startNs, endNs, agent).catch(() => null), diff --git a/src/agentsight/dashboard/src/types/index.ts b/src/agentsight/dashboard/src/types/index.ts index afbc5de14..d75a7d20c 100644 --- a/src/agentsight/dashboard/src/types/index.ts +++ b/src/agentsight/dashboard/src/types/index.ts @@ -218,7 +218,7 @@ export interface AtifStep { tool_calls?: AtifToolCall[]; observation?: AtifObservation; metrics?: AtifStepMetrics; - extra?: any; + extra?: { call_id?: string; [key: string]: any }; } export interface AtifAgent { diff --git a/src/agentsight/dashboard/src/utils/apiClient.ts b/src/agentsight/dashboard/src/utils/apiClient.ts index 69f5c9bc9..de908cf05 100644 --- a/src/agentsight/dashboard/src/utils/apiClient.ts +++ b/src/agentsight/dashboard/src/utils/apiClient.ts @@ -335,11 +335,13 @@ export interface InterruptionTypeStat { */ export async function fetchInterruptionStats( startNs: number, - endNs: number + endNs: number, + agentName?: string ): Promise { const params = new URLSearchParams(); params.set('start_ns', String(startNs)); params.set('end_ns', String(endNs)); + if (agentName) params.set('agent_name', agentName); return apiFetch( `${API_BASE}/api/interruptions/stats?${params.toString()}` ); diff --git a/src/agentsight/src/atif/converter.rs b/src/agentsight/src/atif/converter.rs index 34bd57548..ab52cf50e 100644 --- a/src/agentsight/src/atif/converter.rs +++ b/src/agentsight/src/atif/converter.rs @@ -525,7 +525,9 @@ fn build_agent_step( }, observation, metrics, - extra: None, + extra: event.call_id.as_ref().map(|cid| { + serde_json::json!({ "call_id": cid }) + }), } } diff --git a/src/agentsight/src/interruption/loop_detector.rs b/src/agentsight/src/interruption/loop_detector.rs index 99c05c6ac..e98d190a1 100644 --- a/src/agentsight/src/interruption/loop_detector.rs +++ b/src/agentsight/src/interruption/loop_detector.rs @@ -30,10 +30,10 @@ pub struct LoopDetectorConfig { impl Default for LoopDetectorConfig { fn default() -> Self { Self { - tool_sequence_repeat_threshold: 3, - window_size: 10, + tool_sequence_repeat_threshold: 5, + window_size: 12, output_similarity_threshold: 0.85, - similar_output_repeat_threshold: 3, + similar_output_repeat_threshold: 5, } } } @@ -362,7 +362,11 @@ mod tests { #[test] fn test_tool_sequence_loop_detected() { - let detector = LoopDetector::default(); + let detector = LoopDetector::new(LoopDetectorConfig { + tool_sequence_repeat_threshold: 3, + similar_output_repeat_threshold: 3, + ..Default::default() + }); let calls = vec![ make_call(vec!["read_file", "search"], "output a", 100), make_call(vec!["read_file", "search"], "output b", 200), @@ -380,7 +384,11 @@ mod tests { #[test] fn test_tool_sequence_no_loop_different_tools() { - let detector = LoopDetector::default(); + let detector = LoopDetector::new(LoopDetectorConfig { + tool_sequence_repeat_threshold: 3, + similar_output_repeat_threshold: 3, + ..Default::default() + }); let calls = vec![ make_call(vec!["read_file", "search"], "output a", 100), make_call(vec!["write_file"], "output b", 200), @@ -394,7 +402,11 @@ mod tests { #[test] fn test_tool_sequence_loop_with_interleaved_text_calls() { // Simulates OpenClaw architecture: tool_call → text → tool_call → text → tool_call → text - let detector = LoopDetector::default(); + let detector = LoopDetector::new(LoopDetectorConfig { + tool_sequence_repeat_threshold: 3, + similar_output_repeat_threshold: 3, + ..Default::default() + }); let calls = vec![ make_call(vec!["read_file"], "reading file...", 100), make_call(vec![], "Here is the content of the file.", 200), @@ -414,7 +426,11 @@ mod tests { #[test] fn test_output_similarity_loop_detected() { - let detector = LoopDetector::default(); + let detector = LoopDetector::new(LoopDetectorConfig { + tool_sequence_repeat_threshold: 3, + similar_output_repeat_threshold: 3, + ..Default::default() + }); let calls = vec![ make_call(vec![], "The quick brown fox jumps over the lazy dog repeatedly", 100), make_call(vec![], "The quick brown fox jumps over the lazy dog repeatedly", 200), @@ -429,7 +445,11 @@ mod tests { #[test] fn test_output_similarity_no_loop_different_outputs() { - let detector = LoopDetector::default(); + let detector = LoopDetector::new(LoopDetectorConfig { + tool_sequence_repeat_threshold: 3, + similar_output_repeat_threshold: 3, + ..Default::default() + }); let calls = vec![ make_call(vec![], "completely different output alpha", 100), make_call(vec![], "totally unrelated text beta gamma", 200), @@ -443,6 +463,7 @@ mod tests { fn test_token_burn_detected() { let detector = LoopDetector::new(LoopDetectorConfig { tool_sequence_repeat_threshold: 5, // raise so rule 1 doesn't fire + similar_output_repeat_threshold: 3, ..Default::default() }); let output = "I will try to help you with this task using the available tools"; diff --git a/src/agentsight/src/server/handlers.rs b/src/agentsight/src/server/handlers.rs index fcb9da14e..3de489d90 100644 --- a/src/agentsight/src/server/handlers.rs +++ b/src/agentsight/src/server/handlers.rs @@ -614,7 +614,7 @@ pub async fn interruption_count( let end_ns = query.end_ns.unwrap_or_else(|| now_ns() as i64); let start_ns = query.start_ns.unwrap_or_else(|| end_ns - 86_400_000_000_000i64); - match istore.stats(start_ns, end_ns) { + match istore.stats_filtered(start_ns, end_ns, query.agent_name.as_deref()) { Ok(stats) => { let mut total = 0u64; let mut critical = 0u64; @@ -661,7 +661,7 @@ pub async fn interruption_stats( let end_ns = query.end_ns.unwrap_or_else(|| now_ns() as i64); let start_ns = query.start_ns.unwrap_or_else(|| end_ns - 86_400_000_000_000i64); - match istore.stats(start_ns, end_ns) { + match istore.stats_filtered(start_ns, end_ns, query.agent_name.as_deref()) { Ok(stats) => HttpResponse::Ok().json(stats), Err(e) => HttpResponse::InternalServerError() .json(serde_json::json!({"error": e.to_string()})), diff --git a/src/agentsight/src/storage/sqlite/interruption.rs b/src/agentsight/src/storage/sqlite/interruption.rs index 1a5ef5419..642a96512 100644 --- a/src/agentsight/src/storage/sqlite/interruption.rs +++ b/src/agentsight/src/storage/sqlite/interruption.rs @@ -402,16 +402,40 @@ impl InterruptionStore { &self, start_ns: i64, end_ns: i64, + ) -> Result, Box> { + self.stats_filtered(start_ns, end_ns, None) + } + + /// Like `stats` but optionally filtered by agent_name. + pub fn stats_filtered( + &self, + start_ns: i64, + end_ns: i64, + agent_name: Option<&str>, ) -> Result, Box> { let conn = self.conn.lock().unwrap(); - let mut stmt = conn.prepare( - "SELECT interruption_type, severity, COUNT(*) AS cnt - FROM interruption_events - WHERE occurred_at_ns BETWEEN ?1 AND ?2 - GROUP BY interruption_type - ORDER BY cnt DESC", - )?; - let rows = stmt.query_map(params![start_ns, end_ns], |row| { + let (sql, args): (String, Vec>) = if let Some(agent) = agent_name { + ( + "SELECT interruption_type, severity, COUNT(*) AS cnt + FROM interruption_events + WHERE occurred_at_ns BETWEEN ?1 AND ?2 AND agent_name = ?3 + GROUP BY interruption_type, severity + ORDER BY cnt DESC".to_string(), + vec![Box::new(start_ns), Box::new(end_ns), Box::new(agent.to_string())], + ) + } else { + ( + "SELECT interruption_type, severity, COUNT(*) AS cnt + FROM interruption_events + WHERE occurred_at_ns BETWEEN ?1 AND ?2 + GROUP BY interruption_type, severity + ORDER BY cnt DESC".to_string(), + vec![Box::new(start_ns), Box::new(end_ns)], + ) + }; + let mut stmt = conn.prepare(&sql)?; + let params_ref: Vec<&dyn rusqlite::types::ToSql> = args.iter().map(|a| a.as_ref()).collect(); + let rows = stmt.query_map(params_ref.as_slice(), |row| { Ok(InterruptionTypeStat { interruption_type: row.get(0)?, severity: row.get(1)?,