log message: reflection & backtracking

TataKKKL · TataKKKL · commit 60bc5cd3d736 · 2025-04-29T23:17:47.000-07:00
diff --git a/visual-tree-search-app/components/MessageLogPanelMCTS.tsx b/visual-tree-search-app/components/MessageLogPanelMCTS.tsx
@@ -109,6 +109,8 @@ const MessageLogPanelMCTS: React.FC<MessageLogPanelProps> = ({ messages, message
   const getCardStyle = (type: string) => {
     switch (type) {
       // System Status Messages
+      case 'reflection_backtracking':
+        return "bg-gradient-to-r from-blue-50 to-blue-100 dark:from-blue-900/20 dark:to-blue-800/20 border-blue-200 dark:border-blue-800";
       case 'server_connection':
         return "bg-gradient-to-r from-blue-50 to-blue-100 dark:from-blue-900/20 dark:to-blue-800/20 border-blue-200 dark:border-blue-800";
       case 'start_search':
@@ -208,6 +210,8 @@ const MessageLogPanelMCTS: React.FC<MessageLogPanelProps> = ({ messages, message
 
   const getIcon = (message: ParsedMessage) => {
     switch (message.type) {
+      case 'reflection_backtracking':
+        return <Brain className="h-4 w-4 text-blue-500" />;
       case 'server_connection':
         return <Globe className="h-4 w-4 text-green-500 animate-pulse" />;
       case 'start_search':
@@ -323,6 +327,8 @@ const MessageLogPanelMCTS: React.FC<MessageLogPanelProps> = ({ messages, message
   const getIconBgColor = (type: string) => {
     switch (type) {
       // System Status Messages
+      case 'reflection_backtracking':
+        return "bg-gradient-to-r from-blue-50 to-blue-100 dark:from-blue-900/20 dark:to-blue-800/20 border-blue-200 dark:border-blue-800";
       case 'start_search':
         return "bg-blue-100 dark:bg-blue-800/30 text-blue-600 dark:text-blue-400";
       case 'connection_established':
@@ -552,6 +558,32 @@ const MessageLogPanelMCTS: React.FC<MessageLogPanelProps> = ({ messages, message
           </div>
         );
 
+        case 'reflection_backtracking':
+          return (
+            <div className="flex items-center gap-2 animate-fadeIn">
+              {getIcon(message)}
+              <div className="animate-slideIn">
+                <div className="text-emerald-600 dark:text-emerald-400">
+                  Reflecting & backtracking | Node: {message.description}
+                </div>
+                {message.path && message.path.length > 0 && (
+                  <div className="mt-1">
+                    {message.path.map((step: PathStep, index: number) => (
+                      <div 
+                        key={index} 
+                        className="flex items-start gap-1 text-xs text-slate-500 dark:text-slate-400 animate-fadeIn"
+                        style={{ animationDelay: `${index * 100}ms` }}
+                      >
+                        <ArrowRight className="h-3 w-3 mt-0.5" />
+                        {step.natural_language_description}
+                      </div>
+                    ))}
+                  </div>
+                )}
+              </div>
+            </div>
+          );
+
       case 'search_complete':
         return (
           <div className="flex items-center gap-2 animate-fadeIn">
@@ -668,6 +700,7 @@ const MessageLogPanelMCTS: React.FC<MessageLogPanelProps> = ({ messages, message
           </div>
         );
 
+
       default:
         return (
           <div className="flex items-center gap-2 animate-fadeIn">
diff --git a/visual-tree-search-app/pages/LATSAgent.tsx b/visual-tree-search-app/pages/LATSAgent.tsx
@@ -33,7 +33,7 @@ const LATSAgent = () => {
     goal: 'search running shoes, click on the first result',
     maxDepth: 3,
     num_simulations: 1,
-    iterations: 1
+    iterations: 2
   });
 
   const [sessionId, setSessionId] = useState<string | null>(null);
diff --git a/visual-tree-search-app/pages/MCTSAgent.tsx b/visual-tree-search-app/pages/MCTSAgent.tsx
@@ -33,7 +33,7 @@ const MCTSAgent = () => {
     goal: 'search running shoes, click on the first result',
     maxDepth: 3,
     set_prior_value: false,
-    iterations: 1
+    iterations: 2
   });
 
   const [sessionId, setSessionId] = useState<string | null>(null);
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py
@@ -165,11 +165,13 @@ async def websocket_reflection_backtracking(self, path, selected_node, websocket
         if websocket:
             await websocket.send_json({
                 "type": "reflection_backtracking",
-                "path": [node.action for node in path if node.action is not None],
+                "path": [{
+                    "natural_language_description": node.natural_language_description,
+                    "action": node.action} for node in path if node.action is not None],
                 "node_id": id(selected_node),
-                "node_parent_id": id(selected_node.parent),
-                "node_action": selected_node.action,
-                "node_description": selected_node.natural_language_description,
+                "parent_id": id(selected_node.parent),
+                "action": selected_node.action,
+                "description": selected_node.natural_language_description,
                 "trajectory": selected_node.get_trajectory()
             })
 
@@ -304,80 +306,81 @@ async def mcts_search(self, websocket=None) -> Optional[LATSNode]:
 
             # Step 3: simulation using the current node, (generate a path using the current node, and score the path)
             # TODO: implement simulation using openai
-            print(f"{GREEN}Step 3: Simulation{RESET}")
-            await self.websocket_step_start(step=3, step_name="simulation", websocket=websocket)
-            path = self.get_path_to_root(selected_node)
-            # here score is the reward
-            score = await self.evaluate_selected_path(path)
-            # change to reward later?
-            if score > best_score:
-                best_score = score
-                best_path = path
-                best_node = selected_node
-                print(f"\nNew best path found!")
-                print(f"best score: {best_score:.3f}")
-                print(f"best node: {best_node.action}")
-                print(f"best node: {best_node.natural_language_description}")
-                print(f"best path: {best_path}")
-
-            # add websocket information, just use websocket here
-            if websocket:
-                await self.websocket_simulation_result(score, selected_node, websocket=websocket)
+            if selected_node != self.root_node:
+                print(f"{GREEN}Step 3: Simulation{RESET}")
+                await self.websocket_step_start(step=3, step_name="simulation", websocket=websocket)
+                path = self.get_path_to_root(selected_node)
+                # here score is the reward
+                score = await self.evaluate_selected_path(path)
+                # change to reward later?
+                if score > best_score:
+                    best_score = score
+                    best_path = path
+                    best_node = selected_node
+                    print(f"\nNew best path found!")
+                    print(f"best score: {best_score:.3f}")
+                    print(f"best node: {best_node.action}")
+                    print(f"best node: {best_node.natural_language_description}")
+                    print(f"best path: {best_path}")
 
+                # add websocket information, just use websocket here
+                if websocket:
+                    await self.websocket_simulation_result(score, selected_node, websocket=websocket)
 
-            ## Step 4: reflection backtracking
-            print(f"{GREEN}Step 4: Reflection Backtracking{RESET}")
-            await self.websocket_step_start(step=4, step_name="reflection_backtracking", websocket=websocket)
-            if score >= self.config.reflection_score:
-                # Convert path to serializable trajectory
-                # trajectory = [node.action for node in path if node.action is not None]
-                await self.websocket_search_complete("success", score, selected_node.get_trajectory(), websocket=websocket)
-                await self.playwright_manager.close()
-                return selected_node
 
-            print(f"path: {path}")
-            path, current_node = await self.reflection_backtracking(path)
-            print(f"path: {path}")
-            print(f"current_node: {current_node.action}")
-            print(f"current_node: {current_node.natural_language_description}")
+                ## Step 4: reflection backtracking
+                print(f"{GREEN}Step 4: Reflection Backtracking{RESET}")
+                await self.websocket_step_start(step=4, step_name="reflection_backtracking", websocket=websocket)
+                if score >= self.config.reflection_score:
+                    # Convert path to serializable trajectory
+                    # trajectory = [node.action for node in path if node.action is not None]
+                    await self.websocket_search_complete("success", score, selected_node.get_trajectory(), websocket=websocket)
+                    await self.playwright_manager.close()
+                    return selected_node
 
-            # add websocket information, just use websocket here
-            if websocket:
-                await self.websocket_reflection_backtracking(path, current_node, websocket=websocket)
+                print(f"path: {path}")
+                path, current_node = await self.reflection_backtracking(path)
+                print(f"path: {path}")
+                print(f"current_node: {current_node.action}")
+                print(f"current_node: {current_node.natural_language_description}")
 
-            # Step 5: backpropagation
-            print(f"{GREEN}Step 5: Backpropagation{RESET}")
-            await self.websocket_step_start(step=5, step_name="backpropagation", websocket=websocket)
-            for node in path:
-                if node != self.root_node:
-                    old_value = node.value
-                    node.visits += 1
-                    node.value += (score - node.value) / node.visits
-                    # consiste with lats backpropagation
-                    #node.value = (node.value * (node.visits - 1) + score) / node.visits
-                    print(f"Node {node.action}:")
-                    print(f"  Visits: {node.visits}")
-                    print(f"  Value: {old_value:.3f} -> {node.value:.3f}")
                 # add websocket information, just use websocket here
-                # if websocket:
-                #     await websocket.send_json({
-                #         "type": "backpropagation",
-                #         "node_id": id(node),
-                #         "node_parent_id": id(node.parent),
-                #         "node_action": node.action,
-                #         "node_value": node.value,
-                #         "node_visits": node.visits,
-                #         "node_old_value": old_value,
-                #         "node_description": node.natural_language_description,
-                #     })
+                if websocket:
+                    await self.websocket_reflection_backtracking(path, current_node, websocket=websocket)
 
-            tree_data = self._get_tree_data()
-            print_entire_tree(self.root_node)
-            print(tree_data)
-            if websocket:
-                await self.websocket_tree_update(type="tree_update_node_backpropagation", websocket=websocket, tree_data=tree_data)
-            else:
+                # Step 5: backpropagation
+                print(f"{GREEN}Step 5: Backpropagation{RESET}")
+                await self.websocket_step_start(step=5, step_name="backpropagation", websocket=websocket)
+                for node in path:
+                    if node != self.root_node:
+                        old_value = node.value
+                        node.visits += 1
+                        node.value += (score - node.value) / node.visits
+                        # consiste with lats backpropagation
+                        #node.value = (node.value * (node.visits - 1) + score) / node.visits
+                        print(f"Node {node.action}:")
+                        print(f"  Visits: {node.visits}")
+                        print(f"  Value: {old_value:.3f} -> {node.value:.3f}")
+                    # add websocket information, just use websocket here
+                    # if websocket:
+                    #     await websocket.send_json({
+                    #         "type": "backpropagation",
+                    #         "node_id": id(node),
+                    #         "node_parent_id": id(node.parent),
+                    #         "node_action": node.action,
+                    #         "node_value": node.value,
+                    #         "node_visits": node.visits,
+                    #         "node_old_value": old_value,
+                    #         "node_description": node.natural_language_description,
+                    #     })
+
+                tree_data = self._get_tree_data()
                 print_entire_tree(self.root_node)
+                print(tree_data)
+                if websocket:
+                    await self.websocket_tree_update(type="tree_update_node_backpropagation", websocket=websocket, tree_data=tree_data)
+                else:
+                    print_entire_tree(self.root_node)
         if best_node:
              # Convert node to serializable trajectory
             # trajectory = [n.action for n in self.get_path_to_root(best_node) if n.action is not None]