diff --git a/tests/tools/test_browser_cleanup.py b/tests/tools/test_browser_cleanup.py new file mode 100644 index 000000000..9dfabe640 --- /dev/null +++ b/tests/tools/test_browser_cleanup.py @@ -0,0 +1,96 @@ +"""Regression tests for browser session cleanup and screenshot recovery.""" + +from unittest.mock import patch + + +class TestScreenshotPathRecovery: + def test_extracts_standard_absolute_path(self): + from tools.browser_tool import _extract_screenshot_path_from_text + + assert ( + _extract_screenshot_path_from_text("Screenshot saved to /tmp/foo.png") + == "/tmp/foo.png" + ) + + def test_extracts_quoted_absolute_path(self): + from tools.browser_tool import _extract_screenshot_path_from_text + + assert ( + _extract_screenshot_path_from_text( + "Screenshot saved to '/Users/david/.hermes/browser_screenshots/shot.png'" + ) + == "/Users/david/.hermes/browser_screenshots/shot.png" + ) + + +class TestBrowserCleanup: + def setup_method(self): + from tools import browser_tool + + self.browser_tool = browser_tool + self.orig_active_sessions = browser_tool._active_sessions.copy() + self.orig_session_last_activity = browser_tool._session_last_activity.copy() + self.orig_recording_sessions = browser_tool._recording_sessions.copy() + self.orig_cleanup_done = browser_tool._cleanup_done + + def teardown_method(self): + self.browser_tool._active_sessions.clear() + self.browser_tool._active_sessions.update(self.orig_active_sessions) + self.browser_tool._session_last_activity.clear() + self.browser_tool._session_last_activity.update(self.orig_session_last_activity) + self.browser_tool._recording_sessions.clear() + self.browser_tool._recording_sessions.update(self.orig_recording_sessions) + self.browser_tool._cleanup_done = self.orig_cleanup_done + + def test_cleanup_browser_clears_tracking_state(self): + browser_tool = self.browser_tool + browser_tool._active_sessions["task-1"] = { + "session_name": "sess-1", + "bb_session_id": None, + } + browser_tool._session_last_activity["task-1"] = 123.0 + + with ( + patch("tools.browser_tool._maybe_stop_recording") as mock_stop, + patch( + "tools.browser_tool._run_browser_command", + return_value={"success": True}, + ) as mock_run, + patch("tools.browser_tool.os.path.exists", return_value=False), + ): + browser_tool.cleanup_browser("task-1") + + assert "task-1" not in browser_tool._active_sessions + assert "task-1" not in browser_tool._session_last_activity + mock_stop.assert_called_once_with("task-1") + mock_run.assert_called_once_with("task-1", "close", [], timeout=10) + + def test_browser_close_delegates_to_cleanup_browser(self): + import json + + browser_tool = self.browser_tool + browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"} + + with patch("tools.browser_tool.cleanup_browser") as mock_cleanup: + result = json.loads(browser_tool.browser_close("task-2")) + + assert result == {"success": True, "closed": True} + mock_cleanup.assert_called_once_with("task-2") + + def test_emergency_cleanup_clears_all_tracking_state(self): + browser_tool = self.browser_tool + browser_tool._cleanup_done = False + browser_tool._active_sessions["task-1"] = {"session_name": "sess-1"} + browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"} + browser_tool._session_last_activity["task-1"] = 1.0 + browser_tool._session_last_activity["task-2"] = 2.0 + browser_tool._recording_sessions.update({"task-1", "task-2"}) + + with patch("tools.browser_tool.cleanup_all_browsers") as mock_cleanup_all: + browser_tool._emergency_cleanup_all_sessions() + + mock_cleanup_all.assert_called_once_with() + assert browser_tool._active_sessions == {} + assert browser_tool._session_last_activity == {} + assert browser_tool._recording_sessions == set() + assert browser_tool._cleanup_done is True diff --git a/tools/browser_tool.py b/tools/browser_tool.py index b3516c4f2..5ff560cef 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -53,6 +53,7 @@ import json import logging import os +import re import signal import subprocess import shutil @@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions(): if not _active_sessions: return - logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions)) - - try: - if _is_local_mode(): - # Local mode: just close agent-browser sessions via CLI - for task_id, session_info in list(_active_sessions.items()): - session_name = session_info.get("session_name") - if session_name: - try: - browser_cmd = _find_agent_browser() - task_socket_dir = os.path.join( - _socket_safe_tmpdir(), - f"agent-browser-{session_name}" - ) - env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir} - subprocess.run( - browser_cmd.split() + ["--session", session_name, "--json", "close"], - capture_output=True, timeout=5, env=env, - ) - logger.info("Closed local session %s", session_name) - except Exception as e: - logger.debug("Error closing local session %s: %s", session_name, e) - else: - # Cloud mode: release Browserbase sessions via API - api_key = os.environ.get("BROWSERBASE_API_KEY") - project_id = os.environ.get("BROWSERBASE_PROJECT_ID") - - if not api_key or not project_id: - logger.warning("Cannot cleanup - missing BROWSERBASE credentials") - return + logger.info("Emergency cleanup: closing %s active session(s)...", + len(_active_sessions)) - for task_id, session_info in list(_active_sessions.items()): - bb_session_id = session_info.get("bb_session_id") - if bb_session_id: - try: - response = requests.post( - f"https://api.browserbase.com/v1/sessions/{bb_session_id}", - headers={ - "X-BB-API-Key": api_key, - "Content-Type": "application/json" - }, - json={ - "projectId": project_id, - "status": "REQUEST_RELEASE" - }, - timeout=5 # Short timeout for cleanup - ) - if response.status_code in (200, 201, 204): - logger.info("Closed session %s", bb_session_id) - else: - logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code) - except Exception as e: - logger.error("Error closing session %s: %s", bb_session_id, e) - - _active_sessions.clear() + try: + cleanup_all_browsers() except Exception as e: logger.error("Emergency cleanup error: %s", e) + finally: + with _cleanup_lock: + _active_sessions.clear() + _session_last_activity.clear() + _recording_sessions.clear() # Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM @@ -640,18 +596,13 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]: def _create_local_session(task_id: str) -> Dict[str, str]: - """Create a lightweight local browser session (no cloud API call). - - Returns the same dict shape as ``_create_browserbase_session`` so the rest - of the code can treat both modes uniformly. - """ import uuid - session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" - logger.info("Created local browser session %s", session_name) + session_name = f"h_{uuid.uuid4().hex[:10]}" + logger.info("Created local browser session %s for task %s", session_name, task_id) return { "session_name": session_name, - "bb_session_id": None, # Not applicable in local mode - "cdp_url": None, # Not applicable in local mode + "bb_session_id": None, + "cdp_url": None, "features": {"local": True}, } @@ -772,6 +723,27 @@ def _find_agent_browser() -> str: ) +def _extract_screenshot_path_from_text(text: str) -> Optional[str]: + """Extract a screenshot file path from agent-browser human-readable output.""" + if not text: + return None + + patterns = [ + r"Screenshot saved to ['\"](?P/[^'\"]+?\.png)['\"]", + r"Screenshot saved to (?P/\S+?\.png)(?:\s|$)", + r"(?P/\S+?\.png)(?:\s|$)", + ] + + for pattern in patterns: + match = re.search(pattern, text) + if match: + path = match.group("path").strip().strip("'\"") + if path: + return path + + return None + + def _run_browser_command( task_id: str, command: str, @@ -841,9 +813,22 @@ def _run_browser_command( command, task_id, task_socket_dir, len(task_socket_dir)) browser_env = {**os.environ} - # Ensure PATH includes standard dirs (systemd services may have minimal PATH) - if "/usr/bin" not in browser_env.get("PATH", "").split(":"): - browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}" + + # Prefer Hermes-managed Node, but still ensure standard dirs exist for + # service/minimal environments where PATH may be incomplete. + hermes_home = Path(os.environ.get( + "HERMES_HOME", Path.home() / ".hermes")) + hermes_node_bin = str(hermes_home / "node" / "bin") + + existing_path = browser_env.get("PATH", "") + path_parts = [p for p in existing_path.split(":") if p] + candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p] + + for part in reversed(candidate_dirs): + if os.path.isdir(part) and part not in path_parts: + path_parts.insert(0, part) + + browser_env["PATH"] = ":".join(path_parts) browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir result = subprocess.run( @@ -866,11 +851,11 @@ def _run_browser_command( command, " ".join(cmd_parts[:4]) + "...", (result.stderr or "")[:200]) - # Parse JSON output - if result.stdout.strip(): + stdout_text = result.stdout.strip() + + if stdout_text: try: - parsed = json.loads(result.stdout.strip()) - # Warn if snapshot came back empty (common sign of daemon/CDP issues) + parsed = json.loads(stdout_text) if command == "snapshot" and parsed.get("success"): snap_data = parsed.get("data", {}) if not snap_data.get("snapshot") and not snap_data.get("refs"): @@ -879,13 +864,33 @@ def _run_browser_command( "returncode=%s", result.returncode) return parsed except json.JSONDecodeError: - # Non-JSON output indicates agent-browser crash or version mismatch - raw = result.stdout.strip()[:500] + raw = stdout_text[:2000] logger.warning("browser '%s' returned non-JSON output (rc=%s): %s", - command, result.returncode, raw[:200]) + command, result.returncode, raw[:500]) + + if command == "screenshot": + stderr_text = (result.stderr or "").strip() + combined_text = "\n".join( + part for part in [stdout_text, stderr_text] if part + ) + recovered_path = _extract_screenshot_path_from_text(combined_text) + + if recovered_path and Path(recovered_path).exists(): + logger.info( + "browser 'screenshot' recovered file from non-JSON output: %s", + recovered_path, + ) + return { + "success": True, + "data": { + "path": recovered_path, + "raw": raw, + }, + } + return { - "success": True, - "data": {"raw": raw} + "success": False, + "error": f"Non-JSON output from agent-browser for '{command}': {raw}" } # Check for errors @@ -1258,38 +1263,19 @@ def browser_close(task_id: Optional[str] = None) -> str: JSON string with close result """ effective_task_id = task_id or "default" - - # Stop auto-recording before closing - _maybe_stop_recording(effective_task_id) - - result = _run_browser_command(effective_task_id, "close", []) - - # Close the backend session (Browserbase API in cloud mode, nothing extra in local mode) - session_key = task_id if task_id and task_id in _active_sessions else "default" - if session_key in _active_sessions: - session_info = _active_sessions[session_key] - bb_session_id = session_info.get("bb_session_id") - if bb_session_id: - # Cloud mode: release the Browserbase session via API - try: - config = _get_browserbase_config() - _close_browserbase_session(bb_session_id, config["api_key"], config["project_id"]) - except Exception as e: - logger.warning("Could not close BrowserBase session: %s", e) - del _active_sessions[session_key] - - if result.get("success"): - return json.dumps({ - "success": True, - "closed": True - }, ensure_ascii=False) - else: - # Even if close fails, session was released - return json.dumps({ - "success": True, - "closed": True, - "warning": result.get("error", "Session may not have been active") - }, ensure_ascii=False) + + with _cleanup_lock: + had_session = effective_task_id in _active_sessions + + cleanup_browser(effective_task_id) + + response = { + "success": True, + "closed": True, + } + if not had_session: + response["warning"] = "Session may not have been active" + return json.dumps(response, ensure_ascii=False) def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str: @@ -1481,9 +1467,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] _cleanup_old_screenshots(screenshots_dir, max_age_hours=24) # Take screenshot using agent-browser - screenshot_args = [str(screenshot_path)] + screenshot_args = [] if annotate: - screenshot_args.insert(0, "--annotate") + screenshot_args.append("--annotate") + screenshot_args.append("--full") + screenshot_args.append(str(screenshot_path)) result = _run_browser_command( effective_task_id, "screenshot", @@ -1498,7 +1486,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] "success": False, "error": f"Failed to take screenshot ({mode} mode): {error_detail}" }, ensure_ascii=False) - + + actual_screenshot_path = result.get("data", {}).get("path") + if actual_screenshot_path: + screenshot_path = Path(actual_screenshot_path) + # Check if screenshot file was created if not screenshot_path.exists(): mode = "local" if _is_local_mode() else "cloud"